diff options
Diffstat (limited to 'arch/ia64')
197 files changed, 69771 insertions, 0 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig new file mode 100644 index 000000000000..33fcb205fcb7 --- /dev/null +++ b/arch/ia64/Kconfig @@ -0,0 +1,420 @@ +# +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/kconfig-language.txt. +# + +mainmenu "IA-64 Linux Kernel Configuration" + +source "init/Kconfig" + +menu "Processor type and features" + +config IA64 + bool + default y + help + The Itanium Processor Family is Intel's 64-bit successor to + the 32-bit X86 line. The IA-64 Linux project has a home + page at <http://www.linuxia64.org/> and a mailing list at + <linux-ia64@vger.kernel.org>. + +config 64BIT + bool + default y + +config MMU + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config TIME_INTERPOLATION + bool + default y + +config EFI + bool + default y + +config GENERIC_IOMAP + bool + default y + +choice + prompt "System type" + default IA64_GENERIC + +config IA64_GENERIC + bool "generic" + select NUMA + select ACPI_NUMA + select VIRTUAL_MEM_MAP + select DISCONTIGMEM + help + This selects the system type of your hardware. A "generic" kernel + will run on any supported IA-64 system. However, if you configure + a kernel for your specific system, it will be faster and smaller. + + generic For any supported IA-64 system + DIG-compliant For DIG ("Developer's Interface Guide") compliant systems + HP-zx1/sx1000 For HP systems + HP-zx1/sx1000+swiotlb For HP systems with (broken) DMA-constrained devices. + SGI-SN2 For SGI Altix systems + Ski-simulator For the HP simulator <http://www.hpl.hp.com/research/linux/ski/> + + If you don't know what to do, choose "generic". + +config IA64_DIG + bool "DIG-compliant" + +config IA64_HP_ZX1 + bool "HP-zx1/sx1000" + help + Build a kernel that runs on HP zx1 and sx1000 systems. This adds + support for the HP I/O MMU. + +config IA64_HP_ZX1_SWIOTLB + bool "HP-zx1/sx1000 with software I/O TLB" + help + Build a kernel that runs on HP zx1 and sx1000 systems even when they + have broken PCI devices which cannot DMA to full 32 bits. Apart + from support for the HP I/O MMU, this includes support for the software + I/O TLB, which allows supporting the broken devices at the expense of + wasting some kernel memory (about 2MB by default). + +config IA64_SGI_SN2 + bool "SGI-SN2" + help + Selecting this option will optimize the kernel for use on sn2 based + systems, but the resulting kernel binary will not run on other + types of ia64 systems. If you have an SGI Altix system, it's safe + to select this option. If in doubt, select ia64 generic support + instead. + +config IA64_HP_SIM + bool "Ski-simulator" + +endchoice + +choice + prompt "Processor type" + default ITANIUM + +config ITANIUM + bool "Itanium" + help + Select your IA-64 processor type. The default is Itanium. + This choice is safe for all IA-64 systems, but may not perform + optimally on systems with, say, Itanium 2 or newer processors. + +config MCKINLEY + bool "Itanium 2" + help + Select this to configure for an Itanium 2 (McKinley) processor. + +endchoice + +choice + prompt "Kernel page size" + default IA64_PAGE_SIZE_16KB + +config IA64_PAGE_SIZE_4KB + bool "4KB" + help + This lets you select the page size of the kernel. For best IA-64 + performance, a page size of 8KB or 16KB is recommended. For best + IA-32 compatibility, a page size of 4KB should be selected (the vast + majority of IA-32 binaries work perfectly fine with a larger page + size). For Itanium 2 or newer systems, a page size of 64KB can also + be selected. + + 4KB For best IA-32 compatibility + 8KB For best IA-64 performance + 16KB For best IA-64 performance + 64KB Requires Itanium 2 or newer processor. + + If you don't know what to do, choose 16KB. + +config IA64_PAGE_SIZE_8KB + bool "8KB" + +config IA64_PAGE_SIZE_16KB + bool "16KB" + +config IA64_PAGE_SIZE_64KB + depends on !ITANIUM + bool "64KB" + +endchoice + +config IA64_BRL_EMU + bool + depends on ITANIUM + default y + +# align cache-sensitive data to 128 bytes +config IA64_L1_CACHE_SHIFT + int + default "7" if MCKINLEY + default "6" if ITANIUM + +# align cache-sensitive data to 64 bytes +config NUMA + bool "NUMA support" + depends on !IA64_HP_SIM + default y if IA64_SGI_SN2 + select ACPI_NUMA + help + Say Y to compile the kernel to support NUMA (Non-Uniform Memory + Access). This option is for configuring high-end multiprocessor + server systems. If in doubt, say N. + +config VIRTUAL_MEM_MAP + bool "Virtual mem map" + default y if !IA64_HP_SIM + help + Say Y to compile the kernel with support for a virtual mem map. + This code also only takes effect if a memory hole of greater than + 1 Gb is found during boot. You must turn this option on if you + require the DISCONTIGMEM option for your machine. If you are + unsure, say Y. + +config HOLES_IN_ZONE + bool + default y if VIRTUAL_MEM_MAP + +config DISCONTIGMEM + bool "Discontiguous memory support" + depends on (IA64_DIG || IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB) && NUMA && VIRTUAL_MEM_MAP + default y if (IA64_SGI_SN2 || IA64_GENERIC) && NUMA + help + Say Y to support efficient handling of discontiguous physical memory, + for architectures which are either NUMA (Non-Uniform Memory Access) + or have huge holes in the physical address space for other reasons. + See <file:Documentation/vm/numa> for more. + +config IA64_CYCLONE + bool "Cyclone (EXA) Time Source support" + help + Say Y here to enable support for IBM EXA Cyclone time source. + If you're unsure, answer N. + +config IOSAPIC + bool + depends on !IA64_HP_SIM + default y + +config IA64_SGI_SN_SIM + bool "SGI Medusa Simulator Support" + depends on IA64_SGI_SN2 + help + If you are compiling a kernel that will run under SGI's IA-64 + simulator (Medusa) then say Y, otherwise say N. + +config FORCE_MAX_ZONEORDER + int + default "18" + +config SMP + bool "Symmetric multi-processing support" + help + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + systems, but will use only one CPU of a multiprocessor system. If + you say Y here, the kernel will run on many, but not all, + single processor systems. On a single processor system, the kernel + will run faster if you say N here. + + See also the <file:Documentation/smp.txt> and the SMP-HOWTO + available at <http://www.tldp.org/docs.html#howto>. + + If you don't know what to do here, say N. + +config NR_CPUS + int "Maximum number of CPUs (2-512)" + range 2 512 + depends on SMP + default "64" + help + You should set this to the number of CPUs in your system, but + keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but + only use 2 CPUs on a >2 CPU system. Setting this to a value larger + than 64 will cause the use of a CPU mask array, causing a small + performance hit. + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && EXPERIMENTAL + select HOTPLUG + default n + ---help--- + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu/cpu#. + Say N if you want to disable CPU hotplug. + +config PREEMPT + bool "Preemptible Kernel" + help + This option reduces the latency of the kernel when reacting to + real-time or interactive events by allowing a low priority process to + be preempted even if it is in kernel mode executing a system call. + This allows applications to run more reliably even when the system is + under load. + + Say Y here if you are building a kernel for a desktop, embedded + or real-time system. Say N if you are unsure. + +config HAVE_DEC_LOCK + bool + depends on (SMP || PREEMPT) + default y + +config IA32_SUPPORT + bool "Support for Linux/x86 binaries" + help + IA-64 processors can execute IA-32 (X86) instructions. By + saying Y here, the kernel will include IA-32 system call + emulation support which makes it possible to transparently + run IA-32 Linux binaries on an IA-64 Linux system. + If in doubt, say Y. + +config COMPAT + bool + depends on IA32_SUPPORT + default y + +config IA64_MCA_RECOVERY + tristate "MCA recovery from errors other than TLB." + +config PERFMON + bool "Performance monitor support" + help + Selects whether support for the IA-64 performance monitor hardware + is included in the kernel. This makes some kernel data-structures a + little bigger and slows down execution a bit, but it is generally + a good idea to turn this on. If you're unsure, say Y. + +config IA64_PALINFO + tristate "/proc/pal support" + help + If you say Y here, you are able to get PAL (Processor Abstraction + Layer) information in /proc/pal. This contains useful information + about the processors in your systems, such as cache and TLB sizes + and the PAL firmware version in use. + + To use this option, you have to ensure that the "/proc file system + support" (CONFIG_PROC_FS) is enabled, too. + +config ACPI_DEALLOCATE_IRQ + bool + depends on IOSAPIC && EXPERIMENTAL + default y + +source "drivers/firmware/Kconfig" + +source "fs/Kconfig.binfmt" + +endmenu + +menu "Power management and ACPI" + +config PM + bool "Power Management support" + depends on IA64_GENERIC || IA64_DIG || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB + default y + help + "Power Management" means that parts of your computer are shut + off or put into a power conserving "sleep" mode if they are not + being used. There are two competing standards for doing this: APM + and ACPI. If you want to use either one, say Y here and then also + to the requisite support below. + + Power Management is most important for battery powered laptop + computers; if you have a laptop, check out the Linux Laptop home + page on the WWW at <http://www.linux-on-laptops.com/> and the + Battery Powered Linux mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. + + Note that, even if you say N here, Linux on the x86 architecture + will issue the hlt instruction if nothing is to be done, thereby + sending the processor to sleep and saving power. + +config ACPI + bool + depends on !IA64_HP_SIM + default y + +if !IA64_HP_SIM + +source "drivers/acpi/Kconfig" + +endif + +endmenu + +if !IA64_HP_SIM + +menu "Bus options (PCI, PCMCIA)" + +config PCI + bool "PCI support" + help + Find out whether you have a PCI motherboard. PCI is the name of a + bus system, i.e. the way the CPU talks to the other stuff inside + your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or + VESA. If you have PCI, say Y, otherwise N. + + The PCI-HOWTO, available from + <http://www.tldp.org/docs.html#howto>, contains valuable + information about which PCI hardware does work under Linux and which + doesn't. + +config PCI_DOMAINS + bool + default PCI + +source "drivers/pci/Kconfig" + +source "drivers/pci/hotplug/Kconfig" + +source "drivers/pcmcia/Kconfig" + +endmenu + +endif + +source "drivers/Kconfig" + +source "fs/Kconfig" + +source "lib/Kconfig" + +# +# Use the generic interrupt handling code in kernel/irq/: +# +config GENERIC_HARDIRQS + bool + default y + +config GENERIC_IRQ_PROBE + bool + default y + +source "arch/ia64/hp/sim/Kconfig" + +source "arch/ia64/oprofile/Kconfig" + +source "arch/ia64/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug new file mode 100644 index 000000000000..de9d507ba0fd --- /dev/null +++ b/arch/ia64/Kconfig.debug @@ -0,0 +1,64 @@ +menu "Kernel hacking" + +source "lib/Kconfig.debug" + +choice + prompt "Physical memory granularity" + default IA64_GRANULE_64MB + +config IA64_GRANULE_16MB + bool "16MB" + help + IA-64 identity-mapped regions use a large page size called "granules". + + Select "16MB" for a small granule size. + Select "64MB" for a large granule size. This is the current default. + +config IA64_GRANULE_64MB + bool "64MB" + depends on !(IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_SGI_SN2) + +endchoice + +config IA64_PRINT_HAZARDS + bool "Print possible IA-64 dependency violations to console" + depends on DEBUG_KERNEL + help + Selecting this option prints more information for Illegal Dependency + Faults, that is, for Read-after-Write (RAW), Write-after-Write (WAW), + or Write-after-Read (WAR) violations. This option is ignored if you + are compiling for an Itanium A step processor + (CONFIG_ITANIUM_ASTEP_SPECIFIC). If you're unsure, select Y. + +config DISABLE_VHPT + bool "Disable VHPT" + depends on DEBUG_KERNEL + help + The Virtual Hash Page Table (VHPT) enhances virtual address + translation performance. Normally you want the VHPT active but you + can select this option to disable the VHPT for debugging. If you're + unsure, answer N. + +config IA64_DEBUG_CMPXCHG + bool "Turn on compare-and-exchange bug checking (slow!)" + depends on DEBUG_KERNEL + help + Selecting this option turns on bug checking for the IA-64 + compare-and-exchange instructions. This is slow! Itaniums + from step B3 or later don't have this problem. If you're unsure, + select N. + +config IA64_DEBUG_IRQ + bool "Turn on irq debug checks (slow!)" + depends on DEBUG_KERNEL + help + Selecting this option turns on bug checking for the IA-64 irq_save + and restore instructions. It's useful for tracking down spinlock + problems, but slow! If you're unsure, select N. + +config SYSVIPC_COMPAT + bool + depends on COMPAT && SYSVIPC + default y + +endmenu diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile new file mode 100644 index 000000000000..f9bd88ada708 --- /dev/null +++ b/arch/ia64/Makefile @@ -0,0 +1,115 @@ +# +# ia64/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1998-2004 by David Mosberger-Tang <davidm@hpl.hp.com> +# + +NM := $(CROSS_COMPILE)nm -B +READELF := $(CROSS_COMPILE)readelf + +export AWK + +CHECKFLAGS += -m64 -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__ + +OBJCOPYFLAGS := --strip-all +LDFLAGS_vmlinux := -static +LDFLAGS_MODULE += -T $(srctree)/arch/ia64/module.lds +AFLAGS_KERNEL := -mconstant-gp +EXTRA := + +cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \ + -falign-functions=32 -frename-registers -fno-optimize-sibling-calls +CFLAGS_KERNEL := -mconstant-gp + +GCC_VERSION := $(call cc-version) +GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)") +CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)") + +ifeq ($(GAS_STATUS),buggy) +$(error Sorry, you need a newer version of the assember, one that is built from \ + a source-tree that post-dates 18-Dec-2002. You can find a pre-compiled \ + static binary of such an assembler at: \ + \ + ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz) +endif + +ifneq ($(shell if [ $(GCC_VERSION) -lt 0300 ] ; then echo "bad"; fi ;),) +$(error Sorry, your compiler is too old. GCC v2.96 is known to generate bad code.) +endif + +ifeq ($(GCC_VERSION),0304) + cflags-$(CONFIG_ITANIUM) += -mtune=merced + cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley +endif + +CFLAGS += $(cflags-y) +head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o + +libs-y += arch/ia64/lib/ +core-y += arch/ia64/kernel/ arch/ia64/mm/ +core-$(CONFIG_IA32_SUPPORT) += arch/ia64/ia32/ +core-$(CONFIG_IA64_DIG) += arch/ia64/dig/ +core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/ +core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ +core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ +core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ + +drivers-$(CONFIG_PCI) += arch/ia64/pci/ +drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ +drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ +drivers-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ +drivers-$(CONFIG_IA64_GENERIC) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/ +drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/ + +boot := arch/ia64/hp/sim/boot + +.PHONY: boot compressed check + +all: compressed unwcheck + +compressed: vmlinux.gz + +vmlinux.gz: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ + +unwcheck: vmlinux + -$(Q)READELF=$(READELF) $(srctree)/arch/ia64/scripts/unwcheck.py $< + +archclean: + $(Q)$(MAKE) $(clean)=$(boot) + +CLEAN_FILES += include/asm-ia64/.offsets.h.stamp vmlinux.gz bootloader + +MRPROPER_FILES += include/asm-ia64/offsets.h + +prepare: include/asm-ia64/offsets.h + +arch/ia64/kernel/asm-offsets.s: include/asm include/linux/version.h include/config/MARKER + +include/asm-ia64/offsets.h: arch/ia64/kernel/asm-offsets.s + $(call filechk,gen-asm-offsets) + +arch/ia64/kernel/asm-offsets.s: include/asm-ia64/.offsets.h.stamp + +include/asm-ia64/.offsets.h.stamp: + mkdir -p include/asm-ia64 + [ -s include/asm-ia64/offsets.h ] \ + || echo "#define IA64_TASK_SIZE 0" > include/asm-ia64/offsets.h + touch $@ + +boot: lib/lib.a vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ + +install: vmlinux.gz + sh $(srctree)/arch/ia64/install.sh $(KERNELRELEASE) $< System.map "$(INSTALL_PATH)" + +define archhelp + echo '* compressed - Build compressed kernel image' + echo ' install - Install compressed kernel image' + echo ' boot - Build vmlinux and bootloader for Ski simulator' + echo '* unwcheck - Check vmlinux for invalid unwind info' +endef diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig new file mode 100644 index 000000000000..b95fcf86ea00 --- /dev/null +++ b/arch/ia64/configs/bigsur_defconfig @@ -0,0 +1,1172 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.10-rc2 +# Mon Nov 29 13:27:48 2004 +# + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +CONFIG_LOCK_KERNEL=y + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +CONFIG_LOG_BUF_SHIFT=16 +CONFIG_HOTPLUG=y +CONFIG_KOBJECT_UEVENT=y +# CONFIG_IKCONFIG is not set +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_FUTEX=y +CONFIG_EPOLL=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SHMEM=y +CONFIG_CC_ALIGN_FUNCTIONS=0 +CONFIG_CC_ALIGN_LABELS=0 +CONFIG_CC_ALIGN_LOOPS=0 +CONFIG_CC_ALIGN_JUMPS=0 +# CONFIG_TINY_SHMEM is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +CONFIG_OBSOLETE_MODPARM=y +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y + +# +# Processor type and features +# +CONFIG_IA64=y +CONFIG_64BIT=y +CONFIG_MMU=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_TIME_INTERPOLATION=y +CONFIG_EFI=y +CONFIG_GENERIC_IOMAP=y +# CONFIG_IA64_GENERIC is not set +CONFIG_IA64_DIG=y +# CONFIG_IA64_HP_ZX1 is not set +# CONFIG_IA64_SGI_SN2 is not set +# CONFIG_IA64_HP_SIM is not set +CONFIG_ITANIUM=y +# CONFIG_MCKINLEY is not set +# CONFIG_IA64_PAGE_SIZE_4KB is not set +# CONFIG_IA64_PAGE_SIZE_8KB is not set +CONFIG_IA64_PAGE_SIZE_16KB=y +# CONFIG_IA64_PAGE_SIZE_64KB is not set +CONFIG_IA64_BRL_EMU=y +CONFIG_IA64_L1_CACHE_SHIFT=6 +# CONFIG_NUMA is not set +# CONFIG_VIRTUAL_MEM_MAP is not set +# CONFIG_IA64_CYCLONE is not set +CONFIG_IOSAPIC=y +CONFIG_FORCE_MAX_ZONEORDER=18 +CONFIG_SMP=y +CONFIG_NR_CPUS=2 +# CONFIG_HOTPLUG_CPU is not set +CONFIG_PREEMPT=y +CONFIG_HAVE_DEC_LOCK=y +CONFIG_IA32_SUPPORT=y +CONFIG_COMPAT=y +# CONFIG_IA64_MCA_RECOVERY is not set +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y + +# +# Firmware Drivers +# +CONFIG_EFI_VARS=y +CONFIG_EFI_PCDP=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m + +# +# Power management and ACPI +# +CONFIG_PM=y +CONFIG_ACPI=y + +# +# ACPI (Advanced Configuration and Power Interface) Support +# +CONFIG_ACPI_BOOT=y +CONFIG_ACPI_INTERPRETER=y +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_VIDEO=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_ACPI_THERMAL=m +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_BUS=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_PCI=y +CONFIG_ACPI_SYSTEM=y + +# +# Bus options (PCI, PCMCIA) +# +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +# CONFIG_PCI_MSI is not set +CONFIG_PCI_LEGACY_PROC=y +CONFIG_PCI_NAMES=y + +# +# PCI Hotplug Support +# +# CONFIG_HOTPLUG_PCI is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PC-card bridges +# + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# +# CONFIG_PNP is not set + +# +# Block devices +# +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CDROM_PKTCDVD is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y + +# +# ATA/ATAPI/MFM/RLL support +# +CONFIG_IDE=m +CONFIG_BLK_DEV_IDE=m + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +CONFIG_BLK_DEV_IDEDISK=m +# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_BLK_DEV_IDECD=m +# CONFIG_BLK_DEV_IDETAPE is not set +CONFIG_BLK_DEV_IDEFLOPPY=m +# CONFIG_BLK_DEV_IDESCSI is not set +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=m +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_SHARE_IRQ=y +# CONFIG_BLK_DEV_OFFBOARD is not set +CONFIG_BLK_DEV_GENERIC=m +# CONFIG_BLK_DEV_OPTI621 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_SC1200 is not set +CONFIG_BLK_DEV_PIIX=m +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_IDE_ARM is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +CONFIG_SCSI=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +# CONFIG_BLK_DEV_SR is not set +# CONFIG_CHR_DEV_SG is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y + +# +# SCSI Transport Attributes +# +CONFIG_SCSI_SPI_ATTRS=m +# CONFIG_SCSI_FC_ATTRS is not set + +# +# SCSI low-level drivers +# +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_SCSI_SATA is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_EATA_PIO is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_ISP is not set +# CONFIG_SCSI_QLOGIC_FC is not set +CONFIG_SCSI_QLOGIC_1280=y +# CONFIG_SCSI_QLOGIC_1280_1040 is not set +CONFIG_SCSI_QLA2XXX=y +# CONFIG_SCSI_QLA21XX is not set +# CONFIG_SCSI_QLA22XX is not set +# CONFIG_SCSI_QLA2300 is not set +# CONFIG_SCSI_QLA2322 is not set +# CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_QLA6322 is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_DEBUG is not set + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID10=m +CONFIG_MD_RAID5=m +CONFIG_MD_RAID6=m +CONFIG_MD_MULTIPATH=m +# CONFIG_MD_FAULTY is not set +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Networking support +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +# CONFIG_NETLINK_DEV is not set +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_IP_TCPDIAG=y +# CONFIG_IP_TCPDIAG_IPV6 is not set +# CONFIG_IPV6 is not set +# CONFIG_NETFILTER is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set +# CONFIG_NET_CLS_ROUTE is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +CONFIG_NETDEVICES=y +CONFIG_DUMMY=y +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_NET_VENDOR_3COM is not set + +# +# Tulip family network device support +# +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_DGRS is not set +CONFIG_EEPRO100=y +# CONFIG_EEPRO100_PIO is not set +# CONFIG_E100 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_VIA_RHINE is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SK98LIN is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set + +# +# Ethernet (10000 Mbit) +# +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set + +# +# Input I/O drivers +# +# CONFIG_GAMEPORT is not set +CONFIG_SOUND_GAMEPORT=y +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set +# CONFIG_SERIO_RAW is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_ACPI=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +# CONFIG_SERIAL_8250_MULTIPORT is not set +# CONFIG_SERIAL_8250_RSA is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_HW_RANDOM is not set +CONFIG_EFI_RTC=y +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +CONFIG_AGP=m +CONFIG_AGP_I460=m +CONFIG_DRM=y +# CONFIG_DRM_TDFX is not set +CONFIG_DRM_R128=m +# CONFIG_DRM_RADEON is not set +# CONFIG_DRM_MGA is not set +# CONFIG_DRM_SIS is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_HPET is not set + +# +# I2C support +# +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y + +# +# I2C Algorithms +# +CONFIG_I2C_ALGOBIT=y +# CONFIG_I2C_ALGOPCF is not set +# CONFIG_I2C_ALGOPCA is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_ISA is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_SCx200_ACB is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_ISA is not set + +# +# Hardware Sensors Chip support +# +# CONFIG_I2C_SENSOR is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83627HF is not set + +# +# Other I2C Chip support +# +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_RTC8564 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set + +# +# Dallas's 1-wire bus +# +# CONFIG_W1 is not set + +# +# Misc devices +# + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +# CONFIG_FB is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +CONFIG_DUMMY_CONSOLE=y + +# +# Sound +# +CONFIG_SOUND=m + +# +# Advanced Linux Sound Architecture +# +CONFIG_SND=m +CONFIG_SND_TIMER=m +CONFIG_SND_PCM=m +CONFIG_SND_HWDEP=m +CONFIG_SND_RAWMIDI=m +CONFIG_SND_SEQUENCER=m +# CONFIG_SND_SEQ_DUMMY is not set +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +# CONFIG_SND_SEQUENCER_OSS is not set +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set + +# +# Generic devices +# +CONFIG_SND_OPL3_LIB=m +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_VIRMIDI is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set + +# +# PCI devices +# +CONFIG_SND_AC97_CODEC=m +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CS46XX is not set +CONFIG_SND_CS4281=m +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_HDSP is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_YMFPCI is not set +# CONFIG_SND_ALS4000 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_FM801 is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VX222 is not set + +# +# USB devices +# +# CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_USX2Y is not set + +# +# Open Sound System +# +# CONFIG_SOUND_PRIME is not set + +# +# USB support +# +CONFIG_USB=m +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_BANDWIDTH is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_SUSPEND is not set +# CONFIG_USB_OTG is not set +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y + +# +# USB Host Controller Drivers +# +# CONFIG_USB_EHCI_HCD is not set +# CONFIG_USB_OHCI_HCD is not set +CONFIG_USB_UHCI_HCD=m + +# +# USB Device Class drivers +# +CONFIG_USB_AUDIO=m +CONFIG_USB_BLUETOOTH_TTY=m +CONFIG_USB_MIDI=m +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_RW_DETECT is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_DPCM is not set +# CONFIG_USB_STORAGE_HP8200e is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=m +CONFIG_USB_HIDINPUT=y +# CONFIG_HID_FF is not set +CONFIG_USB_HIDDEV=y + +# +# USB HID Boot Protocol drivers +# +# CONFIG_USB_KBD is not set +# CONFIG_USB_MOUSE is not set +# CONFIG_USB_AIPTEK is not set +# CONFIG_USB_WACOM is not set +# CONFIG_USB_KBTAB is not set +# CONFIG_USB_POWERMATE is not set +# CONFIG_USB_MTOUCH is not set +# CONFIG_USB_EGALAX is not set +# CONFIG_USB_XPAD is not set +# CONFIG_USB_ATI_REMOTE is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set +# CONFIG_USB_HPUSBSCSI is not set + +# +# USB Multimedia devices +# +# CONFIG_USB_DABUSB is not set + +# +# Video4Linux support is needed for USB Multimedia device support +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET is not set + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_TIGL is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGETKIT is not set +# CONFIG_USB_PHIDGETSERVO is not set +# CONFIG_USB_TEST is not set + +# +# USB ATM/DSL drivers +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +# CONFIG_EXT3_FS_POSIX_ACL is not set +# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_XFS_FS=y +# CONFIG_XFS_RT is not set +CONFIG_XFS_QUOTA=y +CONFIG_XFS_SECURITY=y +CONFIG_XFS_POSIX_ACL=y +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_QUOTA is not set +CONFIG_QUOTACTL=y +CONFIG_DNOTIFY=y +CONFIG_AUTOFS_FS=m +CONFIG_AUTOFS4_FS=m + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +# CONFIG_ZISOFS is not set +CONFIG_UDF_FS=m +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +# CONFIG_MSDOS_FS is not set +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y +# CONFIG_DEVFS_FS is not set +CONFIG_DEVPTS_FS_XATTR=y +CONFIG_DEVPTS_FS_SECURITY=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_XATTR is not set +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +CONFIG_NFS_V4=y +# CONFIG_NFS_DIRECTIO is not set +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_TCP=y +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=m +CONFIG_SUNRPC=m +CONFIG_SUNRPC_GSS=m +CONFIG_RPCSEC_GSS_KRB5=m +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +CONFIG_EFI_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=y +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +CONFIG_NLS_UTF8=m + +# +# Library routines +# +# CONFIG_CRC_CCITT is not set +CONFIG_CRC32=y +# CONFIG_LIBCRC32C is not set + +# +# Profiling support +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=y + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_IA64_GRANULE_16MB is not set +CONFIG_IA64_GRANULE_64MB=y +# CONFIG_IA64_PRINT_HAZARDS is not set +# CONFIG_DISABLE_VHPT is not set +# CONFIG_IA64_DEBUG_CMPXCHG is not set +# CONFIG_IA64_DEBUG_IRQ is not set +CONFIG_SYSVIPC_COMPAT=y + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_TEST is not set diff --git a/arch/ia64/configs/sim_defconfig b/arch/ia64/configs/sim_defconfig new file mode 100644 index 000000000000..a26781cfe8bf --- /dev/null +++ b/arch/ia64/configs/sim_defconfig @@ -0,0 +1,534 @@ +# +# Automatically generated make config: don't edit +# + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +# CONFIG_CLEAN_COMPILE is not set +# CONFIG_STANDALONE is not set +CONFIG_BROKEN=y +CONFIG_BROKEN_ON_SMP=y + +# +# General setup +# +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +CONFIG_LOG_BUF_SHIFT=16 +# CONFIG_HOTPLUG is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_OBSOLETE_MODPARM=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y + +# +# Processor type and features +# +CONFIG_IA64=y +CONFIG_64BIT=y +CONFIG_MMU=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_TIME_INTERPOLATION=y +CONFIG_EFI=y +# CONFIG_IA64_GENERIC is not set +# CONFIG_IA64_DIG is not set +# CONFIG_IA64_HP_ZX1 is not set +# CONFIG_IA64_SGI_SN2 is not set +CONFIG_IA64_HP_SIM=y +# CONFIG_ITANIUM is not set +CONFIG_MCKINLEY=y +# CONFIG_IA64_PAGE_SIZE_4KB is not set +# CONFIG_IA64_PAGE_SIZE_8KB is not set +# CONFIG_IA64_PAGE_SIZE_16KB is not set +CONFIG_IA64_PAGE_SIZE_64KB=y +CONFIG_IA64_L1_CACHE_SHIFT=7 +# CONFIG_MCKINLEY_ASTEP_SPECIFIC is not set +# CONFIG_VIRTUAL_MEM_MAP is not set +# CONFIG_IA64_CYCLONE is not set +CONFIG_FORCE_MAX_ZONEORDER=18 +CONFIG_SMP=y +CONFIG_NR_CPUS=64 +CONFIG_PREEMPT=y +CONFIG_HAVE_DEC_LOCK=y +CONFIG_IA32_SUPPORT=y +CONFIG_COMPAT=y +# CONFIG_PERFMON is not set +CONFIG_IA64_PALINFO=m + +# +# Firmware Drivers +# +CONFIG_EFI_VARS=y +# CONFIG_SMBIOS is not set +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=y + +# +# Power management and ACPI +# + +# +# Device Drivers +# + +# +# Generic Driver Options +# +# CONFIG_DEBUG_DRIVER is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# + +# +# Block devices +# +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +# CONFIG_BLK_DEV_NBD is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=4096 +# CONFIG_BLK_DEV_INITRD is not set + +# +# ATA/ATAPI/MFM/RLL support +# +# CONFIG_IDE is not set + +# +# SCSI device support +# +CONFIG_SCSI=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +# CONFIG_BLK_DEV_SR is not set +# CONFIG_CHR_DEV_SG is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y + +# +# SCSI Transport Attributes +# +CONFIG_SCSI_SPI_ATTRS=y +# CONFIG_SCSI_FC_ATTRS is not set + +# +# SCSI low-level drivers +# +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_SATA is not set +# CONFIG_SCSI_EATA_PIO is not set +# CONFIG_SCSI_DEBUG is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Fusion MPT device support +# + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# + +# +# Networking support +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +# CONFIG_NETLINK_DEV is not set +# CONFIG_UNIX is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_IPV6 is not set +# CONFIG_NETFILTER is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_NETDEVICES is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input I/O drivers +# +# CONFIG_GAMEPORT is not set +CONFIG_SOUND_GAMEPORT=y +CONFIG_SERIO=y +# CONFIG_SERIO_I8042 is not set +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_CT82C710 is not set + +# +# Input Device Drivers +# +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +CONFIG_UNIX98_PTYS=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_QIC02_TAPE is not set + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +CONFIG_EFI_RTC=y +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set +# CONFIG_AGP is not set +# CONFIG_DRM is not set +# CONFIG_RAW_DRIVER is not set + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Misc devices +# + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +# CONFIG_FB is not set + +# +# Console display driver support +# +# CONFIG_VGA_CONSOLE is not set +# CONFIG_MDA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +CONFIG_EXT3_FS=y +# CONFIG_EXT3_FS_XATTR is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_XFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_FAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVPTS_FS_XATTR is not set +# CONFIG_TMPFS is not set +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +# CONFIG_NFS_V3 is not set +# CONFIG_NFS_V4 is not set +CONFIG_NFS_DIRECTIO=y +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +# CONFIG_NFSD_V4 is not set +# CONFIG_NFSD_TCP is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=y +CONFIG_SUNRPC=y +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_NEC98_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +CONFIG_EFI_PARTITION=y + +# +# Native Language Support +# +# CONFIG_NLS is not set + +# +# Library routines +# +CONFIG_CRC32=y +# CONFIG_LIBCRC32C is not set + +# +# HP Simulator drivers +# +CONFIG_HP_SIMETH=y +CONFIG_HP_SIMSERIAL=y +CONFIG_HP_SIMSERIAL_CONSOLE=y +CONFIG_HP_SIMSCSI=y + +# +# Profiling support +# +# CONFIG_PROFILING is not set + +# +# Kernel hacking +# +# CONFIG_IA64_GRANULE_16MB is not set +CONFIG_IA64_GRANULE_64MB=y +CONFIG_DEBUG_KERNEL=y +# CONFIG_IA64_PRINT_HAZARDS is not set +# CONFIG_DISABLE_VHPT is not set +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_IA64_DEBUG_CMPXCHG is not set +# CONFIG_IA64_DEBUG_IRQ is not set +CONFIG_DEBUG_INFO=y +CONFIG_SYSVIPC_COMPAT=y + +# +# Security options +# +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +# CONFIG_CRYPTO is not set diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig new file mode 100644 index 000000000000..bfeb952fe8e2 --- /dev/null +++ b/arch/ia64/configs/sn2_defconfig @@ -0,0 +1,1038 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.10 +# Mon Jan 10 13:57:35 2005 +# + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +CONFIG_LOCK_KERNEL=y + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +CONFIG_LOG_BUF_SHIFT=20 +CONFIG_HOTPLUG=y +CONFIG_KOBJECT_UEVENT=y +# CONFIG_IKCONFIG is not set +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_CPUSETS=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SHMEM=y +CONFIG_CC_ALIGN_FUNCTIONS=0 +CONFIG_CC_ALIGN_LABELS=0 +CONFIG_CC_ALIGN_LOOPS=0 +CONFIG_CC_ALIGN_JUMPS=0 +# CONFIG_TINY_SHMEM is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +CONFIG_OBSOLETE_MODPARM=y +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y + +# +# Processor type and features +# +CONFIG_IA64=y +CONFIG_64BIT=y +CONFIG_MMU=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_TIME_INTERPOLATION=y +CONFIG_EFI=y +CONFIG_GENERIC_IOMAP=y +# CONFIG_IA64_GENERIC is not set +# CONFIG_IA64_DIG is not set +# CONFIG_IA64_HP_ZX1 is not set +CONFIG_IA64_SGI_SN2=y +# CONFIG_IA64_HP_SIM is not set +# CONFIG_ITANIUM is not set +CONFIG_MCKINLEY=y +# CONFIG_IA64_PAGE_SIZE_4KB is not set +# CONFIG_IA64_PAGE_SIZE_8KB is not set +CONFIG_IA64_PAGE_SIZE_16KB=y +# CONFIG_IA64_PAGE_SIZE_64KB is not set +CONFIG_IA64_L1_CACHE_SHIFT=7 +CONFIG_NUMA=y +CONFIG_VIRTUAL_MEM_MAP=y +CONFIG_HOLES_IN_ZONE=y +CONFIG_DISCONTIGMEM=y +# CONFIG_IA64_CYCLONE is not set +CONFIG_IOSAPIC=y +CONFIG_IA64_SGI_SN_SIM=y +CONFIG_FORCE_MAX_ZONEORDER=18 +CONFIG_SMP=y +CONFIG_NR_CPUS=512 +# CONFIG_HOTPLUG_CPU is not set +CONFIG_PREEMPT=y +CONFIG_HAVE_DEC_LOCK=y +CONFIG_IA32_SUPPORT=y +CONFIG_COMPAT=y +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y +CONFIG_ACPI_DEALLOCATE_IRQ=y + +# +# Firmware Drivers +# +CONFIG_EFI_VARS=y +# CONFIG_EFI_PCDP is not set +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set + +# +# Power management and ACPI +# +CONFIG_ACPI=y + +# +# ACPI (Advanced Configuration and Power Interface) Support +# +CONFIG_ACPI_BOOT=y +CONFIG_ACPI_INTERPRETER=y +# CONFIG_ACPI_BUTTON is not set +CONFIG_ACPI_VIDEO=m +# CONFIG_ACPI_FAN is not set +# CONFIG_ACPI_PROCESSOR is not set +CONFIG_ACPI_NUMA=y +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_BUS=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_PCI=y +CONFIG_ACPI_SYSTEM=y +# CONFIG_ACPI_CONTAINER is not set + +# +# Bus options (PCI, PCMCIA) +# +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +# CONFIG_PCI_MSI is not set +CONFIG_PCI_LEGACY_PROC=y +CONFIG_PCI_NAMES=y + +# +# PCI Hotplug Support +# +CONFIG_HOTPLUG_PCI=y +# CONFIG_HOTPLUG_PCI_FAKE is not set +# CONFIG_HOTPLUG_PCI_ACPI is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_PCIE is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set +CONFIG_HOTPLUG_PCI_SGI=y + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PC-card bridges +# + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=m +# CONFIG_DEBUG_DRIVER is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# +# CONFIG_PNP is not set + +# +# Block devices +# +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CDROM_PKTCDVD is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_ATA_OVER_ETH=m + +# +# ATA/ATAPI/MFM/RLL support +# +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +CONFIG_BLK_DEV_IDEDISK=y +# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_BLK_DEV_IDESCSI is not set +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_IDEPCI_SHARE_IRQ is not set +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_GENERIC is not set +# CONFIG_BLK_DEV_OPTI621 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_SC1200 is not set +# CONFIG_BLK_DEV_PIIX is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +CONFIG_BLK_DEV_SGIIOC4=y +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_IDE_ARM is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +CONFIG_SCSI=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=m +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_CHR_DEV_SG=m + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +CONFIG_SCSI_CONSTANTS=y +# CONFIG_SCSI_LOGGING is not set + +# +# SCSI Transport Attributes +# +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_ISCSI_ATTRS is not set + +# +# SCSI low-level drivers +# +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +CONFIG_SCSI_SATA=y +# CONFIG_SCSI_SATA_AHCI is not set +# CONFIG_SCSI_SATA_SVW is not set +# CONFIG_SCSI_ATA_PIIX is not set +# CONFIG_SCSI_SATA_NV is not set +# CONFIG_SCSI_SATA_PROMISE is not set +# CONFIG_SCSI_SATA_SX4 is not set +# CONFIG_SCSI_SATA_SIL is not set +# CONFIG_SCSI_SATA_SIS is not set +# CONFIG_SCSI_SATA_ULI is not set +# CONFIG_SCSI_SATA_VIA is not set +CONFIG_SCSI_SATA_VITESSE=y +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_EATA_PIO is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_ISP is not set +# CONFIG_SCSI_QLOGIC_FC is not set +CONFIG_SCSI_QLOGIC_1280=y +# CONFIG_SCSI_QLOGIC_1280_1040 is not set +CONFIG_SCSI_QLA2XXX=y +# CONFIG_SCSI_QLA21XX is not set +CONFIG_SCSI_QLA22XX=y +CONFIG_SCSI_QLA2300=y +CONFIG_SCSI_QLA2322=y +# CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_DEBUG is not set + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=y +CONFIG_MD_RAID0=y +CONFIG_MD_RAID1=y +# CONFIG_MD_RAID10 is not set +CONFIG_MD_RAID5=y +# CONFIG_MD_RAID6 is not set +CONFIG_MD_MULTIPATH=y +# CONFIG_MD_FAULTY is not set +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m + +# +# Fusion MPT device support +# +CONFIG_FUSION=y +CONFIG_FUSION_MAX_SGE=128 +CONFIG_FUSION_CTL=m + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Networking support +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_NETLINK_DEV=y +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_IP_TCPDIAG=y +# CONFIG_IP_TCPDIAG_IPV6 is not set +CONFIG_IPV6=m +# CONFIG_IPV6_PRIVACY is not set +# CONFIG_INET6_AH is not set +# CONFIG_INET6_ESP is not set +# CONFIG_INET6_IPCOMP is not set +# CONFIG_INET6_TUNNEL is not set +# CONFIG_IPV6_TUNNEL is not set +# CONFIG_NETFILTER is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set +# CONFIG_NET_CLS_ROUTE is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +CONFIG_NETPOLL=y +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set +CONFIG_NET_POLL_CONTROLLER=y +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +CONFIG_NETDEVICES=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_ETHERTAP is not set + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set + +# +# Ethernet (10 or 100Mbit) +# +# CONFIG_NET_ETHERNET is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SK98LIN is not set +CONFIG_TIGON3=y + +# +# Ethernet (10000 Mbit) +# +# CONFIG_IXGB is not set +CONFIG_S2IO=m +# CONFIG_S2IO_NAPI is not set +# CONFIG_2BUFF_MODE is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set +CONFIG_NETCONSOLE=y + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input I/O drivers +# +# CONFIG_GAMEPORT is not set +CONFIG_SOUND_GAMEPORT=y +# CONFIG_SERIO is not set +# CONFIG_SERIO_I8042 is not set + +# +# Input Device Drivers +# +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_ROCKETPORT is not set +# CONFIG_CYCLADES is not set +# CONFIG_MOXA_SMARTIO is not set +# CONFIG_ISI is not set +# CONFIG_SYNCLINK is not set +# CONFIG_SYNCLINKMP is not set +# CONFIG_N_HDLC is not set +# CONFIG_STALDRV is not set +CONFIG_SGI_SNSC=y + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +CONFIG_SERIAL_SGI_L1_CONSOLE=y +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_HW_RANDOM is not set +CONFIG_EFI_RTC=y +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_AGP is not set +# CONFIG_DRM is not set +CONFIG_RAW_DRIVER=m +# CONFIG_HPET is not set +CONFIG_MAX_RAW_DEVS=256 +CONFIG_MMTIMER=y + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Dallas's 1-wire bus +# +# CONFIG_W1 is not set + +# +# Misc devices +# + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +# CONFIG_FB is not set + +# +# Console display driver support +# +# CONFIG_VGA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +CONFIG_USB=m +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +# CONFIG_USB_DEVICEFS is not set +# CONFIG_USB_BANDWIDTH is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_OTG is not set +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=m +# CONFIG_USB_EHCI_SPLIT_ISO is not set +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_UHCI_HCD=m +# CONFIG_USB_SL811_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_BLUETOOTH_TTY is not set +# CONFIG_USB_ACM is not set +# CONFIG_USB_PRINTER is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information +# +# CONFIG_USB_STORAGE is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=m +CONFIG_USB_HIDINPUT=y +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set + +# +# USB HID Boot Protocol drivers +# +# CONFIG_USB_KBD is not set +# CONFIG_USB_MOUSE is not set +# CONFIG_USB_AIPTEK is not set +# CONFIG_USB_WACOM is not set +# CONFIG_USB_KBTAB is not set +# CONFIG_USB_POWERMATE is not set +# CONFIG_USB_MTOUCH is not set +# CONFIG_USB_EGALAX is not set +# CONFIG_USB_XPAD is not set +# CONFIG_USB_ATI_REMOTE is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB Multimedia devices +# +# CONFIG_USB_DABUSB is not set + +# +# Video4Linux support is needed for USB Multimedia device support +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET is not set + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGETKIT is not set +# CONFIG_USB_PHIDGETSERVO is not set + +# +# USB ATM/DSL drivers +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# InfiniBand support +# +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_MTHCA=m +# CONFIG_INFINIBAND_MTHCA_DEBUG is not set +CONFIG_INFINIBAND_IPOIB=m +# CONFIG_INFINIBAND_IPOIB_DEBUG is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_XFS_FS=y +CONFIG_XFS_RT=y +CONFIG_XFS_QUOTA=y +# CONFIG_XFS_SECURITY is not set +CONFIG_XFS_POSIX_ACL=y +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_QUOTA=y +# CONFIG_QFMT_V1 is not set +# CONFIG_QFMT_V2 is not set +CONFIG_QUOTACTL=y +CONFIG_DNOTIFY=y +CONFIG_AUTOFS_FS=m +CONFIG_AUTOFS4_FS=m + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +# CONFIG_ZISOFS is not set +CONFIG_UDF_FS=m +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +# CONFIG_MSDOS_FS is not set +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVPTS_FS_XATTR is not set +CONFIG_TMPFS=y +CONFIG_TMPFS_XATTR=y +CONFIG_TMPFS_SECURITY=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +CONFIG_NFS_V4=y +CONFIG_NFS_DIRECTIO=y +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_TCP=y +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=m +CONFIG_SUNRPC=m +CONFIG_SUNRPC_GSS=m +CONFIG_RPCSEC_GSS_KRB5=m +# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_SMB_FS=m +# CONFIG_SMB_NLS_DEFAULT is not set +CONFIG_CIFS=m +# CONFIG_CIFS_STATS is not set +# CONFIG_CIFS_XATTR is not set +# CONFIG_CIFS_EXPERIMENTAL is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +CONFIG_EFI_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=y +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +CONFIG_NLS_UTF8=y + +# +# Library routines +# +# CONFIG_CRC_CCITT is not set +CONFIG_CRC32=y +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=m +CONFIG_ZLIB_DEFLATE=m + +# +# Profiling support +# +# CONFIG_PROFILING is not set + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_FS is not set +CONFIG_IA64_GRANULE_16MB=y +# CONFIG_IA64_GRANULE_64MB is not set +# CONFIG_IA64_PRINT_HAZARDS is not set +# CONFIG_DISABLE_VHPT is not set +# CONFIG_IA64_DEBUG_CMPXCHG is not set +# CONFIG_IA64_DEBUG_IRQ is not set +CONFIG_SYSVIPC_COMPAT=y + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=m +CONFIG_CRYPTO_SHA1=m +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +CONFIG_CRYPTO_DES=m +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_DEFLATE=m +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Hardware crypto devices +# diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig new file mode 100644 index 000000000000..99830e8fc9ba --- /dev/null +++ b/arch/ia64/configs/tiger_defconfig @@ -0,0 +1,1098 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.11-rc2 +# Sat Jan 22 11:17:02 2005 +# + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +CONFIG_LOCK_KERNEL=y + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +CONFIG_LOG_BUF_SHIFT=20 +CONFIG_HOTPLUG=y +CONFIG_KOBJECT_UEVENT=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_FUTEX=y +CONFIG_EPOLL=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SHMEM=y +CONFIG_CC_ALIGN_FUNCTIONS=0 +CONFIG_CC_ALIGN_LABELS=0 +CONFIG_CC_ALIGN_LOOPS=0 +CONFIG_CC_ALIGN_JUMPS=0 +# CONFIG_TINY_SHMEM is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +CONFIG_OBSOLETE_MODPARM=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y + +# +# Processor type and features +# +CONFIG_IA64=y +CONFIG_64BIT=y +CONFIG_MMU=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_TIME_INTERPOLATION=y +CONFIG_EFI=y +CONFIG_GENERIC_IOMAP=y +# CONFIG_IA64_GENERIC is not set +CONFIG_IA64_DIG=y +# CONFIG_IA64_HP_ZX1 is not set +# CONFIG_IA64_HP_ZX1_SWIOTLB is not set +# CONFIG_IA64_SGI_SN2 is not set +# CONFIG_IA64_HP_SIM is not set +# CONFIG_ITANIUM is not set +CONFIG_MCKINLEY=y +# CONFIG_IA64_PAGE_SIZE_4KB is not set +# CONFIG_IA64_PAGE_SIZE_8KB is not set +CONFIG_IA64_PAGE_SIZE_16KB=y +# CONFIG_IA64_PAGE_SIZE_64KB is not set +CONFIG_IA64_L1_CACHE_SHIFT=7 +# CONFIG_NUMA is not set +CONFIG_VIRTUAL_MEM_MAP=y +CONFIG_HOLES_IN_ZONE=y +CONFIG_IA64_CYCLONE=y +CONFIG_IOSAPIC=y +CONFIG_FORCE_MAX_ZONEORDER=18 +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +# CONFIG_PREEMPT is not set +CONFIG_HAVE_DEC_LOCK=y +CONFIG_IA32_SUPPORT=y +CONFIG_COMPAT=y +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y +CONFIG_ACPI_DEALLOCATE_IRQ=y + +# +# Firmware Drivers +# +CONFIG_EFI_VARS=y +CONFIG_EFI_PCDP=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m + +# +# Power management and ACPI +# +CONFIG_PM=y +CONFIG_ACPI=y + +# +# ACPI (Advanced Configuration and Power Interface) Support +# +CONFIG_ACPI_BOOT=y +CONFIG_ACPI_INTERPRETER=y +CONFIG_ACPI_BUTTON=m +# CONFIG_ACPI_VIDEO is not set +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +# CONFIG_ACPI_HOTPLUG_CPU is not set +CONFIG_ACPI_THERMAL=m +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_BUS=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_PCI=y +CONFIG_ACPI_SYSTEM=y +# CONFIG_ACPI_CONTAINER is not set + +# +# Bus options (PCI, PCMCIA) +# +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +# CONFIG_PCI_MSI is not set +CONFIG_PCI_LEGACY_PROC=y +CONFIG_PCI_NAMES=y + +# +# PCI Hotplug Support +# +CONFIG_HOTPLUG_PCI=m +# CONFIG_HOTPLUG_PCI_FAKE is not set +CONFIG_HOTPLUG_PCI_ACPI=m +# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PC-card bridges +# + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# +# CONFIG_PNP is not set + +# +# Block devices +# +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CDROM_PKTCDVD is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_ATA_OVER_ETH is not set + +# +# ATA/ATAPI/MFM/RLL support +# +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +CONFIG_BLK_DEV_IDEDISK=y +# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +CONFIG_BLK_DEV_IDEFLOPPY=y +CONFIG_BLK_DEV_IDESCSI=m +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_IDEPCI_SHARE_IRQ is not set +# CONFIG_BLK_DEV_OFFBOARD is not set +CONFIG_BLK_DEV_GENERIC=y +# CONFIG_BLK_DEV_OPTI621 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +CONFIG_BLK_DEV_CMD64X=y +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_SC1200 is not set +CONFIG_BLK_DEV_PIIX=y +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_IDE_ARM is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +CONFIG_SCSI=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=m +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_CHR_DEV_SG=m + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set + +# +# SCSI Transport Attributes +# +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_ISCSI_ATTRS is not set + +# +# SCSI low-level drivers +# +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_SCSI_SATA is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_EATA_PIO is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_ISP is not set +CONFIG_SCSI_QLOGIC_FC=y +# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set +CONFIG_SCSI_QLOGIC_1280=y +# CONFIG_SCSI_QLOGIC_1280_1040 is not set +CONFIG_SCSI_QLA2XXX=y +CONFIG_SCSI_QLA21XX=m +CONFIG_SCSI_QLA22XX=m +CONFIG_SCSI_QLA2300=m +CONFIG_SCSI_QLA2322=m +# CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_DEBUG is not set + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +# CONFIG_MD_RAID10 is not set +CONFIG_MD_RAID5=m +CONFIG_MD_RAID6=m +CONFIG_MD_MULTIPATH=m +# CONFIG_MD_FAULTY is not set +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m + +# +# Fusion MPT device support +# +CONFIG_FUSION=y +CONFIG_FUSION_MAX_SGE=40 +# CONFIG_FUSION_CTL is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Networking support +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_NETLINK_DEV=y +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +CONFIG_ARPD=y +CONFIG_SYN_COOKIES=y +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_IP_TCPDIAG=y +# CONFIG_IP_TCPDIAG_IPV6 is not set +# CONFIG_IPV6 is not set +# CONFIG_NETFILTER is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set +# CONFIG_NET_CLS_ROUTE is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +CONFIG_NETPOLL=y +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set +CONFIG_NET_POLL_CONTROLLER=y +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_ETHERTAP is not set + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=m +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_NET_VENDOR_3COM is not set + +# +# Tulip family network device support +# +CONFIG_NET_TULIP=y +# CONFIG_DE2104X is not set +CONFIG_TULIP=m +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +# CONFIG_TULIP_NAPI is not set +# CONFIG_DE4X5 is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_DM9102 is not set +# CONFIG_HP100 is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_DGRS is not set +CONFIG_EEPRO100=m +CONFIG_E100=m +# CONFIG_E100_NAPI is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_VIA_RHINE is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +CONFIG_E1000=y +# CONFIG_E1000_NAPI is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SK98LIN is not set +# CONFIG_VIA_VELOCITY is not set +CONFIG_TIGON3=y + +# +# Ethernet (10000 Mbit) +# +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set +CONFIG_NETCONSOLE=y + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input I/O drivers +# +CONFIG_GAMEPORT=m +CONFIG_SOUND_GAMEPORT=m +# CONFIG_GAMEPORT_NS558 is not set +# CONFIG_GAMEPORT_L4 is not set +# CONFIG_GAMEPORT_EMU10K1 is not set +# CONFIG_GAMEPORT_VORTEX is not set +# CONFIG_GAMEPORT_FM801 is not set +# CONFIG_GAMEPORT_CS461X is not set +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_ROCKETPORT is not set +# CONFIG_CYCLADES is not set +# CONFIG_MOXA_SMARTIO is not set +# CONFIG_ISI is not set +# CONFIG_SYNCLINK is not set +# CONFIG_SYNCLINKMP is not set +# CONFIG_N_HDLC is not set +# CONFIG_STALDRV is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_ACPI=y +CONFIG_SERIAL_8250_NR_UARTS=6 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +# CONFIG_SERIAL_8250_MULTIPORT is not set +# CONFIG_SERIAL_8250_RSA is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_HW_RANDOM is not set +CONFIG_EFI_RTC=y +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +CONFIG_AGP=m +CONFIG_AGP_I460=m +CONFIG_DRM=m +CONFIG_DRM_TDFX=m +CONFIG_DRM_R128=m +CONFIG_DRM_RADEON=m +CONFIG_DRM_MGA=m +CONFIG_DRM_SIS=m +CONFIG_RAW_DRIVER=m +CONFIG_HPET=y +# CONFIG_HPET_RTC_IRQ is not set +CONFIG_HPET_MMAP=y +CONFIG_MAX_RAW_DEVS=256 + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Dallas's 1-wire bus +# +# CONFIG_W1 is not set + +# +# Misc devices +# + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +# CONFIG_FB is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +CONFIG_DUMMY_CONSOLE=y +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_BANDWIDTH is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_SUSPEND is not set +# CONFIG_USB_OTG is not set +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=m +# CONFIG_USB_EHCI_SPLIT_ISO is not set +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_UHCI_HCD=y +# CONFIG_USB_SL811_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_BLUETOOTH_TTY is not set +# CONFIG_USB_ACM is not set +# CONFIG_USB_PRINTER is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information +# +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_RW_DETECT is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_DPCM is not set +# CONFIG_USB_STORAGE_HP8200e is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +CONFIG_USB_HIDINPUT=y +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set +# CONFIG_USB_AIPTEK is not set +# CONFIG_USB_WACOM is not set +# CONFIG_USB_KBTAB is not set +# CONFIG_USB_POWERMATE is not set +# CONFIG_USB_MTOUCH is not set +# CONFIG_USB_EGALAX is not set +# CONFIG_USB_XPAD is not set +# CONFIG_USB_ATI_REMOTE is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB Multimedia devices +# +# CONFIG_USB_DABUSB is not set + +# +# Video4Linux support is needed for USB Multimedia device support +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET is not set + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGETKIT is not set +# CONFIG_USB_PHIDGETSERVO is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_TEST is not set + +# +# USB ATM/DSL drivers +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# InfiniBand support +# +# CONFIG_INFINIBAND is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_XFS_FS=y +# CONFIG_XFS_RT is not set +# CONFIG_XFS_QUOTA is not set +# CONFIG_XFS_SECURITY is not set +# CONFIG_XFS_POSIX_ACL is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +CONFIG_AUTOFS_FS=y +CONFIG_AUTOFS4_FS=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +# CONFIG_ZISOFS is not set +CONFIG_UDF_FS=m +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +# CONFIG_MSDOS_FS is not set +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +CONFIG_NTFS_FS=m +# CONFIG_NTFS_DEBUG is not set +# CONFIG_NTFS_RW is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVPTS_FS_XATTR is not set +CONFIG_TMPFS=y +CONFIG_TMPFS_XATTR=y +CONFIG_TMPFS_SECURITY=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +CONFIG_NFS_V4=y +CONFIG_NFS_DIRECTIO=y +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_TCP=y +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=m +CONFIG_SUNRPC=m +CONFIG_SUNRPC_GSS=m +CONFIG_RPCSEC_GSS_KRB5=m +# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_SMB_FS=m +CONFIG_SMB_NLS_DEFAULT=y +CONFIG_SMB_NLS_REMOTE="cp437" +CONFIG_CIFS=m +# CONFIG_CIFS_STATS is not set +# CONFIG_CIFS_XATTR is not set +# CONFIG_CIFS_EXPERIMENTAL is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +CONFIG_EFI_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m + +# +# Library routines +# +# CONFIG_CRC_CCITT is not set +CONFIG_CRC32=y +# CONFIG_LIBCRC32C is not set +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y + +# +# Profiling support +# +# CONFIG_PROFILING is not set + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_FS is not set +CONFIG_IA64_GRANULE_16MB=y +# CONFIG_IA64_GRANULE_64MB is not set +# CONFIG_IA64_PRINT_HAZARDS is not set +# CONFIG_DISABLE_VHPT is not set +# CONFIG_IA64_DEBUG_CMPXCHG is not set +# CONFIG_IA64_DEBUG_IRQ is not set +CONFIG_SYSVIPC_COMPAT=y + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=m +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +CONFIG_CRYPTO_DES=m +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Hardware crypto devices +# diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig new file mode 100644 index 000000000000..21d6f9bab5e9 --- /dev/null +++ b/arch/ia64/configs/zx1_defconfig @@ -0,0 +1,1273 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.10 +# Wed Dec 29 09:05:48 2004 +# + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +# CONFIG_CLEAN_COMPILE is not set +CONFIG_BROKEN=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_LOCK_KERNEL=y + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +# CONFIG_POSIX_MQUEUE is not set +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_HOTPLUG=y +CONFIG_KOBJECT_UEVENT=y +# CONFIG_IKCONFIG is not set +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_FUTEX=y +CONFIG_EPOLL=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SHMEM=y +CONFIG_CC_ALIGN_FUNCTIONS=0 +CONFIG_CC_ALIGN_LABELS=0 +CONFIG_CC_ALIGN_LOOPS=0 +CONFIG_CC_ALIGN_JUMPS=0 +# CONFIG_TINY_SHMEM is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +# CONFIG_MODULE_UNLOAD is not set +CONFIG_OBSOLETE_MODPARM=y +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +# CONFIG_KMOD is not set + +# +# Processor type and features +# +CONFIG_IA64=y +CONFIG_64BIT=y +CONFIG_MMU=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_TIME_INTERPOLATION=y +CONFIG_EFI=y +CONFIG_GENERIC_IOMAP=y +# CONFIG_IA64_GENERIC is not set +# CONFIG_IA64_DIG is not set +CONFIG_IA64_HP_ZX1=y +# CONFIG_IA64_SGI_SN2 is not set +# CONFIG_IA64_HP_SIM is not set +# CONFIG_ITANIUM is not set +CONFIG_MCKINLEY=y +# CONFIG_IA64_PAGE_SIZE_4KB is not set +# CONFIG_IA64_PAGE_SIZE_8KB is not set +CONFIG_IA64_PAGE_SIZE_16KB=y +# CONFIG_IA64_PAGE_SIZE_64KB is not set +CONFIG_IA64_L1_CACHE_SHIFT=7 +# CONFIG_NUMA is not set +CONFIG_VIRTUAL_MEM_MAP=y +# CONFIG_IA64_CYCLONE is not set +CONFIG_IOSAPIC=y +CONFIG_FORCE_MAX_ZONEORDER=18 +CONFIG_SMP=y +CONFIG_NR_CPUS=16 +# CONFIG_HOTPLUG_CPU is not set +# CONFIG_PREEMPT is not set +CONFIG_HAVE_DEC_LOCK=y +CONFIG_IA32_SUPPORT=y +CONFIG_COMPAT=y +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y + +# +# Firmware Drivers +# +CONFIG_EFI_VARS=y +CONFIG_EFI_PCDP=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=y + +# +# Power management and ACPI +# +CONFIG_PM=y +CONFIG_ACPI=y + +# +# ACPI (Advanced Configuration and Power Interface) Support +# +CONFIG_ACPI_BOOT=y +CONFIG_ACPI_INTERPRETER=y +CONFIG_ACPI_BUTTON=y +CONFIG_ACPI_VIDEO=m +CONFIG_ACPI_FAN=y +CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_THERMAL=y +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_BUS=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_PCI=y +CONFIG_ACPI_SYSTEM=y + +# +# Bus options (PCI, PCMCIA) +# +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +# CONFIG_PCI_MSI is not set +CONFIG_PCI_LEGACY_PROC=y +CONFIG_PCI_NAMES=y + +# +# PCI Hotplug Support +# +CONFIG_HOTPLUG_PCI=y +# CONFIG_HOTPLUG_PCI_FAKE is not set +CONFIG_HOTPLUG_PCI_ACPI=y +# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_PCIE is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PC-card bridges +# + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# +# CONFIG_PNP is not set + +# +# Block devices +# +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CDROM_PKTCDVD is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y + +# +# ATA/ATAPI/MFM/RLL support +# +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +CONFIG_BLK_DEV_IDEDISK=y +# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_BLK_DEV_IDESCSI is not set +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_SHARE_IRQ=y +# CONFIG_BLK_DEV_OFFBOARD is not set +CONFIG_BLK_DEV_GENERIC=y +# CONFIG_BLK_DEV_OPTI621 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +# CONFIG_IDEDMA_PCI_AUTO is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +CONFIG_BLK_DEV_CMD64X=y +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_SC1200 is not set +# CONFIG_BLK_DEV_PIIX is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_IDE_ARM is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +# CONFIG_IDEDMA_AUTO is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +CONFIG_SCSI=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=y +CONFIG_CHR_DEV_OSST=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y + +# +# SCSI Transport Attributes +# +CONFIG_SCSI_SPI_ATTRS=y +# CONFIG_SCSI_FC_ATTRS is not set + +# +# SCSI low-level drivers +# +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_SCSI_SATA is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_CPQFCTS is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_EATA_PIO is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_PCI2000 is not set +# CONFIG_SCSI_PCI2220I is not set +# CONFIG_SCSI_QLOGIC_ISP is not set +# CONFIG_SCSI_QLOGIC_FC is not set +CONFIG_SCSI_QLOGIC_1280=y +# CONFIG_SCSI_QLOGIC_1280_1040 is not set +CONFIG_SCSI_QLA2XXX=y +# CONFIG_SCSI_QLA21XX is not set +# CONFIG_SCSI_QLA22XX is not set +# CONFIG_SCSI_QLA2300 is not set +# CONFIG_SCSI_QLA2322 is not set +# CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_QLA6322 is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_DEBUG is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Fusion MPT device support +# +CONFIG_FUSION=y +CONFIG_FUSION_MAX_SGE=40 +# CONFIG_FUSION_CTL is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Networking support +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +# CONFIG_NETLINK_DEV is not set +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_TUNNEL is not set +# CONFIG_IP_TCPDIAG is not set +# CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# IP: Virtual Server Configuration +# +# CONFIG_IP_VS is not set +# CONFIG_IPV6 is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set + +# +# IP: Netfilter Configuration +# +# CONFIG_IP_NF_CONNTRACK is not set +# CONFIG_IP_NF_CONNTRACK_MARK is not set +# CONFIG_IP_NF_QUEUE is not set +# CONFIG_IP_NF_IPTABLES is not set +CONFIG_IP_NF_ARPTABLES=y +# CONFIG_IP_NF_ARPFILTER is not set +# CONFIG_IP_NF_ARP_MANGLE is not set +# CONFIG_IP_NF_COMPAT_IPCHAINS is not set +# CONFIG_IP_NF_COMPAT_IPFWADM is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set +# CONFIG_NET_CLS_ROUTE is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +CONFIG_NETDEVICES=y +CONFIG_DUMMY=y +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_NET_VENDOR_3COM is not set + +# +# Tulip family network device support +# +CONFIG_NET_TULIP=y +# CONFIG_DE2104X is not set +CONFIG_TULIP=y +CONFIG_TULIP_MWI=y +CONFIG_TULIP_MMIO=y +CONFIG_TULIP_NAPI=y +CONFIG_TULIP_NAPI_HW_MITIGATION=y +# CONFIG_DE4X5 is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_DM9102 is not set +# CONFIG_HP100 is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +CONFIG_E100=y +# CONFIG_E100_NAPI is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_VIA_RHINE is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +CONFIG_E1000=y +# CONFIG_E1000_NAPI is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SK98LIN is not set +# CONFIG_VIA_VELOCITY is not set +CONFIG_TIGON3=y + +# +# Ethernet (10000 Mbit) +# +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +CONFIG_INPUT_JOYDEV=y +# CONFIG_INPUT_TSDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set + +# +# Input I/O drivers +# +# CONFIG_GAMEPORT is not set +CONFIG_SOUND_GAMEPORT=y +CONFIG_SERIO=y +# CONFIG_SERIO_I8042 is not set +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set +# CONFIG_SERIO_RAW is not set + +# +# Input Device Drivers +# +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_ACPI=y +CONFIG_SERIAL_8250_NR_UARTS=8 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +# CONFIG_SERIAL_8250_MULTIPORT is not set +# CONFIG_SERIAL_8250_RSA is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_HW_RANDOM is not set +CONFIG_EFI_RTC=y +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +CONFIG_AGP=y +CONFIG_AGP_HP_ZX1=y +CONFIG_DRM=y +# CONFIG_DRM_TDFX is not set +# CONFIG_DRM_GAMMA is not set +# CONFIG_DRM_R128 is not set +CONFIG_DRM_RADEON=y +# CONFIG_DRM_MGA is not set +# CONFIG_DRM_SIS is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_HPET is not set + +# +# I2C support +# +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y + +# +# I2C Algorithms +# +CONFIG_I2C_ALGOBIT=y +CONFIG_I2C_ALGOPCF=y +# CONFIG_I2C_ALGOPCA is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_ISA is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_SCx200_ACB is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_ISA is not set + +# +# Hardware Sensors Chip support +# +# CONFIG_I2C_SENSOR is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83627HF is not set + +# +# Other I2C Chip support +# +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_RTC8564 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set + +# +# Dallas's 1-wire bus +# +# CONFIG_W1 is not set + +# +# Misc devices +# + +# +# Multimedia devices +# +CONFIG_VIDEO_DEV=y + +# +# Video For Linux +# + +# +# Video Adapters +# +# CONFIG_VIDEO_BT848 is not set +# CONFIG_VIDEO_CPIA is not set +# CONFIG_VIDEO_SAA5246A is not set +# CONFIG_VIDEO_SAA5249 is not set +# CONFIG_TUNER_3036 is not set +# CONFIG_VIDEO_STRADIS is not set +# CONFIG_VIDEO_ZORAN is not set +# CONFIG_VIDEO_ZR36120 is not set +# CONFIG_VIDEO_SAA7134 is not set +# CONFIG_VIDEO_MXB is not set +# CONFIG_VIDEO_DPC is not set +# CONFIG_VIDEO_HEXIUM_ORION is not set +# CONFIG_VIDEO_HEXIUM_GEMINI is not set +# CONFIG_VIDEO_CX88 is not set +# CONFIG_VIDEO_OVCAMCHIP is not set + +# +# Radio Adapters +# +# CONFIG_RADIO_GEMTEK_PCI is not set +# CONFIG_RADIO_MAXIRADIO is not set +# CONFIG_RADIO_MAESTRO is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +CONFIG_FB=y +CONFIG_FB_MODE_HELPERS=y +# CONFIG_FB_TILEBLITTING is not set +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON_OLD is not set +CONFIG_FB_RADEON=y +CONFIG_FB_RADEON_I2C=y +CONFIG_FB_RADEON_DEBUG=y +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_VIRTUAL is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y + +# +# Logo configuration +# +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_LOGO_LINUX_CLUT224=y + +# +# Sound +# +CONFIG_SOUND=y + +# +# Advanced Linux Sound Architecture +# +CONFIG_SND=y +CONFIG_SND_TIMER=y +CONFIG_SND_PCM=y +CONFIG_SND_HWDEP=y +CONFIG_SND_RAWMIDI=y +CONFIG_SND_SEQUENCER=y +# CONFIG_SND_SEQ_DUMMY is not set +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +CONFIG_SND_SEQUENCER_OSS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set + +# +# Generic devices +# +CONFIG_SND_MPU401_UART=y +CONFIG_SND_OPL3_LIB=y +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_VIRMIDI is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set + +# +# PCI devices +# +CONFIG_SND_AC97_CODEC=y +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_HDSP is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_YMFPCI is not set +# CONFIG_SND_ALS4000 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_MAESTRO3 is not set +CONFIG_SND_FM801=y +CONFIG_SND_FM801_TEA575X=y +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VX222 is not set + +# +# USB devices +# +# CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_USX2Y is not set + +# +# Open Sound System +# +# CONFIG_SOUND_PRIME is not set + +# +# USB support +# +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +# CONFIG_USB_DEVICEFS is not set +CONFIG_USB_BANDWIDTH=y +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_SUSPEND is not set +# CONFIG_USB_OTG is not set +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_SPLIT_ISO is not set +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +# CONFIG_USB_SL811_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_AUDIO is not set +# CONFIG_USB_BLUETOOTH_TTY is not set +# CONFIG_USB_MIDI is not set +# CONFIG_USB_ACM is not set +# CONFIG_USB_PRINTER is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information +# +CONFIG_USB_STORAGE=y +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_RW_DETECT is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_DPCM is not set +# CONFIG_USB_STORAGE_HP8200e is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +CONFIG_USB_HIDINPUT=y +# CONFIG_HID_FF is not set +CONFIG_USB_HIDDEV=y +# CONFIG_USB_AIPTEK is not set +# CONFIG_USB_WACOM is not set +# CONFIG_USB_KBTAB is not set +# CONFIG_USB_POWERMATE is not set +# CONFIG_USB_MTOUCH is not set +# CONFIG_USB_EGALAX is not set +# CONFIG_USB_XPAD is not set +# CONFIG_USB_ATI_REMOTE is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set +# CONFIG_USB_HPUSBSCSI is not set + +# +# USB Multimedia devices +# +# CONFIG_USB_DABUSB is not set +# CONFIG_USB_VICAM is not set +# CONFIG_USB_DSBR is not set +# CONFIG_USB_IBMCAM is not set +# CONFIG_USB_KONICAWC is not set +# CONFIG_USB_OV511 is not set +# CONFIG_USB_SE401 is not set +# CONFIG_USB_SN9C102 is not set +# CONFIG_USB_STV680 is not set + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET is not set + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_TIGL is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGETKIT is not set +# CONFIG_USB_PHIDGETSERVO is not set + +# +# USB ATM/DSL drivers +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +# CONFIG_EXT2_FS_POSIX_ACL is not set +# CONFIG_EXT2_FS_SECURITY is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +# CONFIG_EXT3_FS_POSIX_ACL is not set +# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_XFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +CONFIG_AUTOFS_FS=y +# CONFIG_AUTOFS4_FS is not set + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +# CONFIG_ZISOFS is not set +CONFIG_UDF_FS=y +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVPTS_FS_XATTR is not set +CONFIG_TMPFS=y +CONFIG_TMPFS_XATTR=y +CONFIG_TMPFS_SECURITY=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V4=y +# CONFIG_NFS_DIRECTIO is not set +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +# CONFIG_NFSD_V4 is not set +# CONFIG_NFSD_TCP is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=y +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y +CONFIG_RPCSEC_GSS_KRB5=y +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +CONFIG_EFI_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=y +CONFIG_NLS_CODEPAGE_775=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_CODEPAGE_852=y +CONFIG_NLS_CODEPAGE_855=y +CONFIG_NLS_CODEPAGE_857=y +CONFIG_NLS_CODEPAGE_860=y +CONFIG_NLS_CODEPAGE_861=y +CONFIG_NLS_CODEPAGE_862=y +CONFIG_NLS_CODEPAGE_863=y +CONFIG_NLS_CODEPAGE_864=y +CONFIG_NLS_CODEPAGE_865=y +CONFIG_NLS_CODEPAGE_866=y +CONFIG_NLS_CODEPAGE_869=y +CONFIG_NLS_CODEPAGE_936=y +CONFIG_NLS_CODEPAGE_950=y +CONFIG_NLS_CODEPAGE_932=y +CONFIG_NLS_CODEPAGE_949=y +CONFIG_NLS_CODEPAGE_874=y +CONFIG_NLS_ISO8859_8=y +# CONFIG_NLS_CODEPAGE_1250 is not set +CONFIG_NLS_CODEPAGE_1251=y +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=y +CONFIG_NLS_ISO8859_3=y +CONFIG_NLS_ISO8859_4=y +CONFIG_NLS_ISO8859_5=y +CONFIG_NLS_ISO8859_6=y +CONFIG_NLS_ISO8859_7=y +CONFIG_NLS_ISO8859_9=y +CONFIG_NLS_ISO8859_13=y +CONFIG_NLS_ISO8859_14=y +CONFIG_NLS_ISO8859_15=y +CONFIG_NLS_KOI8_R=y +CONFIG_NLS_KOI8_U=y +CONFIG_NLS_UTF8=y + +# +# Library routines +# +# CONFIG_CRC_CCITT is not set +CONFIG_CRC32=y +# CONFIG_LIBCRC32C is not set + +# +# Profiling support +# +# CONFIG_PROFILING is not set + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_INFO is not set +CONFIG_IA64_GRANULE_16MB=y +# CONFIG_IA64_GRANULE_64MB is not set +CONFIG_IA64_PRINT_HAZARDS=y +# CONFIG_DISABLE_VHPT is not set +# CONFIG_IA64_DEBUG_CMPXCHG is not set +# CONFIG_IA64_DEBUG_IRQ is not set +CONFIG_SYSVIPC_COMPAT=y + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Hardware crypto devices +# diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig new file mode 100644 index 000000000000..7539e83bf054 --- /dev/null +++ b/arch/ia64/defconfig @@ -0,0 +1,1199 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.10 +# Thu Jan 6 11:13:13 2005 +# + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +CONFIG_LOCK_KERNEL=y + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +CONFIG_LOG_BUF_SHIFT=20 +CONFIG_HOTPLUG=y +CONFIG_KOBJECT_UEVENT=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_FUTEX=y +CONFIG_EPOLL=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SHMEM=y +CONFIG_CC_ALIGN_FUNCTIONS=0 +CONFIG_CC_ALIGN_LABELS=0 +CONFIG_CC_ALIGN_LOOPS=0 +CONFIG_CC_ALIGN_JUMPS=0 +# CONFIG_TINY_SHMEM is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +CONFIG_OBSOLETE_MODPARM=y +CONFIG_MODVERSIONS=y +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y + +# +# Processor type and features +# +CONFIG_IA64=y +CONFIG_64BIT=y +CONFIG_MMU=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_TIME_INTERPOLATION=y +CONFIG_EFI=y +CONFIG_GENERIC_IOMAP=y +CONFIG_IA64_GENERIC=y +# CONFIG_IA64_DIG is not set +# CONFIG_IA64_HP_ZX1 is not set +# CONFIG_IA64_SGI_SN2 is not set +# CONFIG_IA64_HP_SIM is not set +# CONFIG_ITANIUM is not set +CONFIG_MCKINLEY=y +# CONFIG_IA64_PAGE_SIZE_4KB is not set +# CONFIG_IA64_PAGE_SIZE_8KB is not set +CONFIG_IA64_PAGE_SIZE_16KB=y +# CONFIG_IA64_PAGE_SIZE_64KB is not set +CONFIG_IA64_L1_CACHE_SHIFT=7 +CONFIG_NUMA=y +CONFIG_VIRTUAL_MEM_MAP=y +CONFIG_DISCONTIGMEM=y +CONFIG_IA64_CYCLONE=y +CONFIG_IOSAPIC=y +CONFIG_FORCE_MAX_ZONEORDER=18 +CONFIG_SMP=y +CONFIG_NR_CPUS=512 +CONFIG_HOTPLUG_CPU=y +# CONFIG_PREEMPT is not set +CONFIG_HAVE_DEC_LOCK=y +CONFIG_IA32_SUPPORT=y +CONFIG_COMPAT=y +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y +CONFIG_ACPI_DEALLOCATE_IRQ=y + +# +# Firmware Drivers +# +CONFIG_EFI_VARS=y +CONFIG_EFI_PCDP=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m + +# +# Power management and ACPI +# +CONFIG_PM=y +CONFIG_ACPI=y + +# +# ACPI (Advanced Configuration and Power Interface) Support +# +CONFIG_ACPI_BOOT=y +CONFIG_ACPI_INTERPRETER=y +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_VIDEO=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_ACPI_HOTPLUG_CPU=y +CONFIG_ACPI_THERMAL=m +CONFIG_ACPI_NUMA=y +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_BUS=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_PCI=y +CONFIG_ACPI_SYSTEM=y +CONFIG_ACPI_CONTAINER=m + +# +# Bus options (PCI, PCMCIA) +# +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +# CONFIG_PCI_MSI is not set +CONFIG_PCI_LEGACY_PROC=y +CONFIG_PCI_NAMES=y + +# +# PCI Hotplug Support +# +CONFIG_HOTPLUG_PCI=m +# CONFIG_HOTPLUG_PCI_FAKE is not set +CONFIG_HOTPLUG_PCI_ACPI=m +# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_PCIE is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PC-card bridges +# + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# +# CONFIG_PNP is not set + +# +# Block devices +# +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CDROM_PKTCDVD is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y + +# +# ATA/ATAPI/MFM/RLL support +# +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +CONFIG_BLK_DEV_IDEDISK=y +# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +CONFIG_BLK_DEV_IDEFLOPPY=y +CONFIG_BLK_DEV_IDESCSI=m +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_IDEPCI_SHARE_IRQ is not set +# CONFIG_BLK_DEV_OFFBOARD is not set +CONFIG_BLK_DEV_GENERIC=y +# CONFIG_BLK_DEV_OPTI621 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +CONFIG_BLK_DEV_CMD64X=y +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_SC1200 is not set +CONFIG_BLK_DEV_PIIX=y +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +CONFIG_BLK_DEV_SGIIOC4=y +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_IDE_ARM is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +CONFIG_SCSI=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=m +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_CHR_DEV_SG=m + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set + +# +# SCSI Transport Attributes +# +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_ISCSI_ATTRS is not set + +# +# SCSI low-level drivers +# +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_SCSI_SATA is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_EATA_PIO is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_ISP is not set +CONFIG_SCSI_QLOGIC_FC=y +# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set +CONFIG_SCSI_QLOGIC_1280=y +# CONFIG_SCSI_QLOGIC_1280_1040 is not set +CONFIG_SCSI_QLA2XXX=y +CONFIG_SCSI_QLA21XX=m +CONFIG_SCSI_QLA22XX=m +CONFIG_SCSI_QLA2300=m +CONFIG_SCSI_QLA2322=m +# CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_DEBUG is not set + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +# CONFIG_MD_RAID10 is not set +CONFIG_MD_RAID5=m +CONFIG_MD_RAID6=m +CONFIG_MD_MULTIPATH=m +# CONFIG_MD_FAULTY is not set +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m + +# +# Fusion MPT device support +# +CONFIG_FUSION=y +CONFIG_FUSION_MAX_SGE=40 +# CONFIG_FUSION_CTL is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Networking support +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_NETLINK_DEV=y +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +CONFIG_ARPD=y +CONFIG_SYN_COOKIES=y +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_IP_TCPDIAG=y +# CONFIG_IP_TCPDIAG_IPV6 is not set +# CONFIG_IPV6 is not set +# CONFIG_NETFILTER is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set +# CONFIG_NET_CLS_ROUTE is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +CONFIG_NETPOLL=y +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set +CONFIG_NET_POLL_CONTROLLER=y +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_ETHERTAP is not set + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=m +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_NET_VENDOR_3COM is not set + +# +# Tulip family network device support +# +CONFIG_NET_TULIP=y +# CONFIG_DE2104X is not set +CONFIG_TULIP=m +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +# CONFIG_TULIP_NAPI is not set +# CONFIG_DE4X5 is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_DM9102 is not set +# CONFIG_HP100 is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_DGRS is not set +CONFIG_EEPRO100=m +# CONFIG_EEPRO100_PIO is not set +CONFIG_E100=m +# CONFIG_E100_NAPI is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_VIA_RHINE is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +CONFIG_E1000=y +# CONFIG_E1000_NAPI is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SK98LIN is not set +# CONFIG_VIA_VELOCITY is not set +CONFIG_TIGON3=y + +# +# Ethernet (10000 Mbit) +# +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set +CONFIG_NETCONSOLE=y + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input I/O drivers +# +CONFIG_GAMEPORT=m +CONFIG_SOUND_GAMEPORT=m +# CONFIG_GAMEPORT_NS558 is not set +# CONFIG_GAMEPORT_L4 is not set +# CONFIG_GAMEPORT_EMU10K1 is not set +# CONFIG_GAMEPORT_VORTEX is not set +# CONFIG_GAMEPORT_FM801 is not set +# CONFIG_GAMEPORT_CS461x is not set +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_ROCKETPORT is not set +# CONFIG_CYCLADES is not set +# CONFIG_MOXA_SMARTIO is not set +# CONFIG_SYNCLINK is not set +# CONFIG_SYNCLINKMP is not set +# CONFIG_N_HDLC is not set +# CONFIG_STALDRV is not set +CONFIG_SGI_SNSC=y + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_ACPI=y +CONFIG_SERIAL_8250_NR_UARTS=6 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +# CONFIG_SERIAL_8250_MULTIPORT is not set +# CONFIG_SERIAL_8250_RSA is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +CONFIG_SERIAL_SGI_L1_CONSOLE=y +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_HW_RANDOM is not set +CONFIG_EFI_RTC=y +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +CONFIG_AGP=m +CONFIG_AGP_I460=m +CONFIG_AGP_HP_ZX1=m +CONFIG_DRM=m +CONFIG_DRM_TDFX=m +CONFIG_DRM_R128=m +CONFIG_DRM_RADEON=m +CONFIG_DRM_MGA=m +CONFIG_DRM_SIS=m +CONFIG_RAW_DRIVER=m +CONFIG_HPET=y +# CONFIG_HPET_RTC_IRQ is not set +CONFIG_HPET_MMAP=y +CONFIG_MAX_RAW_DEVS=256 +CONFIG_MMTIMER=y + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Dallas's 1-wire bus +# +# CONFIG_W1 is not set + +# +# Misc devices +# + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +# CONFIG_FB is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +CONFIG_DUMMY_CONSOLE=y + +# +# Sound +# +CONFIG_SOUND=m + +# +# Advanced Linux Sound Architecture +# +CONFIG_SND=m +CONFIG_SND_TIMER=m +CONFIG_SND_PCM=m +CONFIG_SND_HWDEP=m +CONFIG_SND_RAWMIDI=m +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_VERBOSE_PRINTK=y +# CONFIG_SND_DEBUG is not set + +# +# Generic devices +# +CONFIG_SND_MPU401_UART=m +CONFIG_SND_OPL3_LIB=m +CONFIG_SND_DUMMY=m +CONFIG_SND_VIRMIDI=m +CONFIG_SND_MTPAV=m +CONFIG_SND_SERIAL_U16550=m +CONFIG_SND_MPU401=m + +# +# PCI devices +# +CONFIG_SND_AC97_CODEC=m +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +CONFIG_SND_CS46XX=m +CONFIG_SND_CS46XX_NEW_DSP=y +CONFIG_SND_CS4281=m +CONFIG_SND_EMU10K1=m +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_HDSP is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_YMFPCI is not set +# CONFIG_SND_ALS4000 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_MAESTRO3 is not set +CONFIG_SND_FM801=m +# CONFIG_SND_FM801_TEA575X is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VX222 is not set + +# +# USB devices +# +# CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_USX2Y is not set + +# +# Open Sound System +# +# CONFIG_SOUND_PRIME is not set + +# +# USB support +# +CONFIG_USB=m +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_BANDWIDTH is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_SUSPEND is not set +# CONFIG_USB_OTG is not set +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=m +# CONFIG_USB_EHCI_SPLIT_ISO is not set +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_UHCI_HCD=m +# CONFIG_USB_SL811_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_AUDIO is not set +# CONFIG_USB_BLUETOOTH_TTY is not set +# CONFIG_USB_MIDI is not set +# CONFIG_USB_ACM is not set +# CONFIG_USB_PRINTER is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information +# +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_RW_DETECT is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_DPCM is not set +# CONFIG_USB_STORAGE_HP8200e is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=m +CONFIG_USB_HIDINPUT=y +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set + +# +# USB HID Boot Protocol drivers +# +# CONFIG_USB_KBD is not set +# CONFIG_USB_MOUSE is not set +# CONFIG_USB_AIPTEK is not set +# CONFIG_USB_WACOM is not set +# CONFIG_USB_KBTAB is not set +# CONFIG_USB_POWERMATE is not set +# CONFIG_USB_MTOUCH is not set +# CONFIG_USB_EGALAX is not set +# CONFIG_USB_XPAD is not set +# CONFIG_USB_ATI_REMOTE is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB Multimedia devices +# +# CONFIG_USB_DABUSB is not set + +# +# Video4Linux support is needed for USB Multimedia device support +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET is not set + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_TIGL is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGETKIT is not set +# CONFIG_USB_PHIDGETSERVO is not set +# CONFIG_USB_TEST is not set + +# +# USB ATM/DSL drivers +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# InfiniBand support +# +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_MTHCA=m +# CONFIG_INFINIBAND_MTHCA_DEBUG is not set +CONFIG_INFINIBAND_IPOIB=m +# CONFIG_INFINIBAND_IPOIB_DEBUG is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_XFS_FS=y +# CONFIG_XFS_RT is not set +# CONFIG_XFS_QUOTA is not set +# CONFIG_XFS_SECURITY is not set +# CONFIG_XFS_POSIX_ACL is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +CONFIG_AUTOFS_FS=y +CONFIG_AUTOFS4_FS=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +# CONFIG_ZISOFS is not set +CONFIG_UDF_FS=m +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +# CONFIG_MSDOS_FS is not set +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +CONFIG_NTFS_FS=m +# CONFIG_NTFS_DEBUG is not set +# CONFIG_NTFS_RW is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVPTS_FS_XATTR is not set +CONFIG_TMPFS=y +CONFIG_TMPFS_XATTR=y +CONFIG_TMPFS_SECURITY=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +CONFIG_NFS_V4=y +CONFIG_NFS_DIRECTIO=y +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_TCP=y +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=m +CONFIG_SUNRPC=m +CONFIG_SUNRPC_GSS=m +CONFIG_RPCSEC_GSS_KRB5=m +# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_SMB_FS=m +CONFIG_SMB_NLS_DEFAULT=y +CONFIG_SMB_NLS_REMOTE="cp437" +CONFIG_CIFS=m +# CONFIG_CIFS_STATS is not set +# CONFIG_CIFS_XATTR is not set +# CONFIG_CIFS_EXPERIMENTAL is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +CONFIG_EFI_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m + +# +# Library routines +# +# CONFIG_CRC_CCITT is not set +CONFIG_CRC32=y +# CONFIG_LIBCRC32C is not set + +# +# HP Simulator drivers +# +# CONFIG_HP_SIMETH is not set +# CONFIG_HP_SIMSERIAL is not set +# CONFIG_HP_SIMSCSI is not set + +# +# Profiling support +# +# CONFIG_PROFILING is not set + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_INFO is not set +CONFIG_IA64_GRANULE_16MB=y +# CONFIG_IA64_GRANULE_64MB is not set +# CONFIG_IA64_PRINT_HAZARDS is not set +# CONFIG_DISABLE_VHPT is not set +# CONFIG_IA64_DEBUG_CMPXCHG is not set +# CONFIG_IA64_DEBUG_IRQ is not set +CONFIG_SYSVIPC_COMPAT=y + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=m +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +CONFIG_CRYPTO_DES=m +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Hardware crypto devices +# diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile new file mode 100644 index 000000000000..971cd7870dd4 --- /dev/null +++ b/arch/ia64/dig/Makefile @@ -0,0 +1,9 @@ +# +# ia64/platform/dig/Makefile +# +# Copyright (C) 1999 Silicon Graphics, Inc. +# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) +# + +obj-y := setup.o +obj-$(CONFIG_IA64_GENERIC) += machvec.o diff --git a/arch/ia64/dig/machvec.c b/arch/ia64/dig/machvec.c new file mode 100644 index 000000000000..0c55bdafb473 --- /dev/null +++ b/arch/ia64/dig/machvec.c @@ -0,0 +1,3 @@ +#define MACHVEC_PLATFORM_NAME dig +#define MACHVEC_PLATFORM_HEADER <asm/machvec_dig.h> +#include <asm/machvec_init.h> diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c new file mode 100644 index 000000000000..d58003f1ad02 --- /dev/null +++ b/arch/ia64/dig/setup.c @@ -0,0 +1,86 @@ +/* + * Platform dependent support for DIG64 platforms. + * + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999, 2001 Hewlett-Packard Co + * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com> + */ +#include <linux/config.h> + +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/kdev_t.h> +#include <linux/string.h> +#include <linux/tty.h> +#include <linux/console.h> +#include <linux/timex.h> +#include <linux/sched.h> +#include <linux/root_dev.h> + +#include <asm/io.h> +#include <asm/machvec.h> +#include <asm/system.h> + +/* + * This is here so we can use the CMOS detection in ide-probe.c to + * determine what drives are present. In theory, we don't need this + * as the auto-detection could be done via ide-probe.c:do_probe() but + * in practice that would be much slower, which is painful when + * running in the simulator. Note that passing zeroes in DRIVE_INFO + * is sufficient (the IDE driver will autodetect the drive geometry). + */ +char drive_info[4*16]; + +void __init +dig_setup (char **cmdline_p) +{ + unsigned int orig_x, orig_y, num_cols, num_rows, font_height; + + /* + * Default to /dev/sda2. This assumes that the EFI partition + * is physical disk 1 partition 1 and the Linux root disk is + * physical disk 1 partition 2. + */ + ROOT_DEV = Root_SDA2; /* default to second partition on first drive */ + +#ifdef CONFIG_SMP + init_smp_config(); +#endif + + memset(&screen_info, 0, sizeof(screen_info)); + + if (!ia64_boot_param->console_info.num_rows + || !ia64_boot_param->console_info.num_cols) + { + printk(KERN_WARNING "dig_setup: warning: invalid screen-info, guessing 80x25\n"); + orig_x = 0; + orig_y = 0; + num_cols = 80; + num_rows = 25; + font_height = 16; + } else { + orig_x = ia64_boot_param->console_info.orig_x; + orig_y = ia64_boot_param->console_info.orig_y; + num_cols = ia64_boot_param->console_info.num_cols; + num_rows = ia64_boot_param->console_info.num_rows; + font_height = 400 / num_rows; + } + + screen_info.orig_x = orig_x; + screen_info.orig_y = orig_y; + screen_info.orig_video_cols = num_cols; + screen_info.orig_video_lines = num_rows; + screen_info.orig_video_points = font_height; + screen_info.orig_video_mode = 3; /* XXX fake */ + screen_info.orig_video_isVGA = 1; /* XXX fake */ + screen_info.orig_video_ega_bx = 3; /* XXX fake */ +} + +void __init +dig_irq_init (void) +{ +} diff --git a/arch/ia64/hp/common/Makefile b/arch/ia64/hp/common/Makefile new file mode 100644 index 000000000000..f61a60057ff7 --- /dev/null +++ b/arch/ia64/hp/common/Makefile @@ -0,0 +1,10 @@ +# +# ia64/platform/hp/common/Makefile +# +# Copyright (C) 2002 Hewlett Packard +# Copyright (C) Alex Williamson (alex_williamson@hp.com) +# + +obj-y := sba_iommu.o +obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += hwsw_iommu.o +obj-$(CONFIG_IA64_GENERIC) += hwsw_iommu.o diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c new file mode 100644 index 000000000000..80f8ef013939 --- /dev/null +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2004 Hewlett-Packard Development Company, L.P. + * Contributed by David Mosberger-Tang <davidm@hpl.hp.com> + * + * This is a pseudo I/O MMU which dispatches to the hardware I/O MMU + * whenever possible. We assume that the hardware I/O MMU requires + * full 32-bit addressability, as is the case, e.g., for HP zx1-based + * systems (there, the I/O MMU window is mapped at 3-4GB). If a + * device doesn't provide full 32-bit addressability, we fall back on + * the sw I/O TLB. This is good enough to let us support broken + * hardware such as soundcards which have a DMA engine that can + * address only 28 bits. + */ + +#include <linux/device.h> + +#include <asm/machvec.h> + +/* swiotlb declarations & definitions: */ +extern void swiotlb_init_with_default_size (size_t size); +extern ia64_mv_dma_alloc_coherent swiotlb_alloc_coherent; +extern ia64_mv_dma_free_coherent swiotlb_free_coherent; +extern ia64_mv_dma_map_single swiotlb_map_single; +extern ia64_mv_dma_unmap_single swiotlb_unmap_single; +extern ia64_mv_dma_map_sg swiotlb_map_sg; +extern ia64_mv_dma_unmap_sg swiotlb_unmap_sg; +extern ia64_mv_dma_supported swiotlb_dma_supported; +extern ia64_mv_dma_mapping_error swiotlb_dma_mapping_error; + +/* hwiommu declarations & definitions: */ + +extern ia64_mv_dma_alloc_coherent sba_alloc_coherent; +extern ia64_mv_dma_free_coherent sba_free_coherent; +extern ia64_mv_dma_map_single sba_map_single; +extern ia64_mv_dma_unmap_single sba_unmap_single; +extern ia64_mv_dma_map_sg sba_map_sg; +extern ia64_mv_dma_unmap_sg sba_unmap_sg; +extern ia64_mv_dma_supported sba_dma_supported; +extern ia64_mv_dma_mapping_error sba_dma_mapping_error; + +#define hwiommu_alloc_coherent sba_alloc_coherent +#define hwiommu_free_coherent sba_free_coherent +#define hwiommu_map_single sba_map_single +#define hwiommu_unmap_single sba_unmap_single +#define hwiommu_map_sg sba_map_sg +#define hwiommu_unmap_sg sba_unmap_sg +#define hwiommu_dma_supported sba_dma_supported +#define hwiommu_dma_mapping_error sba_dma_mapping_error +#define hwiommu_sync_single_for_cpu machvec_dma_sync_single +#define hwiommu_sync_sg_for_cpu machvec_dma_sync_sg +#define hwiommu_sync_single_for_device machvec_dma_sync_single +#define hwiommu_sync_sg_for_device machvec_dma_sync_sg + + +/* + * Note: we need to make the determination of whether or not to use + * the sw I/O TLB based purely on the device structure. Anything else + * would be unreliable or would be too intrusive. + */ +static inline int +use_swiotlb (struct device *dev) +{ + return dev && dev->dma_mask && !hwiommu_dma_supported(dev, *dev->dma_mask); +} + +void +hwsw_init (void) +{ + /* default to a smallish 2MB sw I/O TLB */ + swiotlb_init_with_default_size (2 * (1<<20)); +} + +void * +hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags) +{ + if (use_swiotlb(dev)) + return swiotlb_alloc_coherent(dev, size, dma_handle, flags); + else + return hwiommu_alloc_coherent(dev, size, dma_handle, flags); +} + +void +hwsw_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) +{ + if (use_swiotlb(dev)) + swiotlb_free_coherent(dev, size, vaddr, dma_handle); + else + hwiommu_free_coherent(dev, size, vaddr, dma_handle); +} + +dma_addr_t +hwsw_map_single (struct device *dev, void *addr, size_t size, int dir) +{ + if (use_swiotlb(dev)) + return swiotlb_map_single(dev, addr, size, dir); + else + return hwiommu_map_single(dev, addr, size, dir); +} + +void +hwsw_unmap_single (struct device *dev, dma_addr_t iova, size_t size, int dir) +{ + if (use_swiotlb(dev)) + return swiotlb_unmap_single(dev, iova, size, dir); + else + return hwiommu_unmap_single(dev, iova, size, dir); +} + + +int +hwsw_map_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir) +{ + if (use_swiotlb(dev)) + return swiotlb_map_sg(dev, sglist, nents, dir); + else + return hwiommu_map_sg(dev, sglist, nents, dir); +} + +void +hwsw_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir) +{ + if (use_swiotlb(dev)) + return swiotlb_unmap_sg(dev, sglist, nents, dir); + else + return hwiommu_unmap_sg(dev, sglist, nents, dir); +} + +void +hwsw_sync_single_for_cpu (struct device *dev, dma_addr_t addr, size_t size, int dir) +{ + if (use_swiotlb(dev)) + swiotlb_sync_single_for_cpu(dev, addr, size, dir); + else + hwiommu_sync_single_for_cpu(dev, addr, size, dir); +} + +void +hwsw_sync_sg_for_cpu (struct device *dev, struct scatterlist *sg, int nelems, int dir) +{ + if (use_swiotlb(dev)) + swiotlb_sync_sg_for_cpu(dev, sg, nelems, dir); + else + hwiommu_sync_sg_for_cpu(dev, sg, nelems, dir); +} + +void +hwsw_sync_single_for_device (struct device *dev, dma_addr_t addr, size_t size, int dir) +{ + if (use_swiotlb(dev)) + swiotlb_sync_single_for_device(dev, addr, size, dir); + else + hwiommu_sync_single_for_device(dev, addr, size, dir); +} + +void +hwsw_sync_sg_for_device (struct device *dev, struct scatterlist *sg, int nelems, int dir) +{ + if (use_swiotlb(dev)) + swiotlb_sync_sg_for_device(dev, sg, nelems, dir); + else + hwiommu_sync_sg_for_device(dev, sg, nelems, dir); +} + +int +hwsw_dma_supported (struct device *dev, u64 mask) +{ + if (hwiommu_dma_supported(dev, mask)) + return 1; + return swiotlb_dma_supported(dev, mask); +} + +int +hwsw_dma_mapping_error (dma_addr_t dma_addr) +{ + return hwiommu_dma_mapping_error (dma_addr) || swiotlb_dma_mapping_error(dma_addr); +} + +EXPORT_SYMBOL(hwsw_dma_mapping_error); +EXPORT_SYMBOL(hwsw_map_single); +EXPORT_SYMBOL(hwsw_unmap_single); +EXPORT_SYMBOL(hwsw_map_sg); +EXPORT_SYMBOL(hwsw_unmap_sg); +EXPORT_SYMBOL(hwsw_dma_supported); +EXPORT_SYMBOL(hwsw_alloc_coherent); +EXPORT_SYMBOL(hwsw_free_coherent); diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c new file mode 100644 index 000000000000..017c9ab5fc1b --- /dev/null +++ b/arch/ia64/hp/common/sba_iommu.c @@ -0,0 +1,2121 @@ +/* +** IA64 System Bus Adapter (SBA) I/O MMU manager +** +** (c) Copyright 2002-2004 Alex Williamson +** (c) Copyright 2002-2003 Grant Grundler +** (c) Copyright 2002-2004 Hewlett-Packard Company +** +** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code) +** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code) +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** +** This module initializes the IOC (I/O Controller) found on HP +** McKinley machines and their successors. +** +*/ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/pci.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/acpi.h> +#include <linux/efi.h> +#include <linux/nodemask.h> +#include <linux/bitops.h> /* hweight64() */ + +#include <asm/delay.h> /* ia64_get_itc() */ +#include <asm/io.h> +#include <asm/page.h> /* PAGE_OFFSET */ +#include <asm/dma.h> +#include <asm/system.h> /* wmb() */ + +#include <asm/acpi-ext.h> + +#define PFX "IOC: " + +/* +** Enabling timing search of the pdir resource map. Output in /proc. +** Disabled by default to optimize performance. +*/ +#undef PDIR_SEARCH_TIMING + +/* +** This option allows cards capable of 64bit DMA to bypass the IOMMU. If +** not defined, all DMA will be 32bit and go through the TLB. +** There's potentially a conflict in the bio merge code with us +** advertising an iommu, but then bypassing it. Since I/O MMU bypassing +** appears to give more performance than bio-level virtual merging, we'll +** do the former for now. NOTE: BYPASS_SG also needs to be undef'd to +** completely restrict DMA to the IOMMU. +*/ +#define ALLOW_IOV_BYPASS + +/* +** This option specifically allows/disallows bypassing scatterlists with +** multiple entries. Coalescing these entries can allow better DMA streaming +** and in some cases shows better performance than entirely bypassing the +** IOMMU. Performance increase on the order of 1-2% sequential output/input +** using bonnie++ on a RAID0 MD device (sym2 & mpt). +*/ +#undef ALLOW_IOV_BYPASS_SG + +/* +** If a device prefetches beyond the end of a valid pdir entry, it will cause +** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should +** disconnect on 4k boundaries and prevent such issues. If the device is +** particularly agressive, this option will keep the entire pdir valid such +** that prefetching will hit a valid address. This could severely impact +** error containment, and is therefore off by default. The page that is +** used for spill-over is poisoned, so that should help debugging somewhat. +*/ +#undef FULL_VALID_PDIR + +#define ENABLE_MARK_CLEAN + +/* +** The number of debug flags is a clue - this code is fragile. NOTE: since +** tightening the use of res_lock the resource bitmap and actual pdir are no +** longer guaranteed to stay in sync. The sanity checking code isn't going to +** like that. +*/ +#undef DEBUG_SBA_INIT +#undef DEBUG_SBA_RUN +#undef DEBUG_SBA_RUN_SG +#undef DEBUG_SBA_RESOURCE +#undef ASSERT_PDIR_SANITY +#undef DEBUG_LARGE_SG_ENTRIES +#undef DEBUG_BYPASS + +#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY) +#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive +#endif + +#define SBA_INLINE __inline__ +/* #define SBA_INLINE */ + +#ifdef DEBUG_SBA_INIT +#define DBG_INIT(x...) printk(x) +#else +#define DBG_INIT(x...) +#endif + +#ifdef DEBUG_SBA_RUN +#define DBG_RUN(x...) printk(x) +#else +#define DBG_RUN(x...) +#endif + +#ifdef DEBUG_SBA_RUN_SG +#define DBG_RUN_SG(x...) printk(x) +#else +#define DBG_RUN_SG(x...) +#endif + + +#ifdef DEBUG_SBA_RESOURCE +#define DBG_RES(x...) printk(x) +#else +#define DBG_RES(x...) +#endif + +#ifdef DEBUG_BYPASS +#define DBG_BYPASS(x...) printk(x) +#else +#define DBG_BYPASS(x...) +#endif + +#ifdef ASSERT_PDIR_SANITY +#define ASSERT(expr) \ + if(!(expr)) { \ + printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \ + panic(#expr); \ + } +#else +#define ASSERT(expr) +#endif + +/* +** The number of pdir entries to "free" before issuing +** a read to PCOM register to flush out PCOM writes. +** Interacts with allocation granularity (ie 4 or 8 entries +** allocated and free'd/purged at a time might make this +** less interesting). +*/ +#define DELAYED_RESOURCE_CNT 64 + +#define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP) +#define ZX2_IOC_ID ((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP) +#define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP) +#define SX1000_IOC_ID ((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP) + +#define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */ + +#define IOC_FUNC_ID 0x000 +#define IOC_FCLASS 0x008 /* function class, bist, header, rev... */ +#define IOC_IBASE 0x300 /* IO TLB */ +#define IOC_IMASK 0x308 +#define IOC_PCOM 0x310 +#define IOC_TCNFG 0x318 +#define IOC_PDIR_BASE 0x320 + +#define IOC_ROPE0_CFG 0x500 +#define IOC_ROPE_AO 0x10 /* Allow "Relaxed Ordering" */ + + +/* AGP GART driver looks for this */ +#define ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL + +/* +** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register) +** +** Some IOCs (sx1000) can run at the above pages sizes, but are +** really only supported using the IOC at a 4k page size. +** +** iovp_size could only be greater than PAGE_SIZE if we are +** confident the drivers really only touch the next physical +** page iff that driver instance owns it. +*/ +static unsigned long iovp_size; +static unsigned long iovp_shift; +static unsigned long iovp_mask; + +struct ioc { + void __iomem *ioc_hpa; /* I/O MMU base address */ + char *res_map; /* resource map, bit == pdir entry */ + u64 *pdir_base; /* physical base address */ + unsigned long ibase; /* pdir IOV Space base */ + unsigned long imask; /* pdir IOV Space mask */ + + unsigned long *res_hint; /* next avail IOVP - circular search */ + unsigned long dma_mask; + spinlock_t res_lock; /* protects the resource bitmap, but must be held when */ + /* clearing pdir to prevent races with allocations. */ + unsigned int res_bitshift; /* from the RIGHT! */ + unsigned int res_size; /* size of resource map in bytes */ +#ifdef CONFIG_NUMA + unsigned int node; /* node where this IOC lives */ +#endif +#if DELAYED_RESOURCE_CNT > 0 + spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */ + /* than res_lock for bigger systems. */ + int saved_cnt; + struct sba_dma_pair { + dma_addr_t iova; + size_t size; + } saved[DELAYED_RESOURCE_CNT]; +#endif + +#ifdef PDIR_SEARCH_TIMING +#define SBA_SEARCH_SAMPLE 0x100 + unsigned long avg_search[SBA_SEARCH_SAMPLE]; + unsigned long avg_idx; /* current index into avg_search */ +#endif + + /* Stuff we don't need in performance path */ + struct ioc *next; /* list of IOC's in system */ + acpi_handle handle; /* for multiple IOC's */ + const char *name; + unsigned int func_id; + unsigned int rev; /* HW revision of chip */ + u32 iov_size; + unsigned int pdir_size; /* in bytes, determined by IOV Space size */ + struct pci_dev *sac_only_dev; +}; + +static struct ioc *ioc_list; +static int reserve_sba_gart = 1; + +static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t); +static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t); + +#define sba_sg_address(sg) (page_address((sg)->page) + (sg)->offset) + +#ifdef FULL_VALID_PDIR +static u64 prefetch_spill_page; +#endif + +#ifdef CONFIG_PCI +# define GET_IOC(dev) (((dev)->bus == &pci_bus_type) \ + ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL) +#else +# define GET_IOC(dev) NULL +#endif + +/* +** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up +** (or rather not merge) DMA's into managable chunks. +** On parisc, this is more of the software/tuning constraint +** rather than the HW. I/O MMU allocation alogorithms can be +** faster with smaller size is (to some degree). +*/ +#define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size) + +#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1)) + +/************************************ +** SBA register read and write support +** +** BE WARNED: register writes are posted. +** (ie follow writes which must reach HW with a read) +** +*/ +#define READ_REG(addr) __raw_readq(addr) +#define WRITE_REG(val, addr) __raw_writeq(val, addr) + +#ifdef DEBUG_SBA_INIT + +/** + * sba_dump_tlb - debugging only - print IOMMU operating parameters + * @hpa: base address of the IOMMU + * + * Print the size/location of the IO MMU PDIR. + */ +static void +sba_dump_tlb(char *hpa) +{ + DBG_INIT("IO TLB at 0x%p\n", (void *)hpa); + DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE)); + DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK)); + DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG)); + DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE)); + DBG_INIT("\n"); +} +#endif + + +#ifdef ASSERT_PDIR_SANITY + +/** + * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @msg: text to print ont the output line. + * @pide: pdir index. + * + * Print one entry of the IO MMU PDIR in human readable form. + */ +static void +sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide) +{ + /* start printing from lowest pde in rval */ + u64 *ptr = &ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)]; + unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)]; + uint rcnt; + + printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n", + msg, rptr, pide & (BITS_PER_LONG - 1), *rptr); + + rcnt = 0; + while (rcnt < BITS_PER_LONG) { + printk(KERN_DEBUG "%s %2d %p %016Lx\n", + (rcnt == (pide & (BITS_PER_LONG - 1))) + ? " -->" : " ", + rcnt, ptr, (unsigned long long) *ptr ); + rcnt++; + ptr++; + } + printk(KERN_DEBUG "%s", msg); +} + + +/** + * sba_check_pdir - debugging only - consistency checker + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @msg: text to print ont the output line. + * + * Verify the resource map and pdir state is consistent + */ +static int +sba_check_pdir(struct ioc *ioc, char *msg) +{ + u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]); + u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */ + u64 *pptr = ioc->pdir_base; /* pdir ptr */ + uint pide = 0; + + while (rptr < rptr_end) { + u64 rval; + int rcnt; /* number of bits we might check */ + + rval = *rptr; + rcnt = 64; + + while (rcnt) { + /* Get last byte and highest bit from that */ + u32 pde = ((u32)((*pptr >> (63)) & 0x1)); + if ((rval & 0x1) ^ pde) + { + /* + ** BUMMER! -- res_map != pdir -- + ** Dump rval and matching pdir entries + */ + sba_dump_pdir_entry(ioc, msg, pide); + return(1); + } + rcnt--; + rval >>= 1; /* try the next bit */ + pptr++; + pide++; + } + rptr++; /* look at next word of res_map */ + } + /* It'd be nice if we always got here :^) */ + return 0; +} + + +/** + * sba_dump_sg - debugging only - print Scatter-Gather list + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @startsg: head of the SG list + * @nents: number of entries in SG list + * + * print the SG list so we can verify it's correct by hand. + */ +static void +sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) +{ + while (nents-- > 0) { + printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents, + startsg->dma_address, startsg->dma_length, + sba_sg_address(startsg)); + startsg++; + } +} + +static void +sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) +{ + struct scatterlist *the_sg = startsg; + int the_nents = nents; + + while (the_nents-- > 0) { + if (sba_sg_address(the_sg) == 0x0UL) + sba_dump_sg(NULL, startsg, nents); + the_sg++; + } +} + +#endif /* ASSERT_PDIR_SANITY */ + + + + +/************************************************************** +* +* I/O Pdir Resource Management +* +* Bits set in the resource map are in use. +* Each bit can represent a number of pages. +* LSbs represent lower addresses (IOVA's). +* +***************************************************************/ +#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */ + +/* Convert from IOVP to IOVA and vice versa. */ +#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset)) +#define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase)) + +#define PDIR_ENTRY_SIZE sizeof(u64) + +#define PDIR_INDEX(iovp) ((iovp)>>iovp_shift) + +#define RESMAP_MASK(n) ~(~0UL << (n)) +#define RESMAP_IDX_MASK (sizeof(unsigned long) - 1) + + +/** + * For most cases the normal get_order is sufficient, however it limits us + * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity. + * It only incurs about 1 clock cycle to use this one with the static variable + * and makes the code more intuitive. + */ +static SBA_INLINE int +get_iovp_order (unsigned long size) +{ + long double d = size - 1; + long order; + + order = ia64_getf_exp(d); + order = order - iovp_shift - 0xffff + 1; + if (order < 0) + order = 0; + return order; +} + +/** + * sba_search_bitmap - find free space in IO PDIR resource bitmap + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @bits_wanted: number of entries we need. + * + * Find consecutive free bits in resource bitmap. + * Each bit represents one entry in the IO Pdir. + * Cool perf optimization: search for log2(size) bits at a time. + */ +static SBA_INLINE unsigned long +sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted) +{ + unsigned long *res_ptr = ioc->res_hint; + unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); + unsigned long pide = ~0UL; + + ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); + ASSERT(res_ptr < res_end); + + /* + * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts + * if a TLB entry is purged while in use. sba_mark_invalid() + * purges IOTLB entries in power-of-two sizes, so we also + * allocate IOVA space in power-of-two sizes. + */ + bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift); + + if (likely(bits_wanted == 1)) { + unsigned int bitshiftcnt; + for(; res_ptr < res_end ; res_ptr++) { + if (likely(*res_ptr != ~0UL)) { + bitshiftcnt = ffz(*res_ptr); + *res_ptr |= (1UL << bitshiftcnt); + pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); + pide <<= 3; /* convert to bit address */ + pide += bitshiftcnt; + ioc->res_bitshift = bitshiftcnt + bits_wanted; + goto found_it; + } + } + goto not_found; + + } + + if (likely(bits_wanted <= BITS_PER_LONG/2)) { + /* + ** Search the resource bit map on well-aligned values. + ** "o" is the alignment. + ** We need the alignment to invalidate I/O TLB using + ** SBA HW features in the unmap path. + */ + unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift); + uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o); + unsigned long mask, base_mask; + + base_mask = RESMAP_MASK(bits_wanted); + mask = base_mask << bitshiftcnt; + + DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr); + for(; res_ptr < res_end ; res_ptr++) + { + DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); + ASSERT(0 != mask); + for (; mask ; mask <<= o, bitshiftcnt += o) { + if(0 == ((*res_ptr) & mask)) { + *res_ptr |= mask; /* mark resources busy! */ + pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); + pide <<= 3; /* convert to bit address */ + pide += bitshiftcnt; + ioc->res_bitshift = bitshiftcnt + bits_wanted; + goto found_it; + } + } + + bitshiftcnt = 0; + mask = base_mask; + + } + + } else { + int qwords, bits, i; + unsigned long *end; + + qwords = bits_wanted >> 6; /* /64 */ + bits = bits_wanted - (qwords * BITS_PER_LONG); + + end = res_end - qwords; + + for (; res_ptr < end; res_ptr++) { + for (i = 0 ; i < qwords ; i++) { + if (res_ptr[i] != 0) + goto next_ptr; + } + if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits)) + continue; + + /* Found it, mark it */ + for (i = 0 ; i < qwords ; i++) + res_ptr[i] = ~0UL; + res_ptr[i] |= RESMAP_MASK(bits); + + pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); + pide <<= 3; /* convert to bit address */ + res_ptr += qwords; + ioc->res_bitshift = bits; + goto found_it; +next_ptr: + ; + } + } + +not_found: + prefetch(ioc->res_map); + ioc->res_hint = (unsigned long *) ioc->res_map; + ioc->res_bitshift = 0; + return (pide); + +found_it: + ioc->res_hint = res_ptr; + return (pide); +} + + +/** + * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @size: number of bytes to create a mapping for + * + * Given a size, find consecutive unmarked and then mark those bits in the + * resource bit map. + */ +static int +sba_alloc_range(struct ioc *ioc, size_t size) +{ + unsigned int pages_needed = size >> iovp_shift; +#ifdef PDIR_SEARCH_TIMING + unsigned long itc_start; +#endif + unsigned long pide; + unsigned long flags; + + ASSERT(pages_needed); + ASSERT(0 == (size & ~iovp_mask)); + + spin_lock_irqsave(&ioc->res_lock, flags); + +#ifdef PDIR_SEARCH_TIMING + itc_start = ia64_get_itc(); +#endif + /* + ** "seek and ye shall find"...praying never hurts either... + */ + pide = sba_search_bitmap(ioc, pages_needed); + if (unlikely(pide >= (ioc->res_size << 3))) { + pide = sba_search_bitmap(ioc, pages_needed); + if (unlikely(pide >= (ioc->res_size << 3))) { +#if DELAYED_RESOURCE_CNT > 0 + /* + ** With delayed resource freeing, we can give this one more shot. We're + ** getting close to being in trouble here, so do what we can to make this + ** one count. + */ + spin_lock(&ioc->saved_lock); + if (ioc->saved_cnt > 0) { + struct sba_dma_pair *d; + int cnt = ioc->saved_cnt; + + d = &(ioc->saved[ioc->saved_cnt]); + + while (cnt--) { + sba_mark_invalid(ioc, d->iova, d->size); + sba_free_range(ioc, d->iova, d->size); + d--; + } + ioc->saved_cnt = 0; + READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + } + spin_unlock(&ioc->saved_lock); + + pide = sba_search_bitmap(ioc, pages_needed); + if (unlikely(pide >= (ioc->res_size << 3))) + panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", + ioc->ioc_hpa); +#else + panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", + ioc->ioc_hpa); +#endif + } + } + +#ifdef PDIR_SEARCH_TIMING + ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed; + ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1; +#endif + + prefetchw(&(ioc->pdir_base[pide])); + +#ifdef ASSERT_PDIR_SANITY + /* verify the first enable bit is clear */ + if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) { + sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide); + } +#endif + + DBG_RES("%s(%x) %d -> %lx hint %x/%x\n", + __FUNCTION__, size, pages_needed, pide, + (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map), + ioc->res_bitshift ); + + spin_unlock_irqrestore(&ioc->res_lock, flags); + + return (pide); +} + + +/** + * sba_free_range - unmark bits in IO PDIR resource bitmap + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @iova: IO virtual address which was previously allocated. + * @size: number of bytes to create a mapping for + * + * clear bits in the ioc's resource map + */ +static SBA_INLINE void +sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size) +{ + unsigned long iovp = SBA_IOVP(ioc, iova); + unsigned int pide = PDIR_INDEX(iovp); + unsigned int ridx = pide >> 3; /* convert bit to byte address */ + unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]); + int bits_not_wanted = size >> iovp_shift; + unsigned long m; + + /* Round up to power-of-two size: see AR2305 note above */ + bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift); + for (; bits_not_wanted > 0 ; res_ptr++) { + + if (unlikely(bits_not_wanted > BITS_PER_LONG)) { + + /* these mappings start 64bit aligned */ + *res_ptr = 0UL; + bits_not_wanted -= BITS_PER_LONG; + pide += BITS_PER_LONG; + + } else { + + /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */ + m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1)); + bits_not_wanted = 0; + + DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __FUNCTION__, (uint) iova, size, + bits_not_wanted, m, pide, res_ptr, *res_ptr); + + ASSERT(m != 0); + ASSERT(bits_not_wanted); + ASSERT((*res_ptr & m) == m); /* verify same bits are set */ + *res_ptr &= ~m; + } + } +} + + +/************************************************************** +* +* "Dynamic DMA Mapping" support (aka "Coherent I/O") +* +***************************************************************/ + +/** + * sba_io_pdir_entry - fill in one IO PDIR entry + * @pdir_ptr: pointer to IO PDIR entry + * @vba: Virtual CPU address of buffer to map + * + * SBA Mapping Routine + * + * Given a virtual address (vba, arg1) sba_io_pdir_entry() + * loads the I/O PDIR entry pointed to by pdir_ptr (arg0). + * Each IO Pdir entry consists of 8 bytes as shown below + * (LSB == bit 0): + * + * 63 40 11 7 0 + * +-+---------------------+----------------------------------+----+--------+ + * |V| U | PPN[39:12] | U | FF | + * +-+---------------------+----------------------------------+----+--------+ + * + * V == Valid Bit + * U == Unused + * PPN == Physical Page Number + * + * The physical address fields are filled with the results of virt_to_phys() + * on the vba. + */ + +#if 1 +#define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = ((vba & ~0xE000000000000FFFULL) \ + | 0x8000000000000000ULL) +#else +void SBA_INLINE +sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba) +{ + *pdir_ptr = ((vba & ~0xE000000000000FFFULL) | 0x80000000000000FFULL); +} +#endif + +#ifdef ENABLE_MARK_CLEAN +/** + * Since DMA is i-cache coherent, any (complete) pages that were written via + * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to + * flush them when they get mapped into an executable vm-area. + */ +static void +mark_clean (void *addr, size_t size) +{ + unsigned long pg_addr, end; + + pg_addr = PAGE_ALIGN((unsigned long) addr); + end = (unsigned long) addr + size; + while (pg_addr + PAGE_SIZE <= end) { + struct page *page = virt_to_page((void *)pg_addr); + set_bit(PG_arch_1, &page->flags); + pg_addr += PAGE_SIZE; + } +} +#endif + +/** + * sba_mark_invalid - invalidate one or more IO PDIR entries + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @iova: IO Virtual Address mapped earlier + * @byte_cnt: number of bytes this mapping covers. + * + * Marking the IO PDIR entry(ies) as Invalid and invalidate + * corresponding IO TLB entry. The PCOM (Purge Command Register) + * is to purge stale entries in the IO TLB when unmapping entries. + * + * The PCOM register supports purging of multiple pages, with a minium + * of 1 page and a maximum of 2GB. Hardware requires the address be + * aligned to the size of the range being purged. The size of the range + * must be a power of 2. The "Cool perf optimization" in the + * allocation routine helps keep that true. + */ +static SBA_INLINE void +sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) +{ + u32 iovp = (u32) SBA_IOVP(ioc,iova); + + int off = PDIR_INDEX(iovp); + + /* Must be non-zero and rounded up */ + ASSERT(byte_cnt > 0); + ASSERT(0 == (byte_cnt & ~iovp_mask)); + +#ifdef ASSERT_PDIR_SANITY + /* Assert first pdir entry is set */ + if (!(ioc->pdir_base[off] >> 60)) { + sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp)); + } +#endif + + if (byte_cnt <= iovp_size) + { + ASSERT(off < ioc->pdir_size); + + iovp |= iovp_shift; /* set "size" field for PCOM */ + +#ifndef FULL_VALID_PDIR + /* + ** clear I/O PDIR entry "valid" bit + ** Do NOT clear the rest - save it for debugging. + ** We should only clear bits that have previously + ** been enabled. + */ + ioc->pdir_base[off] &= ~(0x80000000000000FFULL); +#else + /* + ** If we want to maintain the PDIR as valid, put in + ** the spill page so devices prefetching won't + ** cause a hard fail. + */ + ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); +#endif + } else { + u32 t = get_iovp_order(byte_cnt) + iovp_shift; + + iovp |= t; + ASSERT(t <= 31); /* 2GB! Max value of "size" field */ + + do { + /* verify this pdir entry is enabled */ + ASSERT(ioc->pdir_base[off] >> 63); +#ifndef FULL_VALID_PDIR + /* clear I/O Pdir entry "valid" bit first */ + ioc->pdir_base[off] &= ~(0x80000000000000FFULL); +#else + ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); +#endif + off++; + byte_cnt -= iovp_size; + } while (byte_cnt > 0); + } + + WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM); +} + +/** + * sba_map_single - map one buffer and return IOVA for DMA + * @dev: instance of PCI owned by the driver that's asking. + * @addr: driver buffer to map. + * @size: number of bytes to map in driver buffer. + * @dir: R/W or both. + * + * See Documentation/DMA-mapping.txt + */ +dma_addr_t +sba_map_single(struct device *dev, void *addr, size_t size, int dir) +{ + struct ioc *ioc; + dma_addr_t iovp; + dma_addr_t offset; + u64 *pdir_start; + int pide; +#ifdef ASSERT_PDIR_SANITY + unsigned long flags; +#endif +#ifdef ALLOW_IOV_BYPASS + unsigned long pci_addr = virt_to_phys(addr); +#endif + +#ifdef ALLOW_IOV_BYPASS + ASSERT(to_pci_dev(dev)->dma_mask); + /* + ** Check if the PCI device can DMA to ptr... if so, just return ptr + */ + if (likely((pci_addr & ~to_pci_dev(dev)->dma_mask) == 0)) { + /* + ** Device is bit capable of DMA'ing to the buffer... + ** just return the PCI address of ptr + */ + DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n", + to_pci_dev(dev)->dma_mask, pci_addr); + return pci_addr; + } +#endif + ioc = GET_IOC(dev); + ASSERT(ioc); + + prefetch(ioc->res_hint); + + ASSERT(size > 0); + ASSERT(size <= DMA_CHUNK_SIZE); + + /* save offset bits */ + offset = ((dma_addr_t) (long) addr) & ~iovp_mask; + + /* round up to nearest iovp_size */ + size = (size + offset + ~iovp_mask) & iovp_mask; + +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + if (sba_check_pdir(ioc,"Check before sba_map_single()")) + panic("Sanity check failed"); + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + + pide = sba_alloc_range(ioc, size); + + iovp = (dma_addr_t) pide << iovp_shift; + + DBG_RUN("%s() 0x%p -> 0x%lx\n", + __FUNCTION__, addr, (long) iovp | offset); + + pdir_start = &(ioc->pdir_base[pide]); + + while (size > 0) { + ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */ + sba_io_pdir_entry(pdir_start, (unsigned long) addr); + + DBG_RUN(" pdir 0x%p %lx\n", pdir_start, *pdir_start); + + addr += iovp_size; + size -= iovp_size; + pdir_start++; + } + /* force pdir update */ + wmb(); + + /* form complete address */ +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + sba_check_pdir(ioc,"Check after sba_map_single()"); + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + return SBA_IOVA(ioc, iovp, offset); +} + +/** + * sba_unmap_single - unmap one IOVA and free resources + * @dev: instance of PCI owned by the driver that's asking. + * @iova: IOVA of driver buffer previously mapped. + * @size: number of bytes mapped in driver buffer. + * @dir: R/W or both. + * + * See Documentation/DMA-mapping.txt + */ +void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir) +{ + struct ioc *ioc; +#if DELAYED_RESOURCE_CNT > 0 + struct sba_dma_pair *d; +#endif + unsigned long flags; + dma_addr_t offset; + + ioc = GET_IOC(dev); + ASSERT(ioc); + +#ifdef ALLOW_IOV_BYPASS + if (likely((iova & ioc->imask) != ioc->ibase)) { + /* + ** Address does not fall w/in IOVA, must be bypassing + */ + DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova); + +#ifdef ENABLE_MARK_CLEAN + if (dir == DMA_FROM_DEVICE) { + mark_clean(phys_to_virt(iova), size); + } +#endif + return; + } +#endif + offset = iova & ~iovp_mask; + + DBG_RUN("%s() iovp 0x%lx/%x\n", + __FUNCTION__, (long) iova, size); + + iova ^= offset; /* clear offset bits */ + size += offset; + size = ROUNDUP(size, iovp_size); + + +#if DELAYED_RESOURCE_CNT > 0 + spin_lock_irqsave(&ioc->saved_lock, flags); + d = &(ioc->saved[ioc->saved_cnt]); + d->iova = iova; + d->size = size; + if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) { + int cnt = ioc->saved_cnt; + spin_lock(&ioc->res_lock); + while (cnt--) { + sba_mark_invalid(ioc, d->iova, d->size); + sba_free_range(ioc, d->iova, d->size); + d--; + } + ioc->saved_cnt = 0; + READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + spin_unlock(&ioc->res_lock); + } + spin_unlock_irqrestore(&ioc->saved_lock, flags); +#else /* DELAYED_RESOURCE_CNT == 0 */ + spin_lock_irqsave(&ioc->res_lock, flags); + sba_mark_invalid(ioc, iova, size); + sba_free_range(ioc, iova, size); + READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif /* DELAYED_RESOURCE_CNT == 0 */ +#ifdef ENABLE_MARK_CLEAN + if (dir == DMA_FROM_DEVICE) { + u32 iovp = (u32) SBA_IOVP(ioc,iova); + int off = PDIR_INDEX(iovp); + void *addr; + + if (size <= iovp_size) { + addr = phys_to_virt(ioc->pdir_base[off] & + ~0xE000000000000FFFULL); + mark_clean(addr, size); + } else { + size_t byte_cnt = size; + + do { + addr = phys_to_virt(ioc->pdir_base[off] & + ~0xE000000000000FFFULL); + mark_clean(addr, min(byte_cnt, iovp_size)); + off++; + byte_cnt -= iovp_size; + + } while (byte_cnt > 0); + } + } +#endif +} + + +/** + * sba_alloc_coherent - allocate/map shared mem for DMA + * @dev: instance of PCI owned by the driver that's asking. + * @size: number of bytes mapped in driver buffer. + * @dma_handle: IOVA of new buffer. + * + * See Documentation/DMA-mapping.txt + */ +void * +sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags) +{ + struct ioc *ioc; + void *addr; + + ioc = GET_IOC(dev); + ASSERT(ioc); + +#ifdef CONFIG_NUMA + { + struct page *page; + page = alloc_pages_node(ioc->node == MAX_NUMNODES ? + numa_node_id() : ioc->node, flags, + get_order(size)); + + if (unlikely(!page)) + return NULL; + + addr = page_address(page); + } +#else + addr = (void *) __get_free_pages(flags, get_order(size)); +#endif + if (unlikely(!addr)) + return NULL; + + memset(addr, 0, size); + *dma_handle = virt_to_phys(addr); + +#ifdef ALLOW_IOV_BYPASS + ASSERT(dev->coherent_dma_mask); + /* + ** Check if the PCI device can DMA to ptr... if so, just return ptr + */ + if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) { + DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n", + dev->coherent_dma_mask, *dma_handle); + + return addr; + } +#endif + + /* + * If device can't bypass or bypass is disabled, pass the 32bit fake + * device to map single to get an iova mapping. + */ + *dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0); + + return addr; +} + + +/** + * sba_free_coherent - free/unmap shared mem for DMA + * @dev: instance of PCI owned by the driver that's asking. + * @size: number of bytes mapped in driver buffer. + * @vaddr: virtual address IOVA of "consistent" buffer. + * @dma_handler: IO virtual address of "consistent" buffer. + * + * See Documentation/DMA-mapping.txt + */ +void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) +{ + sba_unmap_single(dev, dma_handle, size, 0); + free_pages((unsigned long) vaddr, get_order(size)); +} + + +/* +** Since 0 is a valid pdir_base index value, can't use that +** to determine if a value is valid or not. Use a flag to indicate +** the SG list entry contains a valid pdir index. +*/ +#define PIDE_FLAG 0x1UL + +#ifdef DEBUG_LARGE_SG_ENTRIES +int dump_run_sg = 0; +#endif + + +/** + * sba_fill_pdir - write allocated SG entries into IO PDIR + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @startsg: list of IOVA/size pairs + * @nents: number of entries in startsg list + * + * Take preprocessed SG list and write corresponding entries + * in the IO PDIR. + */ + +static SBA_INLINE int +sba_fill_pdir( + struct ioc *ioc, + struct scatterlist *startsg, + int nents) +{ + struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ + int n_mappings = 0; + u64 *pdirp = NULL; + unsigned long dma_offset = 0; + + dma_sg--; + while (nents-- > 0) { + int cnt = startsg->dma_length; + startsg->dma_length = 0; + +#ifdef DEBUG_LARGE_SG_ENTRIES + if (dump_run_sg) + printk(" %2d : %08lx/%05x %p\n", + nents, startsg->dma_address, cnt, + sba_sg_address(startsg)); +#else + DBG_RUN_SG(" %d : %08lx/%05x %p\n", + nents, startsg->dma_address, cnt, + sba_sg_address(startsg)); +#endif + /* + ** Look for the start of a new DMA stream + */ + if (startsg->dma_address & PIDE_FLAG) { + u32 pide = startsg->dma_address & ~PIDE_FLAG; + dma_offset = (unsigned long) pide & ~iovp_mask; + startsg->dma_address = 0; + dma_sg++; + dma_sg->dma_address = pide | ioc->ibase; + pdirp = &(ioc->pdir_base[pide >> iovp_shift]); + n_mappings++; + } + + /* + ** Look for a VCONTIG chunk + */ + if (cnt) { + unsigned long vaddr = (unsigned long) sba_sg_address(startsg); + ASSERT(pdirp); + + /* Since multiple Vcontig blocks could make up + ** one DMA stream, *add* cnt to dma_len. + */ + dma_sg->dma_length += cnt; + cnt += dma_offset; + dma_offset=0; /* only want offset on first chunk */ + cnt = ROUNDUP(cnt, iovp_size); + do { + sba_io_pdir_entry(pdirp, vaddr); + vaddr += iovp_size; + cnt -= iovp_size; + pdirp++; + } while (cnt > 0); + } + startsg++; + } + /* force pdir update */ + wmb(); + +#ifdef DEBUG_LARGE_SG_ENTRIES + dump_run_sg = 0; +#endif + return(n_mappings); +} + + +/* +** Two address ranges are DMA contiguous *iff* "end of prev" and +** "start of next" are both on an IOV page boundary. +** +** (shift left is a quick trick to mask off upper bits) +*/ +#define DMA_CONTIG(__X, __Y) \ + (((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL) + + +/** + * sba_coalesce_chunks - preprocess the SG list + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @startsg: list of IOVA/size pairs + * @nents: number of entries in startsg list + * + * First pass is to walk the SG list and determine where the breaks are + * in the DMA stream. Allocates PDIR entries but does not fill them. + * Returns the number of DMA chunks. + * + * Doing the fill separate from the coalescing/allocation keeps the + * code simpler. Future enhancement could make one pass through + * the sglist do both. + */ +static SBA_INLINE int +sba_coalesce_chunks( struct ioc *ioc, + struct scatterlist *startsg, + int nents) +{ + struct scatterlist *vcontig_sg; /* VCONTIG chunk head */ + unsigned long vcontig_len; /* len of VCONTIG chunk */ + unsigned long vcontig_end; + struct scatterlist *dma_sg; /* next DMA stream head */ + unsigned long dma_offset, dma_len; /* start/len of DMA stream */ + int n_mappings = 0; + + while (nents > 0) { + unsigned long vaddr = (unsigned long) sba_sg_address(startsg); + + /* + ** Prepare for first/next DMA stream + */ + dma_sg = vcontig_sg = startsg; + dma_len = vcontig_len = vcontig_end = startsg->length; + vcontig_end += vaddr; + dma_offset = vaddr & ~iovp_mask; + + /* PARANOID: clear entries */ + startsg->dma_address = startsg->dma_length = 0; + + /* + ** This loop terminates one iteration "early" since + ** it's always looking one "ahead". + */ + while (--nents > 0) { + unsigned long vaddr; /* tmp */ + + startsg++; + + /* PARANOID */ + startsg->dma_address = startsg->dma_length = 0; + + /* catch brokenness in SCSI layer */ + ASSERT(startsg->length <= DMA_CHUNK_SIZE); + + /* + ** First make sure current dma stream won't + ** exceed DMA_CHUNK_SIZE if we coalesce the + ** next entry. + */ + if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask) + > DMA_CHUNK_SIZE) + break; + + /* + ** Then look for virtually contiguous blocks. + ** + ** append the next transaction? + */ + vaddr = (unsigned long) sba_sg_address(startsg); + if (vcontig_end == vaddr) + { + vcontig_len += startsg->length; + vcontig_end += startsg->length; + dma_len += startsg->length; + continue; + } + +#ifdef DEBUG_LARGE_SG_ENTRIES + dump_run_sg = (vcontig_len > iovp_size); +#endif + + /* + ** Not virtually contigous. + ** Terminate prev chunk. + ** Start a new chunk. + ** + ** Once we start a new VCONTIG chunk, dma_offset + ** can't change. And we need the offset from the first + ** chunk - not the last one. Ergo Successive chunks + ** must start on page boundaries and dove tail + ** with it's predecessor. + */ + vcontig_sg->dma_length = vcontig_len; + + vcontig_sg = startsg; + vcontig_len = startsg->length; + + /* + ** 3) do the entries end/start on page boundaries? + ** Don't update vcontig_end until we've checked. + */ + if (DMA_CONTIG(vcontig_end, vaddr)) + { + vcontig_end = vcontig_len + vaddr; + dma_len += vcontig_len; + continue; + } else { + break; + } + } + + /* + ** End of DMA Stream + ** Terminate last VCONTIG block. + ** Allocate space for DMA stream. + */ + vcontig_sg->dma_length = vcontig_len; + dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask; + ASSERT(dma_len <= DMA_CHUNK_SIZE); + dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG + | (sba_alloc_range(ioc, dma_len) << iovp_shift) + | dma_offset); + n_mappings++; + } + + return n_mappings; +} + + +/** + * sba_map_sg - map Scatter/Gather list + * @dev: instance of PCI owned by the driver that's asking. + * @sglist: array of buffer/length pairs + * @nents: number of entries in list + * @dir: R/W or both. + * + * See Documentation/DMA-mapping.txt + */ +int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int dir) +{ + struct ioc *ioc; + int coalesced, filled = 0; +#ifdef ASSERT_PDIR_SANITY + unsigned long flags; +#endif +#ifdef ALLOW_IOV_BYPASS_SG + struct scatterlist *sg; +#endif + + DBG_RUN_SG("%s() START %d entries\n", __FUNCTION__, nents); + ioc = GET_IOC(dev); + ASSERT(ioc); + +#ifdef ALLOW_IOV_BYPASS_SG + ASSERT(to_pci_dev(dev)->dma_mask); + if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) { + for (sg = sglist ; filled < nents ; filled++, sg++){ + sg->dma_length = sg->length; + sg->dma_address = virt_to_phys(sba_sg_address(sg)); + } + return filled; + } +#endif + /* Fast path single entry scatterlists. */ + if (nents == 1) { + sglist->dma_length = sglist->length; + sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir); + return 1; + } + +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + if (sba_check_pdir(ioc,"Check before sba_map_sg()")) + { + sba_dump_sg(ioc, sglist, nents); + panic("Check before sba_map_sg()"); + } + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + + prefetch(ioc->res_hint); + + /* + ** First coalesce the chunks and allocate I/O pdir space + ** + ** If this is one DMA stream, we can properly map using the + ** correct virtual address associated with each DMA page. + ** w/o this association, we wouldn't have coherent DMA! + ** Access to the virtual address is what forces a two pass algorithm. + */ + coalesced = sba_coalesce_chunks(ioc, sglist, nents); + + /* + ** Program the I/O Pdir + ** + ** map the virtual addresses to the I/O Pdir + ** o dma_address will contain the pdir index + ** o dma_len will contain the number of bytes to map + ** o address contains the virtual address. + */ + filled = sba_fill_pdir(ioc, sglist, nents); + +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + if (sba_check_pdir(ioc,"Check after sba_map_sg()")) + { + sba_dump_sg(ioc, sglist, nents); + panic("Check after sba_map_sg()\n"); + } + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + + ASSERT(coalesced == filled); + DBG_RUN_SG("%s() DONE %d mappings\n", __FUNCTION__, filled); + + return filled; +} + + +/** + * sba_unmap_sg - unmap Scatter/Gather list + * @dev: instance of PCI owned by the driver that's asking. + * @sglist: array of buffer/length pairs + * @nents: number of entries in list + * @dir: R/W or both. + * + * See Documentation/DMA-mapping.txt + */ +void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir) +{ +#ifdef ASSERT_PDIR_SANITY + struct ioc *ioc; + unsigned long flags; +#endif + + DBG_RUN_SG("%s() START %d entries, %p,%x\n", + __FUNCTION__, nents, sba_sg_address(sglist), sglist->length); + +#ifdef ASSERT_PDIR_SANITY + ioc = GET_IOC(dev); + ASSERT(ioc); + + spin_lock_irqsave(&ioc->res_lock, flags); + sba_check_pdir(ioc,"Check before sba_unmap_sg()"); + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + + while (nents && sglist->dma_length) { + + sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir); + sglist++; + nents--; + } + + DBG_RUN_SG("%s() DONE (nents %d)\n", __FUNCTION__, nents); + +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + sba_check_pdir(ioc,"Check after sba_unmap_sg()"); + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + +} + +/************************************************************** +* +* Initialization and claim +* +***************************************************************/ + +static void __init +ioc_iova_init(struct ioc *ioc) +{ + int tcnfg; + int agp_found = 0; + struct pci_dev *device = NULL; +#ifdef FULL_VALID_PDIR + unsigned long index; +#endif + + /* + ** Firmware programs the base and size of a "safe IOVA space" + ** (one that doesn't overlap memory or LMMIO space) in the + ** IBASE and IMASK registers. + */ + ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL; + ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL; + + ioc->iov_size = ~ioc->imask + 1; + + DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n", + __FUNCTION__, ioc->ioc_hpa, ioc->ibase, ioc->imask, + ioc->iov_size >> 20); + + switch (iovp_size) { + case 4*1024: tcnfg = 0; break; + case 8*1024: tcnfg = 1; break; + case 16*1024: tcnfg = 2; break; + case 64*1024: tcnfg = 3; break; + default: + panic(PFX "Unsupported IOTLB page size %ldK", + iovp_size >> 10); + break; + } + WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG); + + ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE; + ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL, + get_order(ioc->pdir_size)); + if (!ioc->pdir_base) + panic(PFX "Couldn't allocate I/O Page Table\n"); + + memset(ioc->pdir_base, 0, ioc->pdir_size); + + DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __FUNCTION__, + iovp_size >> 10, ioc->pdir_base, ioc->pdir_size); + + ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base); + WRITE_REG(virt_to_phys(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE); + + /* + ** If an AGP device is present, only use half of the IOV space + ** for PCI DMA. Unfortunately we can't know ahead of time + ** whether GART support will actually be used, for now we + ** can just key on an AGP device found in the system. + ** We program the next pdir index after we stop w/ a key for + ** the GART code to handshake on. + */ + for_each_pci_dev(device) + agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP); + + if (agp_found && reserve_sba_gart) { + printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n", + ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2); + ioc->pdir_size /= 2; + ((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE; + } +#ifdef FULL_VALID_PDIR + /* + ** Check to see if the spill page has been allocated, we don't need more than + ** one across multiple SBAs. + */ + if (!prefetch_spill_page) { + char *spill_poison = "SBAIOMMU POISON"; + int poison_size = 16; + void *poison_addr, *addr; + + addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size)); + if (!addr) + panic(PFX "Couldn't allocate PDIR spill page\n"); + + poison_addr = addr; + for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size) + memcpy(poison_addr, spill_poison, poison_size); + + prefetch_spill_page = virt_to_phys(addr); + + DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __FUNCTION__, prefetch_spill_page); + } + /* + ** Set all the PDIR entries valid w/ the spill page as the target + */ + for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++) + ((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page); +#endif + + /* Clear I/O TLB of any possible entries */ + WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM); + READ_REG(ioc->ioc_hpa + IOC_PCOM); + + /* Enable IOVA translation */ + WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE); + READ_REG(ioc->ioc_hpa + IOC_IBASE); +} + +static void __init +ioc_resource_init(struct ioc *ioc) +{ + spin_lock_init(&ioc->res_lock); +#if DELAYED_RESOURCE_CNT > 0 + spin_lock_init(&ioc->saved_lock); +#endif + + /* resource map size dictated by pdir_size */ + ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */ + ioc->res_size >>= 3; /* convert bit count to byte count */ + DBG_INIT("%s() res_size 0x%x\n", __FUNCTION__, ioc->res_size); + + ioc->res_map = (char *) __get_free_pages(GFP_KERNEL, + get_order(ioc->res_size)); + if (!ioc->res_map) + panic(PFX "Couldn't allocate resource map\n"); + + memset(ioc->res_map, 0, ioc->res_size); + /* next available IOVP - circular search */ + ioc->res_hint = (unsigned long *) ioc->res_map; + +#ifdef ASSERT_PDIR_SANITY + /* Mark first bit busy - ie no IOVA 0 */ + ioc->res_map[0] = 0x1; + ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE; +#endif +#ifdef FULL_VALID_PDIR + /* Mark the last resource used so we don't prefetch beyond IOVA space */ + ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */ + ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF + | prefetch_spill_page); +#endif + + DBG_INIT("%s() res_map %x %p\n", __FUNCTION__, + ioc->res_size, (void *) ioc->res_map); +} + +static void __init +ioc_sac_init(struct ioc *ioc) +{ + struct pci_dev *sac = NULL; + struct pci_controller *controller = NULL; + + /* + * pci_alloc_coherent() must return a DMA address which is + * SAC (single address cycle) addressable, so allocate a + * pseudo-device to enforce that. + */ + sac = kmalloc(sizeof(*sac), GFP_KERNEL); + if (!sac) + panic(PFX "Couldn't allocate struct pci_dev"); + memset(sac, 0, sizeof(*sac)); + + controller = kmalloc(sizeof(*controller), GFP_KERNEL); + if (!controller) + panic(PFX "Couldn't allocate struct pci_controller"); + memset(controller, 0, sizeof(*controller)); + + controller->iommu = ioc; + sac->sysdata = controller; + sac->dma_mask = 0xFFFFFFFFUL; +#ifdef CONFIG_PCI + sac->dev.bus = &pci_bus_type; +#endif + ioc->sac_only_dev = sac; +} + +static void __init +ioc_zx1_init(struct ioc *ioc) +{ + unsigned long rope_config; + unsigned int i; + + if (ioc->rev < 0x20) + panic(PFX "IOC 2.0 or later required for IOMMU support\n"); + + /* 38 bit memory controller + extra bit for range displaced by MMIO */ + ioc->dma_mask = (0x1UL << 39) - 1; + + /* + ** Clear ROPE(N)_CONFIG AO bit. + ** Disables "NT Ordering" (~= !"Relaxed Ordering") + ** Overrides bit 1 in DMA Hint Sets. + ** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701. + */ + for (i=0; i<(8*8); i+=8) { + rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i); + rope_config &= ~IOC_ROPE_AO; + WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i); + } +} + +typedef void (initfunc)(struct ioc *); + +struct ioc_iommu { + u32 func_id; + char *name; + initfunc *init; +}; + +static struct ioc_iommu ioc_iommu_info[] __initdata = { + { ZX1_IOC_ID, "zx1", ioc_zx1_init }, + { ZX2_IOC_ID, "zx2", NULL }, + { SX1000_IOC_ID, "sx1000", NULL }, +}; + +static struct ioc * __init +ioc_init(u64 hpa, void *handle) +{ + struct ioc *ioc; + struct ioc_iommu *info; + + ioc = kmalloc(sizeof(*ioc), GFP_KERNEL); + if (!ioc) + return NULL; + + memset(ioc, 0, sizeof(*ioc)); + + ioc->next = ioc_list; + ioc_list = ioc; + + ioc->handle = handle; + ioc->ioc_hpa = ioremap(hpa, 0x1000); + + ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID); + ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL; + ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL; /* conservative */ + + for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) { + if (ioc->func_id == info->func_id) { + ioc->name = info->name; + if (info->init) + (info->init)(ioc); + } + } + + iovp_size = (1 << iovp_shift); + iovp_mask = ~(iovp_size - 1); + + DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __FUNCTION__, + PAGE_SIZE >> 10, iovp_size >> 10); + + if (!ioc->name) { + ioc->name = kmalloc(24, GFP_KERNEL); + if (ioc->name) + sprintf((char *) ioc->name, "Unknown (%04x:%04x)", + ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF); + else + ioc->name = "Unknown"; + } + + ioc_iova_init(ioc); + ioc_resource_init(ioc); + ioc_sac_init(ioc); + + if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask) + ia64_max_iommu_merge_mask = ~iovp_mask; + + printk(KERN_INFO PFX + "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n", + ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF, + hpa, ioc->iov_size >> 20, ioc->ibase); + + return ioc; +} + + + +/************************************************************************** +** +** SBA initialization code (HW and SW) +** +** o identify SBA chip itself +** o FIXME: initialize DMA hints for reasonable defaults +** +**************************************************************************/ + +#ifdef CONFIG_PROC_FS +static void * +ioc_start(struct seq_file *s, loff_t *pos) +{ + struct ioc *ioc; + loff_t n = *pos; + + for (ioc = ioc_list; ioc; ioc = ioc->next) + if (!n--) + return ioc; + + return NULL; +} + +static void * +ioc_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct ioc *ioc = v; + + ++*pos; + return ioc->next; +} + +static void +ioc_stop(struct seq_file *s, void *v) +{ +} + +static int +ioc_show(struct seq_file *s, void *v) +{ + struct ioc *ioc = v; + unsigned long *res_ptr = (unsigned long *)ioc->res_map; + int i, used = 0; + + seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n", + ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF)); +#ifdef CONFIG_NUMA + if (ioc->node != MAX_NUMNODES) + seq_printf(s, "NUMA node : %d\n", ioc->node); +#endif + seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024)); + seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024); + + for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr) + used += hweight64(*res_ptr); + + seq_printf(s, "PDIR size : %d entries\n", ioc->pdir_size >> 3); + seq_printf(s, "PDIR used : %d entries\n", used); + +#ifdef PDIR_SEARCH_TIMING + { + unsigned long i = 0, avg = 0, min, max; + min = max = ioc->avg_search[0]; + for (i = 0; i < SBA_SEARCH_SAMPLE; i++) { + avg += ioc->avg_search[i]; + if (ioc->avg_search[i] > max) max = ioc->avg_search[i]; + if (ioc->avg_search[i] < min) min = ioc->avg_search[i]; + } + avg /= SBA_SEARCH_SAMPLE; + seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n", + min, avg, max); + } +#endif +#ifndef ALLOW_IOV_BYPASS + seq_printf(s, "IOVA bypass disabled\n"); +#endif + return 0; +} + +static struct seq_operations ioc_seq_ops = { + .start = ioc_start, + .next = ioc_next, + .stop = ioc_stop, + .show = ioc_show +}; + +static int +ioc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &ioc_seq_ops); +} + +static struct file_operations ioc_fops = { + .open = ioc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +static void __init +ioc_proc_init(void) +{ + struct proc_dir_entry *dir, *entry; + + dir = proc_mkdir("bus/mckinley", NULL); + if (!dir) + return; + + entry = create_proc_entry(ioc_list->name, 0, dir); + if (entry) + entry->proc_fops = &ioc_fops; +} +#endif + +static void +sba_connect_bus(struct pci_bus *bus) +{ + acpi_handle handle, parent; + acpi_status status; + struct ioc *ioc; + + if (!PCI_CONTROLLER(bus)) + panic(PFX "no sysdata on bus %d!\n", bus->number); + + if (PCI_CONTROLLER(bus)->iommu) + return; + + handle = PCI_CONTROLLER(bus)->acpi_handle; + if (!handle) + return; + + /* + * The IOC scope encloses PCI root bridges in the ACPI + * namespace, so work our way out until we find an IOC we + * claimed previously. + */ + do { + for (ioc = ioc_list; ioc; ioc = ioc->next) + if (ioc->handle == handle) { + PCI_CONTROLLER(bus)->iommu = ioc; + return; + } + + status = acpi_get_parent(handle, &parent); + handle = parent; + } while (ACPI_SUCCESS(status)); + + printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number); +} + +#ifdef CONFIG_NUMA +static void __init +sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) +{ + struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *obj; + acpi_handle phandle; + unsigned int node; + + ioc->node = MAX_NUMNODES; + + /* + * Check for a _PXM on this node first. We don't typically see + * one here, so we'll end up getting it from the parent. + */ + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) { + if (ACPI_FAILURE(acpi_get_parent(handle, &phandle))) + return; + + /* Reset the acpi buffer */ + buffer.length = ACPI_ALLOCATE_BUFFER; + buffer.pointer = NULL; + + if (ACPI_FAILURE(acpi_evaluate_object(phandle, "_PXM", NULL, + &buffer))) + return; + } + + if (!buffer.length || !buffer.pointer) + return; + + obj = buffer.pointer; + + if (obj->type != ACPI_TYPE_INTEGER || + obj->integer.value >= MAX_PXM_DOMAINS) { + acpi_os_free(buffer.pointer); + return; + } + + node = pxm_to_nid_map[obj->integer.value]; + acpi_os_free(buffer.pointer); + + if (node >= MAX_NUMNODES || !node_online(node)) + return; + + ioc->node = node; + return; +} +#else +#define sba_map_ioc_to_node(ioc, handle) +#endif + +static int __init +acpi_sba_ioc_add(struct acpi_device *device) +{ + struct ioc *ioc; + acpi_status status; + u64 hpa, length; + struct acpi_buffer buffer; + struct acpi_device_info *dev_info; + + status = hp_acpi_csr_space(device->handle, &hpa, &length); + if (ACPI_FAILURE(status)) + return 1; + + buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; + status = acpi_get_object_info(device->handle, &buffer); + if (ACPI_FAILURE(status)) + return 1; + dev_info = buffer.pointer; + + /* + * For HWP0001, only SBA appears in ACPI namespace. It encloses the PCI + * root bridges, and its CSR space includes the IOC function. + */ + if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) { + hpa += ZX1_IOC_OFFSET; + /* zx1 based systems default to kernel page size iommu pages */ + if (!iovp_shift) + iovp_shift = min(PAGE_SHIFT, 16); + } + ACPI_MEM_FREE(dev_info); + + /* + * default anything not caught above or specified on cmdline to 4k + * iommu page size + */ + if (!iovp_shift) + iovp_shift = 12; + + ioc = ioc_init(hpa, device->handle); + if (!ioc) + return 1; + + /* setup NUMA node association */ + sba_map_ioc_to_node(ioc, device->handle); + return 0; +} + +static struct acpi_driver acpi_sba_ioc_driver = { + .name = "IOC IOMMU Driver", + .ids = "HWP0001,HWP0004", + .ops = { + .add = acpi_sba_ioc_add, + }, +}; + +static int __init +sba_init(void) +{ + acpi_bus_register_driver(&acpi_sba_ioc_driver); + if (!ioc_list) + return 0; + +#ifdef CONFIG_PCI + { + struct pci_bus *b = NULL; + while ((b = pci_find_next_bus(b)) != NULL) + sba_connect_bus(b); + } +#endif + +#ifdef CONFIG_PROC_FS + ioc_proc_init(); +#endif + return 0; +} + +subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */ + +extern void dig_setup(char**); +/* + * MAX_DMA_ADDRESS needs to be setup prior to paging_init to do any good, + * so we use the platform_setup hook to fix it up. + */ +void __init +sba_setup(char **cmdline_p) +{ + MAX_DMA_ADDRESS = ~0UL; + dig_setup(cmdline_p); +} + +static int __init +nosbagart(char *str) +{ + reserve_sba_gart = 0; + return 1; +} + +int +sba_dma_supported (struct device *dev, u64 mask) +{ + /* make sure it's at least 32bit capable */ + return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL); +} + +int +sba_dma_mapping_error (dma_addr_t dma_addr) +{ + return 0; +} + +__setup("nosbagart", nosbagart); + +static int __init +sba_page_override(char *str) +{ + unsigned long page_size; + + page_size = memparse(str, &str); + switch (page_size) { + case 4096: + case 8192: + case 16384: + case 65536: + iovp_shift = ffs(page_size) - 1; + break; + default: + printk("%s: unknown/unsupported iommu page size %ld\n", + __FUNCTION__, page_size); + } + + return 1; +} + +__setup("sbapagesize=",sba_page_override); + +EXPORT_SYMBOL(sba_dma_mapping_error); +EXPORT_SYMBOL(sba_map_single); +EXPORT_SYMBOL(sba_unmap_single); +EXPORT_SYMBOL(sba_map_sg); +EXPORT_SYMBOL(sba_unmap_sg); +EXPORT_SYMBOL(sba_dma_supported); +EXPORT_SYMBOL(sba_alloc_coherent); +EXPORT_SYMBOL(sba_free_coherent); diff --git a/arch/ia64/hp/sim/Kconfig b/arch/ia64/hp/sim/Kconfig new file mode 100644 index 000000000000..18ccb1266e18 --- /dev/null +++ b/arch/ia64/hp/sim/Kconfig @@ -0,0 +1,20 @@ + +menu "HP Simulator drivers" + depends on IA64_HP_SIM || IA64_GENERIC + +config HP_SIMETH + bool "Simulated Ethernet " + +config HP_SIMSERIAL + bool "Simulated serial driver support" + +config HP_SIMSERIAL_CONSOLE + bool "Console for HP simulator" + depends on HP_SIMSERIAL + +config HP_SIMSCSI + tristate "Simulated SCSI disk" + depends on SCSI + +endmenu + diff --git a/arch/ia64/hp/sim/Makefile b/arch/ia64/hp/sim/Makefile new file mode 100644 index 000000000000..d10da47931d7 --- /dev/null +++ b/arch/ia64/hp/sim/Makefile @@ -0,0 +1,16 @@ +# +# ia64/platform/hp/sim/Makefile +# +# Copyright (C) 2002 Hewlett-Packard Co. +# David Mosberger-Tang <davidm@hpl.hp.com> +# Copyright (C) 1999 Silicon Graphics, Inc. +# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) +# + +obj-y := hpsim_irq.o hpsim_setup.o hpsim.o +obj-$(CONFIG_IA64_GENERIC) += hpsim_machvec.o + +obj-$(CONFIG_HP_SIMETH) += simeth.o +obj-$(CONFIG_HP_SIMSERIAL) += simserial.o +obj-$(CONFIG_HP_SIMSERIAL_CONSOLE) += hpsim_console.o +obj-$(CONFIG_HP_SIMSCSI) += simscsi.o diff --git a/arch/ia64/hp/sim/boot/Makefile b/arch/ia64/hp/sim/boot/Makefile new file mode 100644 index 000000000000..df6e9968c845 --- /dev/null +++ b/arch/ia64/hp/sim/boot/Makefile @@ -0,0 +1,37 @@ +# +# ia64/boot/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1998, 2003 by David Mosberger-Tang <davidm@hpl.hp.com> +# + +targets-$(CONFIG_IA64_HP_SIM) += bootloader +targets := vmlinux.bin vmlinux.gz $(targets-y) + +quiet_cmd_cptotop = LN $@ + cmd_cptotop = ln -f $< $@ + +vmlinux.gz: $(obj)/vmlinux.gz $(addprefix $(obj)/,$(targets-y)) + $(call cmd,cptotop) + @echo ' Kernel: $@ is ready' + +boot: bootloader + +bootloader: $(obj)/bootloader + $(call cmd,cptotop) + +$(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) + +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + + +LDFLAGS_bootloader = -static -T + +$(obj)/bootloader: $(src)/bootloader.lds $(obj)/bootloader.o $(obj)/boot_head.o $(obj)/fw-emu.o \ + lib/lib.a arch/ia64/lib/lib.a FORCE + $(call if_changed,ld) diff --git a/arch/ia64/hp/sim/boot/boot_head.S b/arch/ia64/hp/sim/boot/boot_head.S new file mode 100644 index 000000000000..9364199e5632 --- /dev/null +++ b/arch/ia64/hp/sim/boot/boot_head.S @@ -0,0 +1,144 @@ +/* + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <asm/asmmacro.h> + + .bss + .align 16 +stack_mem: + .skip 16834 + + .text + +/* This needs to be defined because lib/string.c:strlcat() calls it in case of error... */ +GLOBAL_ENTRY(printk) + break 0 +END(printk) + +GLOBAL_ENTRY(_start) + .prologue + .save rp, r0 + .body + movl gp = __gp + movl sp = stack_mem + bsw.1 + br.call.sptk.many rp=start_bootloader +END(_start) + +/* + * Set a break point on this function so that symbols are available to set breakpoints in + * the kernel being debugged. + */ +GLOBAL_ENTRY(debug_break) + br.ret.sptk.many b0 +END(debug_break) + +GLOBAL_ENTRY(ssc) + .regstk 5,0,0,0 + mov r15=in4 + break 0x80001 + br.ret.sptk.many b0 +END(ssc) + +GLOBAL_ENTRY(jmp_to_kernel) + .regstk 2,0,0,0 + mov r28=in0 + mov b7=in1 + br.sptk.few b7 +END(jmp_to_kernel) + + +GLOBAL_ENTRY(pal_emulator_static) + mov r8=-1 + mov r9=256 + ;; + cmp.gtu p6,p7=r9,r28 /* r28 <= 255? */ +(p6) br.cond.sptk.few static + ;; + mov r9=512 + ;; + cmp.gtu p6,p7=r9,r28 +(p6) br.cond.sptk.few stacked + ;; +static: cmp.eq p6,p7=6,r28 /* PAL_PTCE_INFO */ +(p7) br.cond.sptk.few 1f + ;; + mov r8=0 /* status = 0 */ + movl r9=0x100000000 /* tc.base */ + movl r10=0x0000000200000003 /* count[0], count[1] */ + movl r11=0x1000000000002000 /* stride[0], stride[1] */ + br.cond.sptk.few rp +1: cmp.eq p6,p7=14,r28 /* PAL_FREQ_RATIOS */ +(p7) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + movl r9 =0x100000064 /* proc_ratio (1/100) */ + movl r10=0x100000100 /* bus_ratio<<32 (1/256) */ + movl r11=0x100000064 /* itc_ratio<<32 (1/100) */ + ;; +1: cmp.eq p6,p7=19,r28 /* PAL_RSE_INFO */ +(p7) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + mov r9=96 /* num phys stacked */ + mov r10=0 /* hints */ + mov r11=0 + br.cond.sptk.few rp +1: cmp.eq p6,p7=1,r28 /* PAL_CACHE_FLUSH */ +(p7) br.cond.sptk.few 1f + mov r9=ar.lc + movl r8=524288 /* flush 512k million cache lines (16MB) */ + ;; + mov ar.lc=r8 + movl r8=0xe000000000000000 + ;; +.loop: fc r8 + add r8=32,r8 + br.cloop.sptk.few .loop + sync.i + ;; + srlz.i + ;; + mov ar.lc=r9 + mov r8=r0 + ;; +1: cmp.eq p6,p7=15,r28 /* PAL_PERF_MON_INFO */ +(p7) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + movl r9 =0x08122f04 /* generic=4 width=47 retired=8 cycles=18 */ + mov r10=0 /* reserved */ + mov r11=0 /* reserved */ + mov r16=0xffff /* implemented PMC */ + mov r17=0x3ffff /* implemented PMD */ + add r18=8,r29 /* second index */ + ;; + st8 [r29]=r16,16 /* store implemented PMC */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r0,16 /* clear remaining bits */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r17,16 /* store implemented PMD */ + st8 [r18]=r0,16 /* clear remaining bits */ + mov r16=0xf0 /* cycles count capable PMC */ + ;; + st8 [r29]=r0,16 /* clear remaining bits */ + st8 [r18]=r0,16 /* clear remaining bits */ + mov r17=0xf0 /* retired bundles capable PMC */ + ;; + st8 [r29]=r16,16 /* store cycles capable */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r0,16 /* clear remaining bits */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r17,16 /* store retired bundle capable */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r0,16 /* clear remaining bits */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; +1: br.cond.sptk.few rp +stacked: + br.ret.sptk.few rp +END(pal_emulator_static) diff --git a/arch/ia64/hp/sim/boot/bootloader.c b/arch/ia64/hp/sim/boot/bootloader.c new file mode 100644 index 000000000000..51a7b7b4dd0e --- /dev/null +++ b/arch/ia64/hp/sim/boot/bootloader.c @@ -0,0 +1,176 @@ +/* + * arch/ia64/hp/sim/boot/bootloader.c + * + * Loads an ELF kernel. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * + * 01/07/99 S.Eranian modified to pass command line arguments to kernel + */ +struct task_struct; /* forward declaration for elf.h */ + +#include <linux/config.h> +#include <linux/elf.h> +#include <linux/init.h> +#include <linux/kernel.h> + +#include <asm/elf.h> +#include <asm/intrinsics.h> +#include <asm/pal.h> +#include <asm/pgtable.h> +#include <asm/sal.h> +#include <asm/system.h> + +#include "ssc.h" + +struct disk_req { + unsigned long addr; + unsigned len; +}; + +struct disk_stat { + int fd; + unsigned count; +}; + +extern void jmp_to_kernel (unsigned long bp, unsigned long e_entry); +extern struct ia64_boot_param *sys_fw_init (const char *args, int arglen); +extern void debug_break (void); + +static void +cons_write (const char *buf) +{ + unsigned long ch; + + while ((ch = *buf++) != '\0') { + ssc(ch, 0, 0, 0, SSC_PUTCHAR); + if (ch == '\n') + ssc('\r', 0, 0, 0, SSC_PUTCHAR); + } +} + +#define MAX_ARGS 32 + +void +start_bootloader (void) +{ + static char mem[4096]; + static char buffer[1024]; + unsigned long off; + int fd, i; + struct disk_req req; + struct disk_stat stat; + struct elfhdr *elf; + struct elf_phdr *elf_phdr; /* program header */ + unsigned long e_entry, e_phoff, e_phnum; + register struct ia64_boot_param *bp; + char *kpath, *args; + long arglen = 0; + + ssc(0, 0, 0, 0, SSC_CONSOLE_INIT); + + /* + * S.Eranian: extract the commandline argument from the simulator + * + * The expected format is as follows: + * + * kernelname args... + * + * Both are optional but you can't have the second one without the first. + */ + arglen = ssc((long) buffer, 0, 0, 0, SSC_GET_ARGS); + + kpath = "vmlinux"; + args = buffer; + if (arglen > 0) { + kpath = buffer; + while (*args != ' ' && *args != '\0') + ++args, --arglen; + if (*args == ' ') + *args++ = '\0', --arglen; + } + + if (arglen <= 0) { + args = ""; + arglen = 1; + } + + fd = ssc((long) kpath, 1, 0, 0, SSC_OPEN); + + if (fd < 0) { + cons_write(kpath); + cons_write(": file not found, reboot now\n"); + for(;;); + } + stat.fd = fd; + off = 0; + + req.len = sizeof(mem); + req.addr = (long) mem; + ssc(fd, 1, (long) &req, off, SSC_READ); + ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION); + + elf = (struct elfhdr *) mem; + if (elf->e_ident[0] == 0x7f && strncmp(elf->e_ident + 1, "ELF", 3) != 0) { + cons_write("not an ELF file\n"); + return; + } + if (elf->e_type != ET_EXEC) { + cons_write("not an ELF executable\n"); + return; + } + if (!elf_check_arch(elf)) { + cons_write("kernel not for this processor\n"); + return; + } + + e_entry = elf->e_entry; + e_phnum = elf->e_phnum; + e_phoff = elf->e_phoff; + + cons_write("loading "); + cons_write(kpath); + cons_write("...\n"); + + for (i = 0; i < e_phnum; ++i) { + req.len = sizeof(*elf_phdr); + req.addr = (long) mem; + ssc(fd, 1, (long) &req, e_phoff, SSC_READ); + ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION); + if (stat.count != sizeof(*elf_phdr)) { + cons_write("failed to read phdr\n"); + return; + } + e_phoff += sizeof(*elf_phdr); + + elf_phdr = (struct elf_phdr *) mem; + + if (elf_phdr->p_type != PT_LOAD) + continue; + + req.len = elf_phdr->p_filesz; + req.addr = __pa(elf_phdr->p_paddr); + ssc(fd, 1, (long) &req, elf_phdr->p_offset, SSC_READ); + ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION); + memset((char *)__pa(elf_phdr->p_paddr) + elf_phdr->p_filesz, 0, + elf_phdr->p_memsz - elf_phdr->p_filesz); + } + ssc(fd, 0, 0, 0, SSC_CLOSE); + + cons_write("starting kernel...\n"); + + /* fake an I/O base address: */ + ia64_setreg(_IA64_REG_AR_KR0, 0xffffc000000UL); + + bp = sys_fw_init(args, arglen); + + ssc(0, (long) kpath, 0, 0, SSC_LOAD_SYMBOLS); + + debug_break(); + jmp_to_kernel((unsigned long) bp, e_entry); + + cons_write("kernel returned!\n"); + ssc(-1, 0, 0, 0, SSC_EXIT); +} diff --git a/arch/ia64/hp/sim/boot/bootloader.lds b/arch/ia64/hp/sim/boot/bootloader.lds new file mode 100644 index 000000000000..69ae58531033 --- /dev/null +++ b/arch/ia64/hp/sim/boot/bootloader.lds @@ -0,0 +1,65 @@ +OUTPUT_FORMAT("elf64-ia64-little") +OUTPUT_ARCH(ia64) +ENTRY(_start) +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + . = 0x100000; + + _text = .; + .text : { *(__ivt_section) *(.text) } + _etext = .; + + /* Global data */ + _data = .; + .rodata : { *(.rodata) *(.rodata.*) } + .data : { *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS } + __gp = ALIGN (8) + 0x200000; + .got : { *(.got.plt) *(.got) } + /* We want the small data sections together, so single-instruction offsets + can access them all, and initialized data all before uninitialized, so + we can shorten the on-disk segment size. */ + .sdata : { *(.sdata) } + _edata = .; + + _bss = .; + .sbss : { *(.sbss) *(.scommon) } + .bss : { *(.bss) *(COMMON) } + . = ALIGN(64 / 8); + _end = . ; + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* These must appear regardless of . */ +} diff --git a/arch/ia64/hp/sim/boot/fw-emu.c b/arch/ia64/hp/sim/boot/fw-emu.c new file mode 100644 index 000000000000..5c46928e3dc6 --- /dev/null +++ b/arch/ia64/hp/sim/boot/fw-emu.c @@ -0,0 +1,398 @@ +/* + * PAL & SAL emulation. + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/config.h> + +#ifdef CONFIG_PCI +# include <linux/pci.h> +#endif + +#include <linux/efi.h> +#include <asm/io.h> +#include <asm/pal.h> +#include <asm/sal.h> + +#include "ssc.h" + +#define MB (1024*1024UL) + +#define SIMPLE_MEMMAP 1 + +#if SIMPLE_MEMMAP +# define NUM_MEM_DESCS 4 +#else +# define NUM_MEM_DESCS 16 +#endif + +static char fw_mem[( sizeof(struct ia64_boot_param) + + sizeof(efi_system_table_t) + + sizeof(efi_runtime_services_t) + + 1*sizeof(efi_config_table_t) + + sizeof(struct ia64_sal_systab) + + sizeof(struct ia64_sal_desc_entry_point) + + NUM_MEM_DESCS*(sizeof(efi_memory_desc_t)) + + 1024)] __attribute__ ((aligned (8))); + +#define SECS_PER_HOUR (60 * 60) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) + +/* Compute the `struct tm' representation of *T, + offset OFFSET seconds east of UTC, + and store year, yday, mon, mday, wday, hour, min, sec into *TP. + Return nonzero if successful. */ +int +offtime (unsigned long t, efi_time_t *tp) +{ + const unsigned short int __mon_yday[2][13] = + { + /* Normal years. */ + { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, + /* Leap years. */ + { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } + }; + long int days, rem, y; + const unsigned short int *ip; + + days = t / SECS_PER_DAY; + rem = t % SECS_PER_DAY; + while (rem < 0) { + rem += SECS_PER_DAY; + --days; + } + while (rem >= SECS_PER_DAY) { + rem -= SECS_PER_DAY; + ++days; + } + tp->hour = rem / SECS_PER_HOUR; + rem %= SECS_PER_HOUR; + tp->minute = rem / 60; + tp->second = rem % 60; + /* January 1, 1970 was a Thursday. */ + y = 1970; + +# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0)) +# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400)) +# define __isleap(year) \ + ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0)) + + while (days < 0 || days >= (__isleap (y) ? 366 : 365)) { + /* Guess a corrected year, assuming 365 days per year. */ + long int yg = y + days / 365 - (days % 365 < 0); + + /* Adjust DAYS and Y to match the guessed year. */ + days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1) + - LEAPS_THRU_END_OF (y - 1)); + y = yg; + } + tp->year = y; + ip = __mon_yday[__isleap(y)]; + for (y = 11; days < (long int) ip[y]; --y) + continue; + days -= ip[y]; + tp->month = y + 1; + tp->day = days + 1; + return 1; +} + +extern void pal_emulator_static (void); + +/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */ + +#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3) + +#define REG_OFFSET(addr) (0x00000000000000FF & (addr)) +#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr)) +#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr)) + +static efi_status_t +fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc) +{ +#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC) + struct { + int tv_sec; /* must be 32bits to work */ + int tv_usec; + } tv32bits; + + ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD); + + memset(tm, 0, sizeof(*tm)); + offtime(tv32bits.tv_sec, tm); + + if (tc) + memset(tc, 0, sizeof(*tc)); +#else +# error Not implemented yet... +#endif + return EFI_SUCCESS; +} + +static void +efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data) +{ +#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC) + ssc(status, 0, 0, 0, SSC_EXIT); +#else +# error Not implemented yet... +#endif +} + +static efi_status_t +efi_unimplemented (void) +{ + return EFI_UNSUPPORTED; +} + +static struct sal_ret_values +sal_emulator (long index, unsigned long in1, unsigned long in2, + unsigned long in3, unsigned long in4, unsigned long in5, + unsigned long in6, unsigned long in7) +{ + long r9 = 0; + long r10 = 0; + long r11 = 0; + long status; + + /* + * Don't do a "switch" here since that gives us code that + * isn't self-relocatable. + */ + status = 0; + if (index == SAL_FREQ_BASE) { + switch (in1) { + case SAL_FREQ_BASE_PLATFORM: + r9 = 200000000; + break; + + case SAL_FREQ_BASE_INTERVAL_TIMER: + /* + * Is this supposed to be the cr.itc frequency + * or something platform specific? The SAL + * doc ain't exactly clear on this... + */ + r9 = 700000000; + break; + + case SAL_FREQ_BASE_REALTIME_CLOCK: + r9 = 1; + break; + + default: + status = -1; + break; + } + } else if (index == SAL_SET_VECTORS) { + ; + } else if (index == SAL_GET_STATE_INFO) { + ; + } else if (index == SAL_GET_STATE_INFO_SIZE) { + ; + } else if (index == SAL_CLEAR_STATE_INFO) { + ; + } else if (index == SAL_MC_RENDEZ) { + ; + } else if (index == SAL_MC_SET_PARAMS) { + ; + } else if (index == SAL_CACHE_FLUSH) { + ; + } else if (index == SAL_CACHE_INIT) { + ; +#ifdef CONFIG_PCI + } else if (index == SAL_PCI_CONFIG_READ) { + /* + * in1 contains the PCI configuration address and in2 + * the size of the read. The value that is read is + * returned via the general register r9. + */ + outl(BUILD_CMD(in1), 0xCF8); + if (in2 == 1) /* Reading byte */ + r9 = inb(0xCFC + ((REG_OFFSET(in1) & 3))); + else if (in2 == 2) /* Reading word */ + r9 = inw(0xCFC + ((REG_OFFSET(in1) & 2))); + else /* Reading dword */ + r9 = inl(0xCFC); + status = PCIBIOS_SUCCESSFUL; + } else if (index == SAL_PCI_CONFIG_WRITE) { + /* + * in1 contains the PCI configuration address, in2 the + * size of the write, and in3 the actual value to be + * written out. + */ + outl(BUILD_CMD(in1), 0xCF8); + if (in2 == 1) /* Writing byte */ + outb(in3, 0xCFC + ((REG_OFFSET(in1) & 3))); + else if (in2 == 2) /* Writing word */ + outw(in3, 0xCFC + ((REG_OFFSET(in1) & 2))); + else /* Writing dword */ + outl(in3, 0xCFC); + status = PCIBIOS_SUCCESSFUL; +#endif /* CONFIG_PCI */ + } else if (index == SAL_UPDATE_PAL) { + ; + } else { + status = -1; + } + return ((struct sal_ret_values) {status, r9, r10, r11}); +} + + +/* + * This is here to work around a bug in egcs-1.1.1b that causes the + * compiler to crash (seems like a bug in the new alias analysis code. + */ +void * +id (long addr) +{ + return (void *) addr; +} + +struct ia64_boot_param * +sys_fw_init (const char *args, int arglen) +{ + efi_system_table_t *efi_systab; + efi_runtime_services_t *efi_runtime; + efi_config_table_t *efi_tables; + struct ia64_sal_systab *sal_systab; + efi_memory_desc_t *efi_memmap, *md; + unsigned long *pal_desc, *sal_desc; + struct ia64_sal_desc_entry_point *sal_ed; + struct ia64_boot_param *bp; + unsigned char checksum = 0; + char *cp, *cmd_line; + int i = 0; +# define MAKE_MD(typ, attr, start, end) \ + do { \ + md = efi_memmap + i++; \ + md->type = typ; \ + md->pad = 0; \ + md->phys_addr = start; \ + md->virt_addr = 0; \ + md->num_pages = (end - start) >> 12; \ + md->attribute = attr; \ + } while (0) + + memset(fw_mem, 0, sizeof(fw_mem)); + + pal_desc = (unsigned long *) &pal_emulator_static; + sal_desc = (unsigned long *) &sal_emulator; + + cp = fw_mem; + efi_systab = (void *) cp; cp += sizeof(*efi_systab); + efi_runtime = (void *) cp; cp += sizeof(*efi_runtime); + efi_tables = (void *) cp; cp += sizeof(*efi_tables); + sal_systab = (void *) cp; cp += sizeof(*sal_systab); + sal_ed = (void *) cp; cp += sizeof(*sal_ed); + efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap); + bp = (void *) cp; cp += sizeof(*bp); + cmd_line = (void *) cp; + + if (args) { + if (arglen >= 1024) + arglen = 1023; + memcpy(cmd_line, args, arglen); + } else { + arglen = 0; + } + cmd_line[arglen] = '\0'; + + memset(efi_systab, 0, sizeof(efi_systab)); + efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE; + efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION; + efi_systab->hdr.headersize = sizeof(efi_systab->hdr); + efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0"); + efi_systab->fw_revision = 1; + efi_systab->runtime = (void *) __pa(efi_runtime); + efi_systab->nr_tables = 1; + efi_systab->tables = __pa(efi_tables); + + efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE; + efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION; + efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr); + efi_runtime->get_time = __pa(&fw_efi_get_time); + efi_runtime->set_time = __pa(&efi_unimplemented); + efi_runtime->get_wakeup_time = __pa(&efi_unimplemented); + efi_runtime->set_wakeup_time = __pa(&efi_unimplemented); + efi_runtime->set_virtual_address_map = __pa(&efi_unimplemented); + efi_runtime->get_variable = __pa(&efi_unimplemented); + efi_runtime->get_next_variable = __pa(&efi_unimplemented); + efi_runtime->set_variable = __pa(&efi_unimplemented); + efi_runtime->get_next_high_mono_count = __pa(&efi_unimplemented); + efi_runtime->reset_system = __pa(&efi_reset_system); + + efi_tables->guid = SAL_SYSTEM_TABLE_GUID; + efi_tables->table = __pa(sal_systab); + + /* fill in the SAL system table: */ + memcpy(sal_systab->signature, "SST_", 4); + sal_systab->size = sizeof(*sal_systab); + sal_systab->sal_rev_minor = 1; + sal_systab->sal_rev_major = 0; + sal_systab->entry_count = 1; + +#ifdef CONFIG_IA64_GENERIC + strcpy(sal_systab->oem_id, "Generic"); + strcpy(sal_systab->product_id, "IA-64 system"); +#endif + +#ifdef CONFIG_IA64_HP_SIM + strcpy(sal_systab->oem_id, "Hewlett-Packard"); + strcpy(sal_systab->product_id, "HP-simulator"); +#endif + +#ifdef CONFIG_IA64_SDV + strcpy(sal_systab->oem_id, "Intel"); + strcpy(sal_systab->product_id, "SDV"); +#endif + + /* fill in an entry point: */ + sal_ed->type = SAL_DESC_ENTRY_POINT; + sal_ed->pal_proc = __pa(pal_desc[0]); + sal_ed->sal_proc = __pa(sal_desc[0]); + sal_ed->gp = __pa(sal_desc[1]); + + for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp) + checksum += *cp; + + sal_systab->checksum = -checksum; + +#if SIMPLE_MEMMAP + /* simulate free memory at physical address zero */ + MAKE_MD(EFI_BOOT_SERVICES_DATA, EFI_MEMORY_WB, 0*MB, 1*MB); + MAKE_MD(EFI_PAL_CODE, EFI_MEMORY_WB, 1*MB, 2*MB); + MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, 2*MB, 130*MB); + MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, 4096*MB, 4128*MB); +#else + MAKE_MD( 4, 0x9, 0x0000000000000000, 0x0000000000001000); + MAKE_MD( 7, 0x9, 0x0000000000001000, 0x000000000008a000); + MAKE_MD( 4, 0x9, 0x000000000008a000, 0x00000000000a0000); + MAKE_MD( 5, 0x8000000000000009, 0x00000000000c0000, 0x0000000000100000); + MAKE_MD( 7, 0x9, 0x0000000000100000, 0x0000000004400000); + MAKE_MD( 2, 0x9, 0x0000000004400000, 0x0000000004be5000); + MAKE_MD( 7, 0x9, 0x0000000004be5000, 0x000000007f77e000); + MAKE_MD( 6, 0x8000000000000009, 0x000000007f77e000, 0x000000007fb94000); + MAKE_MD( 6, 0x8000000000000009, 0x000000007fb94000, 0x000000007fb95000); + MAKE_MD( 6, 0x8000000000000009, 0x000000007fb95000, 0x000000007fc00000); + MAKE_MD(13, 0x8000000000000009, 0x000000007fc00000, 0x000000007fc3a000); + MAKE_MD( 7, 0x9, 0x000000007fc3a000, 0x000000007fea0000); + MAKE_MD( 5, 0x8000000000000009, 0x000000007fea0000, 0x000000007fea8000); + MAKE_MD( 7, 0x9, 0x000000007fea8000, 0x000000007feab000); + MAKE_MD( 5, 0x8000000000000009, 0x000000007feab000, 0x000000007ffff000); + MAKE_MD( 7, 0x9, 0x00000000ff400000, 0x0000000104000000); +#endif + + bp->efi_systab = __pa(&fw_mem); + bp->efi_memmap = __pa(efi_memmap); + bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t); + bp->efi_memdesc_size = sizeof(efi_memory_desc_t); + bp->efi_memdesc_version = 1; + bp->command_line = __pa(cmd_line); + bp->console_info.num_cols = 80; + bp->console_info.num_rows = 25; + bp->console_info.orig_x = 0; + bp->console_info.orig_y = 24; + bp->fpswa = 0; + + return bp; +} diff --git a/arch/ia64/hp/sim/boot/ssc.h b/arch/ia64/hp/sim/boot/ssc.h new file mode 100644 index 000000000000..3b94c03e43a9 --- /dev/null +++ b/arch/ia64/hp/sim/boot/ssc.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + */ +#ifndef ssc_h +#define ssc_h + +/* Simulator system calls: */ + +#define SSC_CONSOLE_INIT 20 +#define SSC_GETCHAR 21 +#define SSC_PUTCHAR 31 +#define SSC_OPEN 50 +#define SSC_CLOSE 51 +#define SSC_READ 52 +#define SSC_WRITE 53 +#define SSC_GET_COMPLETION 54 +#define SSC_WAIT_COMPLETION 55 +#define SSC_CONNECT_INTERRUPT 58 +#define SSC_GENERATE_INTERRUPT 59 +#define SSC_SET_PERIODIC_INTERRUPT 60 +#define SSC_GET_RTC 65 +#define SSC_EXIT 66 +#define SSC_LOAD_SYMBOLS 69 +#define SSC_GET_TOD 74 + +#define SSC_GET_ARGS 75 + +/* + * Simulator system call. + */ +extern long ssc (long arg0, long arg1, long arg2, long arg3, int nr); + +#endif /* ssc_h */ diff --git a/arch/ia64/hp/sim/hpsim.S b/arch/ia64/hp/sim/hpsim.S new file mode 100644 index 000000000000..ff16e8a857d1 --- /dev/null +++ b/arch/ia64/hp/sim/hpsim.S @@ -0,0 +1,10 @@ +#include <asm/asmmacro.h> + +/* + * Simulator system call. + */ +GLOBAL_ENTRY(ia64_ssc) + mov r15=r36 + break 0x80001 + br.ret.sptk.many rp +END(ia64_ssc) diff --git a/arch/ia64/hp/sim/hpsim_console.c b/arch/ia64/hp/sim/hpsim_console.c new file mode 100644 index 000000000000..5deff21e5877 --- /dev/null +++ b/arch/ia64/hp/sim/hpsim_console.c @@ -0,0 +1,65 @@ +/* + * Platform dependent support for HP simulator. + * + * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com> + */ +#include <linux/config.h> + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/tty.h> +#include <linux/kdev_t.h> +#include <linux/console.h> + +#include <asm/delay.h> +#include <asm/irq.h> +#include <asm/pal.h> +#include <asm/machvec.h> +#include <asm/pgtable.h> +#include <asm/sal.h> + +#include "hpsim_ssc.h" + +static int simcons_init (struct console *, char *); +static void simcons_write (struct console *, const char *, unsigned); +static struct tty_driver *simcons_console_device (struct console *, int *); + +struct console hpsim_cons = { + .name = "simcons", + .write = simcons_write, + .device = simcons_console_device, + .setup = simcons_init, + .flags = CON_PRINTBUFFER, + .index = -1, +}; + +static int +simcons_init (struct console *cons, char *options) +{ + return 0; +} + +static void +simcons_write (struct console *cons, const char *buf, unsigned count) +{ + unsigned long ch; + + while (count-- > 0) { + ch = *buf++; + ia64_ssc(ch, 0, 0, 0, SSC_PUTCHAR); + if (ch == '\n') + ia64_ssc('\r', 0, 0, 0, SSC_PUTCHAR); + } +} + +static struct tty_driver *simcons_console_device (struct console *c, int *index) +{ + extern struct tty_driver *hp_simserial_driver; + *index = c->index; + return hp_simserial_driver; +} diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c new file mode 100644 index 000000000000..c0d25a2a3e9c --- /dev/null +++ b/arch/ia64/hp/sim/hpsim_irq.c @@ -0,0 +1,51 @@ +/* + * Platform dependent support for HP simulator. + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/irq.h> + +static unsigned int +hpsim_irq_startup (unsigned int irq) +{ + return 0; +} + +static void +hpsim_irq_noop (unsigned int irq) +{ +} + +static void +hpsim_set_affinity_noop (unsigned int a, cpumask_t b) +{ +} + +static struct hw_interrupt_type irq_type_hp_sim = { + .typename = "hpsim", + .startup = hpsim_irq_startup, + .shutdown = hpsim_irq_noop, + .enable = hpsim_irq_noop, + .disable = hpsim_irq_noop, + .ack = hpsim_irq_noop, + .end = hpsim_irq_noop, + .set_affinity = hpsim_set_affinity_noop, +}; + +void __init +hpsim_irq_init (void) +{ + irq_desc_t *idesc; + int i; + + for (i = 0; i < NR_IRQS; ++i) { + idesc = irq_descp(i); + if (idesc->handler == &no_irq_type) + idesc->handler = &irq_type_hp_sim; + } +} diff --git a/arch/ia64/hp/sim/hpsim_machvec.c b/arch/ia64/hp/sim/hpsim_machvec.c new file mode 100644 index 000000000000..c21419359185 --- /dev/null +++ b/arch/ia64/hp/sim/hpsim_machvec.c @@ -0,0 +1,3 @@ +#define MACHVEC_PLATFORM_NAME hpsim +#define MACHVEC_PLATFORM_HEADER <asm/machvec_hpsim.h> +#include <asm/machvec_init.h> diff --git a/arch/ia64/hp/sim/hpsim_setup.c b/arch/ia64/hp/sim/hpsim_setup.c new file mode 100644 index 000000000000..694fc86bfbd5 --- /dev/null +++ b/arch/ia64/hp/sim/hpsim_setup.c @@ -0,0 +1,52 @@ +/* + * Platform dependent support for HP simulator. + * + * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com> + */ +#include <linux/config.h> +#include <linux/console.h> +#include <linux/init.h> +#include <linux/kdev_t.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/param.h> +#include <linux/root_dev.h> +#include <linux/string.h> +#include <linux/types.h> + +#include <asm/delay.h> +#include <asm/irq.h> +#include <asm/pal.h> +#include <asm/machvec.h> +#include <asm/pgtable.h> +#include <asm/sal.h> + +#include "hpsim_ssc.h" + +void +ia64_ssc_connect_irq (long intr, long irq) +{ + ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT); +} + +void +ia64_ctl_trace (long on) +{ + ia64_ssc(on, 0, 0, 0, SSC_CTL_TRACE); +} + +void __init +hpsim_setup (char **cmdline_p) +{ + ROOT_DEV = Root_SDA1; /* default to first SCSI drive */ + +#ifdef CONFIG_HP_SIMSERIAL_CONSOLE + { + extern struct console hpsim_cons; + if (ia64_platform_is("hpsim")) + register_console(&hpsim_cons); + } +#endif +} diff --git a/arch/ia64/hp/sim/hpsim_ssc.h b/arch/ia64/hp/sim/hpsim_ssc.h new file mode 100644 index 000000000000..bfa3906274b3 --- /dev/null +++ b/arch/ia64/hp/sim/hpsim_ssc.h @@ -0,0 +1,36 @@ +/* + * Platform dependent support for HP simulator. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com> + */ +#ifndef _IA64_PLATFORM_HPSIM_SSC_H +#define _IA64_PLATFORM_HPSIM_SSC_H + +/* Simulator system calls: */ + +#define SSC_CONSOLE_INIT 20 +#define SSC_GETCHAR 21 +#define SSC_PUTCHAR 31 +#define SSC_CONNECT_INTERRUPT 58 +#define SSC_GENERATE_INTERRUPT 59 +#define SSC_SET_PERIODIC_INTERRUPT 60 +#define SSC_GET_RTC 65 +#define SSC_EXIT 66 +#define SSC_LOAD_SYMBOLS 69 +#define SSC_GET_TOD 74 +#define SSC_CTL_TRACE 76 + +#define SSC_NETDEV_PROBE 100 +#define SSC_NETDEV_SEND 101 +#define SSC_NETDEV_RECV 102 +#define SSC_NETDEV_ATTACH 103 +#define SSC_NETDEV_DETACH 104 + +/* + * Simulator system call. + */ +extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr); + +#endif /* _IA64_PLATFORM_HPSIM_SSC_H */ diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c new file mode 100644 index 000000000000..ae84a1018a89 --- /dev/null +++ b/arch/ia64/hp/sim/simeth.c @@ -0,0 +1,530 @@ +/* + * Simulated Ethernet Driver + * + * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <linux/in.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/inetdevice.h> +#include <linux/if_ether.h> +#include <linux/if_arp.h> +#include <linux/skbuff.h> +#include <linux/notifier.h> +#include <linux/bitops.h> +#include <asm/system.h> +#include <asm/irq.h> + +#define SIMETH_RECV_MAX 10 + +/* + * Maximum possible received frame for Ethernet. + * We preallocate an sk_buff of that size to avoid costly + * memcpy for temporary buffer into sk_buff. We do basically + * what's done in other drivers, like eepro with a ring. + * The difference is, of course, that we don't have real DMA !!! + */ +#define SIMETH_FRAME_SIZE ETH_FRAME_LEN + + +#define SSC_NETDEV_PROBE 100 +#define SSC_NETDEV_SEND 101 +#define SSC_NETDEV_RECV 102 +#define SSC_NETDEV_ATTACH 103 +#define SSC_NETDEV_DETACH 104 + +#define NETWORK_INTR 8 + +struct simeth_local { + struct net_device_stats stats; + int simfd; /* descriptor in the simulator */ +}; + +static int simeth_probe1(void); +static int simeth_open(struct net_device *dev); +static int simeth_close(struct net_device *dev); +static int simeth_tx(struct sk_buff *skb, struct net_device *dev); +static int simeth_rx(struct net_device *dev); +static struct net_device_stats *simeth_get_stats(struct net_device *dev); +static irqreturn_t simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs); +static void set_multicast_list(struct net_device *dev); +static int simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr); + +static char *simeth_version="0.3"; + +/* + * This variable is used to establish a mapping between the Linux/ia64 kernel + * and the host linux kernel. + * + * As of today, we support only one card, even though most of the code + * is ready for many more. The mapping is then: + * linux/ia64 -> linux/x86 + * eth0 -> eth1 + * + * In the future, we some string operations, we could easily support up + * to 10 cards (0-9). + * + * The default mapping can be changed on the kernel command line by + * specifying simeth=ethX (or whatever string you want). + */ +static char *simeth_device="eth0"; /* default host interface to use */ + + + +static volatile unsigned int card_count; /* how many cards "found" so far */ +static int simeth_debug; /* set to 1 to get debug information */ + +/* + * Used to catch IFF_UP & IFF_DOWN events + */ +static struct notifier_block simeth_dev_notifier = { + simeth_device_event, + 0 +}; + + +/* + * Function used when using a kernel command line option. + * + * Format: simeth=interface_name (like eth0) + */ +static int __init +simeth_setup(char *str) +{ + simeth_device = str; + return 1; +} + +__setup("simeth=", simeth_setup); + +/* + * Function used to probe for simeth devices when not installed + * as a loadable module + */ + +int __init +simeth_probe (void) +{ + int r; + + printk(KERN_INFO "simeth: v%s\n", simeth_version); + + r = simeth_probe1(); + + if (r == 0) register_netdevice_notifier(&simeth_dev_notifier); + + return r; +} + +extern long ia64_ssc (long, long, long, long, int); +extern void ia64_ssc_connect_irq (long intr, long irq); + +static inline int +netdev_probe(char *name, unsigned char *ether) +{ + return ia64_ssc(__pa(name), __pa(ether), 0,0, SSC_NETDEV_PROBE); +} + + +static inline int +netdev_connect(int irq) +{ + /* XXX Fix me + * this does not support multiple cards + * also no return value + */ + ia64_ssc_connect_irq(NETWORK_INTR, irq); + return 0; +} + +static inline int +netdev_attach(int fd, int irq, unsigned int ipaddr) +{ + /* this puts the host interface in the right mode (start interrupting) */ + return ia64_ssc(fd, ipaddr, 0,0, SSC_NETDEV_ATTACH); +} + + +static inline int +netdev_detach(int fd) +{ + /* + * inactivate the host interface (don't interrupt anymore) */ + return ia64_ssc(fd, 0,0,0, SSC_NETDEV_DETACH); +} + +static inline int +netdev_send(int fd, unsigned char *buf, unsigned int len) +{ + return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_SEND); +} + +static inline int +netdev_read(int fd, unsigned char *buf, unsigned int len) +{ + return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_RECV); +} + +/* + * Function shared with module code, so cannot be in init section + * + * So far this function "detects" only one card (test_&_set) but could + * be extended easily. + * + * Return: + * - -ENODEV is no device found + * - -ENOMEM is no more memory + * - 0 otherwise + */ +static int +simeth_probe1(void) +{ + unsigned char mac_addr[ETH_ALEN]; + struct simeth_local *local; + struct net_device *dev; + int fd, i, err; + + /* + * XXX Fix me + * let's support just one card for now + */ + if (test_and_set_bit(0, &card_count)) + return -ENODEV; + + /* + * check with the simulator for the device + */ + fd = netdev_probe(simeth_device, mac_addr); + if (fd == -1) + return -ENODEV; + + dev = alloc_etherdev(sizeof(struct simeth_local)); + if (!dev) + return -ENOMEM; + + memcpy(dev->dev_addr, mac_addr, sizeof(mac_addr)); + + local = dev->priv; + local->simfd = fd; /* keep track of underlying file descriptor */ + + dev->open = simeth_open; + dev->stop = simeth_close; + dev->hard_start_xmit = simeth_tx; + dev->get_stats = simeth_get_stats; + dev->set_multicast_list = set_multicast_list; /* no yet used */ + + err = register_netdev(dev); + if (err) { + free_netdev(dev); + return err; + } + + dev->irq = assign_irq_vector(AUTO_ASSIGN); + + /* + * attach the interrupt in the simulator, this does enable interrupts + * until a netdev_attach() is called + */ + netdev_connect(dev->irq); + + printk(KERN_INFO "%s: hosteth=%s simfd=%d, HwAddr", + dev->name, simeth_device, local->simfd); + for(i = 0; i < ETH_ALEN; i++) { + printk(" %2.2x", dev->dev_addr[i]); + } + printk(", IRQ %d\n", dev->irq); + + return 0; +} + +/* + * actually binds the device to an interrupt vector + */ +static int +simeth_open(struct net_device *dev) +{ + if (request_irq(dev->irq, simeth_interrupt, 0, "simeth", dev)) { + printk(KERN_WARNING "simeth: unable to get IRQ %d.\n", dev->irq); + return -EAGAIN; + } + + netif_start_queue(dev); + + return 0; +} + +/* copied from lapbether.c */ +static __inline__ int dev_is_ethdev(struct net_device *dev) +{ + return ( dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5)); +} + + +/* + * Handler for IFF_UP or IFF_DOWN + * + * The reason for that is that we don't want to be interrupted when the + * interface is down. There is no way to unconnect in the simualtor. Instead + * we use this function to shutdown packet processing in the frame filter + * in the simulator. Thus no interrupts are generated + * + * + * That's also the place where we pass the IP address of this device to the + * simulator so that that we can start filtering packets for it + * + * There may be a better way of doing this, but I don't know which yet. + */ +static int +simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct simeth_local *local; + struct in_device *in_dev; + struct in_ifaddr **ifap = NULL; + struct in_ifaddr *ifa = NULL; + int r; + + + if ( ! dev ) { + printk(KERN_WARNING "simeth_device_event dev=0\n"); + return NOTIFY_DONE; + } + + if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE; + + /* + * Check whether or not it's for an ethernet device + * + * XXX Fixme: This works only as long as we support one + * type of ethernet device. + */ + if ( !dev_is_ethdev(dev) ) return NOTIFY_DONE; + + if ((in_dev=dev->ip_ptr) != NULL) { + for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next) + if (strcmp(dev->name, ifa->ifa_label) == 0) break; + } + if ( ifa == NULL ) { + printk(KERN_ERR "simeth_open: can't find device %s's ifa\n", dev->name); + return NOTIFY_DONE; + } + + printk(KERN_INFO "simeth_device_event: %s ipaddr=0x%x\n", + dev->name, htonl(ifa->ifa_local)); + + /* + * XXX Fix me + * if the device was up, and we're simply reconfiguring it, not sure + * we get DOWN then UP. + */ + + local = dev->priv; + /* now do it for real */ + r = event == NETDEV_UP ? + netdev_attach(local->simfd, dev->irq, htonl(ifa->ifa_local)): + netdev_detach(local->simfd); + + printk(KERN_INFO "simeth: netdev_attach/detach: event=%s ->%d\n", + event == NETDEV_UP ? "attach":"detach", r); + + return NOTIFY_DONE; +} + +static int +simeth_close(struct net_device *dev) +{ + netif_stop_queue(dev); + + free_irq(dev->irq, dev); + + return 0; +} + +/* + * Only used for debug + */ +static void +frame_print(unsigned char *from, unsigned char *frame, int len) +{ + int i; + + printk("%s: (%d) %02x", from, len, frame[0] & 0xff); + for(i=1; i < 6; i++ ) { + printk(":%02x", frame[i] &0xff); + } + printk(" %2x", frame[6] &0xff); + for(i=7; i < 12; i++ ) { + printk(":%02x", frame[i] &0xff); + } + printk(" [%02x%02x]\n", frame[12], frame[13]); + + for(i=14; i < len; i++ ) { + printk("%02x ", frame[i] &0xff); + if ( (i%10)==0) printk("\n"); + } + printk("\n"); +} + + +/* + * Function used to transmit of frame, very last one on the path before + * going to the simulator. + */ +static int +simeth_tx(struct sk_buff *skb, struct net_device *dev) +{ + struct simeth_local *local = dev->priv; + +#if 0 + /* ensure we have at least ETH_ZLEN bytes (min frame size) */ + unsigned int length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN; + /* Where do the extra padding bytes comes from inthe skbuff ? */ +#else + /* the real driver in the host system is going to take care of that + * or maybe it's the NIC itself. + */ + unsigned int length = skb->len; +#endif + + local->stats.tx_bytes += skb->len; + local->stats.tx_packets++; + + + if (simeth_debug > 5) frame_print("simeth_tx", skb->data, length); + + netdev_send(local->simfd, skb->data, length); + + /* + * we are synchronous on write, so we don't simulate a + * trasnmit complete interrupt, thus we don't need to arm a tx + */ + + dev_kfree_skb(skb); + return 0; +} + +static inline struct sk_buff * +make_new_skb(struct net_device *dev) +{ + struct sk_buff *nskb; + + /* + * The +2 is used to make sure that the IP header is nicely + * aligned (on 4byte boundary I assume 14+2=16) + */ + nskb = dev_alloc_skb(SIMETH_FRAME_SIZE + 2); + if ( nskb == NULL ) { + printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name); + return NULL; + } + nskb->dev = dev; + + skb_reserve(nskb, 2); /* Align IP on 16 byte boundaries */ + + skb_put(nskb,SIMETH_FRAME_SIZE); + + return nskb; +} + +/* + * called from interrupt handler to process a received frame + */ +static int +simeth_rx(struct net_device *dev) +{ + struct simeth_local *local; + struct sk_buff *skb; + int len; + int rcv_count = SIMETH_RECV_MAX; + + local = dev->priv; + /* + * the loop concept has been borrowed from other drivers + * looks to me like it's a throttling thing to avoid pushing to many + * packets at one time into the stack. Making sure we can process them + * upstream and make forward progress overall + */ + do { + if ( (skb=make_new_skb(dev)) == NULL ) { + printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name); + local->stats.rx_dropped++; + return 0; + } + /* + * Read only one frame at a time + */ + len = netdev_read(local->simfd, skb->data, SIMETH_FRAME_SIZE); + if ( len == 0 ) { + if ( simeth_debug > 0 ) printk(KERN_WARNING "%s: count=%d netdev_read=0\n", + dev->name, SIMETH_RECV_MAX-rcv_count); + break; + } +#if 0 + /* + * XXX Fix me + * Should really do a csum+copy here + */ + memcpy(skb->data, frame, len); +#endif + skb->protocol = eth_type_trans(skb, dev); + + if ( simeth_debug > 6 ) frame_print("simeth_rx", skb->data, len); + + /* + * push the packet up & trigger software interrupt + */ + netif_rx(skb); + + local->stats.rx_packets++; + local->stats.rx_bytes += len; + + } while ( --rcv_count ); + + return len; /* 0 = nothing left to read, otherwise, we can try again */ +} + +/* + * Interrupt handler (Yes, we can do it too !!!) + */ +static irqreturn_t +simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs) +{ + struct net_device *dev = dev_id; + + if ( dev == NULL ) { + printk(KERN_WARNING "simeth: irq %d for unknown device\n", irq); + return IRQ_NONE; + } + + /* + * very simple loop because we get interrupts only when receiving + */ + while (simeth_rx(dev)); + return IRQ_HANDLED; +} + +static struct net_device_stats * +simeth_get_stats(struct net_device *dev) +{ + struct simeth_local *local = dev->priv; + + return &local->stats; +} + +/* fake multicast ability */ +static void +set_multicast_list(struct net_device *dev) +{ + printk(KERN_WARNING "%s: set_multicast_list called\n", dev->name); +} + +__initcall(simeth_probe); diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c new file mode 100644 index 000000000000..56405dbfd739 --- /dev/null +++ b/arch/ia64/hp/sim/simscsi.c @@ -0,0 +1,404 @@ +/* + * Simulated SCSI driver. + * + * Copyright (C) 1999, 2001-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * + * 02/01/15 David Mosberger Updated for v2.5.1 + * 99/12/18 David Mosberger Added support for READ10/WRITE10 needed by linux v2.3.33 + */ +#include <linux/blkdev.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/timer.h> +#include <asm/irq.h> + +#include <scsi/scsi.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_device.h> +#include <scsi/scsi_host.h> + +#define DEBUG_SIMSCSI 0 + +#define SIMSCSI_REQ_QUEUE_LEN 64 +#define DEFAULT_SIMSCSI_ROOT "/var/ski-disks/sd" + +/* Simulator system calls: */ + +#define SSC_OPEN 50 +#define SSC_CLOSE 51 +#define SSC_READ 52 +#define SSC_WRITE 53 +#define SSC_GET_COMPLETION 54 +#define SSC_WAIT_COMPLETION 55 + +#define SSC_WRITE_ACCESS 2 +#define SSC_READ_ACCESS 1 + +#if DEBUG_SIMSCSI + int simscsi_debug; +# define DBG simscsi_debug +#else +# define DBG 0 +#endif + +static struct Scsi_Host *host; + +static void simscsi_interrupt (unsigned long val); +static DECLARE_TASKLET(simscsi_tasklet, simscsi_interrupt, 0); + +struct disk_req { + unsigned long addr; + unsigned len; +}; + +struct disk_stat { + int fd; + unsigned count; +}; + +extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr); + +static int desc[16] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +}; + +static struct queue_entry { + struct scsi_cmnd *sc; +} queue[SIMSCSI_REQ_QUEUE_LEN]; + +static int rd, wr; +static atomic_t num_reqs = ATOMIC_INIT(0); + +/* base name for default disks */ +static char *simscsi_root = DEFAULT_SIMSCSI_ROOT; + +#define MAX_ROOT_LEN 128 + +/* + * used to setup a new base for disk images + * to use /foo/bar/disk[a-z] as disk images + * you have to specify simscsi=/foo/bar/disk on the command line + */ +static int __init +simscsi_setup (char *s) +{ + /* XXX Fix me we may need to strcpy() ? */ + if (strlen(s) > MAX_ROOT_LEN) { + printk(KERN_ERR "simscsi_setup: prefix too long---using default %s\n", + simscsi_root); + } + simscsi_root = s; + return 1; +} + +__setup("simscsi=", simscsi_setup); + +static void +simscsi_interrupt (unsigned long val) +{ + struct scsi_cmnd *sc; + + while ((sc = queue[rd].sc) != 0) { + atomic_dec(&num_reqs); + queue[rd].sc = 0; + if (DBG) + printk("simscsi_interrupt: done with %ld\n", sc->serial_number); + (*sc->scsi_done)(sc); + rd = (rd + 1) % SIMSCSI_REQ_QUEUE_LEN; + } +} + +static int +simscsi_biosparam (struct scsi_device *sdev, struct block_device *n, + sector_t capacity, int ip[]) +{ + ip[0] = 64; /* heads */ + ip[1] = 32; /* sectors */ + ip[2] = capacity >> 11; /* cylinders */ + return 0; +} + +static void +simscsi_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset, unsigned long len) +{ + struct disk_stat stat; + struct disk_req req; + + req.addr = __pa(sc->request_buffer); + req.len = len; /* # of bytes to transfer */ + + if (sc->request_bufflen < req.len) + return; + + stat.fd = desc[sc->device->id]; + if (DBG) + printk("simscsi_%s @ %lx (off %lx)\n", + mode == SSC_READ ? "read":"write", req.addr, offset); + ia64_ssc(stat.fd, 1, __pa(&req), offset, mode); + ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION); + + if (stat.count == req.len) { + sc->result = GOOD; + } else { + sc->result = DID_ERROR << 16; + } +} + +static void +simscsi_sg_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset) +{ + int list_len = sc->use_sg; + struct scatterlist *sl = (struct scatterlist *)sc->buffer; + struct disk_stat stat; + struct disk_req req; + + stat.fd = desc[sc->device->id]; + + while (list_len) { + req.addr = __pa(page_address(sl->page) + sl->offset); + req.len = sl->length; + if (DBG) + printk("simscsi_sg_%s @ %lx (off %lx) use_sg=%d len=%d\n", + mode == SSC_READ ? "read":"write", req.addr, offset, + list_len, sl->length); + ia64_ssc(stat.fd, 1, __pa(&req), offset, mode); + ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION); + + /* should not happen in our case */ + if (stat.count != req.len) { + sc->result = DID_ERROR << 16; + return; + } + offset += sl->length; + sl++; + list_len--; + } + sc->result = GOOD; +} + +/* + * function handling both READ_6/WRITE_6 (non-scatter/gather mode) + * commands. + * Added 02/26/99 S.Eranian + */ +static void +simscsi_readwrite6 (struct scsi_cmnd *sc, int mode) +{ + unsigned long offset; + + offset = (((sc->cmnd[1] & 0x1f) << 16) | (sc->cmnd[2] << 8) | sc->cmnd[3])*512; + if (sc->use_sg > 0) + simscsi_sg_readwrite(sc, mode, offset); + else + simscsi_readwrite(sc, mode, offset, sc->cmnd[4]*512); +} + +static size_t +simscsi_get_disk_size (int fd) +{ + struct disk_stat stat; + size_t bit, sectors = 0; + struct disk_req req; + char buf[512]; + + /* + * This is a bit kludgey: the simulator doesn't provide a direct way of determining + * the disk size, so we do a binary search, assuming a maximum disk size of 4GB. + */ + for (bit = (4UL << 30)/512; bit != 0; bit >>= 1) { + req.addr = __pa(&buf); + req.len = sizeof(buf); + ia64_ssc(fd, 1, __pa(&req), ((sectors | bit) - 1)*512, SSC_READ); + stat.fd = fd; + ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION); + if (stat.count == sizeof(buf)) + sectors |= bit; + } + return sectors - 1; /* return last valid sector number */ +} + +static void +simscsi_readwrite10 (struct scsi_cmnd *sc, int mode) +{ + unsigned long offset; + + offset = ( (sc->cmnd[2] << 24) | (sc->cmnd[3] << 16) + | (sc->cmnd[4] << 8) | (sc->cmnd[5] << 0))*512; + if (sc->use_sg > 0) + simscsi_sg_readwrite(sc, mode, offset); + else + simscsi_readwrite(sc, mode, offset, ((sc->cmnd[7] << 8) | sc->cmnd[8])*512); +} + +static int +simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) +{ + unsigned int target_id = sc->device->id; + char fname[MAX_ROOT_LEN+16]; + size_t disk_size; + char *buf; +#if DEBUG_SIMSCSI + register long sp asm ("sp"); + + if (DBG) + printk("simscsi_queuecommand: target=%d,cmnd=%u,sc=%lu,sp=%lx,done=%p\n", + target_id, sc->cmnd[0], sc->serial_number, sp, done); +#endif + + sc->result = DID_BAD_TARGET << 16; + sc->scsi_done = done; + if (target_id <= 15 && sc->device->lun == 0) { + switch (sc->cmnd[0]) { + case INQUIRY: + if (sc->request_bufflen < 35) { + break; + } + sprintf (fname, "%s%c", simscsi_root, 'a' + target_id); + desc[target_id] = ia64_ssc(__pa(fname), SSC_READ_ACCESS|SSC_WRITE_ACCESS, + 0, 0, SSC_OPEN); + if (desc[target_id] < 0) { + /* disk doesn't exist... */ + break; + } + buf = sc->request_buffer; + buf[0] = 0; /* magnetic disk */ + buf[1] = 0; /* not a removable medium */ + buf[2] = 2; /* SCSI-2 compliant device */ + buf[3] = 2; /* SCSI-2 response data format */ + buf[4] = 31; /* additional length (bytes) */ + buf[5] = 0; /* reserved */ + buf[6] = 0; /* reserved */ + buf[7] = 0; /* various flags */ + memcpy(buf + 8, "HP SIMULATED DISK 0.00", 28); + sc->result = GOOD; + break; + + case TEST_UNIT_READY: + sc->result = GOOD; + break; + + case READ_6: + if (desc[target_id] < 0 ) + break; + simscsi_readwrite6(sc, SSC_READ); + break; + + case READ_10: + if (desc[target_id] < 0 ) + break; + simscsi_readwrite10(sc, SSC_READ); + break; + + case WRITE_6: + if (desc[target_id] < 0) + break; + simscsi_readwrite6(sc, SSC_WRITE); + break; + + case WRITE_10: + if (desc[target_id] < 0) + break; + simscsi_readwrite10(sc, SSC_WRITE); + break; + + + case READ_CAPACITY: + if (desc[target_id] < 0 || sc->request_bufflen < 8) { + break; + } + buf = sc->request_buffer; + + disk_size = simscsi_get_disk_size(desc[target_id]); + + /* pretend to be a 1GB disk (partition table contains real stuff): */ + buf[0] = (disk_size >> 24) & 0xff; + buf[1] = (disk_size >> 16) & 0xff; + buf[2] = (disk_size >> 8) & 0xff; + buf[3] = (disk_size >> 0) & 0xff; + /* set block size of 512 bytes: */ + buf[4] = 0; + buf[5] = 0; + buf[6] = 2; + buf[7] = 0; + sc->result = GOOD; + break; + + case MODE_SENSE: + case MODE_SENSE_10: + /* sd.c uses this to determine whether disk does write-caching. */ + memset(sc->request_buffer, 0, 128); + sc->result = GOOD; + break; + + case START_STOP: + printk(KERN_ERR "START_STOP\n"); + break; + + default: + panic("simscsi: unknown SCSI command %u\n", sc->cmnd[0]); + } + } + if (sc->result == DID_BAD_TARGET) { + sc->result |= DRIVER_SENSE << 24; + sc->sense_buffer[0] = 0x70; + sc->sense_buffer[2] = 0x00; + } + if (atomic_read(&num_reqs) >= SIMSCSI_REQ_QUEUE_LEN) { + panic("Attempt to queue command while command is pending!!"); + } + atomic_inc(&num_reqs); + queue[wr].sc = sc; + wr = (wr + 1) % SIMSCSI_REQ_QUEUE_LEN; + + tasklet_schedule(&simscsi_tasklet); + return 0; +} + +static int +simscsi_host_reset (struct scsi_cmnd *sc) +{ + printk(KERN_ERR "simscsi_host_reset: not implemented\n"); + return 0; +} + +static struct scsi_host_template driver_template = { + .name = "simulated SCSI host adapter", + .proc_name = "simscsi", + .queuecommand = simscsi_queuecommand, + .eh_host_reset_handler = simscsi_host_reset, + .bios_param = simscsi_biosparam, + .can_queue = SIMSCSI_REQ_QUEUE_LEN, + .this_id = -1, + .sg_tablesize = SG_ALL, + .max_sectors = 1024, + .cmd_per_lun = SIMSCSI_REQ_QUEUE_LEN, + .use_clustering = DISABLE_CLUSTERING, +}; + +static int __init +simscsi_init(void) +{ + int error; + + host = scsi_host_alloc(&driver_template, 0); + if (!host) + return -ENOMEM; + + error = scsi_add_host(host, NULL); + if (!error) + scsi_scan_host(host); + return error; +} + +static void __exit +simscsi_exit(void) +{ + scsi_remove_host(host); + scsi_host_put(host); +} + +module_init(simscsi_init); +module_exit(simscsi_exit); diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c new file mode 100644 index 000000000000..786e70718ce4 --- /dev/null +++ b/arch/ia64/hp/sim/simserial.c @@ -0,0 +1,1032 @@ +/* + * Simulated Serial Driver (fake serial) + * + * This driver is mostly used for bringup purposes and will go away. + * It has a strong dependency on the system console. All outputs + * are rerouted to the same facility as the one used by printk which, in our + * case means sys_sim.c console (goes via the simulator). The code hereafter + * is completely leveraged from the serial.c driver. + * + * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * 02/04/00 D. Mosberger Merged in serial.c bug fixes in rs_close(). + * 02/25/00 D. Mosberger Synced up with 2.3.99pre-5 version of serial.c. + * 07/30/02 D. Mosberger Replace sti()/cli() with explicit spinlocks & local irq masking + */ + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include <linux/major.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/console.h> +#include <linux/module.h> +#include <linux/serial.h> +#include <linux/serialP.h> + +#include <asm/irq.h> +#include <asm/hw_irq.h> +#include <asm/uaccess.h> + +#ifdef CONFIG_KDB +# include <linux/kdb.h> +#endif + +#undef SIMSERIAL_DEBUG /* define this to get some debug information */ + +#define KEYBOARD_INTR 3 /* must match with simulator! */ + +#define NR_PORTS 1 /* only one port for now */ +#define SERIAL_INLINE 1 + +#ifdef SERIAL_INLINE +#define _INLINE_ inline +#endif + +#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) + +#define SSC_GETCHAR 21 + +extern long ia64_ssc (long, long, long, long, int); +extern void ia64_ssc_connect_irq (long intr, long irq); + +static char *serial_name = "SimSerial driver"; +static char *serial_version = "0.6"; + +/* + * This has been extracted from asm/serial.h. We need one eventually but + * I don't know exactly what we're going to put in it so just fake one + * for now. + */ +#define BASE_BAUD ( 1843200 / 16 ) + +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) + +/* + * Most of the values here are meaningless to this particular driver. + * However some values must be preserved for the code (leveraged from serial.c + * to work correctly). + * port must not be 0 + * type must not be UNKNOWN + * So I picked arbitrary (guess from where?) values instead + */ +static struct serial_state rs_table[NR_PORTS]={ + /* UART CLK PORT IRQ FLAGS */ + { 0, BASE_BAUD, 0x3F8, 0, STD_COM_FLAGS,0,PORT_16550 } /* ttyS0 */ +}; + +/* + * Just for the fun of it ! + */ +static struct serial_uart_config uart_config[] = { + { "unknown", 1, 0 }, + { "8250", 1, 0 }, + { "16450", 1, 0 }, + { "16550", 1, 0 }, + { "16550A", 16, UART_CLEAR_FIFO | UART_USE_FIFO }, + { "cirrus", 1, 0 }, + { "ST16650", 1, UART_CLEAR_FIFO | UART_STARTECH }, + { "ST16650V2", 32, UART_CLEAR_FIFO | UART_USE_FIFO | + UART_STARTECH }, + { "TI16750", 64, UART_CLEAR_FIFO | UART_USE_FIFO}, + { 0, 0} +}; + +struct tty_driver *hp_simserial_driver; + +static struct async_struct *IRQ_ports[NR_IRQS]; + +static struct console *console; + +static unsigned char *tmp_buf; +static DECLARE_MUTEX(tmp_buf_sem); + +extern struct console *console_drivers; /* from kernel/printk.c */ + +/* + * ------------------------------------------------------------ + * rs_stop() and rs_start() + * + * This routines are called before setting or resetting tty->stopped. + * They enable or disable transmitter interrupts, as necessary. + * ------------------------------------------------------------ + */ +static void rs_stop(struct tty_struct *tty) +{ +#ifdef SIMSERIAL_DEBUG + printk("rs_stop: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n", + tty->stopped, tty->hw_stopped, tty->flow_stopped); +#endif + +} + +static void rs_start(struct tty_struct *tty) +{ +#if SIMSERIAL_DEBUG + printk("rs_start: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n", + tty->stopped, tty->hw_stopped, tty->flow_stopped); +#endif +} + +static void receive_chars(struct tty_struct *tty, struct pt_regs *regs) +{ + unsigned char ch; + static unsigned char seen_esc = 0; + + while ( (ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR)) ) { + if ( ch == 27 && seen_esc == 0 ) { + seen_esc = 1; + continue; + } else { + if ( seen_esc==1 && ch == 'O' ) { + seen_esc = 2; + continue; + } else if ( seen_esc == 2 ) { + if ( ch == 'P' ) show_state(); /* F1 key */ +#ifdef CONFIG_KDB + if ( ch == 'S' ) + kdb(KDB_REASON_KEYBOARD, 0, (kdb_eframe_t) regs); +#endif + + seen_esc = 0; + continue; + } + } + seen_esc = 0; + if (tty->flip.count >= TTY_FLIPBUF_SIZE) break; + + *tty->flip.char_buf_ptr = ch; + + *tty->flip.flag_buf_ptr = 0; + + tty->flip.flag_buf_ptr++; + tty->flip.char_buf_ptr++; + tty->flip.count++; + } + tty_flip_buffer_push(tty); +} + +/* + * This is the serial driver's interrupt routine for a single port + */ +static irqreturn_t rs_interrupt_single(int irq, void *dev_id, struct pt_regs * regs) +{ + struct async_struct * info; + + /* + * I don't know exactly why they don't use the dev_id opaque data + * pointer instead of this extra lookup table + */ + info = IRQ_ports[irq]; + if (!info || !info->tty) { + printk(KERN_INFO "simrs_interrupt_single: info|tty=0 info=%p problem\n", info); + return IRQ_NONE; + } + /* + * pretty simple in our case, because we only get interrupts + * on inbound traffic + */ + receive_chars(info->tty, regs); + return IRQ_HANDLED; +} + +/* + * ------------------------------------------------------------------- + * Here ends the serial interrupt routines. + * ------------------------------------------------------------------- + */ + +#if 0 +/* + * not really used in our situation so keep them commented out for now + */ +static DECLARE_TASK_QUEUE(tq_serial); /* used to be at the top of the file */ +static void do_serial_bh(void) +{ + run_task_queue(&tq_serial); + printk(KERN_ERR "do_serial_bh: called\n"); +} +#endif + +static void do_softint(void *private_) +{ + printk(KERN_ERR "simserial: do_softint called\n"); +} + +static void rs_put_char(struct tty_struct *tty, unsigned char ch) +{ + struct async_struct *info = (struct async_struct *)tty->driver_data; + unsigned long flags; + + if (!tty || !info->xmit.buf) return; + + local_irq_save(flags); + if (CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) == 0) { + local_irq_restore(flags); + return; + } + info->xmit.buf[info->xmit.head] = ch; + info->xmit.head = (info->xmit.head + 1) & (SERIAL_XMIT_SIZE-1); + local_irq_restore(flags); +} + +static _INLINE_ void transmit_chars(struct async_struct *info, int *intr_done) +{ + int count; + unsigned long flags; + + + local_irq_save(flags); + + if (info->x_char) { + char c = info->x_char; + + console->write(console, &c, 1); + + info->state->icount.tx++; + info->x_char = 0; + + goto out; + } + + if (info->xmit.head == info->xmit.tail || info->tty->stopped || info->tty->hw_stopped) { +#ifdef SIMSERIAL_DEBUG + printk("transmit_chars: head=%d, tail=%d, stopped=%d\n", + info->xmit.head, info->xmit.tail, info->tty->stopped); +#endif + goto out; + } + /* + * We removed the loop and try to do it in to chunks. We need + * 2 operations maximum because it's a ring buffer. + * + * First from current to tail if possible. + * Then from the beginning of the buffer until necessary + */ + + count = min(CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE), + SERIAL_XMIT_SIZE - info->xmit.tail); + console->write(console, info->xmit.buf+info->xmit.tail, count); + + info->xmit.tail = (info->xmit.tail+count) & (SERIAL_XMIT_SIZE-1); + + /* + * We have more at the beginning of the buffer + */ + count = CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE); + if (count) { + console->write(console, info->xmit.buf, count); + info->xmit.tail += count; + } +out: + local_irq_restore(flags); +} + +static void rs_flush_chars(struct tty_struct *tty) +{ + struct async_struct *info = (struct async_struct *)tty->driver_data; + + if (info->xmit.head == info->xmit.tail || tty->stopped || tty->hw_stopped || + !info->xmit.buf) + return; + + transmit_chars(info, NULL); +} + + +static int rs_write(struct tty_struct * tty, + const unsigned char *buf, int count) +{ + int c, ret = 0; + struct async_struct *info = (struct async_struct *)tty->driver_data; + unsigned long flags; + + if (!tty || !info->xmit.buf || !tmp_buf) return 0; + + local_irq_save(flags); + while (1) { + c = CIRC_SPACE_TO_END(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE); + if (count < c) + c = count; + if (c <= 0) { + break; + } + memcpy(info->xmit.buf + info->xmit.head, buf, c); + info->xmit.head = ((info->xmit.head + c) & + (SERIAL_XMIT_SIZE-1)); + buf += c; + count -= c; + ret += c; + } + local_irq_restore(flags); + /* + * Hey, we transmit directly from here in our case + */ + if (CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) + && !tty->stopped && !tty->hw_stopped) { + transmit_chars(info, NULL); + } + return ret; +} + +static int rs_write_room(struct tty_struct *tty) +{ + struct async_struct *info = (struct async_struct *)tty->driver_data; + + return CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE); +} + +static int rs_chars_in_buffer(struct tty_struct *tty) +{ + struct async_struct *info = (struct async_struct *)tty->driver_data; + + return CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE); +} + +static void rs_flush_buffer(struct tty_struct *tty) +{ + struct async_struct *info = (struct async_struct *)tty->driver_data; + unsigned long flags; + + local_irq_save(flags); + info->xmit.head = info->xmit.tail = 0; + local_irq_restore(flags); + + wake_up_interruptible(&tty->write_wait); + + if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && + tty->ldisc.write_wakeup) + (tty->ldisc.write_wakeup)(tty); +} + +/* + * This function is used to send a high-priority XON/XOFF character to + * the device + */ +static void rs_send_xchar(struct tty_struct *tty, char ch) +{ + struct async_struct *info = (struct async_struct *)tty->driver_data; + + info->x_char = ch; + if (ch) { + /* + * I guess we could call console->write() directly but + * let's do that for now. + */ + transmit_chars(info, NULL); + } +} + +/* + * ------------------------------------------------------------ + * rs_throttle() + * + * This routine is called by the upper-layer tty layer to signal that + * incoming characters should be throttled. + * ------------------------------------------------------------ + */ +static void rs_throttle(struct tty_struct * tty) +{ + if (I_IXOFF(tty)) rs_send_xchar(tty, STOP_CHAR(tty)); + + printk(KERN_INFO "simrs_throttle called\n"); +} + +static void rs_unthrottle(struct tty_struct * tty) +{ + struct async_struct *info = (struct async_struct *)tty->driver_data; + + if (I_IXOFF(tty)) { + if (info->x_char) + info->x_char = 0; + else + rs_send_xchar(tty, START_CHAR(tty)); + } + printk(KERN_INFO "simrs_unthrottle called\n"); +} + +/* + * rs_break() --- routine which turns the break handling on or off + */ +static void rs_break(struct tty_struct *tty, int break_state) +{ +} + +static int rs_ioctl(struct tty_struct *tty, struct file * file, + unsigned int cmd, unsigned long arg) +{ + if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) && + (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGSTRUCT) && + (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) { + if (tty->flags & (1 << TTY_IO_ERROR)) + return -EIO; + } + + switch (cmd) { + case TIOCMGET: + printk(KERN_INFO "rs_ioctl: TIOCMGET called\n"); + return -EINVAL; + case TIOCMBIS: + case TIOCMBIC: + case TIOCMSET: + printk(KERN_INFO "rs_ioctl: TIOCMBIS/BIC/SET called\n"); + return -EINVAL; + case TIOCGSERIAL: + printk(KERN_INFO "simrs_ioctl TIOCGSERIAL called\n"); + return 0; + case TIOCSSERIAL: + printk(KERN_INFO "simrs_ioctl TIOCSSERIAL called\n"); + return 0; + case TIOCSERCONFIG: + printk(KERN_INFO "rs_ioctl: TIOCSERCONFIG called\n"); + return -EINVAL; + + case TIOCSERGETLSR: /* Get line status register */ + printk(KERN_INFO "rs_ioctl: TIOCSERGETLSR called\n"); + return -EINVAL; + + case TIOCSERGSTRUCT: + printk(KERN_INFO "rs_ioctl: TIOCSERGSTRUCT called\n"); +#if 0 + if (copy_to_user((struct async_struct *) arg, + info, sizeof(struct async_struct))) + return -EFAULT; +#endif + return 0; + + /* + * Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change + * - mask passed in arg for lines of interest + * (use |'ed TIOCM_RNG/DSR/CD/CTS for masking) + * Caller should use TIOCGICOUNT to see which one it was + */ + case TIOCMIWAIT: + printk(KERN_INFO "rs_ioctl: TIOCMIWAIT: called\n"); + return 0; + /* + * Get counter of input serial line interrupts (DCD,RI,DSR,CTS) + * Return: write counters to the user passed counter struct + * NB: both 1->0 and 0->1 transitions are counted except for + * RI where only 0->1 is counted. + */ + case TIOCGICOUNT: + printk(KERN_INFO "rs_ioctl: TIOCGICOUNT called\n"); + return 0; + + case TIOCSERGWILD: + case TIOCSERSWILD: + /* "setserial -W" is called in Debian boot */ + printk (KERN_INFO "TIOCSER?WILD ioctl obsolete, ignored.\n"); + return 0; + + default: + return -ENOIOCTLCMD; + } + return 0; +} + +#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) + +static void rs_set_termios(struct tty_struct *tty, struct termios *old_termios) +{ + unsigned int cflag = tty->termios->c_cflag; + + if ( (cflag == old_termios->c_cflag) + && ( RELEVANT_IFLAG(tty->termios->c_iflag) + == RELEVANT_IFLAG(old_termios->c_iflag))) + return; + + + /* Handle turning off CRTSCTS */ + if ((old_termios->c_cflag & CRTSCTS) && + !(tty->termios->c_cflag & CRTSCTS)) { + tty->hw_stopped = 0; + rs_start(tty); + } +} +/* + * This routine will shutdown a serial port; interrupts are disabled, and + * DTR is dropped if the hangup on close termio flag is on. + */ +static void shutdown(struct async_struct * info) +{ + unsigned long flags; + struct serial_state *state; + int retval; + + if (!(info->flags & ASYNC_INITIALIZED)) return; + + state = info->state; + +#ifdef SIMSERIAL_DEBUG + printk("Shutting down serial port %d (irq %d)....", info->line, + state->irq); +#endif + + local_irq_save(flags); + { + /* + * First unlink the serial port from the IRQ chain... + */ + if (info->next_port) + info->next_port->prev_port = info->prev_port; + if (info->prev_port) + info->prev_port->next_port = info->next_port; + else + IRQ_ports[state->irq] = info->next_port; + + /* + * Free the IRQ, if necessary + */ + if (state->irq && (!IRQ_ports[state->irq] || + !IRQ_ports[state->irq]->next_port)) { + if (IRQ_ports[state->irq]) { + free_irq(state->irq, NULL); + retval = request_irq(state->irq, rs_interrupt_single, + IRQ_T(info), "serial", NULL); + + if (retval) + printk(KERN_ERR "serial shutdown: request_irq: error %d" + " Couldn't reacquire IRQ.\n", retval); + } else + free_irq(state->irq, NULL); + } + + if (info->xmit.buf) { + free_page((unsigned long) info->xmit.buf); + info->xmit.buf = 0; + } + + if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags); + + info->flags &= ~ASYNC_INITIALIZED; + } + local_irq_restore(flags); +} + +/* + * ------------------------------------------------------------ + * rs_close() + * + * This routine is called when the serial port gets closed. First, we + * wait for the last remaining data to be sent. Then, we unlink its + * async structure from the interrupt chain if necessary, and we free + * that IRQ if nothing is left in the chain. + * ------------------------------------------------------------ + */ +static void rs_close(struct tty_struct *tty, struct file * filp) +{ + struct async_struct * info = (struct async_struct *)tty->driver_data; + struct serial_state *state; + unsigned long flags; + + if (!info ) return; + + state = info->state; + + local_irq_save(flags); + if (tty_hung_up_p(filp)) { +#ifdef SIMSERIAL_DEBUG + printk("rs_close: hung_up\n"); +#endif + local_irq_restore(flags); + return; + } +#ifdef SIMSERIAL_DEBUG + printk("rs_close ttys%d, count = %d\n", info->line, state->count); +#endif + if ((tty->count == 1) && (state->count != 1)) { + /* + * Uh, oh. tty->count is 1, which means that the tty + * structure will be freed. state->count should always + * be one in these conditions. If it's greater than + * one, we've got real problems, since it means the + * serial port won't be shutdown. + */ + printk(KERN_ERR "rs_close: bad serial port count; tty->count is 1, " + "state->count is %d\n", state->count); + state->count = 1; + } + if (--state->count < 0) { + printk(KERN_ERR "rs_close: bad serial port count for ttys%d: %d\n", + info->line, state->count); + state->count = 0; + } + if (state->count) { + local_irq_restore(flags); + return; + } + info->flags |= ASYNC_CLOSING; + local_irq_restore(flags); + + /* + * Now we wait for the transmit buffer to clear; and we notify + * the line discipline to only process XON/XOFF characters. + */ + shutdown(info); + if (tty->driver->flush_buffer) tty->driver->flush_buffer(tty); + if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty); + info->event = 0; + info->tty = 0; + if (info->blocked_open) { + if (info->close_delay) { + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(info->close_delay); + } + wake_up_interruptible(&info->open_wait); + } + info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); + wake_up_interruptible(&info->close_wait); +} + +/* + * rs_wait_until_sent() --- wait until the transmitter is empty + */ +static void rs_wait_until_sent(struct tty_struct *tty, int timeout) +{ +} + + +/* + * rs_hangup() --- called by tty_hangup() when a hangup is signaled. + */ +static void rs_hangup(struct tty_struct *tty) +{ + struct async_struct * info = (struct async_struct *)tty->driver_data; + struct serial_state *state = info->state; + +#ifdef SIMSERIAL_DEBUG + printk("rs_hangup: called\n"); +#endif + + state = info->state; + + rs_flush_buffer(tty); + if (info->flags & ASYNC_CLOSING) + return; + shutdown(info); + + info->event = 0; + state->count = 0; + info->flags &= ~ASYNC_NORMAL_ACTIVE; + info->tty = 0; + wake_up_interruptible(&info->open_wait); +} + + +static int get_async_struct(int line, struct async_struct **ret_info) +{ + struct async_struct *info; + struct serial_state *sstate; + + sstate = rs_table + line; + sstate->count++; + if (sstate->info) { + *ret_info = sstate->info; + return 0; + } + info = kmalloc(sizeof(struct async_struct), GFP_KERNEL); + if (!info) { + sstate->count--; + return -ENOMEM; + } + memset(info, 0, sizeof(struct async_struct)); + init_waitqueue_head(&info->open_wait); + init_waitqueue_head(&info->close_wait); + init_waitqueue_head(&info->delta_msr_wait); + info->magic = SERIAL_MAGIC; + info->port = sstate->port; + info->flags = sstate->flags; + info->xmit_fifo_size = sstate->xmit_fifo_size; + info->line = line; + INIT_WORK(&info->work, do_softint, info); + info->state = sstate; + if (sstate->info) { + kfree(info); + *ret_info = sstate->info; + return 0; + } + *ret_info = sstate->info = info; + return 0; +} + +static int +startup(struct async_struct *info) +{ + unsigned long flags; + int retval=0; + irqreturn_t (*handler)(int, void *, struct pt_regs *); + struct serial_state *state= info->state; + unsigned long page; + + page = get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + local_irq_save(flags); + + if (info->flags & ASYNC_INITIALIZED) { + free_page(page); + goto errout; + } + + if (!state->port || !state->type) { + if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags); + free_page(page); + goto errout; + } + if (info->xmit.buf) + free_page(page); + else + info->xmit.buf = (unsigned char *) page; + +#ifdef SIMSERIAL_DEBUG + printk("startup: ttys%d (irq %d)...", info->line, state->irq); +#endif + + /* + * Allocate the IRQ if necessary + */ + if (state->irq && (!IRQ_ports[state->irq] || + !IRQ_ports[state->irq]->next_port)) { + if (IRQ_ports[state->irq]) { + retval = -EBUSY; + goto errout; + } else + handler = rs_interrupt_single; + + retval = request_irq(state->irq, handler, IRQ_T(info), "simserial", NULL); + if (retval) { + if (capable(CAP_SYS_ADMIN)) { + if (info->tty) + set_bit(TTY_IO_ERROR, + &info->tty->flags); + retval = 0; + } + goto errout; + } + } + + /* + * Insert serial port into IRQ chain. + */ + info->prev_port = 0; + info->next_port = IRQ_ports[state->irq]; + if (info->next_port) + info->next_port->prev_port = info; + IRQ_ports[state->irq] = info; + + if (info->tty) clear_bit(TTY_IO_ERROR, &info->tty->flags); + + info->xmit.head = info->xmit.tail = 0; + +#if 0 + /* + * Set up serial timers... + */ + timer_table[RS_TIMER].expires = jiffies + 2*HZ/100; + timer_active |= 1 << RS_TIMER; +#endif + + /* + * Set up the tty->alt_speed kludge + */ + if (info->tty) { + if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI) + info->tty->alt_speed = 57600; + if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI) + info->tty->alt_speed = 115200; + if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI) + info->tty->alt_speed = 230400; + if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP) + info->tty->alt_speed = 460800; + } + + info->flags |= ASYNC_INITIALIZED; + local_irq_restore(flags); + return 0; + +errout: + local_irq_restore(flags); + return retval; +} + + +/* + * This routine is called whenever a serial port is opened. It + * enables interrupts for a serial port, linking in its async structure into + * the IRQ chain. It also performs the serial-specific + * initialization for the tty structure. + */ +static int rs_open(struct tty_struct *tty, struct file * filp) +{ + struct async_struct *info; + int retval, line; + unsigned long page; + + line = tty->index; + if ((line < 0) || (line >= NR_PORTS)) + return -ENODEV; + retval = get_async_struct(line, &info); + if (retval) + return retval; + tty->driver_data = info; + info->tty = tty; + +#ifdef SIMSERIAL_DEBUG + printk("rs_open %s, count = %d\n", tty->name, info->state->count); +#endif + info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0; + + if (!tmp_buf) { + page = get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + if (tmp_buf) + free_page(page); + else + tmp_buf = (unsigned char *) page; + } + + /* + * If the port is the middle of closing, bail out now + */ + if (tty_hung_up_p(filp) || + (info->flags & ASYNC_CLOSING)) { + if (info->flags & ASYNC_CLOSING) + interruptible_sleep_on(&info->close_wait); +#ifdef SERIAL_DO_RESTART + return ((info->flags & ASYNC_HUP_NOTIFY) ? + -EAGAIN : -ERESTARTSYS); +#else + return -EAGAIN; +#endif + } + + /* + * Start up serial port + */ + retval = startup(info); + if (retval) { + return retval; + } + + /* + * figure out which console to use (should be one already) + */ + console = console_drivers; + while (console) { + if ((console->flags & CON_ENABLED) && console->write) break; + console = console->next; + } + +#ifdef SIMSERIAL_DEBUG + printk("rs_open ttys%d successful\n", info->line); +#endif + return 0; +} + +/* + * /proc fs routines.... + */ + +static inline int line_info(char *buf, struct serial_state *state) +{ + return sprintf(buf, "%d: uart:%s port:%lX irq:%d\n", + state->line, uart_config[state->type].name, + state->port, state->irq); +} + +static int rs_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int i, len = 0, l; + off_t begin = 0; + + len += sprintf(page, "simserinfo:1.0 driver:%s\n", serial_version); + for (i = 0; i < NR_PORTS && len < 4000; i++) { + l = line_info(page + len, &rs_table[i]); + len += l; + if (len+begin > off+count) + goto done; + if (len+begin < off) { + begin += len; + len = 0; + } + } + *eof = 1; +done: + if (off >= len+begin) + return 0; + *start = page + (begin-off); + return ((count < begin+len-off) ? count : begin+len-off); +} + +/* + * --------------------------------------------------------------------- + * rs_init() and friends + * + * rs_init() is called at boot-time to initialize the serial driver. + * --------------------------------------------------------------------- + */ + +/* + * This routine prints out the appropriate serial driver version + * number, and identifies which options were configured into this + * driver. + */ +static inline void show_serial_version(void) +{ + printk(KERN_INFO "%s version %s with", serial_name, serial_version); + printk(KERN_INFO " no serial options enabled\n"); +} + +static struct tty_operations hp_ops = { + .open = rs_open, + .close = rs_close, + .write = rs_write, + .put_char = rs_put_char, + .flush_chars = rs_flush_chars, + .write_room = rs_write_room, + .chars_in_buffer = rs_chars_in_buffer, + .flush_buffer = rs_flush_buffer, + .ioctl = rs_ioctl, + .throttle = rs_throttle, + .unthrottle = rs_unthrottle, + .send_xchar = rs_send_xchar, + .set_termios = rs_set_termios, + .stop = rs_stop, + .start = rs_start, + .hangup = rs_hangup, + .break_ctl = rs_break, + .wait_until_sent = rs_wait_until_sent, + .read_proc = rs_read_proc, +}; + +/* + * The serial driver boot-time initialization code! + */ +static int __init +simrs_init (void) +{ + int i; + struct serial_state *state; + + if (!ia64_platform_is("hpsim")) + return -ENODEV; + + hp_simserial_driver = alloc_tty_driver(1); + if (!hp_simserial_driver) + return -ENOMEM; + + show_serial_version(); + + /* Initialize the tty_driver structure */ + + hp_simserial_driver->owner = THIS_MODULE; + hp_simserial_driver->driver_name = "simserial"; + hp_simserial_driver->name = "ttyS"; + hp_simserial_driver->major = TTY_MAJOR; + hp_simserial_driver->minor_start = 64; + hp_simserial_driver->type = TTY_DRIVER_TYPE_SERIAL; + hp_simserial_driver->subtype = SERIAL_TYPE_NORMAL; + hp_simserial_driver->init_termios = tty_std_termios; + hp_simserial_driver->init_termios.c_cflag = + B9600 | CS8 | CREAD | HUPCL | CLOCAL; + hp_simserial_driver->flags = TTY_DRIVER_REAL_RAW; + tty_set_operations(hp_simserial_driver, &hp_ops); + + /* + * Let's have a little bit of fun ! + */ + for (i = 0, state = rs_table; i < NR_PORTS; i++,state++) { + + if (state->type == PORT_UNKNOWN) continue; + + if (!state->irq) { + state->irq = assign_irq_vector(AUTO_ASSIGN); + ia64_ssc_connect_irq(KEYBOARD_INTR, state->irq); + } + + printk(KERN_INFO "ttyS%d at 0x%04lx (irq = %d) is a %s\n", + state->line, + state->port, state->irq, + uart_config[state->type].name); + } + + if (tty_register_driver(hp_simserial_driver)) + panic("Couldn't register simserial driver\n"); + + return 0; +} + +#ifndef MODULE +__initcall(simrs_init); +#endif diff --git a/arch/ia64/hp/zx1/Makefile b/arch/ia64/hp/zx1/Makefile new file mode 100644 index 000000000000..61e878729d1e --- /dev/null +++ b/arch/ia64/hp/zx1/Makefile @@ -0,0 +1,8 @@ +# +# ia64/hp/zx1/Makefile +# +# Copyright (C) 2002 Hewlett Packard +# Copyright (C) Alex Williamson (alex_williamson@hp.com) +# + +obj-$(CONFIG_IA64_GENERIC) += hpzx1_machvec.o hpzx1_swiotlb_machvec.o diff --git a/arch/ia64/hp/zx1/hpzx1_machvec.c b/arch/ia64/hp/zx1/hpzx1_machvec.c new file mode 100644 index 000000000000..32518b0f923e --- /dev/null +++ b/arch/ia64/hp/zx1/hpzx1_machvec.c @@ -0,0 +1,3 @@ +#define MACHVEC_PLATFORM_NAME hpzx1 +#define MACHVEC_PLATFORM_HEADER <asm/machvec_hpzx1.h> +#include <asm/machvec_init.h> diff --git a/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c b/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c new file mode 100644 index 000000000000..4392a96b3c58 --- /dev/null +++ b/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c @@ -0,0 +1,3 @@ +#define MACHVEC_PLATFORM_NAME hpzx1_swiotlb +#define MACHVEC_PLATFORM_HEADER <asm/machvec_hpzx1_swiotlb.h> +#include <asm/machvec_init.h> diff --git a/arch/ia64/ia32/Makefile b/arch/ia64/ia32/Makefile new file mode 100644 index 000000000000..2ed90da81166 --- /dev/null +++ b/arch/ia64/ia32/Makefile @@ -0,0 +1,12 @@ +# +# Makefile for the ia32 kernel emulation subsystem. +# + +obj-y := ia32_entry.o sys_ia32.o ia32_ioctl.o ia32_signal.o \ + ia32_support.o ia32_traps.o binfmt_elf32.o ia32_ldt.o + +CFLAGS_ia32_ioctl.o += -Ifs/ + +# Don't let GCC uses f16-f31 so that save_ia32_fpstate_live() and +# restore_ia32_fpstate_live() can be sure the live register contain user-level state. +CFLAGS_ia32_signal.o += -mfixed-range=f16-f31 diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c new file mode 100644 index 000000000000..31de70b7c67f --- /dev/null +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -0,0 +1,294 @@ +/* + * IA-32 ELF support. + * + * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> + * Copyright (C) 2001 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * 06/16/00 A. Mallick initialize csd/ssd/tssd/cflg for ia32_load_state + * 04/13/01 D. Mosberger dropped saving tssd in ar.k1---it's not needed + * 09/14/01 D. Mosberger fixed memory management for gdt/tss page + */ +#include <linux/config.h> + +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/security.h> + +#include <asm/param.h> +#include <asm/signal.h> + +#include "ia32priv.h" +#include "elfcore32.h" + +/* Override some function names */ +#undef start_thread +#define start_thread ia32_start_thread +#define elf_format elf32_format +#define init_elf_binfmt init_elf32_binfmt +#define exit_elf_binfmt exit_elf32_binfmt + +#undef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC IA32_CLOCKS_PER_SEC + +extern void ia64_elf32_init (struct pt_regs *regs); + +static void elf32_set_personality (void); + +#define setup_arg_pages(bprm,tos,exec) ia32_setup_arg_pages(bprm,exec) +#define elf_map elf32_map + +#undef SET_PERSONALITY +#define SET_PERSONALITY(ex, ibcs2) elf32_set_personality() + +#define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack)) + +/* Ugly but avoids duplication */ +#include "../../../fs/binfmt_elf.c" + +extern struct page *ia32_shared_page[]; +extern unsigned long *ia32_gdt; +extern struct page *ia32_gate_page; + +struct page * +ia32_install_shared_page (struct vm_area_struct *vma, unsigned long address, int *type) +{ + struct page *pg = ia32_shared_page[smp_processor_id()]; + get_page(pg); + if (type) + *type = VM_FAULT_MINOR; + return pg; +} + +struct page * +ia32_install_gate_page (struct vm_area_struct *vma, unsigned long address, int *type) +{ + struct page *pg = ia32_gate_page; + get_page(pg); + if (type) + *type = VM_FAULT_MINOR; + return pg; +} + + +static struct vm_operations_struct ia32_shared_page_vm_ops = { + .nopage = ia32_install_shared_page +}; + +static struct vm_operations_struct ia32_gate_page_vm_ops = { + .nopage = ia32_install_gate_page +}; + +void +ia64_elf32_init (struct pt_regs *regs) +{ + struct vm_area_struct *vma; + + /* + * Map GDT below 4GB, where the processor can find it. We need to map + * it with privilege level 3 because the IVE uses non-privileged accesses to these + * tables. IA-32 segmentation is used to protect against IA-32 accesses to them. + */ + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma) { + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = IA32_GDT_OFFSET; + vma->vm_end = vma->vm_start + PAGE_SIZE; + vma->vm_page_prot = PAGE_SHARED; + vma->vm_flags = VM_READ|VM_MAYREAD|VM_RESERVED; + vma->vm_ops = &ia32_shared_page_vm_ops; + down_write(¤t->mm->mmap_sem); + { + if (insert_vm_struct(current->mm, vma)) { + kmem_cache_free(vm_area_cachep, vma); + up_write(¤t->mm->mmap_sem); + BUG(); + } + } + up_write(¤t->mm->mmap_sem); + } + + /* + * When user stack is not executable, push sigreturn code to stack makes + * segmentation fault raised when returning to kernel. So now sigreturn + * code is locked in specific gate page, which is pointed by pretcode + * when setup_frame_ia32 + */ + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma) { + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = IA32_GATE_OFFSET; + vma->vm_end = vma->vm_start + PAGE_SIZE; + vma->vm_page_prot = PAGE_COPY_EXEC; + vma->vm_flags = VM_READ | VM_MAYREAD | VM_EXEC + | VM_MAYEXEC | VM_RESERVED; + vma->vm_ops = &ia32_gate_page_vm_ops; + down_write(¤t->mm->mmap_sem); + { + if (insert_vm_struct(current->mm, vma)) { + kmem_cache_free(vm_area_cachep, vma); + up_write(¤t->mm->mmap_sem); + BUG(); + } + } + up_write(¤t->mm->mmap_sem); + } + + /* + * Install LDT as anonymous memory. This gives us all-zero segment descriptors + * until a task modifies them via modify_ldt(). + */ + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma) { + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = IA32_LDT_OFFSET; + vma->vm_end = vma->vm_start + PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE); + vma->vm_page_prot = PAGE_SHARED; + vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE; + down_write(¤t->mm->mmap_sem); + { + if (insert_vm_struct(current->mm, vma)) { + kmem_cache_free(vm_area_cachep, vma); + up_write(¤t->mm->mmap_sem); + BUG(); + } + } + up_write(¤t->mm->mmap_sem); + } + + ia64_psr(regs)->ac = 0; /* turn off alignment checking */ + regs->loadrs = 0; + /* + * According to the ABI %edx points to an `atexit' handler. Since we don't have + * one we'll set it to 0 and initialize all the other registers just to make + * things more deterministic, ala the i386 implementation. + */ + regs->r8 = 0; /* %eax */ + regs->r11 = 0; /* %ebx */ + regs->r9 = 0; /* %ecx */ + regs->r10 = 0; /* %edx */ + regs->r13 = 0; /* %ebp */ + regs->r14 = 0; /* %esi */ + regs->r15 = 0; /* %edi */ + + current->thread.eflag = IA32_EFLAG; + current->thread.fsr = IA32_FSR_DEFAULT; + current->thread.fcr = IA32_FCR_DEFAULT; + current->thread.fir = 0; + current->thread.fdr = 0; + + /* + * Setup GDTD. Note: GDTD is the descrambled version of the pseudo-descriptor + * format defined by Figure 3-11 "Pseudo-Descriptor Format" in the IA-32 + * architecture manual. Also note that the only fields that are not ignored are + * `base', `limit', 'G', `P' (must be 1) and `S' (must be 0). + */ + regs->r31 = IA32_SEG_UNSCRAMBLE(IA32_SEG_DESCRIPTOR(IA32_GDT_OFFSET, IA32_PAGE_SIZE - 1, + 0, 0, 0, 1, 0, 0, 0)); + /* Setup the segment selectors */ + regs->r16 = (__USER_DS << 16) | __USER_DS; /* ES == DS, GS, FS are zero */ + regs->r17 = (__USER_DS << 16) | __USER_CS; /* SS, CS; ia32_load_state() sets TSS and LDT */ + + ia32_load_segment_descriptors(current); + ia32_load_state(current); +} + +int +ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack) +{ + unsigned long stack_base; + struct vm_area_struct *mpnt; + struct mm_struct *mm = current->mm; + int i, ret; + + stack_base = IA32_STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE; + mm->arg_start = bprm->p + stack_base; + + bprm->p += stack_base; + if (bprm->loader) + bprm->loader += stack_base; + bprm->exec += stack_base; + + mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (!mpnt) + return -ENOMEM; + + if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p)) + >> PAGE_SHIFT)) { + kmem_cache_free(vm_area_cachep, mpnt); + return -ENOMEM; + } + + memset(mpnt, 0, sizeof(*mpnt)); + + down_write(¤t->mm->mmap_sem); + { + mpnt->vm_mm = current->mm; + mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; + mpnt->vm_end = IA32_STACK_TOP; + if (executable_stack == EXSTACK_ENABLE_X) + mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC; + else if (executable_stack == EXSTACK_DISABLE_X) + mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC; + else + mpnt->vm_flags = VM_STACK_FLAGS; + mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)? + PAGE_COPY_EXEC: PAGE_COPY; + if ((ret = insert_vm_struct(current->mm, mpnt))) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, mpnt); + return ret; + } + current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt); + } + + for (i = 0 ; i < MAX_ARG_PAGES ; i++) { + struct page *page = bprm->page[i]; + if (page) { + bprm->page[i] = NULL; + install_arg_page(mpnt, page, stack_base); + } + stack_base += PAGE_SIZE; + } + up_write(¤t->mm->mmap_sem); + + /* Can't do it in ia64_elf32_init(). Needs to be done before calls to + elf32_map() */ + current->thread.ppl = ia32_init_pp_list(); + + return 0; +} + +static void +elf32_set_personality (void) +{ + set_personality(PER_LINUX32); + current->thread.map_base = IA32_PAGE_OFFSET/3; + current->thread.task_size = IA32_PAGE_OFFSET; /* use what Linux/x86 uses... */ + set_fs(USER_DS); /* set addr limit for new TASK_SIZE */ +} + +static unsigned long +elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) +{ + unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK; + + return ia32_do_mmap(filep, (addr & IA32_PAGE_MASK), eppnt->p_filesz + pgoff, prot, type, + eppnt->p_offset - pgoff); +} + +#define cpu_uses_ia32el() (local_cpu_data->family > 0x1f) + +static int __init check_elf32_binfmt(void) +{ + if (cpu_uses_ia32el()) { + printk("Please use IA-32 EL for executing IA-32 binaries\n"); + return unregister_binfmt(&elf_format); + } + return 0; +} + +module_init(check_elf32_binfmt) diff --git a/arch/ia64/ia32/elfcore32.h b/arch/ia64/ia32/elfcore32.h new file mode 100644 index 000000000000..b73b8b6b10c1 --- /dev/null +++ b/arch/ia64/ia32/elfcore32.h @@ -0,0 +1,138 @@ +/* + * IA-32 ELF core dump support. + * + * Copyright (C) 2003 Arun Sharma <arun.sharma@intel.com> + * + * Derived from the x86_64 version + */ +#ifndef _ELFCORE32_H_ +#define _ELFCORE32_H_ + +#include <asm/intrinsics.h> +#include <asm/uaccess.h> + +#define USE_ELF_CORE_DUMP 1 + +/* Override elfcore.h */ +#define _LINUX_ELFCORE_H 1 +typedef unsigned int elf_greg_t; + +#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct ia32_user_i387_struct elf_fpregset_t; +typedef struct ia32_user_fxsr_struct elf_fpxregset_t; + +struct elf_siginfo +{ + int si_signo; /* signal number */ + int si_code; /* extra code */ + int si_errno; /* errno */ +}; + +#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0) + +struct elf_prstatus +{ + struct elf_siginfo pr_info; /* Info associated with signal */ + short pr_cursig; /* Current signal */ + unsigned int pr_sigpend; /* Set of pending signals */ + unsigned int pr_sighold; /* Set of held signals */ + pid_t pr_pid; + pid_t pr_ppid; + pid_t pr_pgrp; + pid_t pr_sid; + struct compat_timeval pr_utime; /* User time */ + struct compat_timeval pr_stime; /* System time */ + struct compat_timeval pr_cutime; /* Cumulative user time */ + struct compat_timeval pr_cstime; /* Cumulative system time */ + elf_gregset_t pr_reg; /* GP registers */ + int pr_fpvalid; /* True if math co-processor being used. */ +}; + +#define ELF_PRARGSZ (80) /* Number of chars for args */ + +struct elf_prpsinfo +{ + char pr_state; /* numeric process state */ + char pr_sname; /* char for pr_state */ + char pr_zomb; /* zombie */ + char pr_nice; /* nice val */ + unsigned int pr_flag; /* flags */ + __u16 pr_uid; + __u16 pr_gid; + pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; + /* Lots missing */ + char pr_fname[16]; /* filename of executable */ + char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ +}; + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ + pr_reg[0] = regs->r11; \ + pr_reg[1] = regs->r9; \ + pr_reg[2] = regs->r10; \ + pr_reg[3] = regs->r14; \ + pr_reg[4] = regs->r15; \ + pr_reg[5] = regs->r13; \ + pr_reg[6] = regs->r8; \ + pr_reg[7] = regs->r16 & 0xffff; \ + pr_reg[8] = (regs->r16 >> 16) & 0xffff; \ + pr_reg[9] = (regs->r16 >> 32) & 0xffff; \ + pr_reg[10] = (regs->r16 >> 48) & 0xffff; \ + pr_reg[11] = regs->r1; \ + pr_reg[12] = regs->cr_iip; \ + pr_reg[13] = regs->r17 & 0xffff; \ + pr_reg[14] = ia64_getreg(_IA64_REG_AR_EFLAG); \ + pr_reg[15] = regs->r12; \ + pr_reg[16] = (regs->r17 >> 16) & 0xffff; + +static inline void elf_core_copy_regs(elf_gregset_t *elfregs, + struct pt_regs *regs) +{ + ELF_CORE_COPY_REGS((*elfregs), regs) +} + +static inline int elf_core_copy_task_regs(struct task_struct *t, + elf_gregset_t* elfregs) +{ + struct pt_regs *pp = ia64_task_regs(t); + ELF_CORE_COPY_REGS((*elfregs), pp); + return 1; +} + +static inline int +elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu) +{ + struct ia32_user_i387_struct *fpstate = (void*)fpu; + mm_segment_t old_fs; + + if (!tsk_used_math(tsk)) + return 0; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + save_ia32_fpstate(tsk, (struct ia32_user_i387_struct __user *) fpstate); + set_fs(old_fs); + + return 1; +} + +#define ELF_CORE_COPY_XFPREGS 1 +static inline int +elf_core_copy_task_xfpregs(struct task_struct *tsk, elf_fpxregset_t *xfpu) +{ + struct ia32_user_fxsr_struct *fpxstate = (void*) xfpu; + mm_segment_t old_fs; + + if (!tsk_used_math(tsk)) + return 0; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + save_ia32_fpxstate(tsk, (struct ia32_user_fxsr_struct __user *) fpxstate); + set_fs(old_fs); + + return 1; +} + +#endif /* _ELFCORE32_H_ */ diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S new file mode 100644 index 000000000000..829a6d80711c --- /dev/null +++ b/arch/ia64/ia32/ia32_entry.S @@ -0,0 +1,500 @@ +#include <asm/asmmacro.h> +#include <asm/ia32.h> +#include <asm/offsets.h> +#include <asm/signal.h> +#include <asm/thread_info.h> + +#include "../kernel/minstate.h" + + /* + * execve() is special because in case of success, we need to + * setup a null register window frame (in case an IA-32 process + * is exec'ing an IA-64 program). + */ +ENTRY(ia32_execve) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(3) + alloc loc1=ar.pfs,3,2,4,0 + mov loc0=rp + .body + zxt4 out0=in0 // filename + ;; // stop bit between alloc and call + zxt4 out1=in1 // argv + zxt4 out2=in2 // envp + add out3=16,sp // regs + br.call.sptk.few rp=sys32_execve +1: cmp.ge p6,p0=r8,r0 + mov ar.pfs=loc1 // restore ar.pfs + ;; +(p6) mov ar.pfs=r0 // clear ar.pfs in case of success + sxt4 r8=r8 // return 64-bit result + mov rp=loc0 + br.ret.sptk.few rp +END(ia32_execve) + +ENTRY(ia32_clone) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5) + alloc r16=ar.pfs,5,2,6,0 + DO_SAVE_SWITCH_STACK + mov loc0=rp + mov loc1=r16 // save ar.pfs across do_fork + .body + zxt4 out1=in1 // newsp + mov out3=16 // stacksize (compensates for 16-byte scratch area) + adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = ®s + mov out0=in0 // out0 = clone_flags + zxt4 out4=in2 // out4 = parent_tidptr + zxt4 out5=in4 // out5 = child_tidptr + br.call.sptk.many rp=do_fork +.ret0: .restore sp + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack + mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(ia32_clone) + +ENTRY(sys32_rt_sigsuspend) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs + mov loc0=rp + mov out0=in0 // mask + mov out1=in1 // sigsetsize + mov out2=sp // out2 = &sigscratch + .fframe 16 + adds sp=-16,sp // allocate dummy "sigscratch" + ;; + .body + br.call.sptk.many rp=ia32_rt_sigsuspend +1: .restore sp + adds sp=16,sp + mov rp=loc0 + mov ar.pfs=loc1 + br.ret.sptk.many rp +END(sys32_rt_sigsuspend) + +ENTRY(sys32_sigsuspend) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs + mov loc0=rp + mov out0=in2 // mask (first two args are ignored) + ;; + mov out1=sp // out1 = &sigscratch + .fframe 16 + adds sp=-16,sp // allocate dummy "sigscratch" + .body + br.call.sptk.many rp=ia32_sigsuspend +1: .restore sp + adds sp=16,sp + mov rp=loc0 + mov ar.pfs=loc1 + br.ret.sptk.many rp +END(sys32_sigsuspend) + +GLOBAL_ENTRY(ia32_ret_from_clone) + PT_REGS_UNWIND_INFO(0) +{ /* + * Some versions of gas generate bad unwind info if the first instruction of a + * procedure doesn't go into the first slot of a bundle. This is a workaround. + */ + nop.m 0 + nop.i 0 + /* + * We need to call schedule_tail() to complete the scheduling process. + * Called by ia64_switch_to after do_fork()->copy_thread(). r8 contains the + * address of the previously executing task. + */ + br.call.sptk.many rp=ia64_invoke_schedule_tail +} +.ret1: + adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; + ld4 r2=[r2] + ;; + mov r8=0 + and r2=_TIF_SYSCALL_TRACEAUDIT,r2 + ;; + cmp.ne p6,p0=r2,r0 +(p6) br.cond.spnt .ia32_strace_check_retval + ;; // prevent RAW on r8 +END(ia32_ret_from_clone) + // fall thrugh +GLOBAL_ENTRY(ia32_ret_from_syscall) + PT_REGS_UNWIND_INFO(0) + + cmp.ge p6,p7=r8,r0 // syscall executed successfully? + adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8 + ;; + alloc r3=ar.pfs,0,0,0,0 // drop the syscall argument frame + st8 [r2]=r8 // store return value in slot for r8 + br.cond.sptk.many ia64_leave_kernel +END(ia32_ret_from_syscall) + + // + // Invoke a system call, but do some tracing before and after the call. + // We MUST preserve the current register frame throughout this routine + // because some system calls (such as ia64_execve) directly + // manipulate ar.pfs. + // + // Input: + // r8 = syscall number + // b6 = syscall entry point + // +GLOBAL_ENTRY(ia32_trace_syscall) + PT_REGS_UNWIND_INFO(0) + mov r3=-38 + adds r2=IA64_PT_REGS_R8_OFFSET+16,sp + ;; + st8 [r2]=r3 // initialize return code to -ENOSYS + br.call.sptk.few rp=syscall_trace_enter // give parent a chance to catch syscall args +.ret2: // Need to reload arguments (they may be changed by the tracing process) + adds r2=IA64_PT_REGS_R1_OFFSET+16,sp // r2 = &pt_regs.r1 + adds r3=IA64_PT_REGS_R13_OFFSET+16,sp // r3 = &pt_regs.r13 + mov r15=IA32_NR_syscalls + ;; + ld4 r8=[r2],IA64_PT_REGS_R9_OFFSET-IA64_PT_REGS_R1_OFFSET + movl r16=ia32_syscall_table + ;; + ld4 r33=[r2],8 // r9 == ecx + ld4 r37=[r3],16 // r13 == ebp + cmp.ltu.unc p6,p7=r8,r15 + ;; + ld4 r34=[r2],8 // r10 == edx + ld4 r36=[r3],8 // r15 == edi +(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number + ;; + ld8 r16=[r16] + ;; + ld4 r32=[r2],8 // r11 == ebx + mov b6=r16 + ld4 r35=[r3],8 // r14 == esi + br.call.sptk.few rp=b6 // do the syscall +.ia32_strace_check_retval: + cmp.lt p6,p0=r8,r0 // syscall failed? + adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8 + ;; + st8.spill [r2]=r8 // store return value in slot for r8 + br.call.sptk.few rp=syscall_trace_leave // give parent a chance to catch return value +.ret4: alloc r2=ar.pfs,0,0,0,0 // drop the syscall argument frame + br.cond.sptk.many ia64_leave_kernel +END(ia32_trace_syscall) + +GLOBAL_ENTRY(sys32_vfork) + alloc r16=ar.pfs,2,2,4,0;; + mov out0=IA64_CLONE_VFORK|IA64_CLONE_VM|SIGCHLD // out0 = clone_flags + br.cond.sptk.few .fork1 // do the work +END(sys32_vfork) + +GLOBAL_ENTRY(sys32_fork) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + alloc r16=ar.pfs,2,2,4,0 + mov out0=SIGCHLD // out0 = clone_flags + ;; +.fork1: + mov loc0=rp + mov loc1=r16 // save ar.pfs across do_fork + DO_SAVE_SWITCH_STACK + + .body + + mov out1=0 + mov out3=0 + adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = ®s + br.call.sptk.few rp=do_fork +.ret5: .restore sp + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack + mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(sys32_fork) + + .rodata + .align 8 + .globl ia32_syscall_table +ia32_syscall_table: + data8 sys_ni_syscall /* 0 - old "setup(" system call*/ + data8 sys_exit + data8 sys32_fork + data8 sys_read + data8 sys_write + data8 sys32_open /* 5 */ + data8 sys_close + data8 sys32_waitpid + data8 sys_creat + data8 sys_link + data8 sys_unlink /* 10 */ + data8 ia32_execve + data8 sys_chdir + data8 compat_sys_time + data8 sys_mknod + data8 sys_chmod /* 15 */ + data8 sys_lchown /* 16-bit version */ + data8 sys_ni_syscall /* old break syscall holder */ + data8 sys_ni_syscall + data8 sys32_lseek + data8 sys_getpid /* 20 */ + data8 compat_sys_mount + data8 sys_oldumount + data8 sys_setuid /* 16-bit version */ + data8 sys_getuid /* 16-bit version */ + data8 compat_sys_stime /* 25 */ + data8 sys32_ptrace + data8 sys32_alarm + data8 sys_ni_syscall + data8 sys32_pause + data8 compat_sys_utime /* 30 */ + data8 sys_ni_syscall /* old stty syscall holder */ + data8 sys_ni_syscall /* old gtty syscall holder */ + data8 sys_access + data8 sys_nice + data8 sys_ni_syscall /* 35 */ /* old ftime syscall holder */ + data8 sys_sync + data8 sys_kill + data8 sys_rename + data8 sys_mkdir + data8 sys_rmdir /* 40 */ + data8 sys_dup + data8 sys32_pipe + data8 compat_sys_times + data8 sys_ni_syscall /* old prof syscall holder */ + data8 sys32_brk /* 45 */ + data8 sys_setgid /* 16-bit version */ + data8 sys_getgid /* 16-bit version */ + data8 sys32_signal + data8 sys_geteuid /* 16-bit version */ + data8 sys_getegid /* 16-bit version */ /* 50 */ + data8 sys_acct + data8 sys_umount /* recycled never used phys( */ + data8 sys_ni_syscall /* old lock syscall holder */ + data8 compat_sys_ioctl + data8 compat_sys_fcntl /* 55 */ + data8 sys_ni_syscall /* old mpx syscall holder */ + data8 sys_setpgid + data8 sys_ni_syscall /* old ulimit syscall holder */ + data8 sys_ni_syscall + data8 sys_umask /* 60 */ + data8 sys_chroot + data8 sys_ustat + data8 sys_dup2 + data8 sys_getppid + data8 sys_getpgrp /* 65 */ + data8 sys_setsid + data8 sys32_sigaction + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_setreuid /* 16-bit version */ /* 70 */ + data8 sys_setregid /* 16-bit version */ + data8 sys32_sigsuspend + data8 compat_sys_sigpending + data8 sys_sethostname + data8 compat_sys_setrlimit /* 75 */ + data8 compat_sys_old_getrlimit + data8 compat_sys_getrusage + data8 sys32_gettimeofday + data8 sys32_settimeofday + data8 sys32_getgroups16 /* 80 */ + data8 sys32_setgroups16 + data8 sys32_old_select + data8 sys_symlink + data8 sys_ni_syscall + data8 sys_readlink /* 85 */ + data8 sys_uselib + data8 sys_swapon + data8 sys_reboot + data8 sys32_readdir + data8 sys32_mmap /* 90 */ + data8 sys32_munmap + data8 sys_truncate + data8 sys_ftruncate + data8 sys_fchmod + data8 sys_fchown /* 16-bit version */ /* 95 */ + data8 sys_getpriority + data8 sys_setpriority + data8 sys_ni_syscall /* old profil syscall holder */ + data8 compat_sys_statfs + data8 compat_sys_fstatfs /* 100 */ + data8 sys_ni_syscall /* ioperm */ + data8 compat_sys_socketcall + data8 sys_syslog + data8 compat_sys_setitimer + data8 compat_sys_getitimer /* 105 */ + data8 compat_sys_newstat + data8 compat_sys_newlstat + data8 compat_sys_newfstat + data8 sys_ni_syscall + data8 sys_ni_syscall /* iopl */ /* 110 */ + data8 sys_vhangup + data8 sys_ni_syscall /* used to be sys_idle */ + data8 sys_ni_syscall + data8 compat_sys_wait4 + data8 sys_swapoff /* 115 */ + data8 sys32_sysinfo + data8 sys32_ipc + data8 sys_fsync + data8 sys32_sigreturn + data8 ia32_clone /* 120 */ + data8 sys_setdomainname + data8 sys32_newuname + data8 sys32_modify_ldt + data8 sys_ni_syscall /* adjtimex */ + data8 sys32_mprotect /* 125 */ + data8 compat_sys_sigprocmask + data8 sys_ni_syscall /* create_module */ + data8 sys_ni_syscall /* init_module */ + data8 sys_ni_syscall /* delete_module */ + data8 sys_ni_syscall /* get_kernel_syms */ /* 130 */ + data8 sys_quotactl + data8 sys_getpgid + data8 sys_fchdir + data8 sys_ni_syscall /* sys_bdflush */ + data8 sys_sysfs /* 135 */ + data8 sys32_personality + data8 sys_ni_syscall /* for afs_syscall */ + data8 sys_setfsuid /* 16-bit version */ + data8 sys_setfsgid /* 16-bit version */ + data8 sys_llseek /* 140 */ + data8 compat_sys_getdents + data8 compat_sys_select + data8 sys_flock + data8 sys32_msync + data8 compat_sys_readv /* 145 */ + data8 compat_sys_writev + data8 sys_getsid + data8 sys_fdatasync + data8 sys32_sysctl + data8 sys_mlock /* 150 */ + data8 sys_munlock + data8 sys_mlockall + data8 sys_munlockall + data8 sys_sched_setparam + data8 sys_sched_getparam /* 155 */ + data8 sys_sched_setscheduler + data8 sys_sched_getscheduler + data8 sys_sched_yield + data8 sys_sched_get_priority_max + data8 sys_sched_get_priority_min /* 160 */ + data8 sys32_sched_rr_get_interval + data8 compat_sys_nanosleep + data8 sys32_mremap + data8 sys_setresuid /* 16-bit version */ + data8 sys32_getresuid16 /* 16-bit version */ /* 165 */ + data8 sys_ni_syscall /* vm86 */ + data8 sys_ni_syscall /* sys_query_module */ + data8 sys_poll + data8 sys_ni_syscall /* nfsservctl */ + data8 sys_setresgid /* 170 */ + data8 sys32_getresgid16 + data8 sys_prctl + data8 sys32_rt_sigreturn + data8 sys32_rt_sigaction + data8 sys32_rt_sigprocmask /* 175 */ + data8 sys_rt_sigpending + data8 compat_sys_rt_sigtimedwait + data8 sys32_rt_sigqueueinfo + data8 sys32_rt_sigsuspend + data8 sys32_pread /* 180 */ + data8 sys32_pwrite + data8 sys_chown /* 16-bit version */ + data8 sys_getcwd + data8 sys_capget + data8 sys_capset /* 185 */ + data8 sys32_sigaltstack + data8 sys32_sendfile + data8 sys_ni_syscall /* streams1 */ + data8 sys_ni_syscall /* streams2 */ + data8 sys32_vfork /* 190 */ + data8 compat_sys_getrlimit + data8 sys32_mmap2 + data8 sys32_truncate64 + data8 sys32_ftruncate64 + data8 sys32_stat64 /* 195 */ + data8 sys32_lstat64 + data8 sys32_fstat64 + data8 sys_lchown + data8 sys_getuid + data8 sys_getgid /* 200 */ + data8 sys_geteuid + data8 sys_getegid + data8 sys_setreuid + data8 sys_setregid + data8 sys_getgroups /* 205 */ + data8 sys_setgroups + data8 sys_fchown + data8 sys_setresuid + data8 sys_getresuid + data8 sys_setresgid /* 210 */ + data8 sys_getresgid + data8 sys_chown + data8 sys_setuid + data8 sys_setgid + data8 sys_setfsuid /* 215 */ + data8 sys_setfsgid + data8 sys_pivot_root + data8 sys_mincore + data8 sys_madvise + data8 compat_sys_getdents64 /* 220 */ + data8 compat_sys_fcntl64 + data8 sys_ni_syscall /* reserved for TUX */ + data8 sys_ni_syscall /* reserved for Security */ + data8 sys_gettid + data8 sys_readahead /* 225 */ + data8 sys_setxattr + data8 sys_lsetxattr + data8 sys_fsetxattr + data8 sys_getxattr + data8 sys_lgetxattr /* 230 */ + data8 sys_fgetxattr + data8 sys_listxattr + data8 sys_llistxattr + data8 sys_flistxattr + data8 sys_removexattr /* 235 */ + data8 sys_lremovexattr + data8 sys_fremovexattr + data8 sys_tkill + data8 sys_sendfile64 + data8 compat_sys_futex /* 240 */ + data8 compat_sys_sched_setaffinity + data8 compat_sys_sched_getaffinity + data8 sys32_set_thread_area + data8 sys32_get_thread_area + data8 compat_sys_io_setup /* 245 */ + data8 sys_io_destroy + data8 compat_sys_io_getevents + data8 compat_sys_io_submit + data8 sys_io_cancel + data8 sys_fadvise64 /* 250 */ + data8 sys_ni_syscall + data8 sys_exit_group + data8 sys_lookup_dcookie + data8 sys_epoll_create + data8 sys32_epoll_ctl /* 255 */ + data8 sys32_epoll_wait + data8 sys_remap_file_pages + data8 sys_set_tid_address + data8 sys32_timer_create + data8 compat_sys_timer_settime /* 260 */ + data8 compat_sys_timer_gettime + data8 sys_timer_getoverrun + data8 sys_timer_delete + data8 compat_sys_clock_settime + data8 compat_sys_clock_gettime /* 265 */ + data8 compat_sys_clock_getres + data8 compat_sys_clock_nanosleep + data8 compat_sys_statfs64 + data8 compat_sys_fstatfs64 + data8 sys_tgkill /* 270 */ + data8 compat_sys_utimes + data8 sys32_fadvise64_64 + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_ni_syscall /* 275 */ + data8 sys_ni_syscall + data8 compat_sys_mq_open + data8 sys_mq_unlink + data8 compat_sys_mq_timedsend + data8 compat_sys_mq_timedreceive /* 280 */ + data8 compat_sys_mq_notify + data8 compat_sys_mq_getsetattr + data8 sys_ni_syscall /* reserved for kexec */ + data8 compat_sys_waitid + + // guard against failures to increase IA32_NR_syscalls + .org ia32_syscall_table + 8*IA32_NR_syscalls diff --git a/arch/ia64/ia32/ia32_ioctl.c b/arch/ia64/ia32/ia32_ioctl.c new file mode 100644 index 000000000000..9845dabe2613 --- /dev/null +++ b/arch/ia64/ia32/ia32_ioctl.c @@ -0,0 +1,48 @@ +/* + * IA32 Architecture-specific ioctl shim code + * + * Copyright (C) 2000 VA Linux Co + * Copyright (C) 2000 Don Dugger <n0ano@valinux.com> + * Copyright (C) 2001-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <linux/signal.h> /* argh, msdos_fs.h isn't self-contained... */ +#include <linux/syscalls.h> +#include "ia32priv.h" + +#define INCLUDES +#include "compat_ioctl.c" +#include <asm/ioctl32.h> + +#define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) + +#define DO_IOCTL(fd, cmd, arg) ({ \ + int _ret; \ + mm_segment_t _old_fs = get_fs(); \ + \ + set_fs(KERNEL_DS); \ + _ret = sys_ioctl(fd, cmd, (unsigned long)arg); \ + set_fs(_old_fs); \ + _ret; \ +}) + +#define CODE +#include "compat_ioctl.c" + +typedef int (* ioctl32_handler_t)(unsigned int, unsigned int, unsigned long, struct file *); + +#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL((cmd),sys_ioctl) +#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl32_handler_t)(handler), NULL }, +#define IOCTL_TABLE_START \ + struct ioctl_trans ioctl_start[] = { +#define IOCTL_TABLE_END \ + }; + +IOCTL_TABLE_START +#define DECLARES +#include "compat_ioctl.c" +#include <linux/compat_ioctl.h> +IOCTL_TABLE_END + +int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/arch/ia64/ia32/ia32_ldt.c b/arch/ia64/ia32/ia32_ldt.c new file mode 100644 index 000000000000..a152738c7d0d --- /dev/null +++ b/arch/ia64/ia32/ia32_ldt.c @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2001, 2004 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * Adapted from arch/i386/kernel/ldt.c + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/vmalloc.h> + +#include <asm/uaccess.h> + +#include "ia32priv.h" + +/* + * read_ldt() is not really atomic - this is not a problem since synchronization of reads + * and writes done to the LDT has to be assured by user-space anyway. Writes are atomic, + * to protect the security checks done on new descriptors. + */ +static int +read_ldt (void __user *ptr, unsigned long bytecount) +{ + unsigned long bytes_left, n; + char __user *src, *dst; + char buf[256]; /* temporary buffer (don't overflow kernel stack!) */ + + if (bytecount > IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE) + bytecount = IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE; + + bytes_left = bytecount; + + src = (void __user *) IA32_LDT_OFFSET; + dst = ptr; + + while (bytes_left) { + n = sizeof(buf); + if (n > bytes_left) + n = bytes_left; + + /* + * We know we're reading valid memory, but we still must guard against + * running out of memory. + */ + if (__copy_from_user(buf, src, n)) + return -EFAULT; + + if (copy_to_user(dst, buf, n)) + return -EFAULT; + + src += n; + dst += n; + bytes_left -= n; + } + return bytecount; +} + +static int +read_default_ldt (void __user * ptr, unsigned long bytecount) +{ + unsigned long size; + int err; + + /* XXX fix me: should return equivalent of default_ldt[0] */ + err = 0; + size = 8; + if (size > bytecount) + size = bytecount; + + err = size; + if (clear_user(ptr, size)) + err = -EFAULT; + + return err; +} + +static int +write_ldt (void __user * ptr, unsigned long bytecount, int oldmode) +{ + struct ia32_user_desc ldt_info; + __u64 entry; + int ret; + + if (bytecount != sizeof(ldt_info)) + return -EINVAL; + if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) + return -EFAULT; + + if (ldt_info.entry_number >= IA32_LDT_ENTRIES) + return -EINVAL; + if (ldt_info.contents == 3) { + if (oldmode) + return -EINVAL; + if (ldt_info.seg_not_present == 0) + return -EINVAL; + } + + if (ldt_info.base_addr == 0 && ldt_info.limit == 0 + && (oldmode || (ldt_info.contents == 0 && ldt_info.read_exec_only == 1 + && ldt_info.seg_32bit == 0 && ldt_info.limit_in_pages == 0 + && ldt_info.seg_not_present == 1 && ldt_info.useable == 0))) + /* allow LDTs to be cleared by the user */ + entry = 0; + else + /* we must set the "Accessed" bit as IVE doesn't emulate it */ + entry = IA32_SEG_DESCRIPTOR(ldt_info.base_addr, ldt_info.limit, + (((ldt_info.read_exec_only ^ 1) << 1) + | (ldt_info.contents << 2)) | 1, + 1, 3, ldt_info.seg_not_present ^ 1, + (oldmode ? 0 : ldt_info.useable), + ldt_info.seg_32bit, + ldt_info.limit_in_pages); + /* + * Install the new entry. We know we're accessing valid (mapped) user-level + * memory, but we still need to guard against out-of-memory, hence we must use + * put_user(). + */ + ret = __put_user(entry, (__u64 __user *) IA32_LDT_OFFSET + ldt_info.entry_number); + ia32_load_segment_descriptors(current); + return ret; +} + +asmlinkage int +sys32_modify_ldt (int func, unsigned int ptr, unsigned int bytecount) +{ + int ret = -ENOSYS; + + switch (func) { + case 0: + ret = read_ldt(compat_ptr(ptr), bytecount); + break; + case 1: + ret = write_ldt(compat_ptr(ptr), bytecount, 1); + break; + case 2: + ret = read_default_ldt(compat_ptr(ptr), bytecount); + break; + case 0x11: + ret = write_ldt(compat_ptr(ptr), bytecount, 0); + break; + } + return ret; +} diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c new file mode 100644 index 000000000000..19b02adce68c --- /dev/null +++ b/arch/ia64/ia32/ia32_signal.c @@ -0,0 +1,1036 @@ +/* + * IA32 Architecture-specific signal handling support. + * + * Copyright (C) 1999, 2001-2002, 2005 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> + * Copyright (C) 2000 VA Linux Co + * Copyright (C) 2000 Don Dugger <n0ano@valinux.com> + * + * Derived from i386 and Alpha versions. + */ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/personality.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/syscalls.h> +#include <linux/unistd.h> +#include <linux/wait.h> +#include <linux/compat.h> + +#include <asm/intrinsics.h> +#include <asm/uaccess.h> +#include <asm/rse.h> +#include <asm/sigcontext.h> +#include <asm/segment.h> + +#include "ia32priv.h" + +#include "../kernel/sigframe.h" + +#define A(__x) ((unsigned long)(__x)) + +#define DEBUG_SIG 0 +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +#define __IA32_NR_sigreturn 119 +#define __IA32_NR_rt_sigreturn 173 + +struct sigframe_ia32 +{ + int pretcode; + int sig; + struct sigcontext_ia32 sc; + struct _fpstate_ia32 fpstate; + unsigned int extramask[_COMPAT_NSIG_WORDS-1]; + char retcode[8]; +}; + +struct rt_sigframe_ia32 +{ + int pretcode; + int sig; + int pinfo; + int puc; + compat_siginfo_t info; + struct ucontext_ia32 uc; + struct _fpstate_ia32 fpstate; + char retcode[8]; +}; + +int +copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from) +{ + unsigned long tmp; + int err; + + if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) + return -EFAULT; + + err = __get_user(to->si_signo, &from->si_signo); + err |= __get_user(to->si_errno, &from->si_errno); + err |= __get_user(to->si_code, &from->si_code); + + if (to->si_code < 0) + err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); + else { + switch (to->si_code >> 16) { + case __SI_CHLD >> 16: + err |= __get_user(to->si_utime, &from->si_utime); + err |= __get_user(to->si_stime, &from->si_stime); + err |= __get_user(to->si_status, &from->si_status); + default: + err |= __get_user(to->si_pid, &from->si_pid); + err |= __get_user(to->si_uid, &from->si_uid); + break; + case __SI_FAULT >> 16: + err |= __get_user(tmp, &from->si_addr); + to->si_addr = (void __user *) tmp; + break; + case __SI_POLL >> 16: + err |= __get_user(to->si_band, &from->si_band); + err |= __get_user(to->si_fd, &from->si_fd); + break; + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: + err |= __get_user(to->si_pid, &from->si_pid); + err |= __get_user(to->si_uid, &from->si_uid); + err |= __get_user(to->si_int, &from->si_int); + break; + } + } + return err; +} + +int +copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from) +{ + unsigned int addr; + int err; + + if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) + return -EFAULT; + + /* If you change siginfo_t structure, please be sure + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. + This routine must convert siginfo from 64bit to 32bit as well + at the same time. */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + if (from->si_code < 0) + err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); + else { + switch (from->si_code >> 16) { + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + default: + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + break; + case __SI_FAULT >> 16: + /* avoid type-checking warnings by copying _pad[0] in lieu of si_addr... */ + err |= __put_user(from->_sifields._pad[0], &to->si_addr); + break; + case __SI_POLL >> 16: + err |= __put_user(from->si_band, &to->si_band); + err |= __put_user(from->si_fd, &to->si_fd); + break; + case __SI_TIMER >> 16: + err |= __put_user(from->si_tid, &to->si_tid); + err |= __put_user(from->si_overrun, &to->si_overrun); + addr = (unsigned long) from->si_ptr; + err |= __put_user(addr, &to->si_ptr); + break; + case __SI_RT >> 16: /* Not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(from->si_pid, &to->si_pid); + addr = (unsigned long) from->si_ptr; + err |= __put_user(addr, &to->si_ptr); + break; + } + } + return err; +} + + +/* + * SAVE and RESTORE of ia32 fpstate info, from ia64 current state + * Used in exception handler to pass the fpstate to the user, and restore + * the fpstate while returning from the exception handler. + * + * fpstate info and their mapping to IA64 regs: + * fpstate REG(BITS) Attribute Comments + * cw ar.fcr(0:12) with bits 7 and 6 not used + * sw ar.fsr(0:15) + * tag ar.fsr(16:31) with odd numbered bits not used + * (read returns 0, writes ignored) + * ipoff ar.fir(0:31) + * cssel ar.fir(32:47) + * dataoff ar.fdr(0:31) + * datasel ar.fdr(32:47) + * + * _st[(0+TOS)%8] f8 + * _st[(1+TOS)%8] f9 + * _st[(2+TOS)%8] f10 + * _st[(3+TOS)%8] f11 (f8..f11 from ptregs) + * : : : (f12..f15 from live reg) + * : : : + * _st[(7+TOS)%8] f15 TOS=sw.top(bits11:13) + * + * status Same as sw RO + * magic 0 as X86_FXSR_MAGIC in ia32 + * mxcsr Bits(7:15)=ar.fcr(39:47) + * Bits(0:5) =ar.fsr(32:37) with bit 6 reserved + * _xmm[0..7] f16..f31 (live registers) + * with _xmm[0] + * Bit(64:127)=f17(0:63) + * Bit(0:63)=f16(0:63) + * All other fields unused... + */ + +static int +save_ia32_fpstate_live (struct _fpstate_ia32 __user *save) +{ + struct task_struct *tsk = current; + struct pt_regs *ptp; + struct _fpreg_ia32 *fpregp; + char buf[32]; + unsigned long fsr, fcr, fir, fdr; + unsigned long new_fsr; + unsigned long num128[2]; + unsigned long mxcsr=0; + int fp_tos, fr8_st_map; + + if (!access_ok(VERIFY_WRITE, save, sizeof(*save))) + return -EFAULT; + + /* Read in fsr, fcr, fir, fdr and copy onto fpstate */ + fsr = ia64_getreg(_IA64_REG_AR_FSR); + fcr = ia64_getreg(_IA64_REG_AR_FCR); + fir = ia64_getreg(_IA64_REG_AR_FIR); + fdr = ia64_getreg(_IA64_REG_AR_FDR); + + /* + * We need to clear the exception state before calling the signal handler. Clear + * the bits 15, bits 0-7 in fp status word. Similar to the functionality of fnclex + * instruction. + */ + new_fsr = fsr & ~0x80ff; + ia64_setreg(_IA64_REG_AR_FSR, new_fsr); + + __put_user(fcr & 0xffff, &save->cw); + __put_user(fsr & 0xffff, &save->sw); + __put_user((fsr>>16) & 0xffff, &save->tag); + __put_user(fir, &save->ipoff); + __put_user((fir>>32) & 0xffff, &save->cssel); + __put_user(fdr, &save->dataoff); + __put_user((fdr>>32) & 0xffff, &save->datasel); + __put_user(fsr & 0xffff, &save->status); + + mxcsr = ((fcr>>32) & 0xff80) | ((fsr>>32) & 0x3f); + __put_user(mxcsr & 0xffff, &save->mxcsr); + __put_user( 0, &save->magic); //#define X86_FXSR_MAGIC 0x0000 + + /* + * save f8..f11 from pt_regs + * save f12..f15 from live register set + */ + /* + * Find the location where f8 has to go in fp reg stack. This depends on + * TOP(11:13) field of sw. Other f reg continue sequentially from where f8 maps + * to. + */ + fp_tos = (fsr>>11)&0x7; + fr8_st_map = (8-fp_tos)&0x7; + ptp = ia64_task_regs(tsk); + fpregp = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15); + ia64f2ia32f(fpregp, &ptp->f8); + copy_to_user(&save->_st[(0+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + ia64f2ia32f(fpregp, &ptp->f9); + copy_to_user(&save->_st[(1+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + ia64f2ia32f(fpregp, &ptp->f10); + copy_to_user(&save->_st[(2+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + ia64f2ia32f(fpregp, &ptp->f11); + copy_to_user(&save->_st[(3+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + + ia64_stfe(fpregp, 12); + copy_to_user(&save->_st[(4+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + ia64_stfe(fpregp, 13); + copy_to_user(&save->_st[(5+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + ia64_stfe(fpregp, 14); + copy_to_user(&save->_st[(6+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + ia64_stfe(fpregp, 15); + copy_to_user(&save->_st[(7+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32)); + + ia64_stf8(&num128[0], 16); + ia64_stf8(&num128[1], 17); + copy_to_user(&save->_xmm[0], num128, sizeof(struct _xmmreg_ia32)); + + ia64_stf8(&num128[0], 18); + ia64_stf8(&num128[1], 19); + copy_to_user(&save->_xmm[1], num128, sizeof(struct _xmmreg_ia32)); + + ia64_stf8(&num128[0], 20); + ia64_stf8(&num128[1], 21); + copy_to_user(&save->_xmm[2], num128, sizeof(struct _xmmreg_ia32)); + + ia64_stf8(&num128[0], 22); + ia64_stf8(&num128[1], 23); + copy_to_user(&save->_xmm[3], num128, sizeof(struct _xmmreg_ia32)); + + ia64_stf8(&num128[0], 24); + ia64_stf8(&num128[1], 25); + copy_to_user(&save->_xmm[4], num128, sizeof(struct _xmmreg_ia32)); + + ia64_stf8(&num128[0], 26); + ia64_stf8(&num128[1], 27); + copy_to_user(&save->_xmm[5], num128, sizeof(struct _xmmreg_ia32)); + + ia64_stf8(&num128[0], 28); + ia64_stf8(&num128[1], 29); + copy_to_user(&save->_xmm[6], num128, sizeof(struct _xmmreg_ia32)); + + ia64_stf8(&num128[0], 30); + ia64_stf8(&num128[1], 31); + copy_to_user(&save->_xmm[7], num128, sizeof(struct _xmmreg_ia32)); + return 0; +} + +static int +restore_ia32_fpstate_live (struct _fpstate_ia32 __user *save) +{ + struct task_struct *tsk = current; + struct pt_regs *ptp; + unsigned int lo, hi; + unsigned long num128[2]; + unsigned long num64, mxcsr; + struct _fpreg_ia32 *fpregp; + char buf[32]; + unsigned long fsr, fcr, fir, fdr; + int fp_tos, fr8_st_map; + + if (!access_ok(VERIFY_READ, save, sizeof(*save))) + return(-EFAULT); + + /* + * Updating fsr, fcr, fir, fdr. + * Just a bit more complicated than save. + * - Need to make sure that we don't write any value other than the + * specific fpstate info + * - Need to make sure that the untouched part of frs, fdr, fir, fcr + * should remain same while writing. + * So, we do a read, change specific fields and write. + */ + fsr = ia64_getreg(_IA64_REG_AR_FSR); + fcr = ia64_getreg(_IA64_REG_AR_FCR); + fir = ia64_getreg(_IA64_REG_AR_FIR); + fdr = ia64_getreg(_IA64_REG_AR_FDR); + + __get_user(mxcsr, (unsigned int __user *)&save->mxcsr); + /* setting bits 0..5 8..12 with cw and 39..47 from mxcsr */ + __get_user(lo, (unsigned int __user *)&save->cw); + num64 = mxcsr & 0xff10; + num64 = (num64 << 32) | (lo & 0x1f3f); + fcr = (fcr & (~0xff1000001f3fUL)) | num64; + + /* setting bits 0..31 with sw and tag and 32..37 from mxcsr */ + __get_user(lo, (unsigned int __user *)&save->sw); + /* set bits 15,7 (fsw.b, fsw.es) to reflect the current error status */ + if ( !(lo & 0x7f) ) + lo &= (~0x8080); + __get_user(hi, (unsigned int __user *)&save->tag); + num64 = mxcsr & 0x3f; + num64 = (num64 << 16) | (hi & 0xffff); + num64 = (num64 << 16) | (lo & 0xffff); + fsr = (fsr & (~0x3fffffffffUL)) | num64; + + /* setting bits 0..47 with cssel and ipoff */ + __get_user(lo, (unsigned int __user *)&save->ipoff); + __get_user(hi, (unsigned int __user *)&save->cssel); + num64 = hi & 0xffff; + num64 = (num64 << 32) | lo; + fir = (fir & (~0xffffffffffffUL)) | num64; + + /* setting bits 0..47 with datasel and dataoff */ + __get_user(lo, (unsigned int __user *)&save->dataoff); + __get_user(hi, (unsigned int __user *)&save->datasel); + num64 = hi & 0xffff; + num64 = (num64 << 32) | lo; + fdr = (fdr & (~0xffffffffffffUL)) | num64; + + ia64_setreg(_IA64_REG_AR_FSR, fsr); + ia64_setreg(_IA64_REG_AR_FCR, fcr); + ia64_setreg(_IA64_REG_AR_FIR, fir); + ia64_setreg(_IA64_REG_AR_FDR, fdr); + + /* + * restore f8..f11 onto pt_regs + * restore f12..f15 onto live registers + */ + /* + * Find the location where f8 has to go in fp reg stack. This depends on + * TOP(11:13) field of sw. Other f reg continue sequentially from where f8 maps + * to. + */ + fp_tos = (fsr>>11)&0x7; + fr8_st_map = (8-fp_tos)&0x7; + fpregp = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15); + + ptp = ia64_task_regs(tsk); + copy_from_user(fpregp, &save->_st[(0+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + ia32f2ia64f(&ptp->f8, fpregp); + copy_from_user(fpregp, &save->_st[(1+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + ia32f2ia64f(&ptp->f9, fpregp); + copy_from_user(fpregp, &save->_st[(2+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + ia32f2ia64f(&ptp->f10, fpregp); + copy_from_user(fpregp, &save->_st[(3+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + ia32f2ia64f(&ptp->f11, fpregp); + + copy_from_user(fpregp, &save->_st[(4+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + ia64_ldfe(12, fpregp); + copy_from_user(fpregp, &save->_st[(5+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + ia64_ldfe(13, fpregp); + copy_from_user(fpregp, &save->_st[(6+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + ia64_ldfe(14, fpregp); + copy_from_user(fpregp, &save->_st[(7+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32)); + ia64_ldfe(15, fpregp); + + copy_from_user(num128, &save->_xmm[0], sizeof(struct _xmmreg_ia32)); + ia64_ldf8(16, &num128[0]); + ia64_ldf8(17, &num128[1]); + + copy_from_user(num128, &save->_xmm[1], sizeof(struct _xmmreg_ia32)); + ia64_ldf8(18, &num128[0]); + ia64_ldf8(19, &num128[1]); + + copy_from_user(num128, &save->_xmm[2], sizeof(struct _xmmreg_ia32)); + ia64_ldf8(20, &num128[0]); + ia64_ldf8(21, &num128[1]); + + copy_from_user(num128, &save->_xmm[3], sizeof(struct _xmmreg_ia32)); + ia64_ldf8(22, &num128[0]); + ia64_ldf8(23, &num128[1]); + + copy_from_user(num128, &save->_xmm[4], sizeof(struct _xmmreg_ia32)); + ia64_ldf8(24, &num128[0]); + ia64_ldf8(25, &num128[1]); + + copy_from_user(num128, &save->_xmm[5], sizeof(struct _xmmreg_ia32)); + ia64_ldf8(26, &num128[0]); + ia64_ldf8(27, &num128[1]); + + copy_from_user(num128, &save->_xmm[6], sizeof(struct _xmmreg_ia32)); + ia64_ldf8(28, &num128[0]); + ia64_ldf8(29, &num128[1]); + + copy_from_user(num128, &save->_xmm[7], sizeof(struct _xmmreg_ia32)); + ia64_ldf8(30, &num128[0]); + ia64_ldf8(31, &num128[1]); + return 0; +} + +static inline void +sigact_set_handler (struct k_sigaction *sa, unsigned int handler, unsigned int restorer) +{ + if (handler + 1 <= 2) + /* SIG_DFL, SIG_IGN, or SIG_ERR: must sign-extend to 64-bits */ + sa->sa.sa_handler = (__sighandler_t) A((int) handler); + else + sa->sa.sa_handler = (__sighandler_t) (((unsigned long) restorer << 32) | handler); +} + +long +__ia32_rt_sigsuspend (compat_sigset_t *sset, unsigned int sigsetsize, struct sigscratch *scr) +{ + extern long ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall); + sigset_t oldset, set; + + scr->scratch_unat = 0; /* avoid leaking kernel bits to user level */ + memset(&set, 0, sizeof(&set)); + + if (memcpy(&set.sig, &sset->sig, sigsetsize)) + return -EFAULT; + + sigdelsetmask(&set, ~_BLOCKABLE); + + spin_lock_irq(¤t->sighand->siglock); + { + oldset = current->blocked; + current->blocked = set; + recalc_sigpending(); + } + spin_unlock_irq(¤t->sighand->siglock); + + /* + * The return below usually returns to the signal handler. We need to pre-set the + * correct error code here to ensure that the right values get saved in sigcontext + * by ia64_do_signal. + */ + scr->pt.r8 = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (ia64_do_signal(&oldset, scr, 1)) + return -EINTR; + } +} + +asmlinkage long +ia32_rt_sigsuspend (compat_sigset_t __user *uset, unsigned int sigsetsize, struct sigscratch *scr) +{ + compat_sigset_t set; + + if (sigsetsize > sizeof(compat_sigset_t)) + return -EINVAL; + + if (copy_from_user(&set.sig, &uset->sig, sigsetsize)) + return -EFAULT; + + return __ia32_rt_sigsuspend(&set, sigsetsize, scr); +} + +asmlinkage long +ia32_sigsuspend (unsigned int mask, struct sigscratch *scr) +{ + return __ia32_rt_sigsuspend((compat_sigset_t *) &mask, sizeof(mask), scr); +} + +asmlinkage long +sys32_signal (int sig, unsigned int handler) +{ + struct k_sigaction new_sa, old_sa; + int ret; + + sigact_set_handler(&new_sa, handler, 0); + new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK; + + ret = do_sigaction(sig, &new_sa, &old_sa); + + return ret ? ret : IA32_SA_HANDLER(&old_sa); +} + +asmlinkage long +sys32_rt_sigaction (int sig, struct sigaction32 __user *act, + struct sigaction32 __user *oact, unsigned int sigsetsize) +{ + struct k_sigaction new_ka, old_ka; + unsigned int handler, restorer; + int ret; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + + if (act) { + ret = get_user(handler, &act->sa_handler); + ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags); + ret |= get_user(restorer, &act->sa_restorer); + ret |= copy_from_user(&new_ka.sa.sa_mask, &act->sa_mask, sizeof(compat_sigset_t)); + if (ret) + return -EFAULT; + + sigact_set_handler(&new_ka, handler, restorer); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + ret = put_user(IA32_SA_HANDLER(&old_ka), &oact->sa_handler); + ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags); + ret |= put_user(IA32_SA_RESTORER(&old_ka), &oact->sa_restorer); + ret |= copy_to_user(&oact->sa_mask, &old_ka.sa.sa_mask, sizeof(compat_sigset_t)); + } + return ret; +} + + +asmlinkage long +sys32_rt_sigprocmask (int how, compat_sigset_t __user *set, compat_sigset_t __user *oset, + unsigned int sigsetsize) +{ + mm_segment_t old_fs = get_fs(); + sigset_t s; + long ret; + + if (sigsetsize > sizeof(s)) + return -EINVAL; + + if (set) { + memset(&s, 0, sizeof(s)); + if (copy_from_user(&s.sig, set, sigsetsize)) + return -EFAULT; + } + set_fs(KERNEL_DS); + ret = sys_rt_sigprocmask(how, + set ? (sigset_t __user *) &s : NULL, + oset ? (sigset_t __user *) &s : NULL, sizeof(s)); + set_fs(old_fs); + if (ret) + return ret; + if (oset) { + if (copy_to_user(oset, &s.sig, sigsetsize)) + return -EFAULT; + } + return 0; +} + +asmlinkage long +sys32_rt_sigqueueinfo (int pid, int sig, compat_siginfo_t __user *uinfo) +{ + mm_segment_t old_fs = get_fs(); + siginfo_t info; + int ret; + + if (copy_siginfo_from_user32(&info, uinfo)) + return -EFAULT; + set_fs(KERNEL_DS); + ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *) &info); + set_fs(old_fs); + return ret; +} + +asmlinkage long +sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact) +{ + struct k_sigaction new_ka, old_ka; + unsigned int handler, restorer; + int ret; + + if (act) { + compat_old_sigset_t mask; + + ret = get_user(handler, &act->sa_handler); + ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags); + ret |= get_user(restorer, &act->sa_restorer); + ret |= get_user(mask, &act->sa_mask); + if (ret) + return ret; + + sigact_set_handler(&new_ka, handler, restorer); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + ret = put_user(IA32_SA_HANDLER(&old_ka), &oact->sa_handler); + ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags); + ret |= put_user(IA32_SA_RESTORER(&old_ka), &oact->sa_restorer); + ret |= put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +static int +setup_sigcontext_ia32 (struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __user *fpstate, + struct pt_regs *regs, unsigned long mask) +{ + int err = 0; + unsigned long flag; + + if (!access_ok(VERIFY_WRITE, sc, sizeof(*sc))) + return -EFAULT; + + err |= __put_user((regs->r16 >> 32) & 0xffff, (unsigned int __user *)&sc->fs); + err |= __put_user((regs->r16 >> 48) & 0xffff, (unsigned int __user *)&sc->gs); + err |= __put_user((regs->r16 >> 16) & 0xffff, (unsigned int __user *)&sc->es); + err |= __put_user(regs->r16 & 0xffff, (unsigned int __user *)&sc->ds); + err |= __put_user(regs->r15, &sc->edi); + err |= __put_user(regs->r14, &sc->esi); + err |= __put_user(regs->r13, &sc->ebp); + err |= __put_user(regs->r12, &sc->esp); + err |= __put_user(regs->r11, &sc->ebx); + err |= __put_user(regs->r10, &sc->edx); + err |= __put_user(regs->r9, &sc->ecx); + err |= __put_user(regs->r8, &sc->eax); +#if 0 + err |= __put_user(current->tss.trap_no, &sc->trapno); + err |= __put_user(current->tss.error_code, &sc->err); +#endif + err |= __put_user(regs->cr_iip, &sc->eip); + err |= __put_user(regs->r17 & 0xffff, (unsigned int __user *)&sc->cs); + /* + * `eflags' is in an ar register for this context + */ + flag = ia64_getreg(_IA64_REG_AR_EFLAG); + err |= __put_user((unsigned int)flag, &sc->eflags); + err |= __put_user(regs->r12, &sc->esp_at_signal); + err |= __put_user((regs->r17 >> 16) & 0xffff, (unsigned int __user *)&sc->ss); + + if ( save_ia32_fpstate_live(fpstate) < 0 ) + err = -EFAULT; + else + err |= __put_user((u32)(u64)fpstate, &sc->fpstate); + +#if 0 + tmp = save_i387(fpstate); + if (tmp < 0) + err = 1; + else + err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); + + /* non-iBCS2 extensions.. */ +#endif + err |= __put_user(mask, &sc->oldmask); +#if 0 + err |= __put_user(current->tss.cr2, &sc->cr2); +#endif + return err; +} + +static int +restore_sigcontext_ia32 (struct pt_regs *regs, struct sigcontext_ia32 __user *sc, int *peax) +{ + unsigned int err = 0; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + if (!access_ok(VERIFY_READ, sc, sizeof(*sc))) + return(-EFAULT); + +#define COPY(ia64x, ia32x) err |= __get_user(regs->ia64x, &sc->ia32x) + +#define copyseg_gs(tmp) (regs->r16 |= (unsigned long) (tmp) << 48) +#define copyseg_fs(tmp) (regs->r16 |= (unsigned long) (tmp) << 32) +#define copyseg_cs(tmp) (regs->r17 |= tmp) +#define copyseg_ss(tmp) (regs->r17 |= (unsigned long) (tmp) << 16) +#define copyseg_es(tmp) (regs->r16 |= (unsigned long) (tmp) << 16) +#define copyseg_ds(tmp) (regs->r16 |= tmp) + +#define COPY_SEG(seg) \ + { \ + unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + copyseg_##seg(tmp); \ + } +#define COPY_SEG_STRICT(seg) \ + { \ + unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + copyseg_##seg(tmp|3); \ + } + + /* To make COPY_SEGs easier, we zero r16, r17 */ + regs->r16 = 0; + regs->r17 = 0; + + COPY_SEG(gs); + COPY_SEG(fs); + COPY_SEG(es); + COPY_SEG(ds); + COPY(r15, edi); + COPY(r14, esi); + COPY(r13, ebp); + COPY(r12, esp); + COPY(r11, ebx); + COPY(r10, edx); + COPY(r9, ecx); + COPY(cr_iip, eip); + COPY_SEG_STRICT(cs); + COPY_SEG_STRICT(ss); + ia32_load_segment_descriptors(current); + { + unsigned int tmpflags; + unsigned long flag; + + /* + * IA32 `eflags' is not part of `pt_regs', it's in an ar register which + * is part of the thread context. Fortunately, we are executing in the + * IA32 process's context. + */ + err |= __get_user(tmpflags, &sc->eflags); + flag = ia64_getreg(_IA64_REG_AR_EFLAG); + flag &= ~0x40DD5; + flag |= (tmpflags & 0x40DD5); + ia64_setreg(_IA64_REG_AR_EFLAG, flag); + + regs->r1 = -1; /* disable syscall checks, r1 is orig_eax */ + } + + { + struct _fpstate_ia32 __user *buf = NULL; + u32 fpstate_ptr; + err |= get_user(fpstate_ptr, &(sc->fpstate)); + buf = compat_ptr(fpstate_ptr); + if (buf) { + err |= restore_ia32_fpstate_live(buf); + } + } + +#if 0 + { + struct _fpstate * buf; + err |= __get_user(buf, &sc->fpstate); + if (buf) { + if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) + goto badframe; + err |= restore_i387(buf); + } + } +#endif + + err |= __get_user(*peax, &sc->eax); + return err; + +#if 0 + badframe: + return 1; +#endif +} + +/* + * Determine which stack to use.. + */ +static inline void __user * +get_sigframe (struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +{ + unsigned long esp; + + /* Default to using normal stack (truncate off sign-extension of bit 31: */ + esp = (unsigned int) regs->r12; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (!on_sig_stack(esp)) + esp = current->sas_ss_sp + current->sas_ss_size; + } + /* Legacy stack switching not supported */ + + return (void __user *)((esp - frame_size) & -8ul); +} + +static int +setup_frame_ia32 (int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs) +{ + struct exec_domain *ed = current_thread_info()->exec_domain; + struct sigframe_ia32 __user *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((ed && ed->signal_invmap && sig < 32 + ? (int)(ed->signal_invmap[sig]) : sig), &frame->sig); + + err |= setup_sigcontext_ia32(&frame->sc, &frame->fpstate, regs, set->sig[0]); + + if (_COMPAT_NSIG_WORDS > 1) + err |= __copy_to_user(frame->extramask, (char *) &set->sig + 4, + sizeof(frame->extramask)); + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + unsigned int restorer = IA32_SA_RESTORER(ka); + err |= __put_user(restorer, &frame->pretcode); + } else { + /* Pointing to restorer in ia32 gate page */ + err |= __put_user(IA32_GATE_OFFSET, &frame->pretcode); + } + + /* This is popl %eax ; movl $,%eax ; int $0x80 + * and there for historical reasons only. + * See arch/i386/kernel/signal.c + */ + + err |= __put_user(0xb858, (short __user *)(frame->retcode+0)); + err |= __put_user(__IA32_NR_sigreturn, (int __user *)(frame->retcode+2)); + err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->r12 = (unsigned long) frame; + regs->cr_iip = IA32_SA_HANDLER(ka); + + set_fs(USER_DS); + +#if 0 + regs->eflags &= ~TF_MASK; +#endif + +#if 0 + printk("SIG deliver (%s:%d): sig=%d sp=%p pc=%lx ra=%x\n", + current->comm, current->pid, sig, (void *) frame, regs->cr_iip, frame->pretcode); +#endif + + return 1; + + give_sigsegv: + force_sigsegv(sig, current); + return 0; +} + +static int +setup_rt_frame_ia32 (int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs * regs) +{ + struct exec_domain *ed = current_thread_info()->exec_domain; + compat_uptr_t pinfo, puc; + struct rt_sigframe_ia32 __user *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((ed && ed->signal_invmap + && sig < 32 ? ed->signal_invmap[sig] : sig), &frame->sig); + + pinfo = (long __user) &frame->info; + puc = (long __user) &frame->uc; + err |= __put_user(pinfo, &frame->pinfo); + err |= __put_user(puc, &frame->puc); + err |= copy_siginfo_to_user32(&frame->info, info); + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->r12), &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext_ia32(&frame->uc.uc_mcontext, &frame->fpstate, regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + unsigned int restorer = IA32_SA_RESTORER(ka); + err |= __put_user(restorer, &frame->pretcode); + } else { + /* Pointing to rt_restorer in ia32 gate page */ + err |= __put_user(IA32_GATE_OFFSET + 8, &frame->pretcode); + } + + /* This is movl $,%eax ; int $0x80 + * and there for historical reasons only. + * See arch/i386/kernel/signal.c + */ + + err |= __put_user(0xb8, (char __user *)(frame->retcode+0)); + err |= __put_user(__IA32_NR_rt_sigreturn, (int __user *)(frame->retcode+1)); + err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->r12 = (unsigned long) frame; + regs->cr_iip = IA32_SA_HANDLER(ka); + + set_fs(USER_DS); + +#if 0 + regs->eflags &= ~TF_MASK; +#endif + +#if 0 + printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%x\n", + current->comm, current->pid, (void *) frame, regs->cr_iip, frame->pretcode); +#endif + + return 1; + +give_sigsegv: + force_sigsegv(sig, current); + return 0; +} + +int +ia32_setup_frame1 (int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + /* Set up the stack frame */ + if (ka->sa.sa_flags & SA_SIGINFO) + return setup_rt_frame_ia32(sig, ka, info, set, regs); + else + return setup_frame_ia32(sig, ka, set, regs); +} + +asmlinkage long +sys32_sigreturn (int arg0, int arg1, int arg2, int arg3, int arg4, int arg5, + int arg6, int arg7, struct pt_regs regs) +{ + unsigned long esp = (unsigned int) regs.r12; + struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(esp - 8); + sigset_t set; + int eax; + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + + if (__get_user(set.sig[0], &frame->sc.oldmask) + || (_COMPAT_NSIG_WORDS > 1 && __copy_from_user((char *) &set.sig + 4, &frame->extramask, + sizeof(frame->extramask)))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + if (restore_sigcontext_ia32(®s, &frame->sc, &eax)) + goto badframe; + return eax; + + badframe: + force_sig(SIGSEGV, current); + return 0; +} + +asmlinkage long +sys32_rt_sigreturn (int arg0, int arg1, int arg2, int arg3, int arg4, + int arg5, int arg6, int arg7, struct pt_regs regs) +{ + unsigned long esp = (unsigned int) regs.r12; + struct rt_sigframe_ia32 __user *frame = (struct rt_sigframe_ia32 __user *)(esp - 4); + sigset_t set; + int eax; + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + if (restore_sigcontext_ia32(®s, &frame->uc.uc_mcontext, &eax)) + goto badframe; + + /* It is more difficult to avoid calling this function than to + call it and ignore errors. */ + do_sigaltstack((stack_t __user *) &frame->uc.uc_stack, NULL, esp); + + return eax; + + badframe: + force_sig(SIGSEGV, current); + return 0; +} diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c new file mode 100644 index 000000000000..4f630043b3ae --- /dev/null +++ b/arch/ia64/ia32/ia32_support.c @@ -0,0 +1,264 @@ +/* + * IA32 helper functions + * + * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> + * Copyright (C) 2000 Asit K. Mallick <asit.k.mallick@intel.com> + * Copyright (C) 2001-2002 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * 06/16/00 A. Mallick added csd/ssd/tssd for ia32 thread context + * 02/19/01 D. Mosberger dropped tssd; it's not needed + * 09/14/01 D. Mosberger fixed memory management for gdt/tss page + * 09/29/01 D. Mosberger added ia32_load_segment_descriptors() + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/personality.h> +#include <linux/sched.h> + +#include <asm/intrinsics.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/uaccess.h> + +#include "ia32priv.h" + +extern void die_if_kernel (char *str, struct pt_regs *regs, long err); + +struct exec_domain ia32_exec_domain; +struct page *ia32_shared_page[NR_CPUS]; +unsigned long *ia32_boot_gdt; +unsigned long *cpu_gdt_table[NR_CPUS]; +struct page *ia32_gate_page; + +static unsigned long +load_desc (u16 selector) +{ + unsigned long *table, limit, index; + + if (!selector) + return 0; + if (selector & IA32_SEGSEL_TI) { + table = (unsigned long *) IA32_LDT_OFFSET; + limit = IA32_LDT_ENTRIES; + } else { + table = cpu_gdt_table[smp_processor_id()]; + limit = IA32_PAGE_SIZE / sizeof(ia32_boot_gdt[0]); + } + index = selector >> IA32_SEGSEL_INDEX_SHIFT; + if (index >= limit) + return 0; + return IA32_SEG_UNSCRAMBLE(table[index]); +} + +void +ia32_load_segment_descriptors (struct task_struct *task) +{ + struct pt_regs *regs = ia64_task_regs(task); + + /* Setup the segment descriptors */ + regs->r24 = load_desc(regs->r16 >> 16); /* ESD */ + regs->r27 = load_desc(regs->r16 >> 0); /* DSD */ + regs->r28 = load_desc(regs->r16 >> 32); /* FSD */ + regs->r29 = load_desc(regs->r16 >> 48); /* GSD */ + regs->ar_csd = load_desc(regs->r17 >> 0); /* CSD */ + regs->ar_ssd = load_desc(regs->r17 >> 16); /* SSD */ +} + +int +ia32_clone_tls (struct task_struct *child, struct pt_regs *childregs) +{ + struct desc_struct *desc; + struct ia32_user_desc info; + int idx; + + if (copy_from_user(&info, (void __user *)(childregs->r14 & 0xffffffff), sizeof(info))) + return -EFAULT; + if (LDT_empty(&info)) + return -EINVAL; + + idx = info.entry_number; + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; + desc->a = LDT_entry_a(&info); + desc->b = LDT_entry_b(&info); + + /* XXX: can this be done in a cleaner way ? */ + load_TLS(&child->thread, smp_processor_id()); + ia32_load_segment_descriptors(child); + load_TLS(¤t->thread, smp_processor_id()); + + return 0; +} + +void +ia32_save_state (struct task_struct *t) +{ + t->thread.eflag = ia64_getreg(_IA64_REG_AR_EFLAG); + t->thread.fsr = ia64_getreg(_IA64_REG_AR_FSR); + t->thread.fcr = ia64_getreg(_IA64_REG_AR_FCR); + t->thread.fir = ia64_getreg(_IA64_REG_AR_FIR); + t->thread.fdr = ia64_getreg(_IA64_REG_AR_FDR); + ia64_set_kr(IA64_KR_IO_BASE, t->thread.old_iob); + ia64_set_kr(IA64_KR_TSSD, t->thread.old_k1); +} + +void +ia32_load_state (struct task_struct *t) +{ + unsigned long eflag, fsr, fcr, fir, fdr, tssd; + struct pt_regs *regs = ia64_task_regs(t); + + eflag = t->thread.eflag; + fsr = t->thread.fsr; + fcr = t->thread.fcr; + fir = t->thread.fir; + fdr = t->thread.fdr; + tssd = load_desc(_TSS); /* TSSD */ + + ia64_setreg(_IA64_REG_AR_EFLAG, eflag); + ia64_setreg(_IA64_REG_AR_FSR, fsr); + ia64_setreg(_IA64_REG_AR_FCR, fcr); + ia64_setreg(_IA64_REG_AR_FIR, fir); + ia64_setreg(_IA64_REG_AR_FDR, fdr); + current->thread.old_iob = ia64_get_kr(IA64_KR_IO_BASE); + current->thread.old_k1 = ia64_get_kr(IA64_KR_TSSD); + ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE); + ia64_set_kr(IA64_KR_TSSD, tssd); + + regs->r17 = (_TSS << 48) | (_LDT << 32) | (__u32) regs->r17; + regs->r30 = load_desc(_LDT); /* LDTD */ + load_TLS(&t->thread, smp_processor_id()); +} + +/* + * Setup IA32 GDT and TSS + */ +void +ia32_gdt_init (void) +{ + int cpu = smp_processor_id(); + + ia32_shared_page[cpu] = alloc_page(GFP_KERNEL); + if (!ia32_shared_page[cpu]) + panic("failed to allocate ia32_shared_page[%d]\n", cpu); + + cpu_gdt_table[cpu] = page_address(ia32_shared_page[cpu]); + + /* Copy from the boot cpu's GDT */ + memcpy(cpu_gdt_table[cpu], ia32_boot_gdt, PAGE_SIZE); +} + + +/* + * Setup IA32 GDT and TSS + */ +static void +ia32_boot_gdt_init (void) +{ + unsigned long ldt_size; + + ia32_shared_page[0] = alloc_page(GFP_KERNEL); + if (!ia32_shared_page[0]) + panic("failed to allocate ia32_shared_page[0]\n"); + + ia32_boot_gdt = page_address(ia32_shared_page[0]); + cpu_gdt_table[0] = ia32_boot_gdt; + + /* CS descriptor in IA-32 (scrambled) format */ + ia32_boot_gdt[__USER_CS >> 3] + = IA32_SEG_DESCRIPTOR(0, (IA32_GATE_END-1) >> IA32_PAGE_SHIFT, + 0xb, 1, 3, 1, 1, 1, 1); + + /* DS descriptor in IA-32 (scrambled) format */ + ia32_boot_gdt[__USER_DS >> 3] + = IA32_SEG_DESCRIPTOR(0, (IA32_GATE_END-1) >> IA32_PAGE_SHIFT, + 0x3, 1, 3, 1, 1, 1, 1); + + ldt_size = PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE); + ia32_boot_gdt[TSS_ENTRY] = IA32_SEG_DESCRIPTOR(IA32_TSS_OFFSET, 235, + 0xb, 0, 3, 1, 1, 1, 0); + ia32_boot_gdt[LDT_ENTRY] = IA32_SEG_DESCRIPTOR(IA32_LDT_OFFSET, ldt_size - 1, + 0x2, 0, 3, 1, 1, 1, 0); +} + +static void +ia32_gate_page_init(void) +{ + unsigned long *sr; + + ia32_gate_page = alloc_page(GFP_KERNEL); + sr = page_address(ia32_gate_page); + /* This is popl %eax ; movl $,%eax ; int $0x80 */ + *sr++ = 0xb858 | (__IA32_NR_sigreturn << 16) | (0x80cdUL << 48); + + /* This is movl $,%eax ; int $0x80 */ + *sr = 0xb8 | (__IA32_NR_rt_sigreturn << 8) | (0x80cdUL << 40); +} + +void +ia32_mem_init(void) +{ + ia32_boot_gdt_init(); + ia32_gate_page_init(); +} + +/* + * Handle bad IA32 interrupt via syscall + */ +void +ia32_bad_interrupt (unsigned long int_num, struct pt_regs *regs) +{ + siginfo_t siginfo; + + die_if_kernel("Bad IA-32 interrupt", regs, int_num); + + siginfo.si_signo = SIGTRAP; + siginfo.si_errno = int_num; /* XXX is it OK to abuse si_errno like this? */ + siginfo.si_flags = 0; + siginfo.si_isr = 0; + siginfo.si_addr = NULL; + siginfo.si_imm = 0; + siginfo.si_code = TRAP_BRKPT; + force_sig_info(SIGTRAP, &siginfo, current); +} + +void +ia32_cpu_init (void) +{ + /* initialize global ia32 state - CR0 and CR4 */ + ia64_setreg(_IA64_REG_AR_CFLAG, (((ulong) IA32_CR4 << 32) | IA32_CR0)); +} + +static int __init +ia32_init (void) +{ + ia32_exec_domain.name = "Linux/x86"; + ia32_exec_domain.handler = NULL; + ia32_exec_domain.pers_low = PER_LINUX32; + ia32_exec_domain.pers_high = PER_LINUX32; + ia32_exec_domain.signal_map = default_exec_domain.signal_map; + ia32_exec_domain.signal_invmap = default_exec_domain.signal_invmap; + register_exec_domain(&ia32_exec_domain); + +#if PAGE_SHIFT > IA32_PAGE_SHIFT + { + extern kmem_cache_t *partial_page_cachep; + + partial_page_cachep = kmem_cache_create("partial_page_cache", + sizeof(struct partial_page), 0, 0, + NULL, NULL); + if (!partial_page_cachep) + panic("Cannot create partial page SLAB cache"); + } +#endif + return 0; +} + +__initcall(ia32_init); diff --git a/arch/ia64/ia32/ia32_traps.c b/arch/ia64/ia32/ia32_traps.c new file mode 100644 index 000000000000..e486042672f1 --- /dev/null +++ b/arch/ia64/ia32/ia32_traps.c @@ -0,0 +1,156 @@ +/* + * IA-32 exception handlers + * + * Copyright (C) 2000 Asit K. Mallick <asit.k.mallick@intel.com> + * Copyright (C) 2001-2002 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * 06/16/00 A. Mallick added siginfo for most cases (close to IA32) + * 09/29/00 D. Mosberger added ia32_intercept() + */ + +#include <linux/kernel.h> +#include <linux/sched.h> + +#include "ia32priv.h" + +#include <asm/intrinsics.h> +#include <asm/ptrace.h> + +int +ia32_intercept (struct pt_regs *regs, unsigned long isr) +{ + switch ((isr >> 16) & 0xff) { + case 0: /* Instruction intercept fault */ + case 4: /* Locked Data reference fault */ + case 1: /* Gate intercept trap */ + return -1; + + case 2: /* System flag trap */ + if (((isr >> 14) & 0x3) >= 2) { + /* MOV SS, POP SS instructions */ + ia64_psr(regs)->id = 1; + return 0; + } else + return -1; + } + return -1; +} + +int +ia32_exception (struct pt_regs *regs, unsigned long isr) +{ + struct siginfo siginfo; + + /* initialize these fields to avoid leaking kernel bits to user space: */ + siginfo.si_errno = 0; + siginfo.si_flags = 0; + siginfo.si_isr = 0; + siginfo.si_imm = 0; + switch ((isr >> 16) & 0xff) { + case 1: + case 2: + siginfo.si_signo = SIGTRAP; + if (isr == 0) + siginfo.si_code = TRAP_TRACE; + else if (isr & 0x4) + siginfo.si_code = TRAP_BRANCH; + else + siginfo.si_code = TRAP_BRKPT; + break; + + case 3: + siginfo.si_signo = SIGTRAP; + siginfo.si_code = TRAP_BRKPT; + break; + + case 0: /* Divide fault */ + siginfo.si_signo = SIGFPE; + siginfo.si_code = FPE_INTDIV; + break; + + case 4: /* Overflow */ + case 5: /* Bounds fault */ + siginfo.si_signo = SIGFPE; + siginfo.si_code = 0; + break; + + case 6: /* Invalid Op-code */ + siginfo.si_signo = SIGILL; + siginfo.si_code = ILL_ILLOPN; + break; + + case 7: /* FP DNA */ + case 8: /* Double Fault */ + case 9: /* Invalid TSS */ + case 11: /* Segment not present */ + case 12: /* Stack fault */ + case 13: /* General Protection Fault */ + siginfo.si_signo = SIGSEGV; + siginfo.si_code = 0; + break; + + case 16: /* Pending FP error */ + { + unsigned long fsr, fcr; + + fsr = ia64_getreg(_IA64_REG_AR_FSR); + fcr = ia64_getreg(_IA64_REG_AR_FCR); + + siginfo.si_signo = SIGFPE; + /* + * (~cwd & swd) will mask out exceptions that are not set to unmasked + * status. 0x3f is the exception bits in these regs, 0x200 is the + * C1 reg you need in case of a stack fault, 0x040 is the stack + * fault bit. We should only be taking one exception at a time, + * so if this combination doesn't produce any single exception, + * then we have a bad program that isn't synchronizing its FPU usage + * and it will suffer the consequences since we won't be able to + * fully reproduce the context of the exception + */ + siginfo.si_isr = isr; + siginfo.si_flags = __ISR_VALID; + switch(((~fcr) & (fsr & 0x3f)) | (fsr & 0x240)) { + case 0x000: + default: + siginfo.si_code = 0; + break; + case 0x001: /* Invalid Op */ + case 0x040: /* Stack Fault */ + case 0x240: /* Stack Fault | Direction */ + siginfo.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + siginfo.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + siginfo.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + siginfo.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + siginfo.si_code = FPE_FLTRES; + break; + } + + break; + } + + case 17: /* Alignment check */ + siginfo.si_signo = SIGSEGV; + siginfo.si_code = BUS_ADRALN; + break; + + case 19: /* SSE Numeric error */ + siginfo.si_signo = SIGFPE; + siginfo.si_code = 0; + break; + + default: + return -1; + } + force_sig_info(siginfo.si_signo, &siginfo, current); + return 0; +} diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h new file mode 100644 index 000000000000..b2de948bdaea --- /dev/null +++ b/arch/ia64/ia32/ia32priv.h @@ -0,0 +1,544 @@ +#ifndef _ASM_IA64_IA32_PRIV_H +#define _ASM_IA64_IA32_PRIV_H + +#include <linux/config.h> + +#include <asm/ia32.h> + +#ifdef CONFIG_IA32_SUPPORT + +#include <linux/binfmts.h> +#include <linux/compat.h> +#include <linux/rbtree.h> + +#include <asm/processor.h> + +/* + * 32 bit structures for IA32 support. + */ + +#define IA32_PAGE_SIZE (1UL << IA32_PAGE_SHIFT) +#define IA32_PAGE_MASK (~(IA32_PAGE_SIZE - 1)) +#define IA32_PAGE_ALIGN(addr) (((addr) + IA32_PAGE_SIZE - 1) & IA32_PAGE_MASK) +#define IA32_CLOCKS_PER_SEC 100 /* Cast in stone for IA32 Linux */ + +/* + * partially mapped pages provide precise accounting of which 4k sub pages + * are mapped and which ones are not, thereby improving IA-32 compatibility. + */ +struct partial_page { + struct partial_page *next; /* linked list, sorted by address */ + struct rb_node pp_rb; + /* 64K is the largest "normal" page supported by ia64 ABI. So 4K*32 + * should suffice.*/ + unsigned int bitmap; + unsigned int base; +}; + +struct partial_page_list { + struct partial_page *pp_head; /* list head, points to the lowest + * addressed partial page */ + struct rb_root ppl_rb; + struct partial_page *pp_hint; /* pp_hint->next is the last + * accessed partial page */ + atomic_t pp_count; /* reference count */ +}; + +#if PAGE_SHIFT > IA32_PAGE_SHIFT +struct partial_page_list* ia32_init_pp_list (void); +#else +# define ia32_init_pp_list() 0 +#endif + +/* sigcontext.h */ +/* + * As documented in the iBCS2 standard.. + * + * The first part of "struct _fpstate" is just the + * normal i387 hardware setup, the extra "status" + * word is used to save the coprocessor status word + * before entering the handler. + */ +struct _fpreg_ia32 { + unsigned short significand[4]; + unsigned short exponent; +}; + +struct _fpxreg_ia32 { + unsigned short significand[4]; + unsigned short exponent; + unsigned short padding[3]; +}; + +struct _xmmreg_ia32 { + unsigned int element[4]; +}; + + +struct _fpstate_ia32 { + unsigned int cw, + sw, + tag, + ipoff, + cssel, + dataoff, + datasel; + struct _fpreg_ia32 _st[8]; + unsigned short status; + unsigned short magic; /* 0xffff = regular FPU data only */ + + /* FXSR FPU environment */ + unsigned int _fxsr_env[6]; /* FXSR FPU env is ignored */ + unsigned int mxcsr; + unsigned int reserved; + struct _fpxreg_ia32 _fxsr_st[8]; /* FXSR FPU reg data is ignored */ + struct _xmmreg_ia32 _xmm[8]; + unsigned int padding[56]; +}; + +struct sigcontext_ia32 { + unsigned short gs, __gsh; + unsigned short fs, __fsh; + unsigned short es, __esh; + unsigned short ds, __dsh; + unsigned int edi; + unsigned int esi; + unsigned int ebp; + unsigned int esp; + unsigned int ebx; + unsigned int edx; + unsigned int ecx; + unsigned int eax; + unsigned int trapno; + unsigned int err; + unsigned int eip; + unsigned short cs, __csh; + unsigned int eflags; + unsigned int esp_at_signal; + unsigned short ss, __ssh; + unsigned int fpstate; /* really (struct _fpstate_ia32 *) */ + unsigned int oldmask; + unsigned int cr2; +}; + +/* user.h */ +/* + * IA32 (Pentium III/4) FXSR, SSE support + * + * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for + * interacting with the FXSR-format floating point environment. Floating + * point data can be accessed in the regular format in the usual manner, + * and both the standard and SIMD floating point data can be accessed via + * the new ptrace requests. In either case, changes to the FPU environment + * will be reflected in the task's state as expected. + */ +struct ia32_user_i387_struct { + int cwd; + int swd; + int twd; + int fip; + int fcs; + int foo; + int fos; + /* 8*10 bytes for each FP-reg = 80 bytes */ + struct _fpreg_ia32 st_space[8]; +}; + +struct ia32_user_fxsr_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + int fip; + int fcs; + int foo; + int fos; + int mxcsr; + int reserved; + int st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + int xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + int padding[56]; +}; + +/* signal.h */ +#define IA32_SET_SA_HANDLER(ka,handler,restorer) \ + ((ka)->sa.sa_handler = (__sighandler_t) \ + (((unsigned long)(restorer) << 32) \ + | ((handler) & 0xffffffff))) +#define IA32_SA_HANDLER(ka) ((unsigned long) (ka)->sa.sa_handler & 0xffffffff) +#define IA32_SA_RESTORER(ka) ((unsigned long) (ka)->sa.sa_handler >> 32) + +#define __IA32_NR_sigreturn 119 +#define __IA32_NR_rt_sigreturn 173 + +struct sigaction32 { + unsigned int sa_handler; /* Really a pointer, but need to deal with 32 bits */ + unsigned int sa_flags; + unsigned int sa_restorer; /* Another 32 bit pointer */ + compat_sigset_t sa_mask; /* A 32 bit mask */ +}; + +struct old_sigaction32 { + unsigned int sa_handler; /* Really a pointer, but need to deal + with 32 bits */ + compat_old_sigset_t sa_mask; /* A 32 bit mask */ + unsigned int sa_flags; + unsigned int sa_restorer; /* Another 32 bit pointer */ +}; + +typedef struct sigaltstack_ia32 { + unsigned int ss_sp; + int ss_flags; + unsigned int ss_size; +} stack_ia32_t; + +struct ucontext_ia32 { + unsigned int uc_flags; + unsigned int uc_link; + stack_ia32_t uc_stack; + struct sigcontext_ia32 uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +struct stat64 { + unsigned long long st_dev; + unsigned char __pad0[4]; + unsigned int __st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned long long st_rdev; + unsigned char __pad3[4]; + unsigned int st_size_lo; + unsigned int st_size_hi; + unsigned int st_blksize; + unsigned int st_blocks; /* Number 512-byte blocks allocated. */ + unsigned int __pad4; /* future possible st_blocks high bits */ + unsigned int st_atime; + unsigned int st_atime_nsec; + unsigned int st_mtime; + unsigned int st_mtime_nsec; + unsigned int st_ctime; + unsigned int st_ctime_nsec; + unsigned int st_ino_lo; + unsigned int st_ino_hi; +}; + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[((128/sizeof(int)) - 3)]; + + /* kill() */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + char _pad[sizeof(unsigned int) - sizeof(int)]; + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + } _timer; + + /* POSIX.1b signals */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + unsigned int _addr; /* faulting insn/memory ref. */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} compat_siginfo_t; + +struct old_linux32_dirent { + u32 d_ino; + u32 d_offset; + u16 d_namlen; + char d_name[1]; +}; + +/* + * IA-32 ELF specific definitions for IA-64. + */ + +#define _ASM_IA64_ELF_H /* Don't include elf.h */ + +#include <linux/sched.h> +#include <asm/processor.h> + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) ((x)->e_machine == EM_386) + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_386 + +#define IA32_PAGE_OFFSET 0xc0000000 +#define IA32_STACK_TOP IA32_PAGE_OFFSET +#define IA32_GATE_OFFSET IA32_PAGE_OFFSET +#define IA32_GATE_END IA32_PAGE_OFFSET + PAGE_SIZE + +/* + * The system segments (GDT, TSS, LDT) have to be mapped below 4GB so the IA-32 engine can + * access them. + */ +#define IA32_GDT_OFFSET (IA32_PAGE_OFFSET + PAGE_SIZE) +#define IA32_TSS_OFFSET (IA32_PAGE_OFFSET + 2*PAGE_SIZE) +#define IA32_LDT_OFFSET (IA32_PAGE_OFFSET + 3*PAGE_SIZE) + +#define ELF_EXEC_PAGESIZE IA32_PAGE_SIZE + +/* + * This is the location that an ET_DYN program is loaded if exec'ed. + * Typical use of this is to invoke "./ld.so someprog" to test out a + * new version of the loader. We need to make sure that it is out of + * the way of the program that it will "exec", and that there is + * sufficient room for the brk. + */ +#define ELF_ET_DYN_BASE (IA32_PAGE_OFFSET/3 + 0x1000000) + +void ia64_elf32_init(struct pt_regs *regs); +#define ELF_PLAT_INIT(_r, load_addr) ia64_elf32_init(_r) + +#define elf_addr_t u32 + +/* This macro yields a bitmask that programs can use to figure out + what instruction set this CPU supports. */ +#define ELF_HWCAP 0 + +/* This macro yields a string that ld.so will use to load + implementation specific libraries for optimization. Not terribly + relevant until we have real hardware to play with... */ +#define ELF_PLATFORM NULL + +#ifdef __KERNEL__ +# define SET_PERSONALITY(EX,IBCS2) \ + (current->personality = (IBCS2) ? PER_SVR4 : PER_LINUX) +#endif + +#define IA32_EFLAG 0x200 + +/* + * IA-32 ELF specific definitions for IA-64. + */ + +#define __USER_CS 0x23 +#define __USER_DS 0x2B + +/* + * The per-cpu GDT has 32 entries: see <asm-i386/segment.h> + */ +#define GDT_ENTRIES 32 + +#define GDT_SIZE (GDT_ENTRIES * 8) + +#define TSS_ENTRY 14 +#define LDT_ENTRY (TSS_ENTRY + 1) + +#define IA32_SEGSEL_RPL (0x3 << 0) +#define IA32_SEGSEL_TI (0x1 << 2) +#define IA32_SEGSEL_INDEX_SHIFT 3 + +#define _TSS ((unsigned long) TSS_ENTRY << IA32_SEGSEL_INDEX_SHIFT) +#define _LDT ((unsigned long) LDT_ENTRY << IA32_SEGSEL_INDEX_SHIFT) + +#define IA32_SEG_BASE 16 +#define IA32_SEG_TYPE 40 +#define IA32_SEG_SYS 44 +#define IA32_SEG_DPL 45 +#define IA32_SEG_P 47 +#define IA32_SEG_HIGH_LIMIT 48 +#define IA32_SEG_AVL 52 +#define IA32_SEG_DB 54 +#define IA32_SEG_G 55 +#define IA32_SEG_HIGH_BASE 56 + +#define IA32_SEG_DESCRIPTOR(base, limit, segtype, nonsysseg, dpl, segpresent, avl, segdb, gran) \ + (((limit) & 0xffff) \ + | (((unsigned long) (base) & 0xffffff) << IA32_SEG_BASE) \ + | ((unsigned long) (segtype) << IA32_SEG_TYPE) \ + | ((unsigned long) (nonsysseg) << IA32_SEG_SYS) \ + | ((unsigned long) (dpl) << IA32_SEG_DPL) \ + | ((unsigned long) (segpresent) << IA32_SEG_P) \ + | ((((unsigned long) (limit) >> 16) & 0xf) << IA32_SEG_HIGH_LIMIT) \ + | ((unsigned long) (avl) << IA32_SEG_AVL) \ + | ((unsigned long) (segdb) << IA32_SEG_DB) \ + | ((unsigned long) (gran) << IA32_SEG_G) \ + | ((((unsigned long) (base) >> 24) & 0xff) << IA32_SEG_HIGH_BASE)) + +#define SEG_LIM 32 +#define SEG_TYPE 52 +#define SEG_SYS 56 +#define SEG_DPL 57 +#define SEG_P 59 +#define SEG_AVL 60 +#define SEG_DB 62 +#define SEG_G 63 + +/* Unscramble an IA-32 segment descriptor into the IA-64 format. */ +#define IA32_SEG_UNSCRAMBLE(sd) \ + ( (((sd) >> IA32_SEG_BASE) & 0xffffff) | ((((sd) >> IA32_SEG_HIGH_BASE) & 0xff) << 24) \ + | ((((sd) & 0xffff) | ((((sd) >> IA32_SEG_HIGH_LIMIT) & 0xf) << 16)) << SEG_LIM) \ + | ((((sd) >> IA32_SEG_TYPE) & 0xf) << SEG_TYPE) \ + | ((((sd) >> IA32_SEG_SYS) & 0x1) << SEG_SYS) \ + | ((((sd) >> IA32_SEG_DPL) & 0x3) << SEG_DPL) \ + | ((((sd) >> IA32_SEG_P) & 0x1) << SEG_P) \ + | ((((sd) >> IA32_SEG_AVL) & 0x1) << SEG_AVL) \ + | ((((sd) >> IA32_SEG_DB) & 0x1) << SEG_DB) \ + | ((((sd) >> IA32_SEG_G) & 0x1) << SEG_G)) + +#define IA32_IOBASE 0x2000000000000000UL /* Virtual address for I/O space */ + +#define IA32_CR0 0x80000001 /* Enable PG and PE bits */ +#define IA32_CR4 0x600 /* MMXEX and FXSR on */ + +/* + * IA32 floating point control registers starting values + */ + +#define IA32_FSR_DEFAULT 0x55550000 /* set all tag bits */ +#define IA32_FCR_DEFAULT 0x17800000037fUL /* extended precision, all masks */ + +#define IA32_PTRACE_GETREGS 12 +#define IA32_PTRACE_SETREGS 13 +#define IA32_PTRACE_GETFPREGS 14 +#define IA32_PTRACE_SETFPREGS 15 +#define IA32_PTRACE_GETFPXREGS 18 +#define IA32_PTRACE_SETFPXREGS 19 + +#define ia32_start_thread(regs,new_ip,new_sp) do { \ + set_fs(USER_DS); \ + ia64_psr(regs)->cpl = 3; /* set user mode */ \ + ia64_psr(regs)->ri = 0; /* clear return slot number */ \ + ia64_psr(regs)->is = 1; /* IA-32 instruction set */ \ + regs->cr_iip = new_ip; \ + regs->ar_rsc = 0xc; /* enforced lazy mode, priv. level 3 */ \ + regs->ar_rnat = 0; \ + regs->loadrs = 0; \ + regs->r12 = new_sp; \ +} while (0) + +/* + * Local Descriptor Table (LDT) related declarations. + */ + +#define IA32_LDT_ENTRIES 8192 /* Maximum number of LDT entries supported. */ +#define IA32_LDT_ENTRY_SIZE 8 /* The size of each LDT entry. */ + +#define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) + +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + (((info)->base_addr & 0x00ff0000) >> 16) | \ + ((info)->limit & 0xf0000) | \ + (((info)->read_exec_only ^ 1) << 9) | \ + ((info)->contents << 10) | \ + (((info)->seg_not_present ^ 1) << 15) | \ + ((info)->seg_32bit << 22) | \ + ((info)->limit_in_pages << 23) | \ + ((info)->useable << 20) | \ + 0x7100) + +#define LDT_empty(info) ( \ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 ) + +static inline void +load_TLS (struct thread_struct *t, unsigned int cpu) +{ + extern unsigned long *cpu_gdt_table[NR_CPUS]; + + memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0], sizeof(long)); + memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1], sizeof(long)); + memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2], sizeof(long)); +} + +struct ia32_user_desc { + unsigned int entry_number; + unsigned int base_addr; + unsigned int limit; + unsigned int seg_32bit:1; + unsigned int contents:2; + unsigned int read_exec_only:1; + unsigned int limit_in_pages:1; + unsigned int seg_not_present:1; + unsigned int useable:1; +}; + +struct linux_binprm; + +extern void ia32_init_addr_space (struct pt_regs *regs); +extern int ia32_setup_arg_pages (struct linux_binprm *bprm, int exec_stack); +extern unsigned long ia32_do_mmap (struct file *, unsigned long, unsigned long, int, int, loff_t); +extern void ia32_load_segment_descriptors (struct task_struct *task); + +#define ia32f2ia64f(dst,src) \ +do { \ + ia64_ldfe(6,src); \ + ia64_stop(); \ + ia64_stf_spill(dst, 6); \ +} while(0) + +#define ia64f2ia32f(dst,src) \ +do { \ + ia64_ldf_fill(6, src); \ + ia64_stop(); \ + ia64_stfe(dst, 6); \ +} while(0) + +struct user_regs_struct32 { + __u32 ebx, ecx, edx, esi, edi, ebp, eax; + unsigned short ds, __ds, es, __es; + unsigned short fs, __fs, gs, __gs; + __u32 orig_eax, eip; + unsigned short cs, __cs; + __u32 eflags, esp; + unsigned short ss, __ss; +}; + +/* Prototypes for use in elfcore32.h */ +extern int save_ia32_fpstate (struct task_struct *, struct ia32_user_i387_struct __user *); +extern int save_ia32_fpxstate (struct task_struct *, struct ia32_user_fxsr_struct __user *); + +#endif /* !CONFIG_IA32_SUPPORT */ + +#endif /* _ASM_IA64_IA32_PRIV_H */ diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c new file mode 100644 index 000000000000..247a21c64aea --- /dev/null +++ b/arch/ia64/ia32/sys_ia32.c @@ -0,0 +1,2747 @@ +/* + * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Derived from sys_sparc32.c. + * + * Copyright (C) 2000 VA Linux Co + * Copyright (C) 2000 Don Dugger <n0ano@valinux.com> + * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 2000-2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2004 Gordon Jin <gordon.jin@intel.com> + * + * These routines maintain argument size conversion between 32bit and 64bit + * environment. + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/syscalls.h> +#include <linux/sysctl.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/signal.h> +#include <linux/resource.h> +#include <linux/times.h> +#include <linux/utsname.h> +#include <linux/timex.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/mm.h> +#include <linux/shm.h> +#include <linux/slab.h> +#include <linux/uio.h> +#include <linux/nfs_fs.h> +#include <linux/quota.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr.h> +#include <linux/nfsd/syscall.h> +#include <linux/poll.h> +#include <linux/eventpoll.h> +#include <linux/personality.h> +#include <linux/ptrace.h> +#include <linux/stat.h> +#include <linux/ipc.h> +#include <linux/compat.h> +#include <linux/vfs.h> +#include <linux/mman.h> + +#include <asm/intrinsics.h> +#include <asm/semaphore.h> +#include <asm/types.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> + +#include "ia32priv.h" + +#include <net/scm.h> +#include <net/sock.h> + +#define DEBUG 0 + +#if DEBUG +# define DBG(fmt...) printk(KERN_DEBUG fmt) +#else +# define DBG(fmt...) +#endif + +#define ROUND_UP(x,a) ((__typeof__(x))(((unsigned long)(x) + ((a) - 1)) & ~((a) - 1))) + +#define OFFSET4K(a) ((a) & 0xfff) +#define PAGE_START(addr) ((addr) & PAGE_MASK) +#define MINSIGSTKSZ_IA32 2048 + +#define high2lowuid(uid) ((uid) > 65535 ? 65534 : (uid)) +#define high2lowgid(gid) ((gid) > 65535 ? 65534 : (gid)) + +/* + * Anything that modifies or inspects ia32 user virtual memory must hold this semaphore + * while doing so. + */ +/* XXX make per-mm: */ +static DECLARE_MUTEX(ia32_mmap_sem); + +asmlinkage long +sys32_execve (char __user *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp, + struct pt_regs *regs) +{ + long error; + char *filename; + unsigned long old_map_base, old_task_size, tssd; + + filename = getname(name); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + return error; + + old_map_base = current->thread.map_base; + old_task_size = current->thread.task_size; + tssd = ia64_get_kr(IA64_KR_TSSD); + + /* we may be exec'ing a 64-bit process: reset map base, task-size, and io-base: */ + current->thread.map_base = DEFAULT_MAP_BASE; + current->thread.task_size = DEFAULT_TASK_SIZE; + ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob); + ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1); + + error = compat_do_execve(filename, argv, envp, regs); + putname(filename); + + if (error < 0) { + /* oops, execve failed, switch back to old values... */ + ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE); + ia64_set_kr(IA64_KR_TSSD, tssd); + current->thread.map_base = old_map_base; + current->thread.task_size = old_task_size; + } + + return error; +} + +int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) +{ + int err; + + if ((u64) stat->size > MAX_NON_LFS || + !old_valid_dev(stat->dev) || + !old_valid_dev(stat->rdev)) + return -EOVERFLOW; + + if (clear_user(ubuf, sizeof(*ubuf))) + return -EFAULT; + + err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev); + err |= __put_user(stat->ino, &ubuf->st_ino); + err |= __put_user(stat->mode, &ubuf->st_mode); + err |= __put_user(stat->nlink, &ubuf->st_nlink); + err |= __put_user(high2lowuid(stat->uid), &ubuf->st_uid); + err |= __put_user(high2lowgid(stat->gid), &ubuf->st_gid); + err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev); + err |= __put_user(stat->size, &ubuf->st_size); + err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime); + err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec); + err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime); + err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec); + err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime); + err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec); + err |= __put_user(stat->blksize, &ubuf->st_blksize); + err |= __put_user(stat->blocks, &ubuf->st_blocks); + return err; +} + +#if PAGE_SHIFT > IA32_PAGE_SHIFT + + +static int +get_page_prot (struct vm_area_struct *vma, unsigned long addr) +{ + int prot = 0; + + if (!vma || vma->vm_start > addr) + return 0; + + if (vma->vm_flags & VM_READ) + prot |= PROT_READ; + if (vma->vm_flags & VM_WRITE) + prot |= PROT_WRITE; + if (vma->vm_flags & VM_EXEC) + prot |= PROT_EXEC; + return prot; +} + +/* + * Map a subpage by creating an anonymous page that contains the union of the old page and + * the subpage. + */ +static unsigned long +mmap_subpage (struct file *file, unsigned long start, unsigned long end, int prot, int flags, + loff_t off) +{ + void *page = NULL; + struct inode *inode; + unsigned long ret = 0; + struct vm_area_struct *vma = find_vma(current->mm, start); + int old_prot = get_page_prot(vma, start); + + DBG("mmap_subpage(file=%p,start=0x%lx,end=0x%lx,prot=%x,flags=%x,off=0x%llx)\n", + file, start, end, prot, flags, off); + + + /* Optimize the case where the old mmap and the new mmap are both anonymous */ + if ((old_prot & PROT_WRITE) && (flags & MAP_ANONYMOUS) && !vma->vm_file) { + if (clear_user((void __user *) start, end - start)) { + ret = -EFAULT; + goto out; + } + goto skip_mmap; + } + + page = (void *) get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + if (old_prot) + copy_from_user(page, (void __user *) PAGE_START(start), PAGE_SIZE); + + down_write(¤t->mm->mmap_sem); + { + ret = do_mmap(NULL, PAGE_START(start), PAGE_SIZE, prot | PROT_WRITE, + flags | MAP_FIXED | MAP_ANONYMOUS, 0); + } + up_write(¤t->mm->mmap_sem); + + if (IS_ERR((void *) ret)) + goto out; + + if (old_prot) { + /* copy back the old page contents. */ + if (offset_in_page(start)) + copy_to_user((void __user *) PAGE_START(start), page, + offset_in_page(start)); + if (offset_in_page(end)) + copy_to_user((void __user *) end, page + offset_in_page(end), + PAGE_SIZE - offset_in_page(end)); + } + + if (!(flags & MAP_ANONYMOUS)) { + /* read the file contents */ + inode = file->f_dentry->d_inode; + if (!inode->i_fop || !file->f_op->read + || ((*file->f_op->read)(file, (char __user *) start, end - start, &off) < 0)) + { + ret = -EINVAL; + goto out; + } + } + + skip_mmap: + if (!(prot & PROT_WRITE)) + ret = sys_mprotect(PAGE_START(start), PAGE_SIZE, prot | old_prot); + out: + if (page) + free_page((unsigned long) page); + return ret; +} + +/* SLAB cache for partial_page structures */ +kmem_cache_t *partial_page_cachep; + +/* + * init partial_page_list. + * return 0 means kmalloc fail. + */ +struct partial_page_list* +ia32_init_pp_list(void) +{ + struct partial_page_list *p; + + if ((p = kmalloc(sizeof(*p), GFP_KERNEL)) == NULL) + return p; + p->pp_head = NULL; + p->ppl_rb = RB_ROOT; + p->pp_hint = NULL; + atomic_set(&p->pp_count, 1); + return p; +} + +/* + * Search for the partial page with @start in partial page list @ppl. + * If finds the partial page, return the found partial page. + * Else, return 0 and provide @pprev, @rb_link, @rb_parent to + * be used by later __ia32_insert_pp(). + */ +static struct partial_page * +__ia32_find_pp(struct partial_page_list *ppl, unsigned int start, + struct partial_page **pprev, struct rb_node ***rb_link, + struct rb_node **rb_parent) +{ + struct partial_page *pp; + struct rb_node **__rb_link, *__rb_parent, *rb_prev; + + pp = ppl->pp_hint; + if (pp && pp->base == start) + return pp; + + __rb_link = &ppl->ppl_rb.rb_node; + rb_prev = __rb_parent = NULL; + + while (*__rb_link) { + __rb_parent = *__rb_link; + pp = rb_entry(__rb_parent, struct partial_page, pp_rb); + + if (pp->base == start) { + ppl->pp_hint = pp; + return pp; + } else if (pp->base < start) { + rb_prev = __rb_parent; + __rb_link = &__rb_parent->rb_right; + } else { + __rb_link = &__rb_parent->rb_left; + } + } + + *rb_link = __rb_link; + *rb_parent = __rb_parent; + *pprev = NULL; + if (rb_prev) + *pprev = rb_entry(rb_prev, struct partial_page, pp_rb); + return NULL; +} + +/* + * insert @pp into @ppl. + */ +static void +__ia32_insert_pp(struct partial_page_list *ppl, struct partial_page *pp, + struct partial_page *prev, struct rb_node **rb_link, + struct rb_node *rb_parent) +{ + /* link list */ + if (prev) { + pp->next = prev->next; + prev->next = pp; + } else { + ppl->pp_head = pp; + if (rb_parent) + pp->next = rb_entry(rb_parent, + struct partial_page, pp_rb); + else + pp->next = NULL; + } + + /* link rb */ + rb_link_node(&pp->pp_rb, rb_parent, rb_link); + rb_insert_color(&pp->pp_rb, &ppl->ppl_rb); + + ppl->pp_hint = pp; +} + +/* + * delete @pp from partial page list @ppl. + */ +static void +__ia32_delete_pp(struct partial_page_list *ppl, struct partial_page *pp, + struct partial_page *prev) +{ + if (prev) { + prev->next = pp->next; + if (ppl->pp_hint == pp) + ppl->pp_hint = prev; + } else { + ppl->pp_head = pp->next; + if (ppl->pp_hint == pp) + ppl->pp_hint = pp->next; + } + rb_erase(&pp->pp_rb, &ppl->ppl_rb); + kmem_cache_free(partial_page_cachep, pp); +} + +static struct partial_page * +__pp_prev(struct partial_page *pp) +{ + struct rb_node *prev = rb_prev(&pp->pp_rb); + if (prev) + return rb_entry(prev, struct partial_page, pp_rb); + else + return NULL; +} + +/* + * Delete partial pages with address between @start and @end. + * @start and @end are page aligned. + */ +static void +__ia32_delete_pp_range(unsigned int start, unsigned int end) +{ + struct partial_page *pp, *prev; + struct rb_node **rb_link, *rb_parent; + + if (start >= end) + return; + + pp = __ia32_find_pp(current->thread.ppl, start, &prev, + &rb_link, &rb_parent); + if (pp) + prev = __pp_prev(pp); + else { + if (prev) + pp = prev->next; + else + pp = current->thread.ppl->pp_head; + } + + while (pp && pp->base < end) { + struct partial_page *tmp = pp->next; + __ia32_delete_pp(current->thread.ppl, pp, prev); + pp = tmp; + } +} + +/* + * Set the range between @start and @end in bitmap. + * @start and @end should be IA32 page aligned and in the same IA64 page. + */ +static int +__ia32_set_pp(unsigned int start, unsigned int end, int flags) +{ + struct partial_page *pp, *prev; + struct rb_node ** rb_link, *rb_parent; + unsigned int pstart, start_bit, end_bit, i; + + pstart = PAGE_START(start); + start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE; + end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE; + if (end_bit == 0) + end_bit = PAGE_SIZE / IA32_PAGE_SIZE; + pp = __ia32_find_pp(current->thread.ppl, pstart, &prev, + &rb_link, &rb_parent); + if (pp) { + for (i = start_bit; i < end_bit; i++) + set_bit(i, &pp->bitmap); + /* + * Check: if this partial page has been set to a full page, + * then delete it. + */ + if (find_first_zero_bit(&pp->bitmap, sizeof(pp->bitmap)*8) >= + PAGE_SIZE/IA32_PAGE_SIZE) { + __ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp)); + } + return 0; + } + + /* + * MAP_FIXED may lead to overlapping mmap. + * In this case, the requested mmap area may already mmaped as a full + * page. So check vma before adding a new partial page. + */ + if (flags & MAP_FIXED) { + struct vm_area_struct *vma = find_vma(current->mm, pstart); + if (vma && vma->vm_start <= pstart) + return 0; + } + + /* new a partial_page */ + pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL); + if (!pp) + return -ENOMEM; + pp->base = pstart; + pp->bitmap = 0; + for (i=start_bit; i<end_bit; i++) + set_bit(i, &(pp->bitmap)); + pp->next = NULL; + __ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent); + return 0; +} + +/* + * @start and @end should be IA32 page aligned, but don't need to be in the + * same IA64 page. Split @start and @end to make sure they're in the same IA64 + * page, then call __ia32_set_pp(). + */ +static void +ia32_set_pp(unsigned int start, unsigned int end, int flags) +{ + down_write(¤t->mm->mmap_sem); + if (flags & MAP_FIXED) { + /* + * MAP_FIXED may lead to overlapping mmap. When this happens, + * a series of complete IA64 pages results in deletion of + * old partial pages in that range. + */ + __ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end)); + } + + if (end < PAGE_ALIGN(start)) { + __ia32_set_pp(start, end, flags); + } else { + if (offset_in_page(start)) + __ia32_set_pp(start, PAGE_ALIGN(start), flags); + if (offset_in_page(end)) + __ia32_set_pp(PAGE_START(end), end, flags); + } + up_write(¤t->mm->mmap_sem); +} + +/* + * Unset the range between @start and @end in bitmap. + * @start and @end should be IA32 page aligned and in the same IA64 page. + * After doing that, if the bitmap is 0, then free the page and return 1, + * else return 0; + * If not find the partial page in the list, then + * If the vma exists, then the full page is set to a partial page; + * Else return -ENOMEM. + */ +static int +__ia32_unset_pp(unsigned int start, unsigned int end) +{ + struct partial_page *pp, *prev; + struct rb_node ** rb_link, *rb_parent; + unsigned int pstart, start_bit, end_bit, i; + struct vm_area_struct *vma; + + pstart = PAGE_START(start); + start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE; + end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE; + if (end_bit == 0) + end_bit = PAGE_SIZE / IA32_PAGE_SIZE; + + pp = __ia32_find_pp(current->thread.ppl, pstart, &prev, + &rb_link, &rb_parent); + if (pp) { + for (i = start_bit; i < end_bit; i++) + clear_bit(i, &pp->bitmap); + if (pp->bitmap == 0) { + __ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp)); + return 1; + } + return 0; + } + + vma = find_vma(current->mm, pstart); + if (!vma || vma->vm_start > pstart) { + return -ENOMEM; + } + + /* new a partial_page */ + pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL); + if (!pp) + return -ENOMEM; + pp->base = pstart; + pp->bitmap = 0; + for (i = 0; i < start_bit; i++) + set_bit(i, &(pp->bitmap)); + for (i = end_bit; i < PAGE_SIZE / IA32_PAGE_SIZE; i++) + set_bit(i, &(pp->bitmap)); + pp->next = NULL; + __ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent); + return 0; +} + +/* + * Delete pp between PAGE_ALIGN(start) and PAGE_START(end) by calling + * __ia32_delete_pp_range(). Unset possible partial pages by calling + * __ia32_unset_pp(). + * The returned value see __ia32_unset_pp(). + */ +static int +ia32_unset_pp(unsigned int *startp, unsigned int *endp) +{ + unsigned int start = *startp, end = *endp; + int ret = 0; + + down_write(¤t->mm->mmap_sem); + + __ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end)); + + if (end < PAGE_ALIGN(start)) { + ret = __ia32_unset_pp(start, end); + if (ret == 1) { + *startp = PAGE_START(start); + *endp = PAGE_ALIGN(end); + } + if (ret == 0) { + /* to shortcut sys_munmap() in sys32_munmap() */ + *startp = PAGE_START(start); + *endp = PAGE_START(end); + } + } else { + if (offset_in_page(start)) { + ret = __ia32_unset_pp(start, PAGE_ALIGN(start)); + if (ret == 1) + *startp = PAGE_START(start); + if (ret == 0) + *startp = PAGE_ALIGN(start); + if (ret < 0) + goto out; + } + if (offset_in_page(end)) { + ret = __ia32_unset_pp(PAGE_START(end), end); + if (ret == 1) + *endp = PAGE_ALIGN(end); + if (ret == 0) + *endp = PAGE_START(end); + } + } + + out: + up_write(¤t->mm->mmap_sem); + return ret; +} + +/* + * Compare the range between @start and @end with bitmap in partial page. + * @start and @end should be IA32 page aligned and in the same IA64 page. + */ +static int +__ia32_compare_pp(unsigned int start, unsigned int end) +{ + struct partial_page *pp, *prev; + struct rb_node ** rb_link, *rb_parent; + unsigned int pstart, start_bit, end_bit, size; + unsigned int first_bit, next_zero_bit; /* the first range in bitmap */ + + pstart = PAGE_START(start); + + pp = __ia32_find_pp(current->thread.ppl, pstart, &prev, + &rb_link, &rb_parent); + if (!pp) + return 1; + + start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE; + end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE; + size = sizeof(pp->bitmap) * 8; + first_bit = find_first_bit(&pp->bitmap, size); + next_zero_bit = find_next_zero_bit(&pp->bitmap, size, first_bit); + if ((start_bit < first_bit) || (end_bit > next_zero_bit)) { + /* exceeds the first range in bitmap */ + return -ENOMEM; + } else if ((start_bit == first_bit) && (end_bit == next_zero_bit)) { + first_bit = find_next_bit(&pp->bitmap, size, next_zero_bit); + if ((next_zero_bit < first_bit) && (first_bit < size)) + return 1; /* has next range */ + else + return 0; /* no next range */ + } else + return 1; +} + +/* + * @start and @end should be IA32 page aligned, but don't need to be in the + * same IA64 page. Split @start and @end to make sure they're in the same IA64 + * page, then call __ia32_compare_pp(). + * + * Take this as example: the range is the 1st and 2nd 4K page. + * Return 0 if they fit bitmap exactly, i.e. bitmap = 00000011; + * Return 1 if the range doesn't cover whole bitmap, e.g. bitmap = 00001111; + * Return -ENOMEM if the range exceeds the bitmap, e.g. bitmap = 00000001 or + * bitmap = 00000101. + */ +static int +ia32_compare_pp(unsigned int *startp, unsigned int *endp) +{ + unsigned int start = *startp, end = *endp; + int retval = 0; + + down_write(¤t->mm->mmap_sem); + + if (end < PAGE_ALIGN(start)) { + retval = __ia32_compare_pp(start, end); + if (retval == 0) { + *startp = PAGE_START(start); + *endp = PAGE_ALIGN(end); + } + } else { + if (offset_in_page(start)) { + retval = __ia32_compare_pp(start, + PAGE_ALIGN(start)); + if (retval == 0) + *startp = PAGE_START(start); + if (retval < 0) + goto out; + } + if (offset_in_page(end)) { + retval = __ia32_compare_pp(PAGE_START(end), end); + if (retval == 0) + *endp = PAGE_ALIGN(end); + } + } + + out: + up_write(¤t->mm->mmap_sem); + return retval; +} + +static void +__ia32_drop_pp_list(struct partial_page_list *ppl) +{ + struct partial_page *pp = ppl->pp_head; + + while (pp) { + struct partial_page *next = pp->next; + kmem_cache_free(partial_page_cachep, pp); + pp = next; + } + + kfree(ppl); +} + +void +ia32_drop_partial_page_list(struct task_struct *task) +{ + struct partial_page_list* ppl = task->thread.ppl; + + if (ppl && atomic_dec_and_test(&ppl->pp_count)) + __ia32_drop_pp_list(ppl); +} + +/* + * Copy current->thread.ppl to ppl (already initialized). + */ +static int +__ia32_copy_pp_list(struct partial_page_list *ppl) +{ + struct partial_page *pp, *tmp, *prev; + struct rb_node **rb_link, *rb_parent; + + ppl->pp_head = NULL; + ppl->pp_hint = NULL; + ppl->ppl_rb = RB_ROOT; + rb_link = &ppl->ppl_rb.rb_node; + rb_parent = NULL; + prev = NULL; + + for (pp = current->thread.ppl->pp_head; pp; pp = pp->next) { + tmp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + *tmp = *pp; + __ia32_insert_pp(ppl, tmp, prev, rb_link, rb_parent); + prev = tmp; + rb_link = &tmp->pp_rb.rb_right; + rb_parent = &tmp->pp_rb; + } + return 0; +} + +int +ia32_copy_partial_page_list(struct task_struct *p, unsigned long clone_flags) +{ + int retval = 0; + + if (clone_flags & CLONE_VM) { + atomic_inc(¤t->thread.ppl->pp_count); + p->thread.ppl = current->thread.ppl; + } else { + p->thread.ppl = ia32_init_pp_list(); + if (!p->thread.ppl) + return -ENOMEM; + down_write(¤t->mm->mmap_sem); + { + retval = __ia32_copy_pp_list(p->thread.ppl); + } + up_write(¤t->mm->mmap_sem); + } + + return retval; +} + +static unsigned long +emulate_mmap (struct file *file, unsigned long start, unsigned long len, int prot, int flags, + loff_t off) +{ + unsigned long tmp, end, pend, pstart, ret, is_congruent, fudge = 0; + struct inode *inode; + loff_t poff; + + end = start + len; + pstart = PAGE_START(start); + pend = PAGE_ALIGN(end); + + if (flags & MAP_FIXED) { + ia32_set_pp((unsigned int)start, (unsigned int)end, flags); + if (start > pstart) { + if (flags & MAP_SHARED) + printk(KERN_INFO + "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n", + current->comm, current->pid, start); + ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags, + off); + if (IS_ERR((void *) ret)) + return ret; + pstart += PAGE_SIZE; + if (pstart >= pend) + goto out; /* done */ + } + if (end < pend) { + if (flags & MAP_SHARED) + printk(KERN_INFO + "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n", + current->comm, current->pid, end); + ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags, + (off + len) - offset_in_page(end)); + if (IS_ERR((void *) ret)) + return ret; + pend -= PAGE_SIZE; + if (pstart >= pend) + goto out; /* done */ + } + } else { + /* + * If a start address was specified, use it if the entire rounded out area + * is available. + */ + if (start && !pstart) + fudge = 1; /* handle case of mapping to range (0,PAGE_SIZE) */ + tmp = arch_get_unmapped_area(file, pstart - fudge, pend - pstart, 0, flags); + if (tmp != pstart) { + pstart = tmp; + start = pstart + offset_in_page(off); /* make start congruent with off */ + end = start + len; + pend = PAGE_ALIGN(end); + } + } + + poff = off + (pstart - start); /* note: (pstart - start) may be negative */ + is_congruent = (flags & MAP_ANONYMOUS) || (offset_in_page(poff) == 0); + + if ((flags & MAP_SHARED) && !is_congruent) + printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap " + "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off); + + DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend, + is_congruent ? "congruent" : "not congruent", poff); + + down_write(¤t->mm->mmap_sem); + { + if (!(flags & MAP_ANONYMOUS) && is_congruent) + ret = do_mmap(file, pstart, pend - pstart, prot, flags | MAP_FIXED, poff); + else + ret = do_mmap(NULL, pstart, pend - pstart, + prot | ((flags & MAP_ANONYMOUS) ? 0 : PROT_WRITE), + flags | MAP_FIXED | MAP_ANONYMOUS, 0); + } + up_write(¤t->mm->mmap_sem); + + if (IS_ERR((void *) ret)) + return ret; + + if (!is_congruent) { + /* read the file contents */ + inode = file->f_dentry->d_inode; + if (!inode->i_fop || !file->f_op->read + || ((*file->f_op->read)(file, (char __user *) pstart, pend - pstart, &poff) + < 0)) + { + sys_munmap(pstart, pend - pstart); + return -EINVAL; + } + if (!(prot & PROT_WRITE) && sys_mprotect(pstart, pend - pstart, prot) < 0) + return -EINVAL; + } + + if (!(flags & MAP_FIXED)) + ia32_set_pp((unsigned int)start, (unsigned int)end, flags); +out: + return start; +} + +#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */ + +static inline unsigned int +get_prot32 (unsigned int prot) +{ + if (prot & PROT_WRITE) + /* on x86, PROT_WRITE implies PROT_READ which implies PROT_EEC */ + prot |= PROT_READ | PROT_WRITE | PROT_EXEC; + else if (prot & (PROT_READ | PROT_EXEC)) + /* on x86, there is no distinction between PROT_READ and PROT_EXEC */ + prot |= (PROT_READ | PROT_EXEC); + + return prot; +} + +unsigned long +ia32_do_mmap (struct file *file, unsigned long addr, unsigned long len, int prot, int flags, + loff_t offset) +{ + DBG("ia32_do_mmap(file=%p,addr=0x%lx,len=0x%lx,prot=%x,flags=%x,offset=0x%llx)\n", + file, addr, len, prot, flags, offset); + + if (file && (!file->f_op || !file->f_op->mmap)) + return -ENODEV; + + len = IA32_PAGE_ALIGN(len); + if (len == 0) + return addr; + + if (len > IA32_PAGE_OFFSET || addr > IA32_PAGE_OFFSET - len) + { + if (flags & MAP_FIXED) + return -ENOMEM; + else + return -EINVAL; + } + + if (OFFSET4K(offset)) + return -EINVAL; + + prot = get_prot32(prot); + +#if PAGE_SHIFT > IA32_PAGE_SHIFT + down(&ia32_mmap_sem); + { + addr = emulate_mmap(file, addr, len, prot, flags, offset); + } + up(&ia32_mmap_sem); +#else + down_write(¤t->mm->mmap_sem); + { + addr = do_mmap(file, addr, len, prot, flags, offset); + } + up_write(¤t->mm->mmap_sem); +#endif + DBG("ia32_do_mmap: returning 0x%lx\n", addr); + return addr; +} + +/* + * Linux/i386 didn't use to be able to handle more than 4 system call parameters, so these + * system calls used a memory block for parameter passing.. + */ + +struct mmap_arg_struct { + unsigned int addr; + unsigned int len; + unsigned int prot; + unsigned int flags; + unsigned int fd; + unsigned int offset; +}; + +asmlinkage long +sys32_mmap (struct mmap_arg_struct __user *arg) +{ + struct mmap_arg_struct a; + struct file *file = NULL; + unsigned long addr; + int flags; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + + if (OFFSET4K(a.offset)) + return -EINVAL; + + flags = a.flags; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(a.fd); + if (!file) + return -EBADF; + } + + addr = ia32_do_mmap(file, a.addr, a.len, a.prot, flags, a.offset); + + if (file) + fput(file); + return addr; +} + +asmlinkage long +sys32_mmap2 (unsigned int addr, unsigned int len, unsigned int prot, unsigned int flags, + unsigned int fd, unsigned int pgoff) +{ + struct file *file = NULL; + unsigned long retval; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + return -EBADF; + } + + retval = ia32_do_mmap(file, addr, len, prot, flags, + (unsigned long) pgoff << IA32_PAGE_SHIFT); + + if (file) + fput(file); + return retval; +} + +asmlinkage long +sys32_munmap (unsigned int start, unsigned int len) +{ + unsigned int end = start + len; + long ret; + +#if PAGE_SHIFT <= IA32_PAGE_SHIFT + ret = sys_munmap(start, end - start); +#else + if (OFFSET4K(start)) + return -EINVAL; + + end = IA32_PAGE_ALIGN(end); + if (start >= end) + return -EINVAL; + + ret = ia32_unset_pp(&start, &end); + if (ret < 0) + return ret; + + if (start >= end) + return 0; + + down(&ia32_mmap_sem); + { + ret = sys_munmap(start, end - start); + } + up(&ia32_mmap_sem); +#endif + return ret; +} + +#if PAGE_SHIFT > IA32_PAGE_SHIFT + +/* + * When mprotect()ing a partial page, we set the permission to the union of the old + * settings and the new settings. In other words, it's only possible to make access to a + * partial page less restrictive. + */ +static long +mprotect_subpage (unsigned long address, int new_prot) +{ + int old_prot; + struct vm_area_struct *vma; + + if (new_prot == PROT_NONE) + return 0; /* optimize case where nothing changes... */ + vma = find_vma(current->mm, address); + old_prot = get_page_prot(vma, address); + return sys_mprotect(address, PAGE_SIZE, new_prot | old_prot); +} + +#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */ + +asmlinkage long +sys32_mprotect (unsigned int start, unsigned int len, int prot) +{ + unsigned int end = start + len; +#if PAGE_SHIFT > IA32_PAGE_SHIFT + long retval = 0; +#endif + + prot = get_prot32(prot); + +#if PAGE_SHIFT <= IA32_PAGE_SHIFT + return sys_mprotect(start, end - start, prot); +#else + if (OFFSET4K(start)) + return -EINVAL; + + end = IA32_PAGE_ALIGN(end); + if (end < start) + return -EINVAL; + + retval = ia32_compare_pp(&start, &end); + + if (retval < 0) + return retval; + + down(&ia32_mmap_sem); + { + if (offset_in_page(start)) { + /* start address is 4KB aligned but not page aligned. */ + retval = mprotect_subpage(PAGE_START(start), prot); + if (retval < 0) + goto out; + + start = PAGE_ALIGN(start); + if (start >= end) + goto out; /* retval is already zero... */ + } + + if (offset_in_page(end)) { + /* end address is 4KB aligned but not page aligned. */ + retval = mprotect_subpage(PAGE_START(end), prot); + if (retval < 0) + goto out; + + end = PAGE_START(end); + } + retval = sys_mprotect(start, end - start, prot); + } + out: + up(&ia32_mmap_sem); + return retval; +#endif +} + +asmlinkage long +sys32_mremap (unsigned int addr, unsigned int old_len, unsigned int new_len, + unsigned int flags, unsigned int new_addr) +{ + long ret; + +#if PAGE_SHIFT <= IA32_PAGE_SHIFT + ret = sys_mremap(addr, old_len, new_len, flags, new_addr); +#else + unsigned int old_end, new_end; + + if (OFFSET4K(addr)) + return -EINVAL; + + old_len = IA32_PAGE_ALIGN(old_len); + new_len = IA32_PAGE_ALIGN(new_len); + old_end = addr + old_len; + new_end = addr + new_len; + + if (!new_len) + return -EINVAL; + + if ((flags & MREMAP_FIXED) && (OFFSET4K(new_addr))) + return -EINVAL; + + if (old_len >= new_len) { + ret = sys32_munmap(addr + new_len, old_len - new_len); + if (ret && old_len != new_len) + return ret; + ret = addr; + if (!(flags & MREMAP_FIXED) || (new_addr == addr)) + return ret; + old_len = new_len; + } + + addr = PAGE_START(addr); + old_len = PAGE_ALIGN(old_end) - addr; + new_len = PAGE_ALIGN(new_end) - addr; + + down(&ia32_mmap_sem); + { + ret = sys_mremap(addr, old_len, new_len, flags, new_addr); + } + up(&ia32_mmap_sem); + + if ((ret >= 0) && (old_len < new_len)) { + /* mremap expanded successfully */ + ia32_set_pp(old_end, new_end, flags); + } +#endif + return ret; +} + +asmlinkage long +sys32_pipe (int __user *fd) +{ + int retval; + int fds[2]; + + retval = do_pipe(fds); + if (retval) + goto out; + if (copy_to_user(fd, fds, sizeof(fds))) + retval = -EFAULT; + out: + return retval; +} + +static inline long +get_tv32 (struct timeval *o, struct compat_timeval __user *i) +{ + return (!access_ok(VERIFY_READ, i, sizeof(*i)) || + (__get_user(o->tv_sec, &i->tv_sec) | __get_user(o->tv_usec, &i->tv_usec))); +} + +static inline long +put_tv32 (struct compat_timeval __user *o, struct timeval *i) +{ + return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || + (__put_user(i->tv_sec, &o->tv_sec) | __put_user(i->tv_usec, &o->tv_usec))); +} + +asmlinkage unsigned long +sys32_alarm (unsigned int seconds) +{ + struct itimerval it_new, it_old; + unsigned int oldalarm; + + it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; + it_new.it_value.tv_sec = seconds; + it_new.it_value.tv_usec = 0; + do_setitimer(ITIMER_REAL, &it_new, &it_old); + oldalarm = it_old.it_value.tv_sec; + /* ehhh.. We can't return 0 if we have an alarm pending.. */ + /* And we'd better return too much than too little anyway */ + if (it_old.it_value.tv_usec) + oldalarm++; + return oldalarm; +} + +/* Translations due to time_t size differences. Which affects all + sorts of things, like timeval and itimerval. */ + +extern struct timezone sys_tz; + +asmlinkage long +sys32_gettimeofday (struct compat_timeval __user *tv, struct timezone __user *tz) +{ + if (tv) { + struct timeval ktv; + do_gettimeofday(&ktv); + if (put_tv32(tv, &ktv)) + return -EFAULT; + } + if (tz) { + if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) + return -EFAULT; + } + return 0; +} + +asmlinkage long +sys32_settimeofday (struct compat_timeval __user *tv, struct timezone __user *tz) +{ + struct timeval ktv; + struct timespec kts; + struct timezone ktz; + + if (tv) { + if (get_tv32(&ktv, tv)) + return -EFAULT; + kts.tv_sec = ktv.tv_sec; + kts.tv_nsec = ktv.tv_usec * 1000; + } + if (tz) { + if (copy_from_user(&ktz, tz, sizeof(ktz))) + return -EFAULT; + } + + return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); +} + +struct getdents32_callback { + struct compat_dirent __user *current_dir; + struct compat_dirent __user *previous; + int count; + int error; +}; + +struct readdir32_callback { + struct old_linux32_dirent __user * dirent; + int count; +}; + +static int +filldir32 (void *__buf, const char *name, int namlen, loff_t offset, ino_t ino, + unsigned int d_type) +{ + struct compat_dirent __user * dirent; + struct getdents32_callback * buf = (struct getdents32_callback *) __buf; + int reclen = ROUND_UP(offsetof(struct compat_dirent, d_name) + namlen + 1, 4); + + buf->error = -EINVAL; /* only used if we fail.. */ + if (reclen > buf->count) + return -EINVAL; + buf->error = -EFAULT; /* only used if we fail.. */ + dirent = buf->previous; + if (dirent) + if (put_user(offset, &dirent->d_off)) + return -EFAULT; + dirent = buf->current_dir; + buf->previous = dirent; + if (put_user(ino, &dirent->d_ino) + || put_user(reclen, &dirent->d_reclen) + || copy_to_user(dirent->d_name, name, namlen) + || put_user(0, dirent->d_name + namlen)) + return -EFAULT; + dirent = (struct compat_dirent __user *) ((char __user *) dirent + reclen); + buf->current_dir = dirent; + buf->count -= reclen; + return 0; +} + +asmlinkage long +sys32_getdents (unsigned int fd, struct compat_dirent __user *dirent, unsigned int count) +{ + struct file * file; + struct compat_dirent __user * lastdirent; + struct getdents32_callback buf; + int error; + + error = -EBADF; + file = fget(fd); + if (!file) + goto out; + + buf.current_dir = dirent; + buf.previous = NULL; + buf.count = count; + buf.error = 0; + + error = vfs_readdir(file, filldir32, &buf); + if (error < 0) + goto out_putf; + error = buf.error; + lastdirent = buf.previous; + if (lastdirent) { + error = -EINVAL; + if (put_user(file->f_pos, &lastdirent->d_off)) + goto out_putf; + error = count - buf.count; + } + +out_putf: + fput(file); +out: + return error; +} + +static int +fillonedir32 (void * __buf, const char * name, int namlen, loff_t offset, ino_t ino, + unsigned int d_type) +{ + struct readdir32_callback * buf = (struct readdir32_callback *) __buf; + struct old_linux32_dirent __user * dirent; + + if (buf->count) + return -EINVAL; + buf->count++; + dirent = buf->dirent; + if (put_user(ino, &dirent->d_ino) + || put_user(offset, &dirent->d_offset) + || put_user(namlen, &dirent->d_namlen) + || copy_to_user(dirent->d_name, name, namlen) + || put_user(0, dirent->d_name + namlen)) + return -EFAULT; + return 0; +} + +asmlinkage long +sys32_readdir (unsigned int fd, void __user *dirent, unsigned int count) +{ + int error; + struct file * file; + struct readdir32_callback buf; + + error = -EBADF; + file = fget(fd); + if (!file) + goto out; + + buf.count = 0; + buf.dirent = dirent; + + error = vfs_readdir(file, fillonedir32, &buf); + if (error >= 0) + error = buf.count; + fput(file); +out: + return error; +} + +struct sel_arg_struct { + unsigned int n; + unsigned int inp; + unsigned int outp; + unsigned int exp; + unsigned int tvp; +}; + +asmlinkage long +sys32_old_select (struct sel_arg_struct __user *arg) +{ + struct sel_arg_struct a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), + compat_ptr(a.exp), compat_ptr(a.tvp)); +} + +#define SEMOP 1 +#define SEMGET 2 +#define SEMCTL 3 +#define SEMTIMEDOP 4 +#define MSGSND 11 +#define MSGRCV 12 +#define MSGGET 13 +#define MSGCTL 14 +#define SHMAT 21 +#define SHMDT 22 +#define SHMGET 23 +#define SHMCTL 24 + +asmlinkage long +sys32_ipc(u32 call, int first, int second, int third, u32 ptr, u32 fifth) +{ + int version; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + case SEMTIMEDOP: + if (fifth) + return compat_sys_semtimedop(first, compat_ptr(ptr), + second, compat_ptr(fifth)); + /* else fall through for normal semop() */ + case SEMOP: + /* struct sembuf is the same on 32 and 64bit :)) */ + return sys_semtimedop(first, compat_ptr(ptr), second, + NULL); + case SEMGET: + return sys_semget(first, second, third); + case SEMCTL: + return compat_sys_semctl(first, second, third, compat_ptr(ptr)); + + case MSGSND: + return compat_sys_msgsnd(first, second, third, compat_ptr(ptr)); + case MSGRCV: + return compat_sys_msgrcv(first, second, fifth, third, version, compat_ptr(ptr)); + case MSGGET: + return sys_msgget((key_t) first, second); + case MSGCTL: + return compat_sys_msgctl(first, second, compat_ptr(ptr)); + + case SHMAT: + return compat_sys_shmat(first, second, third, version, compat_ptr(ptr)); + break; + case SHMDT: + return sys_shmdt(compat_ptr(ptr)); + case SHMGET: + return sys_shmget(first, (unsigned)second, third); + case SHMCTL: + return compat_sys_shmctl(first, second, compat_ptr(ptr)); + + default: + return -ENOSYS; + } + return -EINVAL; +} + +asmlinkage long +compat_sys_wait4 (compat_pid_t pid, compat_uint_t * stat_addr, int options, + struct compat_rusage *ru); + +asmlinkage long +sys32_waitpid (int pid, unsigned int *stat_addr, int options) +{ + return compat_sys_wait4(pid, stat_addr, options, NULL); +} + +static unsigned int +ia32_peek (struct task_struct *child, unsigned long addr, unsigned int *val) +{ + size_t copied; + unsigned int ret; + + copied = access_process_vm(child, addr, val, sizeof(*val), 0); + return (copied != sizeof(ret)) ? -EIO : 0; +} + +static unsigned int +ia32_poke (struct task_struct *child, unsigned long addr, unsigned int val) +{ + + if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val)) + return -EIO; + return 0; +} + +/* + * The order in which registers are stored in the ptrace regs structure + */ +#define PT_EBX 0 +#define PT_ECX 1 +#define PT_EDX 2 +#define PT_ESI 3 +#define PT_EDI 4 +#define PT_EBP 5 +#define PT_EAX 6 +#define PT_DS 7 +#define PT_ES 8 +#define PT_FS 9 +#define PT_GS 10 +#define PT_ORIG_EAX 11 +#define PT_EIP 12 +#define PT_CS 13 +#define PT_EFL 14 +#define PT_UESP 15 +#define PT_SS 16 + +static unsigned int +getreg (struct task_struct *child, int regno) +{ + struct pt_regs *child_regs; + + child_regs = ia64_task_regs(child); + switch (regno / sizeof(int)) { + case PT_EBX: return child_regs->r11; + case PT_ECX: return child_regs->r9; + case PT_EDX: return child_regs->r10; + case PT_ESI: return child_regs->r14; + case PT_EDI: return child_regs->r15; + case PT_EBP: return child_regs->r13; + case PT_EAX: return child_regs->r8; + case PT_ORIG_EAX: return child_regs->r1; /* see dispatch_to_ia32_handler() */ + case PT_EIP: return child_regs->cr_iip; + case PT_UESP: return child_regs->r12; + case PT_EFL: return child->thread.eflag; + case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS: + return __USER_DS; + case PT_CS: return __USER_CS; + default: + printk(KERN_ERR "ia32.getreg(): unknown register %d\n", regno); + break; + } + return 0; +} + +static void +putreg (struct task_struct *child, int regno, unsigned int value) +{ + struct pt_regs *child_regs; + + child_regs = ia64_task_regs(child); + switch (regno / sizeof(int)) { + case PT_EBX: child_regs->r11 = value; break; + case PT_ECX: child_regs->r9 = value; break; + case PT_EDX: child_regs->r10 = value; break; + case PT_ESI: child_regs->r14 = value; break; + case PT_EDI: child_regs->r15 = value; break; + case PT_EBP: child_regs->r13 = value; break; + case PT_EAX: child_regs->r8 = value; break; + case PT_ORIG_EAX: child_regs->r1 = value; break; + case PT_EIP: child_regs->cr_iip = value; break; + case PT_UESP: child_regs->r12 = value; break; + case PT_EFL: child->thread.eflag = value; break; + case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS: + if (value != __USER_DS) + printk(KERN_ERR + "ia32.putreg: attempt to set invalid segment register %d = %x\n", + regno, value); + break; + case PT_CS: + if (value != __USER_CS) + printk(KERN_ERR + "ia32.putreg: attempt to to set invalid segment register %d = %x\n", + regno, value); + break; + default: + printk(KERN_ERR "ia32.putreg: unknown register %d\n", regno); + break; + } +} + +static void +put_fpreg (int regno, struct _fpreg_ia32 __user *reg, struct pt_regs *ptp, + struct switch_stack *swp, int tos) +{ + struct _fpreg_ia32 *f; + char buf[32]; + + f = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15); + if ((regno += tos) >= 8) + regno -= 8; + switch (regno) { + case 0: + ia64f2ia32f(f, &ptp->f8); + break; + case 1: + ia64f2ia32f(f, &ptp->f9); + break; + case 2: + ia64f2ia32f(f, &ptp->f10); + break; + case 3: + ia64f2ia32f(f, &ptp->f11); + break; + case 4: + case 5: + case 6: + case 7: + ia64f2ia32f(f, &swp->f12 + (regno - 4)); + break; + } + copy_to_user(reg, f, sizeof(*reg)); +} + +static void +get_fpreg (int regno, struct _fpreg_ia32 __user *reg, struct pt_regs *ptp, + struct switch_stack *swp, int tos) +{ + + if ((regno += tos) >= 8) + regno -= 8; + switch (regno) { + case 0: + copy_from_user(&ptp->f8, reg, sizeof(*reg)); + break; + case 1: + copy_from_user(&ptp->f9, reg, sizeof(*reg)); + break; + case 2: + copy_from_user(&ptp->f10, reg, sizeof(*reg)); + break; + case 3: + copy_from_user(&ptp->f11, reg, sizeof(*reg)); + break; + case 4: + case 5: + case 6: + case 7: + copy_from_user(&swp->f12 + (regno - 4), reg, sizeof(*reg)); + break; + } + return; +} + +int +save_ia32_fpstate (struct task_struct *tsk, struct ia32_user_i387_struct __user *save) +{ + struct switch_stack *swp; + struct pt_regs *ptp; + int i, tos; + + if (!access_ok(VERIFY_WRITE, save, sizeof(*save))) + return -EFAULT; + + __put_user(tsk->thread.fcr & 0xffff, &save->cwd); + __put_user(tsk->thread.fsr & 0xffff, &save->swd); + __put_user((tsk->thread.fsr>>16) & 0xffff, &save->twd); + __put_user(tsk->thread.fir, &save->fip); + __put_user((tsk->thread.fir>>32) & 0xffff, &save->fcs); + __put_user(tsk->thread.fdr, &save->foo); + __put_user((tsk->thread.fdr>>32) & 0xffff, &save->fos); + + /* + * Stack frames start with 16-bytes of temp space + */ + swp = (struct switch_stack *)(tsk->thread.ksp + 16); + ptp = ia64_task_regs(tsk); + tos = (tsk->thread.fsr >> 11) & 7; + for (i = 0; i < 8; i++) + put_fpreg(i, &save->st_space[i], ptp, swp, tos); + return 0; +} + +static int +restore_ia32_fpstate (struct task_struct *tsk, struct ia32_user_i387_struct __user *save) +{ + struct switch_stack *swp; + struct pt_regs *ptp; + int i, tos; + unsigned int fsrlo, fsrhi, num32; + + if (!access_ok(VERIFY_READ, save, sizeof(*save))) + return(-EFAULT); + + __get_user(num32, (unsigned int __user *)&save->cwd); + tsk->thread.fcr = (tsk->thread.fcr & (~0x1f3f)) | (num32 & 0x1f3f); + __get_user(fsrlo, (unsigned int __user *)&save->swd); + __get_user(fsrhi, (unsigned int __user *)&save->twd); + num32 = (fsrhi << 16) | fsrlo; + tsk->thread.fsr = (tsk->thread.fsr & (~0xffffffff)) | num32; + __get_user(num32, (unsigned int __user *)&save->fip); + tsk->thread.fir = (tsk->thread.fir & (~0xffffffff)) | num32; + __get_user(num32, (unsigned int __user *)&save->foo); + tsk->thread.fdr = (tsk->thread.fdr & (~0xffffffff)) | num32; + + /* + * Stack frames start with 16-bytes of temp space + */ + swp = (struct switch_stack *)(tsk->thread.ksp + 16); + ptp = ia64_task_regs(tsk); + tos = (tsk->thread.fsr >> 11) & 7; + for (i = 0; i < 8; i++) + get_fpreg(i, &save->st_space[i], ptp, swp, tos); + return 0; +} + +int +save_ia32_fpxstate (struct task_struct *tsk, struct ia32_user_fxsr_struct __user *save) +{ + struct switch_stack *swp; + struct pt_regs *ptp; + int i, tos; + unsigned long mxcsr=0; + unsigned long num128[2]; + + if (!access_ok(VERIFY_WRITE, save, sizeof(*save))) + return -EFAULT; + + __put_user(tsk->thread.fcr & 0xffff, &save->cwd); + __put_user(tsk->thread.fsr & 0xffff, &save->swd); + __put_user((tsk->thread.fsr>>16) & 0xffff, &save->twd); + __put_user(tsk->thread.fir, &save->fip); + __put_user((tsk->thread.fir>>32) & 0xffff, &save->fcs); + __put_user(tsk->thread.fdr, &save->foo); + __put_user((tsk->thread.fdr>>32) & 0xffff, &save->fos); + + /* + * Stack frames start with 16-bytes of temp space + */ + swp = (struct switch_stack *)(tsk->thread.ksp + 16); + ptp = ia64_task_regs(tsk); + tos = (tsk->thread.fsr >> 11) & 7; + for (i = 0; i < 8; i++) + put_fpreg(i, (struct _fpreg_ia32 __user *)&save->st_space[4*i], ptp, swp, tos); + + mxcsr = ((tsk->thread.fcr>>32) & 0xff80) | ((tsk->thread.fsr>>32) & 0x3f); + __put_user(mxcsr & 0xffff, &save->mxcsr); + for (i = 0; i < 8; i++) { + memcpy(&(num128[0]), &(swp->f16) + i*2, sizeof(unsigned long)); + memcpy(&(num128[1]), &(swp->f17) + i*2, sizeof(unsigned long)); + copy_to_user(&save->xmm_space[0] + 4*i, num128, sizeof(struct _xmmreg_ia32)); + } + return 0; +} + +static int +restore_ia32_fpxstate (struct task_struct *tsk, struct ia32_user_fxsr_struct __user *save) +{ + struct switch_stack *swp; + struct pt_regs *ptp; + int i, tos; + unsigned int fsrlo, fsrhi, num32; + int mxcsr; + unsigned long num64; + unsigned long num128[2]; + + if (!access_ok(VERIFY_READ, save, sizeof(*save))) + return(-EFAULT); + + __get_user(num32, (unsigned int __user *)&save->cwd); + tsk->thread.fcr = (tsk->thread.fcr & (~0x1f3f)) | (num32 & 0x1f3f); + __get_user(fsrlo, (unsigned int __user *)&save->swd); + __get_user(fsrhi, (unsigned int __user *)&save->twd); + num32 = (fsrhi << 16) | fsrlo; + tsk->thread.fsr = (tsk->thread.fsr & (~0xffffffff)) | num32; + __get_user(num32, (unsigned int __user *)&save->fip); + tsk->thread.fir = (tsk->thread.fir & (~0xffffffff)) | num32; + __get_user(num32, (unsigned int __user *)&save->foo); + tsk->thread.fdr = (tsk->thread.fdr & (~0xffffffff)) | num32; + + /* + * Stack frames start with 16-bytes of temp space + */ + swp = (struct switch_stack *)(tsk->thread.ksp + 16); + ptp = ia64_task_regs(tsk); + tos = (tsk->thread.fsr >> 11) & 7; + for (i = 0; i < 8; i++) + get_fpreg(i, (struct _fpreg_ia32 __user *)&save->st_space[4*i], ptp, swp, tos); + + __get_user(mxcsr, (unsigned int __user *)&save->mxcsr); + num64 = mxcsr & 0xff10; + tsk->thread.fcr = (tsk->thread.fcr & (~0xff1000000000UL)) | (num64<<32); + num64 = mxcsr & 0x3f; + tsk->thread.fsr = (tsk->thread.fsr & (~0x3f00000000UL)) | (num64<<32); + + for (i = 0; i < 8; i++) { + copy_from_user(num128, &save->xmm_space[0] + 4*i, sizeof(struct _xmmreg_ia32)); + memcpy(&(swp->f16) + i*2, &(num128[0]), sizeof(unsigned long)); + memcpy(&(swp->f17) + i*2, &(num128[1]), sizeof(unsigned long)); + } + return 0; +} + +asmlinkage long +sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data) +{ + struct task_struct *child; + unsigned int value, tmp; + long i, ret; + + lock_kernel(); + if (request == PTRACE_TRACEME) { + ret = sys_ptrace(request, pid, addr, data); + goto out; + } + + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (!child) + goto out; + ret = -EPERM; + if (pid == 1) /* no messing around with init! */ + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = sys_ptrace(request, pid, addr, data); + goto out_tsk; + } + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out_tsk; + + switch (request) { + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: /* read word at location addr */ + ret = ia32_peek(child, addr, &value); + if (ret == 0) + ret = put_user(value, (unsigned int __user *) compat_ptr(data)); + else + ret = -EIO; + goto out_tsk; + + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: /* write the word at location addr */ + ret = ia32_poke(child, addr, data); + goto out_tsk; + + case PTRACE_PEEKUSR: /* read word at addr in USER area */ + ret = -EIO; + if ((addr & 3) || addr > 17*sizeof(int)) + break; + + tmp = getreg(child, addr); + if (!put_user(tmp, (unsigned int __user *) compat_ptr(data))) + ret = 0; + break; + + case PTRACE_POKEUSR: /* write word at addr in USER area */ + ret = -EIO; + if ((addr & 3) || addr > 17*sizeof(int)) + break; + + putreg(child, addr, data); + ret = 0; + break; + + case IA32_PTRACE_GETREGS: + if (!access_ok(VERIFY_WRITE, compat_ptr(data), 17*sizeof(int))) { + ret = -EIO; + break; + } + for (i = 0; i < (int) (17*sizeof(int)); i += sizeof(int) ) { + put_user(getreg(child, i), (unsigned int __user *) compat_ptr(data)); + data += sizeof(int); + } + ret = 0; + break; + + case IA32_PTRACE_SETREGS: + if (!access_ok(VERIFY_READ, compat_ptr(data), 17*sizeof(int))) { + ret = -EIO; + break; + } + for (i = 0; i < (int) (17*sizeof(int)); i += sizeof(int) ) { + get_user(tmp, (unsigned int __user *) compat_ptr(data)); + putreg(child, i, tmp); + data += sizeof(int); + } + ret = 0; + break; + + case IA32_PTRACE_GETFPREGS: + ret = save_ia32_fpstate(child, (struct ia32_user_i387_struct __user *) + compat_ptr(data)); + break; + + case IA32_PTRACE_GETFPXREGS: + ret = save_ia32_fpxstate(child, (struct ia32_user_fxsr_struct __user *) + compat_ptr(data)); + break; + + case IA32_PTRACE_SETFPREGS: + ret = restore_ia32_fpstate(child, (struct ia32_user_i387_struct __user *) + compat_ptr(data)); + break; + + case IA32_PTRACE_SETFPXREGS: + ret = restore_ia32_fpxstate(child, (struct ia32_user_fxsr_struct __user *) + compat_ptr(data)); + break; + + case PTRACE_GETEVENTMSG: + ret = put_user(child->ptrace_message, (unsigned int __user *) compat_ptr(data)); + break; + + case PTRACE_SYSCALL: /* continue, stop after next syscall */ + case PTRACE_CONT: /* restart after signal. */ + case PTRACE_KILL: + case PTRACE_SINGLESTEP: /* execute chile for one instruction */ + case PTRACE_DETACH: /* detach a process */ + ret = sys_ptrace(request, pid, addr, data); + break; + + default: + ret = ptrace_request(child, request, addr, data); + break; + + } + out_tsk: + put_task_struct(child); + out: + unlock_kernel(); + return ret; +} + +typedef struct { + unsigned int ss_sp; + unsigned int ss_flags; + unsigned int ss_size; +} ia32_stack_t; + +asmlinkage long +sys32_sigaltstack (ia32_stack_t __user *uss32, ia32_stack_t __user *uoss32, + long arg2, long arg3, long arg4, long arg5, long arg6, + long arg7, struct pt_regs pt) +{ + stack_t uss, uoss; + ia32_stack_t buf32; + int ret; + mm_segment_t old_fs = get_fs(); + + if (uss32) { + if (copy_from_user(&buf32, uss32, sizeof(ia32_stack_t))) + return -EFAULT; + uss.ss_sp = (void __user *) (long) buf32.ss_sp; + uss.ss_flags = buf32.ss_flags; + /* MINSIGSTKSZ is different for ia32 vs ia64. We lie here to pass the + check and set it to the user requested value later */ + if ((buf32.ss_flags != SS_DISABLE) && (buf32.ss_size < MINSIGSTKSZ_IA32)) { + ret = -ENOMEM; + goto out; + } + uss.ss_size = MINSIGSTKSZ; + } + set_fs(KERNEL_DS); + ret = do_sigaltstack(uss32 ? (stack_t __user *) &uss : NULL, + (stack_t __user *) &uoss, pt.r12); + current->sas_ss_size = buf32.ss_size; + set_fs(old_fs); +out: + if (ret < 0) + return(ret); + if (uoss32) { + buf32.ss_sp = (long __user) uoss.ss_sp; + buf32.ss_flags = uoss.ss_flags; + buf32.ss_size = uoss.ss_size; + if (copy_to_user(uoss32, &buf32, sizeof(ia32_stack_t))) + return -EFAULT; + } + return ret; +} + +asmlinkage int +sys32_pause (void) +{ + current->state = TASK_INTERRUPTIBLE; + schedule(); + return -ERESTARTNOHAND; +} + +asmlinkage int +sys32_msync (unsigned int start, unsigned int len, int flags) +{ + unsigned int addr; + + if (OFFSET4K(start)) + return -EINVAL; + addr = PAGE_START(start); + return sys_msync(addr, len + (start - addr), flags); +} + +struct sysctl32 { + unsigned int name; + int nlen; + unsigned int oldval; + unsigned int oldlenp; + unsigned int newval; + unsigned int newlen; + unsigned int __unused[4]; +}; + +#ifdef CONFIG_SYSCTL +asmlinkage long +sys32_sysctl (struct sysctl32 __user *args) +{ + struct sysctl32 a32; + mm_segment_t old_fs = get_fs (); + void __user *oldvalp, *newvalp; + size_t oldlen; + int __user *namep; + long ret; + + if (copy_from_user(&a32, args, sizeof(a32))) + return -EFAULT; + + /* + * We need to pre-validate these because we have to disable address checking + * before calling do_sysctl() because of OLDLEN but we can't run the risk of the + * user specifying bad addresses here. Well, since we're dealing with 32 bit + * addresses, we KNOW that access_ok() will always succeed, so this is an + * expensive NOP, but so what... + */ + namep = (int __user *) compat_ptr(a32.name); + oldvalp = compat_ptr(a32.oldval); + newvalp = compat_ptr(a32.newval); + + if ((oldvalp && get_user(oldlen, (int __user *) compat_ptr(a32.oldlenp))) + || !access_ok(VERIFY_WRITE, namep, 0) + || !access_ok(VERIFY_WRITE, oldvalp, 0) + || !access_ok(VERIFY_WRITE, newvalp, 0)) + return -EFAULT; + + set_fs(KERNEL_DS); + lock_kernel(); + ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *) &oldlen, + newvalp, (size_t) a32.newlen); + unlock_kernel(); + set_fs(old_fs); + + if (oldvalp && put_user (oldlen, (int __user *) compat_ptr(a32.oldlenp))) + return -EFAULT; + + return ret; +} +#endif + +asmlinkage long +sys32_newuname (struct new_utsname __user *name) +{ + int ret = sys_newuname(name); + + if (!ret) + if (copy_to_user(name->machine, "i686\0\0\0", 8)) + ret = -EFAULT; + return ret; +} + +asmlinkage long +sys32_getresuid16 (u16 __user *ruid, u16 __user *euid, u16 __user *suid) +{ + uid_t a, b, c; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + ret = sys_getresuid((uid_t __user *) &a, (uid_t __user *) &b, (uid_t __user *) &c); + set_fs(old_fs); + + if (put_user(a, ruid) || put_user(b, euid) || put_user(c, suid)) + return -EFAULT; + return ret; +} + +asmlinkage long +sys32_getresgid16 (u16 __user *rgid, u16 __user *egid, u16 __user *sgid) +{ + gid_t a, b, c; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + ret = sys_getresgid((gid_t __user *) &a, (gid_t __user *) &b, (gid_t __user *) &c); + set_fs(old_fs); + + if (ret) + return ret; + + return put_user(a, rgid) | put_user(b, egid) | put_user(c, sgid); +} + +asmlinkage long +sys32_lseek (unsigned int fd, int offset, unsigned int whence) +{ + /* Sign-extension of "offset" is important here... */ + return sys_lseek(fd, offset, whence); +} + +static int +groups16_to_user(short __user *grouplist, struct group_info *group_info) +{ + int i; + short group; + + for (i = 0; i < group_info->ngroups; i++) { + group = (short)GROUP_AT(group_info, i); + if (put_user(group, grouplist+i)) + return -EFAULT; + } + + return 0; +} + +static int +groups16_from_user(struct group_info *group_info, short __user *grouplist) +{ + int i; + short group; + + for (i = 0; i < group_info->ngroups; i++) { + if (get_user(group, grouplist+i)) + return -EFAULT; + GROUP_AT(group_info, i) = (gid_t)group; + } + + return 0; +} + +asmlinkage long +sys32_getgroups16 (int gidsetsize, short __user *grouplist) +{ + int i; + + if (gidsetsize < 0) + return -EINVAL; + + get_group_info(current->group_info); + i = current->group_info->ngroups; + if (gidsetsize) { + if (i > gidsetsize) { + i = -EINVAL; + goto out; + } + if (groups16_to_user(grouplist, current->group_info)) { + i = -EFAULT; + goto out; + } + } +out: + put_group_info(current->group_info); + return i; +} + +asmlinkage long +sys32_setgroups16 (int gidsetsize, short __user *grouplist) +{ + struct group_info *group_info; + int retval; + + if (!capable(CAP_SETGID)) + return -EPERM; + if ((unsigned)gidsetsize > NGROUPS_MAX) + return -EINVAL; + + group_info = groups_alloc(gidsetsize); + if (!group_info) + return -ENOMEM; + retval = groups16_from_user(group_info, grouplist); + if (retval) { + put_group_info(group_info); + return retval; + } + + retval = set_current_groups(group_info); + put_group_info(group_info); + + return retval; +} + +asmlinkage long +sys32_truncate64 (unsigned int path, unsigned int len_lo, unsigned int len_hi) +{ + return sys_truncate(compat_ptr(path), ((unsigned long) len_hi << 32) | len_lo); +} + +asmlinkage long +sys32_ftruncate64 (int fd, unsigned int len_lo, unsigned int len_hi) +{ + return sys_ftruncate(fd, ((unsigned long) len_hi << 32) | len_lo); +} + +static int +putstat64 (struct stat64 __user *ubuf, struct kstat *kbuf) +{ + int err; + u64 hdev; + + if (clear_user(ubuf, sizeof(*ubuf))) + return -EFAULT; + + hdev = huge_encode_dev(kbuf->dev); + err = __put_user(hdev, (u32 __user*)&ubuf->st_dev); + err |= __put_user(hdev >> 32, ((u32 __user*)&ubuf->st_dev) + 1); + err |= __put_user(kbuf->ino, &ubuf->__st_ino); + err |= __put_user(kbuf->ino, &ubuf->st_ino_lo); + err |= __put_user(kbuf->ino >> 32, &ubuf->st_ino_hi); + err |= __put_user(kbuf->mode, &ubuf->st_mode); + err |= __put_user(kbuf->nlink, &ubuf->st_nlink); + err |= __put_user(kbuf->uid, &ubuf->st_uid); + err |= __put_user(kbuf->gid, &ubuf->st_gid); + hdev = huge_encode_dev(kbuf->rdev); + err = __put_user(hdev, (u32 __user*)&ubuf->st_rdev); + err |= __put_user(hdev >> 32, ((u32 __user*)&ubuf->st_rdev) + 1); + err |= __put_user(kbuf->size, &ubuf->st_size_lo); + err |= __put_user((kbuf->size >> 32), &ubuf->st_size_hi); + err |= __put_user(kbuf->atime.tv_sec, &ubuf->st_atime); + err |= __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec); + err |= __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime); + err |= __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec); + err |= __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime); + err |= __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec); + err |= __put_user(kbuf->blksize, &ubuf->st_blksize); + err |= __put_user(kbuf->blocks, &ubuf->st_blocks); + return err; +} + +asmlinkage long +sys32_stat64 (char __user *filename, struct stat64 __user *statbuf) +{ + struct kstat s; + long ret = vfs_stat(filename, &s); + if (!ret) + ret = putstat64(statbuf, &s); + return ret; +} + +asmlinkage long +sys32_lstat64 (char __user *filename, struct stat64 __user *statbuf) +{ + struct kstat s; + long ret = vfs_lstat(filename, &s); + if (!ret) + ret = putstat64(statbuf, &s); + return ret; +} + +asmlinkage long +sys32_fstat64 (unsigned int fd, struct stat64 __user *statbuf) +{ + struct kstat s; + long ret = vfs_fstat(fd, &s); + if (!ret) + ret = putstat64(statbuf, &s); + return ret; +} + +struct sysinfo32 { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + u16 procs; + u16 pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[8]; +}; + +asmlinkage long +sys32_sysinfo (struct sysinfo32 __user *info) +{ + struct sysinfo s; + long ret, err; + int bitcount = 0; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + ret = sys_sysinfo((struct sysinfo __user *) &s); + set_fs(old_fs); + /* Check to see if any memory value is too large for 32-bit and + * scale down if needed. + */ + if ((s.totalram >> 32) || (s.totalswap >> 32)) { + while (s.mem_unit < PAGE_SIZE) { + s.mem_unit <<= 1; + bitcount++; + } + s.totalram >>= bitcount; + s.freeram >>= bitcount; + s.sharedram >>= bitcount; + s.bufferram >>= bitcount; + s.totalswap >>= bitcount; + s.freeswap >>= bitcount; + s.totalhigh >>= bitcount; + s.freehigh >>= bitcount; + } + + if (!access_ok(VERIFY_WRITE, info, sizeof(*info))) + return -EFAULT; + + err = __put_user(s.uptime, &info->uptime); + err |= __put_user(s.loads[0], &info->loads[0]); + err |= __put_user(s.loads[1], &info->loads[1]); + err |= __put_user(s.loads[2], &info->loads[2]); + err |= __put_user(s.totalram, &info->totalram); + err |= __put_user(s.freeram, &info->freeram); + err |= __put_user(s.sharedram, &info->sharedram); + err |= __put_user(s.bufferram, &info->bufferram); + err |= __put_user(s.totalswap, &info->totalswap); + err |= __put_user(s.freeswap, &info->freeswap); + err |= __put_user(s.procs, &info->procs); + err |= __put_user (s.totalhigh, &info->totalhigh); + err |= __put_user (s.freehigh, &info->freehigh); + err |= __put_user (s.mem_unit, &info->mem_unit); + if (err) + return -EFAULT; + return ret; +} + +asmlinkage long +sys32_sched_rr_get_interval (pid_t pid, struct compat_timespec __user *interval) +{ + mm_segment_t old_fs = get_fs(); + struct timespec t; + long ret; + + set_fs(KERNEL_DS); + ret = sys_sched_rr_get_interval(pid, (struct timespec __user *) &t); + set_fs(old_fs); + if (put_compat_timespec(&t, interval)) + return -EFAULT; + return ret; +} + +asmlinkage long +sys32_pread (unsigned int fd, void __user *buf, unsigned int count, u32 pos_lo, u32 pos_hi) +{ + return sys_pread64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo); +} + +asmlinkage long +sys32_pwrite (unsigned int fd, void __user *buf, unsigned int count, u32 pos_lo, u32 pos_hi) +{ + return sys_pwrite64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo); +} + +asmlinkage long +sys32_sendfile (int out_fd, int in_fd, int __user *offset, unsigned int count) +{ + mm_segment_t old_fs = get_fs(); + long ret; + off_t of; + + if (offset && get_user(of, offset)) + return -EFAULT; + + set_fs(KERNEL_DS); + ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *) &of : NULL, count); + set_fs(old_fs); + + if (!ret && offset && put_user(of, offset)) + return -EFAULT; + + return ret; +} + +asmlinkage long +sys32_personality (unsigned int personality) +{ + long ret; + + if (current->personality == PER_LINUX32 && personality == PER_LINUX) + personality = PER_LINUX32; + ret = sys_personality(personality); + if (ret == PER_LINUX32) + ret = PER_LINUX; + return ret; +} + +asmlinkage unsigned long +sys32_brk (unsigned int brk) +{ + unsigned long ret, obrk; + struct mm_struct *mm = current->mm; + + obrk = mm->brk; + ret = sys_brk(brk); + if (ret < obrk) + clear_user(compat_ptr(ret), PAGE_ALIGN(ret) - ret); + return ret; +} + +/* + * Exactly like fs/open.c:sys_open(), except that it doesn't set the O_LARGEFILE flag. + */ +asmlinkage long +sys32_open (const char __user * filename, int flags, int mode) +{ + char * tmp; + int fd, error; + + tmp = getname(filename); + fd = PTR_ERR(tmp); + if (!IS_ERR(tmp)) { + fd = get_unused_fd(); + if (fd >= 0) { + struct file *f = filp_open(tmp, flags, mode); + error = PTR_ERR(f); + if (IS_ERR(f)) + goto out_error; + fd_install(fd, f); + } +out: + putname(tmp); + } + return fd; + +out_error: + put_unused_fd(fd); + fd = error; + goto out; +} + +/* Structure for ia32 emulation on ia64 */ +struct epoll_event32 +{ + u32 events; + u32 data[2]; +}; + +asmlinkage long +sys32_epoll_ctl(int epfd, int op, int fd, struct epoll_event32 __user *event) +{ + mm_segment_t old_fs = get_fs(); + struct epoll_event event64; + int error; + u32 data_halfword; + + if (!access_ok(VERIFY_READ, event, sizeof(struct epoll_event32))) + return -EFAULT; + + __get_user(event64.events, &event->events); + __get_user(data_halfword, &event->data[0]); + event64.data = data_halfword; + __get_user(data_halfword, &event->data[1]); + event64.data |= (u64)data_halfword << 32; + + set_fs(KERNEL_DS); + error = sys_epoll_ctl(epfd, op, fd, (struct epoll_event __user *) &event64); + set_fs(old_fs); + + return error; +} + +asmlinkage long +sys32_epoll_wait(int epfd, struct epoll_event32 __user * events, int maxevents, + int timeout) +{ + struct epoll_event *events64 = NULL; + mm_segment_t old_fs = get_fs(); + int error, numevents, size; + int evt_idx; + int do_free_pages = 0; + + if (maxevents <= 0) { + return -EINVAL; + } + + /* Verify that the area passed by the user is writeable */ + if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event32))) + return -EFAULT; + + /* + * Allocate space for the intermediate copy. If the space needed + * is large enough to cause kmalloc to fail, then try again with + * __get_free_pages. + */ + size = maxevents * sizeof(struct epoll_event); + events64 = kmalloc(size, GFP_KERNEL); + if (events64 == NULL) { + events64 = (struct epoll_event *) + __get_free_pages(GFP_KERNEL, get_order(size)); + if (events64 == NULL) + return -ENOMEM; + do_free_pages = 1; + } + + /* Do the system call */ + set_fs(KERNEL_DS); /* copy_to/from_user should work on kernel mem*/ + numevents = sys_epoll_wait(epfd, (struct epoll_event __user *) events64, + maxevents, timeout); + set_fs(old_fs); + + /* Don't modify userspace memory if we're returning an error */ + if (numevents > 0) { + /* Translate the 64-bit structures back into the 32-bit + structures */ + for (evt_idx = 0; evt_idx < numevents; evt_idx++) { + __put_user(events64[evt_idx].events, + &events[evt_idx].events); + __put_user((u32)events64[evt_idx].data, + &events[evt_idx].data[0]); + __put_user((u32)(events64[evt_idx].data >> 32), + &events[evt_idx].data[1]); + } + } + + if (do_free_pages) + free_pages((unsigned long) events64, get_order(size)); + else + kfree(events64); + return numevents; +} + +/* + * Get a yet unused TLS descriptor index. + */ +static int +get_free_idx (void) +{ + struct thread_struct *t = ¤t->thread; + int idx; + + for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) + if (desc_empty(t->tls_array + idx)) + return idx + GDT_ENTRY_TLS_MIN; + return -ESRCH; +} + +/* + * Set a given TLS descriptor: + */ +asmlinkage int +sys32_set_thread_area (struct ia32_user_desc __user *u_info) +{ + struct thread_struct *t = ¤t->thread; + struct ia32_user_desc info; + struct desc_struct *desc; + int cpu, idx; + + if (copy_from_user(&info, u_info, sizeof(info))) + return -EFAULT; + idx = info.entry_number; + + /* + * index -1 means the kernel should try to find and allocate an empty descriptor: + */ + if (idx == -1) { + idx = get_free_idx(); + if (idx < 0) + return idx; + if (put_user(idx, &u_info->entry_number)) + return -EFAULT; + } + + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; + + cpu = smp_processor_id(); + + if (LDT_empty(&info)) { + desc->a = 0; + desc->b = 0; + } else { + desc->a = LDT_entry_a(&info); + desc->b = LDT_entry_b(&info); + } + load_TLS(t, cpu); + return 0; +} + +/* + * Get the current Thread-Local Storage area: + */ + +#define GET_BASE(desc) ( \ + (((desc)->a >> 16) & 0x0000ffff) | \ + (((desc)->b << 16) & 0x00ff0000) | \ + ( (desc)->b & 0xff000000) ) + +#define GET_LIMIT(desc) ( \ + ((desc)->a & 0x0ffff) | \ + ((desc)->b & 0xf0000) ) + +#define GET_32BIT(desc) (((desc)->b >> 22) & 1) +#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) +#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) +#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) +#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) +#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) + +asmlinkage int +sys32_get_thread_area (struct ia32_user_desc __user *u_info) +{ + struct ia32_user_desc info; + struct desc_struct *desc; + int idx; + + if (get_user(idx, &u_info->entry_number)) + return -EFAULT; + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; + + info.entry_number = idx; + info.base_addr = GET_BASE(desc); + info.limit = GET_LIMIT(desc); + info.seg_32bit = GET_32BIT(desc); + info.contents = GET_CONTENTS(desc); + info.read_exec_only = !GET_WRITABLE(desc); + info.limit_in_pages = GET_LIMIT_PAGES(desc); + info.seg_not_present = !GET_PRESENT(desc); + info.useable = GET_USEABLE(desc); + + if (copy_to_user(u_info, &info, sizeof(info))) + return -EFAULT; + return 0; +} + +asmlinkage long +sys32_timer_create(u32 clock, struct compat_sigevent __user *se32, timer_t __user *timer_id) +{ + struct sigevent se; + mm_segment_t oldfs; + timer_t t; + long err; + + if (se32 == NULL) + return sys_timer_create(clock, NULL, timer_id); + + if (get_compat_sigevent(&se, se32)) + return -EFAULT; + + if (!access_ok(VERIFY_WRITE,timer_id,sizeof(timer_t))) + return -EFAULT; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + err = sys_timer_create(clock, (struct sigevent __user *) &se, (timer_t __user *) &t); + set_fs(oldfs); + + if (!err) + err = __put_user (t, timer_id); + + return err; +} + +long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, + __u32 len_low, __u32 len_high, int advice) +{ + return sys_fadvise64_64(fd, + (((u64)offset_high)<<32) | offset_low, + (((u64)len_high)<<32) | len_low, + advice); +} + +#ifdef NOTYET /* UNTESTED FOR IA64 FROM HERE DOWN */ + +asmlinkage long sys32_setreuid(compat_uid_t ruid, compat_uid_t euid) +{ + uid_t sruid, seuid; + + sruid = (ruid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)ruid); + seuid = (euid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)euid); + return sys_setreuid(sruid, seuid); +} + +asmlinkage long +sys32_setresuid(compat_uid_t ruid, compat_uid_t euid, + compat_uid_t suid) +{ + uid_t sruid, seuid, ssuid; + + sruid = (ruid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)ruid); + seuid = (euid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)euid); + ssuid = (suid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)suid); + return sys_setresuid(sruid, seuid, ssuid); +} + +asmlinkage long +sys32_setregid(compat_gid_t rgid, compat_gid_t egid) +{ + gid_t srgid, segid; + + srgid = (rgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)rgid); + segid = (egid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)egid); + return sys_setregid(srgid, segid); +} + +asmlinkage long +sys32_setresgid(compat_gid_t rgid, compat_gid_t egid, + compat_gid_t sgid) +{ + gid_t srgid, segid, ssgid; + + srgid = (rgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)rgid); + segid = (egid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)egid); + ssgid = (sgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)sgid); + return sys_setresgid(srgid, segid, ssgid); +} + +/* Handle adjtimex compatibility. */ + +struct timex32 { + u32 modes; + s32 offset, freq, maxerror, esterror; + s32 status, constant, precision, tolerance; + struct compat_timeval time; + s32 tick; + s32 ppsfreq, jitter, shift, stabil; + s32 jitcnt, calcnt, errcnt, stbcnt; + s32 :32; s32 :32; s32 :32; s32 :32; + s32 :32; s32 :32; s32 :32; s32 :32; + s32 :32; s32 :32; s32 :32; s32 :32; +}; + +extern int do_adjtimex(struct timex *); + +asmlinkage long +sys32_adjtimex(struct timex32 *utp) +{ + struct timex txc; + int ret; + + memset(&txc, 0, sizeof(struct timex)); + + if(get_user(txc.modes, &utp->modes) || + __get_user(txc.offset, &utp->offset) || + __get_user(txc.freq, &utp->freq) || + __get_user(txc.maxerror, &utp->maxerror) || + __get_user(txc.esterror, &utp->esterror) || + __get_user(txc.status, &utp->status) || + __get_user(txc.constant, &utp->constant) || + __get_user(txc.precision, &utp->precision) || + __get_user(txc.tolerance, &utp->tolerance) || + __get_user(txc.time.tv_sec, &utp->time.tv_sec) || + __get_user(txc.time.tv_usec, &utp->time.tv_usec) || + __get_user(txc.tick, &utp->tick) || + __get_user(txc.ppsfreq, &utp->ppsfreq) || + __get_user(txc.jitter, &utp->jitter) || + __get_user(txc.shift, &utp->shift) || + __get_user(txc.stabil, &utp->stabil) || + __get_user(txc.jitcnt, &utp->jitcnt) || + __get_user(txc.calcnt, &utp->calcnt) || + __get_user(txc.errcnt, &utp->errcnt) || + __get_user(txc.stbcnt, &utp->stbcnt)) + return -EFAULT; + + ret = do_adjtimex(&txc); + + if(put_user(txc.modes, &utp->modes) || + __put_user(txc.offset, &utp->offset) || + __put_user(txc.freq, &utp->freq) || + __put_user(txc.maxerror, &utp->maxerror) || + __put_user(txc.esterror, &utp->esterror) || + __put_user(txc.status, &utp->status) || + __put_user(txc.constant, &utp->constant) || + __put_user(txc.precision, &utp->precision) || + __put_user(txc.tolerance, &utp->tolerance) || + __put_user(txc.time.tv_sec, &utp->time.tv_sec) || + __put_user(txc.time.tv_usec, &utp->time.tv_usec) || + __put_user(txc.tick, &utp->tick) || + __put_user(txc.ppsfreq, &utp->ppsfreq) || + __put_user(txc.jitter, &utp->jitter) || + __put_user(txc.shift, &utp->shift) || + __put_user(txc.stabil, &utp->stabil) || + __put_user(txc.jitcnt, &utp->jitcnt) || + __put_user(txc.calcnt, &utp->calcnt) || + __put_user(txc.errcnt, &utp->errcnt) || + __put_user(txc.stbcnt, &utp->stbcnt)) + ret = -EFAULT; + + return ret; +} +#endif /* NOTYET */ diff --git a/arch/ia64/install.sh b/arch/ia64/install.sh new file mode 100644 index 000000000000..929e780026d1 --- /dev/null +++ b/arch/ia64/install.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# +# arch/ia64/install.sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# +# "make install" script for ia64 architecture +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) +# + +# User may have a custom install script + +if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi +if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi + +# Default install - same as make zlilo + +if [ -f $4/vmlinuz ]; then + mv $4/vmlinuz $4/vmlinuz.old +fi + +if [ -f $4/System.map ]; then + mv $4/System.map $4/System.old +fi + +cat $2 > $4/vmlinuz +cp $3 $4/System.map + +test -x /usr/sbin/elilo && /usr/sbin/elilo diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile new file mode 100644 index 000000000000..c1a02bbc252c --- /dev/null +++ b/arch/ia64/kernel/Makefile @@ -0,0 +1,52 @@ +# +# Makefile for the linux kernel. +# + +extra-y := head.o init_task.o vmlinux.lds + +obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ + irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ + salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ + unwind.o mca.o mca_asm.o topology.o + +obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o +obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o +obj-$(CONFIG_IA64_HP_ZX1) += acpi-ext.o +obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o +obj-$(CONFIG_IA64_PALINFO) += palinfo.o +obj-$(CONFIG_IOSAPIC) += iosapic.o +obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o +obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o +obj-$(CONFIG_IA64_CYCLONE) += cyclone.o +obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o +mca_recovery-y += mca_drv.o mca_drv_asm.o + +# The gate DSO image is built using a special linker script. +targets += gate.so gate-syms.o + +extra-y += gate.so gate-syms.o gate.lds gate.o + +# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. +CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 + +CPPFLAGS_gate.lds := -P -C -U$(ARCH) + +quiet_cmd_gate = GATE $@ + cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@ + +GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 +$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE + $(call if_changed,gate) + +$(obj)/built-in.o: $(obj)/gate-syms.o +$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o + +GATECFLAGS_gate-syms.o = -r +$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE + $(call if_changed,gate) + +# gate-data.o contains the gate DSO image as data in section .data.gate. +# We must build gate.so before we can assemble it. +# Note: kbuild does not track this dependency due to usage of .incbin +$(obj)/gate-data.o: $(obj)/gate.so diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c new file mode 100644 index 000000000000..2623df5e2633 --- /dev/null +++ b/arch/ia64/kernel/acpi-ext.c @@ -0,0 +1,100 @@ +/* + * arch/ia64/kernel/acpi-ext.c + * + * Copyright (C) 2003 Hewlett-Packard + * Copyright (C) Alex Williamson + * Copyright (C) Bjorn Helgaas + * + * Vendor specific extensions to ACPI. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/acpi.h> +#include <linux/efi.h> + +#include <asm/acpi-ext.h> + +struct acpi_vendor_descriptor { + u8 guid_id; + efi_guid_t guid; +}; + +struct acpi_vendor_info { + struct acpi_vendor_descriptor *descriptor; + u8 *data; + u32 length; +}; + +acpi_status +acpi_vendor_resource_match(struct acpi_resource *resource, void *context) +{ + struct acpi_vendor_info *info = (struct acpi_vendor_info *) context; + struct acpi_resource_vendor *vendor; + struct acpi_vendor_descriptor *descriptor; + u32 length; + + if (resource->id != ACPI_RSTYPE_VENDOR) + return AE_OK; + + vendor = (struct acpi_resource_vendor *) &resource->data; + descriptor = (struct acpi_vendor_descriptor *) vendor->reserved; + if (vendor->length <= sizeof(*info->descriptor) || + descriptor->guid_id != info->descriptor->guid_id || + efi_guidcmp(descriptor->guid, info->descriptor->guid)) + return AE_OK; + + length = vendor->length - sizeof(struct acpi_vendor_descriptor); + info->data = acpi_os_allocate(length); + if (!info->data) + return AE_NO_MEMORY; + + memcpy(info->data, vendor->reserved + sizeof(struct acpi_vendor_descriptor), length); + info->length = length; + return AE_CTRL_TERMINATE; +} + +acpi_status +acpi_find_vendor_resource(acpi_handle obj, struct acpi_vendor_descriptor *id, + u8 **data, u32 *length) +{ + struct acpi_vendor_info info; + + info.descriptor = id; + info.data = NULL; + + acpi_walk_resources(obj, METHOD_NAME__CRS, acpi_vendor_resource_match, &info); + if (!info.data) + return AE_NOT_FOUND; + + *data = info.data; + *length = info.length; + return AE_OK; +} + +struct acpi_vendor_descriptor hp_ccsr_descriptor = { + .guid_id = 2, + .guid = EFI_GUID(0x69e9adf9, 0x924f, 0xab5f, 0xf6, 0x4a, 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad) +}; + +acpi_status +hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length) +{ + acpi_status status; + u8 *data; + u32 length; + + status = acpi_find_vendor_resource(obj, &hp_ccsr_descriptor, &data, &length); + + if (ACPI_FAILURE(status) || length != 16) + return AE_NOT_FOUND; + + memcpy(csr_base, data, sizeof(*csr_base)); + memcpy(csr_length, data + 8, sizeof(*csr_length)); + acpi_os_free(data); + + return AE_OK; +} + +EXPORT_SYMBOL(hp_acpi_csr_space); diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c new file mode 100644 index 000000000000..a8e99c56a768 --- /dev/null +++ b/arch/ia64/kernel/acpi.c @@ -0,0 +1,841 @@ +/* + * acpi.c - Architecture-Specific Low-Level ACPI Support + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> + * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co. + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2000 Intel Corp. + * Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com> + * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> + * Copyright (C) 2001 Jenna Hall <jenna.s.hall@intel.com> + * Copyright (C) 2001 Takayoshi Kochi <t-kochi@bq.jp.nec.com> + * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/irq.h> +#include <linux/acpi.h> +#include <linux/efi.h> +#include <linux/mmzone.h> +#include <linux/nodemask.h> +#include <asm/io.h> +#include <asm/iosapic.h> +#include <asm/machvec.h> +#include <asm/page.h> +#include <asm/system.h> +#include <asm/numa.h> +#include <asm/sal.h> +#include <asm/cyclone.h> + +#define BAD_MADT_ENTRY(entry, end) ( \ + (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ + ((acpi_table_entry_header *)entry)->length != sizeof(*entry)) + +#define PREFIX "ACPI: " + +void (*pm_idle) (void); +EXPORT_SYMBOL(pm_idle); +void (*pm_power_off) (void); +EXPORT_SYMBOL(pm_power_off); + +unsigned char acpi_kbd_controller_present = 1; +unsigned char acpi_legacy_devices; + +#define MAX_SAPICS 256 +u16 ia64_acpiid_to_sapicid[MAX_SAPICS] = + { [0 ... MAX_SAPICS - 1] = -1 }; +EXPORT_SYMBOL(ia64_acpiid_to_sapicid); + +const char * +acpi_get_sysname (void) +{ +#ifdef CONFIG_IA64_GENERIC + unsigned long rsdp_phys; + struct acpi20_table_rsdp *rsdp; + struct acpi_table_xsdt *xsdt; + struct acpi_table_header *hdr; + + rsdp_phys = acpi_find_rsdp(); + if (!rsdp_phys) { + printk(KERN_ERR "ACPI 2.0 RSDP not found, default to \"dig\"\n"); + return "dig"; + } + + rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys); + if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) { + printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n"); + return "dig"; + } + + xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address); + hdr = &xsdt->header; + if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) { + printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n"); + return "dig"; + } + + if (!strcmp(hdr->oem_id, "HP")) { + return "hpzx1"; + } + else if (!strcmp(hdr->oem_id, "SGI")) { + return "sn2"; + } + + return "dig"; +#else +# if defined (CONFIG_IA64_HP_SIM) + return "hpsim"; +# elif defined (CONFIG_IA64_HP_ZX1) + return "hpzx1"; +# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB) + return "hpzx1_swiotlb"; +# elif defined (CONFIG_IA64_SGI_SN2) + return "sn2"; +# elif defined (CONFIG_IA64_DIG) + return "dig"; +# else +# error Unknown platform. Fix acpi.c. +# endif +#endif +} + +#ifdef CONFIG_ACPI_BOOT + +#define ACPI_MAX_PLATFORM_INTERRUPTS 256 + +/* Array to record platform interrupt vectors for generic interrupt routing. */ +int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = { + [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1 +}; + +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC; + +/* + * Interrupt routing API for device drivers. Provides interrupt vector for + * a generic platform event. Currently only CPEI is implemented. + */ +int +acpi_request_vector (u32 int_type) +{ + int vector = -1; + + if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) { + /* corrected platform error interrupt */ + vector = platform_intr_list[int_type]; + } else + printk(KERN_ERR "acpi_request_vector(): invalid interrupt type\n"); + return vector; +} + +char * +__acpi_map_table (unsigned long phys_addr, unsigned long size) +{ + return __va(phys_addr); +} + +/* -------------------------------------------------------------------------- + Boot-time Table Parsing + -------------------------------------------------------------------------- */ + +static int total_cpus __initdata; +static int available_cpus __initdata; +struct acpi_table_madt * acpi_madt __initdata; +static u8 has_8259; + + +static int __init +acpi_parse_lapic_addr_ovr ( + acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_lapic_addr_ovr *lapic; + + lapic = (struct acpi_table_lapic_addr_ovr *) header; + + if (BAD_MADT_ENTRY(lapic, end)) + return -EINVAL; + + if (lapic->address) { + iounmap(ipi_base_addr); + ipi_base_addr = ioremap(lapic->address, 0); + } + return 0; +} + + +static int __init +acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_lsapic *lsapic; + + lsapic = (struct acpi_table_lsapic *) header; + + if (BAD_MADT_ENTRY(lsapic, end)) + return -EINVAL; + + if (lsapic->flags.enabled) { +#ifdef CONFIG_SMP + smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) | lsapic->eid; +#endif + ia64_acpiid_to_sapicid[lsapic->acpi_id] = (lsapic->id << 8) | lsapic->eid; + ++available_cpus; + } + + total_cpus++; + return 0; +} + + +static int __init +acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_lapic_nmi *lacpi_nmi; + + lacpi_nmi = (struct acpi_table_lapic_nmi*) header; + + if (BAD_MADT_ENTRY(lacpi_nmi, end)) + return -EINVAL; + + /* TBD: Support lapic_nmi entries */ + return 0; +} + + +static int __init +acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_iosapic *iosapic; + + iosapic = (struct acpi_table_iosapic *) header; + + if (BAD_MADT_ENTRY(iosapic, end)) + return -EINVAL; + + iosapic_init(iosapic->address, iosapic->global_irq_base); + + return 0; +} + + +static int __init +acpi_parse_plat_int_src ( + acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_plat_int_src *plintsrc; + int vector; + + plintsrc = (struct acpi_table_plat_int_src *) header; + + if (BAD_MADT_ENTRY(plintsrc, end)) + return -EINVAL; + + /* + * Get vector assignment for this interrupt, set attributes, + * and program the IOSAPIC routing table. + */ + vector = iosapic_register_platform_intr(plintsrc->type, + plintsrc->global_irq, + plintsrc->iosapic_vector, + plintsrc->eid, + plintsrc->id, + (plintsrc->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, + (plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); + + platform_intr_list[plintsrc->type] = vector; + return 0; +} + + +static int __init +acpi_parse_int_src_ovr ( + acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_int_src_ovr *p; + + p = (struct acpi_table_int_src_ovr *) header; + + if (BAD_MADT_ENTRY(p, end)) + return -EINVAL; + + iosapic_override_isa_irq(p->bus_irq, p->global_irq, + (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, + (p->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); + return 0; +} + + +static int __init +acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_nmi_src *nmi_src; + + nmi_src = (struct acpi_table_nmi_src*) header; + + if (BAD_MADT_ENTRY(nmi_src, end)) + return -EINVAL; + + /* TBD: Support nimsrc entries */ + return 0; +} + +static void __init +acpi_madt_oem_check (char *oem_id, char *oem_table_id) +{ + if (!strncmp(oem_id, "IBM", 3) && + (!strncmp(oem_table_id, "SERMOW", 6))) { + + /* + * Unfortunately ITC_DRIFT is not yet part of the + * official SAL spec, so the ITC_DRIFT bit is not + * set by the BIOS on this hardware. + */ + sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT; + + cyclone_setup(); + } +} + +static int __init +acpi_parse_madt (unsigned long phys_addr, unsigned long size) +{ + if (!phys_addr || !size) + return -EINVAL; + + acpi_madt = (struct acpi_table_madt *) __va(phys_addr); + + /* remember the value for reference after free_initmem() */ +#ifdef CONFIG_ITANIUM + has_8259 = 1; /* Firmware on old Itanium systems is broken */ +#else + has_8259 = acpi_madt->flags.pcat_compat; +#endif + iosapic_system_init(has_8259); + + /* Get base address of IPI Message Block */ + + if (acpi_madt->lapic_address) + ipi_base_addr = ioremap(acpi_madt->lapic_address, 0); + + printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr); + + acpi_madt_oem_check(acpi_madt->header.oem_id, + acpi_madt->header.oem_table_id); + + return 0; +} + + +#ifdef CONFIG_ACPI_NUMA + +#undef SLIT_DEBUG + +#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32) + +static int __initdata srat_num_cpus; /* number of cpus */ +static u32 __devinitdata pxm_flag[PXM_FLAG_LEN]; +#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) +#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) +/* maps to convert between proximity domain and logical node ID */ +int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS]; +int __initdata nid_to_pxm_map[MAX_NUMNODES]; +static struct acpi_table_slit __initdata *slit_table; + +/* + * ACPI 2.0 SLIT (System Locality Information Table) + * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf + */ +void __init +acpi_numa_slit_init (struct acpi_table_slit *slit) +{ + u32 len; + + len = sizeof(struct acpi_table_header) + 8 + + slit->localities * slit->localities; + if (slit->header.length != len) { + printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n", + len, slit->header.length); + memset(numa_slit, 10, sizeof(numa_slit)); + return; + } + slit_table = slit; +} + +void __init +acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa) +{ + /* record this node in proximity bitmap */ + pxm_bit_set(pa->proximity_domain); + + node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid); + /* nid should be overridden as logical node id later */ + node_cpuid[srat_num_cpus].nid = pa->proximity_domain; + srat_num_cpus++; +} + +void __init +acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma) +{ + unsigned long paddr, size; + u8 pxm; + struct node_memblk_s *p, *q, *pend; + + pxm = ma->proximity_domain; + + /* fill node memory chunk structure */ + paddr = ma->base_addr_hi; + paddr = (paddr << 32) | ma->base_addr_lo; + size = ma->length_hi; + size = (size << 32) | ma->length_lo; + + /* Ignore disabled entries */ + if (!ma->flags.enabled) + return; + + /* record this node in proximity bitmap */ + pxm_bit_set(pxm); + + /* Insertion sort based on base address */ + pend = &node_memblk[num_node_memblks]; + for (p = &node_memblk[0]; p < pend; p++) { + if (paddr < p->start_paddr) + break; + } + if (p < pend) { + for (q = pend - 1; q >= p; q--) + *(q + 1) = *q; + } + p->start_paddr = paddr; + p->size = size; + p->nid = pxm; + num_node_memblks++; +} + +void __init +acpi_numa_arch_fixup (void) +{ + int i, j, node_from, node_to; + + /* If there's no SRAT, fix the phys_id and mark node 0 online */ + if (srat_num_cpus == 0) { + node_set_online(0); + node_cpuid[0].phys_id = hard_smp_processor_id(); + return; + } + + /* + * MCD - This can probably be dropped now. No need for pxm ID to node ID + * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES. + */ + /* calculate total number of nodes in system from PXM bitmap */ + memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); + memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); + nodes_clear(node_online_map); + for (i = 0; i < MAX_PXM_DOMAINS; i++) { + if (pxm_bit_test(i)) { + int nid = num_online_nodes(); + pxm_to_nid_map[i] = nid; + nid_to_pxm_map[nid] = i; + node_set_online(nid); + } + } + + /* set logical node id in memory chunk structure */ + for (i = 0; i < num_node_memblks; i++) + node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid]; + + /* assign memory bank numbers for each chunk on each node */ + for_each_online_node(i) { + int bank; + + bank = 0; + for (j = 0; j < num_node_memblks; j++) + if (node_memblk[j].nid == i) + node_memblk[j].bank = bank++; + } + + /* set logical node id in cpu structure */ + for (i = 0; i < srat_num_cpus; i++) + node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid]; + + printk(KERN_INFO "Number of logical nodes in system = %d\n", num_online_nodes()); + printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks); + + if (!slit_table) return; + memset(numa_slit, -1, sizeof(numa_slit)); + for (i=0; i<slit_table->localities; i++) { + if (!pxm_bit_test(i)) + continue; + node_from = pxm_to_nid_map[i]; + for (j=0; j<slit_table->localities; j++) { + if (!pxm_bit_test(j)) + continue; + node_to = pxm_to_nid_map[j]; + node_distance(node_from, node_to) = + slit_table->entry[i*slit_table->localities + j]; + } + } + +#ifdef SLIT_DEBUG + printk("ACPI 2.0 SLIT locality table:\n"); + for_each_online_node(i) { + for_each_online_node(j) + printk("%03d ", node_distance(i,j)); + printk("\n"); + } +#endif +} +#endif /* CONFIG_ACPI_NUMA */ + +unsigned int +acpi_register_gsi (u32 gsi, int edge_level, int active_high_low) +{ + if (has_8259 && gsi < 16) + return isa_irq_to_vector(gsi); + + return iosapic_register_intr(gsi, + (active_high_low == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, + (edge_level == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); +} +EXPORT_SYMBOL(acpi_register_gsi); + +#ifdef CONFIG_ACPI_DEALLOCATE_IRQ +void +acpi_unregister_gsi (u32 gsi) +{ + iosapic_unregister_intr(gsi); +} +EXPORT_SYMBOL(acpi_unregister_gsi); +#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */ + +static int __init +acpi_parse_fadt (unsigned long phys_addr, unsigned long size) +{ + struct acpi_table_header *fadt_header; + struct fadt_descriptor_rev2 *fadt; + + if (!phys_addr || !size) + return -EINVAL; + + fadt_header = (struct acpi_table_header *) __va(phys_addr); + if (fadt_header->revision != 3) + return -ENODEV; /* Only deal with ACPI 2.0 FADT */ + + fadt = (struct fadt_descriptor_rev2 *) fadt_header; + + if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER)) + acpi_kbd_controller_present = 0; + + if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES) + acpi_legacy_devices = 1; + + acpi_register_gsi(fadt->sci_int, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); + return 0; +} + + +unsigned long __init +acpi_find_rsdp (void) +{ + unsigned long rsdp_phys = 0; + + if (efi.acpi20) + rsdp_phys = __pa(efi.acpi20); + else if (efi.acpi) + printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n"); + return rsdp_phys; +} + + +int __init +acpi_boot_init (void) +{ + + /* + * MADT + * ---- + * Parse the Multiple APIC Description Table (MADT), if exists. + * Note that this table provides platform SMP configuration + * information -- the successor to MPS tables. + */ + + if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) { + printk(KERN_ERR PREFIX "Can't find MADT\n"); + goto skip_madt; + } + + /* Local APIC */ + + if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); + + if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) < 1) + printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC entries\n"); + + if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); + + /* I/O APIC */ + + if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1) + printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC entries\n"); + + /* System-Level Interrupt Routing */ + + if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0) + printk(KERN_ERR PREFIX "Error parsing platform interrupt source entry\n"); + + if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); + + if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); + skip_madt: + + /* + * FADT says whether a legacy keyboard controller is present. + * The FADT also contains an SCI_INT line, by which the system + * gets interrupts such as power and sleep buttons. If it's not + * on a Legacy interrupt, it needs to be setup. + */ + if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1) + printk(KERN_ERR PREFIX "Can't find FADT\n"); + +#ifdef CONFIG_SMP + if (available_cpus == 0) { + printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n"); + printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id()); + smp_boot_data.cpu_phys_id[available_cpus] = hard_smp_processor_id(); + available_cpus = 1; /* We've got at least one of these, no? */ + } + smp_boot_data.cpu_count = available_cpus; + + smp_build_cpu_map(); +# ifdef CONFIG_ACPI_NUMA + if (srat_num_cpus == 0) { + int cpu, i = 1; + for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) + if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id()) + node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu]; + } + build_cpu_to_node_map(); +# endif +#endif + /* Make boot-up look pretty */ + printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus); + return 0; +} + +int +acpi_gsi_to_irq (u32 gsi, unsigned int *irq) +{ + int vector; + + if (has_8259 && gsi < 16) + *irq = isa_irq_to_vector(gsi); + else { + vector = gsi_to_vector(gsi); + if (vector == -1) + return -1; + + *irq = vector; + } + return 0; +} + +/* + * ACPI based hotplug CPU support + */ +#ifdef CONFIG_ACPI_HOTPLUG_CPU +static +int +acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) +{ +#ifdef CONFIG_ACPI_NUMA + int pxm_id; + + pxm_id = acpi_get_pxm(handle); + + /* + * Assuming that the container driver would have set the proximity + * domain and would have initialized pxm_to_nid_map[pxm_id] && pxm_flag + */ + node_cpuid[cpu].nid = (pxm_id < 0) ? 0: + pxm_to_nid_map[pxm_id]; + + node_cpuid[cpu].phys_id = physid; +#endif + return(0); +} + + +int +acpi_map_lsapic(acpi_handle handle, int *pcpu) +{ + struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *obj; + struct acpi_table_lsapic *lsapic; + cpumask_t tmp_map; + long physid; + int cpu; + + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) + return -EINVAL; + + if (!buffer.length || !buffer.pointer) + return -EINVAL; + + obj = buffer.pointer; + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length < sizeof(*lsapic)) { + acpi_os_free(buffer.pointer); + return -EINVAL; + } + + lsapic = (struct acpi_table_lsapic *)obj->buffer.pointer; + + if ((lsapic->header.type != ACPI_MADT_LSAPIC) || + (!lsapic->flags.enabled)) { + acpi_os_free(buffer.pointer); + return -EINVAL; + } + + physid = ((lsapic->id <<8) | (lsapic->eid)); + + acpi_os_free(buffer.pointer); + buffer.length = ACPI_ALLOCATE_BUFFER; + buffer.pointer = NULL; + + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + if(cpu >= NR_CPUS) + return -EINVAL; + + acpi_map_cpu2node(handle, cpu, physid); + + cpu_set(cpu, cpu_present_map); + ia64_cpu_to_sapicid[cpu] = physid; + ia64_acpiid_to_sapicid[lsapic->acpi_id] = ia64_cpu_to_sapicid[cpu]; + + *pcpu = cpu; + return(0); +} +EXPORT_SYMBOL(acpi_map_lsapic); + + +int +acpi_unmap_lsapic(int cpu) +{ + int i; + + for (i=0; i<MAX_SAPICS; i++) { + if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) { + ia64_acpiid_to_sapicid[i] = -1; + break; + } + } + ia64_cpu_to_sapicid[cpu] = -1; + cpu_clear(cpu,cpu_present_map); + +#ifdef CONFIG_ACPI_NUMA + /* NUMA specific cleanup's */ +#endif + + return(0); +} +EXPORT_SYMBOL(acpi_unmap_lsapic); +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + + +#ifdef CONFIG_ACPI_NUMA +acpi_status __init +acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret) +{ + struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *obj; + struct acpi_table_iosapic *iosapic; + unsigned int gsi_base; + int node; + + /* Only care about objects w/ a method that returns the MADT */ + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) + return AE_OK; + + if (!buffer.length || !buffer.pointer) + return AE_OK; + + obj = buffer.pointer; + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length < sizeof(*iosapic)) { + acpi_os_free(buffer.pointer); + return AE_OK; + } + + iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer; + + if (iosapic->header.type != ACPI_MADT_IOSAPIC) { + acpi_os_free(buffer.pointer); + return AE_OK; + } + + gsi_base = iosapic->global_irq_base; + + acpi_os_free(buffer.pointer); + buffer.length = ACPI_ALLOCATE_BUFFER; + buffer.pointer = NULL; + + /* + * OK, it's an IOSAPIC MADT entry, look for a _PXM method to tell + * us which node to associate this with. + */ + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) + return AE_OK; + + if (!buffer.length || !buffer.pointer) + return AE_OK; + + obj = buffer.pointer; + + if (obj->type != ACPI_TYPE_INTEGER || + obj->integer.value >= MAX_PXM_DOMAINS) { + acpi_os_free(buffer.pointer); + return AE_OK; + } + + node = pxm_to_nid_map[obj->integer.value]; + acpi_os_free(buffer.pointer); + + if (node >= MAX_NUMNODES || !node_online(node) || + cpus_empty(node_to_cpumask(node))) + return AE_OK; + + /* We know a gsi to node mapping! */ + map_iosapic_to_node(gsi_base, node); + return AE_OK; +} +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_ACPI_BOOT */ diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c new file mode 100644 index 000000000000..7d1ae2982c53 --- /dev/null +++ b/arch/ia64/kernel/asm-offsets.c @@ -0,0 +1,239 @@ +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed + * to extract and format the required data. + */ + +#include <linux/config.h> + +#include <linux/sched.h> + +#include <asm-ia64/processor.h> +#include <asm-ia64/ptrace.h> +#include <asm-ia64/siginfo.h> +#include <asm-ia64/sigcontext.h> +#include <asm-ia64/mca.h> + +#include "../kernel/sigframe.h" + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +void foo(void) +{ + DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct)); + DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info)); + DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs)); + DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack)); + DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo)); + DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64)); + DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe)); + DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info)); + + BLANK(); + + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); + + BLANK(); + + DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); + DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); + DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader)); + DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending)); + DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid)); + DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent)); + DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand)); + DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal)); + DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid)); + DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp)); + DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack)); + + BLANK(); + + DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock)); + + BLANK(); + + DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct, + group_stop_count)); + DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending)); + + BLANK(); + + DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6)); + DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7)); + DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd)); + DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd)); + DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8)); + DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9)); + DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10)); + DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11)); + DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr)); + DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip)); + DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs)); + DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat)); + DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs)); + DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc)); + DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat)); + + DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore)); + DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr)); + DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0)); + DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs)); + DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1)); + DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12)); + DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13)); + DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr)); + DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15)); + DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14)); + DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2)); + DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3)); + DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16)); + DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17)); + DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18)); + DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19)); + DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20)); + DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21)); + DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22)); + DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23)); + DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24)); + DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25)); + DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26)); + DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27)); + DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28)); + DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29)); + DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30)); + DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31)); + DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv)); + DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6)); + DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7)); + DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8)); + DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9)); + DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10)); + DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11)); + + BLANK(); + + DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat)); + DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr)); + DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2)); + DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3)); + DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4)); + DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5)); + DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12)); + DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13)); + DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14)); + DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15)); + DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16)); + DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17)); + DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18)); + DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19)); + DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20)); + DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21)); + DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22)); + DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23)); + DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24)); + DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25)); + DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26)); + DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27)); + DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28)); + DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29)); + DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30)); + DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31)); + DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4)); + DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5)); + DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6)); + DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7)); + DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0)); + DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1)); + DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2)); + DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3)); + DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4)); + DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5)); + DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs)); + DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc)); + DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat)); + DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat)); + DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore)); + DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr)); + + BLANK(); + + DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip)); + DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp)); + DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr)); + DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat)); + DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat)); + DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0])); + DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm)); + DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags)); + DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6])); + DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr)); + DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12])); + DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base)); + DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs)); + + BLANK(); + + DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal)); + + BLANK(); + + DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0)); + DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1)); + DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2)); + DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler)); + DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc)); + BLANK(); + /* for assembly files which can't include sched.h: */ + DEFINE(IA64_CLONE_VFORK, CLONE_VFORK); + DEFINE(IA64_CLONE_VM, CLONE_VM); + + BLANK(); + DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, + offsetof (struct cpuinfo_ia64, nsec_per_cyc)); + DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET, + offsetof (struct cpuinfo_ia64, ptce_base)); + DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET, + offsetof (struct cpuinfo_ia64, ptce_count)); + DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET, + offsetof (struct cpuinfo_ia64, ptce_stride)); + BLANK(); + DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, + offsetof (struct timespec, tv_nsec)); + + DEFINE(CLONE_SETTLS_BIT, 19); +#if CLONE_SETTLS != (1<<19) +# error "CLONE_SETTLS_BIT incorrect, please fix" +#endif + + BLANK(); + DEFINE(IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, + offsetof (struct ia64_mca_cpu, proc_state_dump)); + DEFINE(IA64_MCA_CPU_STACK_OFFSET, + offsetof (struct ia64_mca_cpu, stack)); + DEFINE(IA64_MCA_CPU_STACKFRAME_OFFSET, + offsetof (struct ia64_mca_cpu, stackframe)); + DEFINE(IA64_MCA_CPU_RBSTORE_OFFSET, + offsetof (struct ia64_mca_cpu, rbstore)); + DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET, + offsetof (struct ia64_mca_cpu, init_stack)); + BLANK(); + /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */ + DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr)); + DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source)); + DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift)); + DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc)); + DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset)); + DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle)); + DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter)); + DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter)); + DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask)); + DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU); + DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64); + DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32); + DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec)); +} diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c new file mode 100644 index 000000000000..0b286ca164f9 --- /dev/null +++ b/arch/ia64/kernel/brl_emu.c @@ -0,0 +1,234 @@ +/* + * Emulation of the "brl" instruction for IA64 processors that + * don't support it in hardware. + * Author: Stephan Zeisset, Intel Corp. <Stephan.Zeisset@intel.com> + * + * 02/22/02 D. Mosberger Clear si_flgs, si_isr, and si_imm to avoid + * leaking kernel bits. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <asm/processor.h> + +extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5; + +struct illegal_op_return { + unsigned long fkt, arg1, arg2, arg3; +}; + +/* + * The unimplemented bits of a virtual address must be set + * to the value of the most significant implemented bit. + * unimpl_va_mask includes all unimplemented bits and + * the most significant implemented bit, so the result + * of an and operation with the mask must be all 0's + * or all 1's for the address to be valid. + */ +#define unimplemented_virtual_address(va) ( \ + ((va) & local_cpu_data->unimpl_va_mask) != 0 && \ + ((va) & local_cpu_data->unimpl_va_mask) != local_cpu_data->unimpl_va_mask \ +) + +/* + * The unimplemented bits of a physical address must be 0. + * unimpl_pa_mask includes all unimplemented bits, so the result + * of an and operation with the mask must be all 0's for the + * address to be valid. + */ +#define unimplemented_physical_address(pa) ( \ + ((pa) & local_cpu_data->unimpl_pa_mask) != 0 \ +) + +/* + * Handle an illegal operation fault that was caused by an + * unimplemented "brl" instruction. + * If we are not successful (e.g because the illegal operation + * wasn't caused by a "brl" after all), we return -1. + * If we are successful, we return either 0 or the address + * of a "fixup" function for manipulating preserved register + * state. + */ + +struct illegal_op_return +ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec) +{ + unsigned long bundle[2]; + unsigned long opcode, btype, qp, offset, cpl; + unsigned long next_ip; + struct siginfo siginfo; + struct illegal_op_return rv; + long tmp_taken, unimplemented_address; + + rv.fkt = (unsigned long) -1; + + /* + * Decode the instruction bundle. + */ + + if (copy_from_user(bundle, (void *) (regs->cr_iip), sizeof(bundle))) + return rv; + + next_ip = (unsigned long) regs->cr_iip + 16; + + /* "brl" must be in slot 2. */ + if (ia64_psr(regs)->ri != 1) return rv; + + /* Must be "mlx" template */ + if ((bundle[0] & 0x1e) != 0x4) return rv; + + opcode = (bundle[1] >> 60); + btype = ((bundle[1] >> 29) & 0x7); + qp = ((bundle[1] >> 23) & 0x3f); + offset = ((bundle[1] & 0x0800000000000000L) << 4) + | ((bundle[1] & 0x00fffff000000000L) >> 32) + | ((bundle[1] & 0x00000000007fffffL) << 40) + | ((bundle[0] & 0xffff000000000000L) >> 24); + + tmp_taken = regs->pr & (1L << qp); + + switch(opcode) { + + case 0xC: + /* + * Long Branch. + */ + if (btype != 0) return rv; + rv.fkt = 0; + if (!(tmp_taken)) { + /* + * Qualifying predicate is 0. + * Skip instruction. + */ + regs->cr_iip = next_ip; + ia64_psr(regs)->ri = 0; + return rv; + } + break; + + case 0xD: + /* + * Long Call. + */ + rv.fkt = 0; + if (!(tmp_taken)) { + /* + * Qualifying predicate is 0. + * Skip instruction. + */ + regs->cr_iip = next_ip; + ia64_psr(regs)->ri = 0; + return rv; + } + + /* + * BR[btype] = IP+16 + */ + switch(btype) { + case 0: + regs->b0 = next_ip; + break; + case 1: + rv.fkt = (unsigned long) &ia64_set_b1; + break; + case 2: + rv.fkt = (unsigned long) &ia64_set_b2; + break; + case 3: + rv.fkt = (unsigned long) &ia64_set_b3; + break; + case 4: + rv.fkt = (unsigned long) &ia64_set_b4; + break; + case 5: + rv.fkt = (unsigned long) &ia64_set_b5; + break; + case 6: + regs->b6 = next_ip; + break; + case 7: + regs->b7 = next_ip; + break; + } + rv.arg1 = next_ip; + + /* + * AR[PFS].pfm = CFM + * AR[PFS].pec = AR[EC] + * AR[PFS].ppl = PSR.cpl + */ + cpl = ia64_psr(regs)->cpl; + regs->ar_pfs = ((regs->cr_ifs & 0x3fffffffff) + | (ar_ec << 52) | (cpl << 62)); + + /* + * CFM.sof -= CFM.sol + * CFM.sol = 0 + * CFM.sor = 0 + * CFM.rrb.gr = 0 + * CFM.rrb.fr = 0 + * CFM.rrb.pr = 0 + */ + regs->cr_ifs = ((regs->cr_ifs & 0xffffffc00000007f) + - ((regs->cr_ifs >> 7) & 0x7f)); + + break; + + default: + /* + * Unknown opcode. + */ + return rv; + + } + + regs->cr_iip += offset; + ia64_psr(regs)->ri = 0; + + if (ia64_psr(regs)->it == 0) + unimplemented_address = unimplemented_physical_address(regs->cr_iip); + else + unimplemented_address = unimplemented_virtual_address(regs->cr_iip); + + if (unimplemented_address) { + /* + * The target address contains unimplemented bits. + */ + printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n"); + siginfo.si_signo = SIGILL; + siginfo.si_errno = 0; + siginfo.si_flags = 0; + siginfo.si_isr = 0; + siginfo.si_imm = 0; + siginfo.si_code = ILL_BADIADDR; + force_sig_info(SIGILL, &siginfo, current); + } else if (ia64_psr(regs)->tb) { + /* + * Branch Tracing is enabled. + * Force a taken branch signal. + */ + siginfo.si_signo = SIGTRAP; + siginfo.si_errno = 0; + siginfo.si_code = TRAP_BRANCH; + siginfo.si_flags = 0; + siginfo.si_isr = 0; + siginfo.si_addr = 0; + siginfo.si_imm = 0; + force_sig_info(SIGTRAP, &siginfo, current); + } else if (ia64_psr(regs)->ss) { + /* + * Single Step is enabled. + * Force a trace signal. + */ + siginfo.si_signo = SIGTRAP; + siginfo.si_errno = 0; + siginfo.si_code = TRAP_TRACE; + siginfo.si_flags = 0; + siginfo.si_isr = 0; + siginfo.si_addr = 0; + siginfo.si_imm = 0; + force_sig_info(SIGTRAP, &siginfo, current); + } + return rv; +} diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c new file mode 100644 index 000000000000..768c7e46957c --- /dev/null +++ b/arch/ia64/kernel/cyclone.c @@ -0,0 +1,109 @@ +#include <linux/module.h> +#include <linux/smp.h> +#include <linux/time.h> +#include <linux/errno.h> +#include <asm/io.h> + +/* IBM Summit (EXA) Cyclone counter code*/ +#define CYCLONE_CBAR_ADDR 0xFEB00CD0 +#define CYCLONE_PMCC_OFFSET 0x51A0 +#define CYCLONE_MPMC_OFFSET 0x51D0 +#define CYCLONE_MPCS_OFFSET 0x51A8 +#define CYCLONE_TIMER_FREQ 100000000 + +int use_cyclone; +void __init cyclone_setup(void) +{ + use_cyclone = 1; +} + + +struct time_interpolator cyclone_interpolator = { + .source = TIME_SOURCE_MMIO64, + .shift = 16, + .frequency = CYCLONE_TIMER_FREQ, + .drift = -100, + .mask = (1LL << 40) - 1 +}; + +int __init init_cyclone_clock(void) +{ + u64* reg; + u64 base; /* saved cyclone base address */ + u64 offset; /* offset from pageaddr to cyclone_timer register */ + int i; + u32* volatile cyclone_timer; /* Cyclone MPMC0 register */ + + if (!use_cyclone) + return -ENODEV; + + printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); + + /* find base address */ + offset = (CYCLONE_CBAR_ADDR); + reg = (u64*)ioremap_nocache(offset, sizeof(u64)); + if(!reg){ + printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n"); + use_cyclone = 0; + return -ENODEV; + } + base = readq(reg); + if(!base){ + printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n"); + use_cyclone = 0; + return -ENODEV; + } + iounmap(reg); + + /* setup PMCC */ + offset = (base + CYCLONE_PMCC_OFFSET); + reg = (u64*)ioremap_nocache(offset, sizeof(u64)); + if(!reg){ + printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n"); + use_cyclone = 0; + return -ENODEV; + } + writel(0x00000001,reg); + iounmap(reg); + + /* setup MPCS */ + offset = (base + CYCLONE_MPCS_OFFSET); + reg = (u64*)ioremap_nocache(offset, sizeof(u64)); + if(!reg){ + printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n"); + use_cyclone = 0; + return -ENODEV; + } + writel(0x00000001,reg); + iounmap(reg); + + /* map in cyclone_timer */ + offset = (base + CYCLONE_MPMC_OFFSET); + cyclone_timer = (u32*)ioremap_nocache(offset, sizeof(u32)); + if(!cyclone_timer){ + printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n"); + use_cyclone = 0; + return -ENODEV; + } + + /*quick test to make sure its ticking*/ + for(i=0; i<3; i++){ + u32 old = readl(cyclone_timer); + int stall = 100; + while(stall--) barrier(); + if(readl(cyclone_timer) == old){ + printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n"); + iounmap(cyclone_timer); + cyclone_timer = 0; + use_cyclone = 0; + return -ENODEV; + } + } + /* initialize last tick */ + cyclone_interpolator.addr = cyclone_timer; + register_time_interpolator(&cyclone_interpolator); + + return 0; +} + +__initcall(init_cyclone_clock); diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c new file mode 100644 index 000000000000..fe532c970438 --- /dev/null +++ b/arch/ia64/kernel/domain.c @@ -0,0 +1,382 @@ +/* + * arch/ia64/kernel/domain.c + * Architecture specific sched-domains builder. + * + * Copyright (C) 2004 Jesse Barnes + * Copyright (C) 2004 Silicon Graphics, Inc. + */ + +#include <linux/sched.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <linux/cpumask.h> +#include <linux/init.h> +#include <linux/topology.h> +#include <linux/nodemask.h> + +#define SD_NODES_PER_DOMAIN 6 + +#ifdef CONFIG_NUMA +/** + * find_next_best_node - find the next node to include in a sched_domain + * @node: node whose sched_domain we're building + * @used_nodes: nodes already in the sched_domain + * + * Find the next node to include in a given scheduling domain. Simply + * finds the closest node not already in the @used_nodes map. + * + * Should use nodemask_t. + */ +static int __devinit find_next_best_node(int node, unsigned long *used_nodes) +{ + int i, n, val, min_val, best_node = 0; + + min_val = INT_MAX; + + for (i = 0; i < MAX_NUMNODES; i++) { + /* Start at @node */ + n = (node + i) % MAX_NUMNODES; + + if (!nr_cpus_node(n)) + continue; + + /* Skip already used nodes */ + if (test_bit(n, used_nodes)) + continue; + + /* Simple min distance search */ + val = node_distance(node, n); + + if (val < min_val) { + min_val = val; + best_node = n; + } + } + + set_bit(best_node, used_nodes); + return best_node; +} + +/** + * sched_domain_node_span - get a cpumask for a node's sched_domain + * @node: node whose cpumask we're constructing + * @size: number of nodes to include in this span + * + * Given a node, construct a good cpumask for its sched_domain to span. It + * should be one that prevents unnecessary balancing, but also spreads tasks + * out optimally. + */ +static cpumask_t __devinit sched_domain_node_span(int node) +{ + int i; + cpumask_t span, nodemask; + DECLARE_BITMAP(used_nodes, MAX_NUMNODES); + + cpus_clear(span); + bitmap_zero(used_nodes, MAX_NUMNODES); + + nodemask = node_to_cpumask(node); + cpus_or(span, span, nodemask); + set_bit(node, used_nodes); + + for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { + int next_node = find_next_best_node(node, used_nodes); + nodemask = node_to_cpumask(next_node); + cpus_or(span, span, nodemask); + } + + return span; +} +#endif + +/* + * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we + * can switch it on easily if needed. + */ +#ifdef CONFIG_SCHED_SMT +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static struct sched_group sched_group_cpus[NR_CPUS]; +static int __devinit cpu_to_cpu_group(int cpu) +{ + return cpu; +} +#endif + +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static struct sched_group sched_group_phys[NR_CPUS]; +static int __devinit cpu_to_phys_group(int cpu) +{ +#ifdef CONFIG_SCHED_SMT + return first_cpu(cpu_sibling_map[cpu]); +#else + return cpu; +#endif +} + +#ifdef CONFIG_NUMA +/* + * The init_sched_build_groups can't handle what we want to do with node + * groups, so roll our own. Now each node has its own list of groups which + * gets dynamically allocated. + */ +static DEFINE_PER_CPU(struct sched_domain, node_domains); +static struct sched_group *sched_group_nodes[MAX_NUMNODES]; + +static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); +static struct sched_group sched_group_allnodes[MAX_NUMNODES]; + +static int __devinit cpu_to_allnodes_group(int cpu) +{ + return cpu_to_node(cpu); +} +#endif + +/* + * Set up scheduler domains and groups. Callers must hold the hotplug lock. + */ +void __devinit arch_init_sched_domains(void) +{ + int i; + cpumask_t cpu_default_map; + + /* + * Setup mask for cpus without special case scheduling requirements. + * For now this just excludes isolated cpus, but could be used to + * exclude other special cases in the future. + */ + cpus_complement(cpu_default_map, cpu_isolated_map); + cpus_and(cpu_default_map, cpu_default_map, cpu_online_map); + + /* + * Set up domains. Isolated domains just stay on the dummy domain. + */ + for_each_cpu_mask(i, cpu_default_map) { + int group; + struct sched_domain *sd = NULL, *p; + cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); + + cpus_and(nodemask, nodemask, cpu_default_map); + +#ifdef CONFIG_NUMA + if (num_online_cpus() + > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { + sd = &per_cpu(allnodes_domains, i); + *sd = SD_ALLNODES_INIT; + sd->span = cpu_default_map; + group = cpu_to_allnodes_group(i); + sd->groups = &sched_group_allnodes[group]; + p = sd; + } else + p = NULL; + + sd = &per_cpu(node_domains, i); + *sd = SD_NODE_INIT; + sd->span = sched_domain_node_span(cpu_to_node(i)); + sd->parent = p; + cpus_and(sd->span, sd->span, cpu_default_map); +#endif + + p = sd; + sd = &per_cpu(phys_domains, i); + group = cpu_to_phys_group(i); + *sd = SD_CPU_INIT; + sd->span = nodemask; + sd->parent = p; + sd->groups = &sched_group_phys[group]; + +#ifdef CONFIG_SCHED_SMT + p = sd; + sd = &per_cpu(cpu_domains, i); + group = cpu_to_cpu_group(i); + *sd = SD_SIBLING_INIT; + sd->span = cpu_sibling_map[i]; + cpus_and(sd->span, sd->span, cpu_default_map); + sd->parent = p; + sd->groups = &sched_group_cpus[group]; +#endif + } + +#ifdef CONFIG_SCHED_SMT + /* Set up CPU (sibling) groups */ + for_each_cpu_mask(i, cpu_default_map) { + cpumask_t this_sibling_map = cpu_sibling_map[i]; + cpus_and(this_sibling_map, this_sibling_map, cpu_default_map); + if (i != first_cpu(this_sibling_map)) + continue; + + init_sched_build_groups(sched_group_cpus, this_sibling_map, + &cpu_to_cpu_group); + } +#endif + + /* Set up physical groups */ + for (i = 0; i < MAX_NUMNODES; i++) { + cpumask_t nodemask = node_to_cpumask(i); + + cpus_and(nodemask, nodemask, cpu_default_map); + if (cpus_empty(nodemask)) + continue; + + init_sched_build_groups(sched_group_phys, nodemask, + &cpu_to_phys_group); + } + +#ifdef CONFIG_NUMA + init_sched_build_groups(sched_group_allnodes, cpu_default_map, + &cpu_to_allnodes_group); + + for (i = 0; i < MAX_NUMNODES; i++) { + /* Set up node groups */ + struct sched_group *sg, *prev; + cpumask_t nodemask = node_to_cpumask(i); + cpumask_t domainspan; + cpumask_t covered = CPU_MASK_NONE; + int j; + + cpus_and(nodemask, nodemask, cpu_default_map); + if (cpus_empty(nodemask)) + continue; + + domainspan = sched_domain_node_span(i); + cpus_and(domainspan, domainspan, cpu_default_map); + + sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); + sched_group_nodes[i] = sg; + for_each_cpu_mask(j, nodemask) { + struct sched_domain *sd; + sd = &per_cpu(node_domains, j); + sd->groups = sg; + if (sd->groups == NULL) { + /* Turn off balancing if we have no groups */ + sd->flags = 0; + } + } + if (!sg) { + printk(KERN_WARNING + "Can not alloc domain group for node %d\n", i); + continue; + } + sg->cpu_power = 0; + sg->cpumask = nodemask; + cpus_or(covered, covered, nodemask); + prev = sg; + + for (j = 0; j < MAX_NUMNODES; j++) { + cpumask_t tmp, notcovered; + int n = (i + j) % MAX_NUMNODES; + + cpus_complement(notcovered, covered); + cpus_and(tmp, notcovered, cpu_default_map); + cpus_and(tmp, tmp, domainspan); + if (cpus_empty(tmp)) + break; + + nodemask = node_to_cpumask(n); + cpus_and(tmp, tmp, nodemask); + if (cpus_empty(tmp)) + continue; + + sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); + if (!sg) { + printk(KERN_WARNING + "Can not alloc domain group for node %d\n", j); + break; + } + sg->cpu_power = 0; + sg->cpumask = tmp; + cpus_or(covered, covered, tmp); + prev->next = sg; + prev = sg; + } + prev->next = sched_group_nodes[i]; + } +#endif + + /* Calculate CPU power for physical packages and nodes */ + for_each_cpu_mask(i, cpu_default_map) { + int power; + struct sched_domain *sd; +#ifdef CONFIG_SCHED_SMT + sd = &per_cpu(cpu_domains, i); + power = SCHED_LOAD_SCALE; + sd->groups->cpu_power = power; +#endif + + sd = &per_cpu(phys_domains, i); + power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * + (cpus_weight(sd->groups->cpumask)-1) / 10; + sd->groups->cpu_power = power; + +#ifdef CONFIG_NUMA + sd = &per_cpu(allnodes_domains, i); + if (sd->groups) { + power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * + (cpus_weight(sd->groups->cpumask)-1) / 10; + sd->groups->cpu_power = power; + } +#endif + } + +#ifdef CONFIG_NUMA + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *sg = sched_group_nodes[i]; + int j; + + if (sg == NULL) + continue; +next_sg: + for_each_cpu_mask(j, sg->cpumask) { + struct sched_domain *sd; + int power; + + sd = &per_cpu(phys_domains, j); + if (j != first_cpu(sd->groups->cpumask)) { + /* + * Only add "power" once for each + * physical package. + */ + continue; + } + power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * + (cpus_weight(sd->groups->cpumask)-1) / 10; + + sg->cpu_power += power; + } + sg = sg->next; + if (sg != sched_group_nodes[i]) + goto next_sg; + } +#endif + + /* Attach the domains */ + for_each_online_cpu(i) { + struct sched_domain *sd; +#ifdef CONFIG_SCHED_SMT + sd = &per_cpu(cpu_domains, i); +#else + sd = &per_cpu(phys_domains, i); +#endif + cpu_attach_domain(sd, i); + } +} + +void __devinit arch_destroy_sched_domains(void) +{ +#ifdef CONFIG_NUMA + int i; + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *oldsg, *sg = sched_group_nodes[i]; + if (sg == NULL) + continue; + sg = sg->next; +next_sg: + oldsg = sg; + sg = sg->next; + kfree(oldsg); + if (oldsg != sched_group_nodes[i]) + goto next_sg; + sched_group_nodes[i] = NULL; + } +#endif +} + diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c new file mode 100644 index 000000000000..4a3b1aac43e7 --- /dev/null +++ b/arch/ia64/kernel/efi.c @@ -0,0 +1,832 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999-2003 Hewlett-Packard Co. + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + * + * Goutham Rao: <goutham.rao@intel.com> + * Skip non-WB memory and ignore empty memory ranges. + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/time.h> +#include <linux/efi.h> + +#include <asm/io.h> +#include <asm/kregs.h> +#include <asm/meminit.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/mca.h> + +#define EFI_DEBUG 0 + +extern efi_status_t efi_call_phys (void *, ...); + +struct efi efi; +EXPORT_SYMBOL(efi); +static efi_runtime_services_t *runtime; +static unsigned long mem_limit = ~0UL, max_addr = ~0UL; + +#define efi_call_virt(f, args...) (*(f))(args) + +#define STUB_GET_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_time_cap_t *atc = NULL; \ + efi_status_t ret; \ + \ + if (tc) \ + atc = adjust_arg(tc); \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), atc); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_SET_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_set_time (efi_time_t *tm) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), adjust_arg(tm)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time), \ + adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_time_t *atm = NULL; \ + efi_status_t ret; \ + \ + if (tm) \ + atm = adjust_arg(tm); \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time), \ + enabled, atm); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_GET_VARIABLE(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, \ + unsigned long *data_size, void *data) \ +{ \ + struct ia64_fpreg fr[6]; \ + u32 *aattr = NULL; \ + efi_status_t ret; \ + \ + if (attr) \ + aattr = adjust_arg(attr); \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_get_variable_t *) __va(runtime->get_variable), \ + adjust_arg(name), adjust_arg(vendor), aattr, \ + adjust_arg(data_size), adjust_arg(data)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_get_next_variable_t *) __va(runtime->get_next_variable), \ + adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_SET_VARIABLE(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, unsigned long attr, \ + unsigned long data_size, void *data) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_set_variable_t *) __va(runtime->set_variable), \ + adjust_arg(name), adjust_arg(vendor), attr, data_size, \ + adjust_arg(data)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_next_high_mono_count (u32 *count) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_get_next_high_mono_count_t *) \ + __va(runtime->get_next_high_mono_count), adjust_arg(count)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_RESET_SYSTEM(prefix, adjust_arg) \ +static void \ +prefix##_reset_system (int reset_type, efi_status_t status, \ + unsigned long data_size, efi_char16_t *data) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_char16_t *adata = NULL; \ + \ + if (data) \ + adata = adjust_arg(data); \ + \ + ia64_save_scratch_fpregs(fr); \ + efi_call_##prefix((efi_reset_system_t *) __va(runtime->reset_system), \ + reset_type, status, data_size, adata); \ + /* should not return, but just in case... */ \ + ia64_load_scratch_fpregs(fr); \ +} + +#define phys_ptr(arg) ((__typeof__(arg)) ia64_tpa(arg)) + +STUB_GET_TIME(phys, phys_ptr) +STUB_SET_TIME(phys, phys_ptr) +STUB_GET_WAKEUP_TIME(phys, phys_ptr) +STUB_SET_WAKEUP_TIME(phys, phys_ptr) +STUB_GET_VARIABLE(phys, phys_ptr) +STUB_GET_NEXT_VARIABLE(phys, phys_ptr) +STUB_SET_VARIABLE(phys, phys_ptr) +STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr) +STUB_RESET_SYSTEM(phys, phys_ptr) + +#define id(arg) arg + +STUB_GET_TIME(virt, id) +STUB_SET_TIME(virt, id) +STUB_GET_WAKEUP_TIME(virt, id) +STUB_SET_WAKEUP_TIME(virt, id) +STUB_GET_VARIABLE(virt, id) +STUB_GET_NEXT_VARIABLE(virt, id) +STUB_SET_VARIABLE(virt, id) +STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id) +STUB_RESET_SYSTEM(virt, id) + +void +efi_gettimeofday (struct timespec *ts) +{ + efi_time_t tm; + + memset(ts, 0, sizeof(ts)); + if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS) + return; + + ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second); + ts->tv_nsec = tm.nanosecond; +} + +static int +is_available_memory (efi_memory_desc_t *md) +{ + if (!(md->attribute & EFI_MEMORY_WB)) + return 0; + + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + return 1; + } + return 0; +} + +/* + * Trim descriptor MD so its starts at address START_ADDR. If the descriptor covers + * memory that is normally available to the kernel, issue a warning that some memory + * is being ignored. + */ +static void +trim_bottom (efi_memory_desc_t *md, u64 start_addr) +{ + u64 num_skipped_pages; + + if (md->phys_addr >= start_addr || !md->num_pages) + return; + + num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT; + if (num_skipped_pages > md->num_pages) + num_skipped_pages = md->num_pages; + + if (is_available_memory(md)) + printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole " + "at 0x%lx\n", __FUNCTION__, + (num_skipped_pages << EFI_PAGE_SHIFT) >> 10, + md->phys_addr, start_addr - IA64_GRANULE_SIZE); + /* + * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory + * descriptor list to become unsorted. In such a case, md->num_pages will be + * zero, so the Right Thing will happen. + */ + md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT; + md->num_pages -= num_skipped_pages; +} + +static void +trim_top (efi_memory_desc_t *md, u64 end_addr) +{ + u64 num_dropped_pages, md_end_addr; + + md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + + if (md_end_addr <= end_addr || !md->num_pages) + return; + + num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT; + if (num_dropped_pages > md->num_pages) + num_dropped_pages = md->num_pages; + + if (is_available_memory(md)) + printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole " + "at 0x%lx\n", __FUNCTION__, + (num_dropped_pages << EFI_PAGE_SHIFT) >> 10, + md->phys_addr, end_addr); + md->num_pages -= num_dropped_pages; +} + +/* + * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that + * has memory that is available for OS use. + */ +void +efi_memmap_walk (efi_freemem_callback_t callback, void *arg) +{ + int prev_valid = 0; + struct range { + u64 start; + u64 end; + } prev, curr; + void *efi_map_start, *efi_map_end, *p, *q; + efi_memory_desc_t *md, *check_md; + u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0; + unsigned long total_mem = 0; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + /* skip over non-WB memory descriptors; that's all we're interested in... */ + if (!(md->attribute & EFI_MEMORY_WB)) + continue; + + /* + * granule_addr is the base of md's first granule. + * [granule_addr - first_non_wb_addr) is guaranteed to + * be contiguous WB memory. + */ + granule_addr = GRANULEROUNDDOWN(md->phys_addr); + first_non_wb_addr = max(first_non_wb_addr, granule_addr); + + if (first_non_wb_addr < md->phys_addr) { + trim_bottom(md, granule_addr + IA64_GRANULE_SIZE); + granule_addr = GRANULEROUNDDOWN(md->phys_addr); + first_non_wb_addr = max(first_non_wb_addr, granule_addr); + } + + for (q = p; q < efi_map_end; q += efi_desc_size) { + check_md = q; + + if ((check_md->attribute & EFI_MEMORY_WB) && + (check_md->phys_addr == first_non_wb_addr)) + first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT; + else + break; /* non-WB or hole */ + } + + last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr); + if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) + trim_top(md, last_granule_addr); + + if (is_available_memory(md)) { + if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) { + if (md->phys_addr >= max_addr) + continue; + md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT; + first_non_wb_addr = max_addr; + } + + if (total_mem >= mem_limit) + continue; + + if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) { + unsigned long limit_addr = md->phys_addr; + + limit_addr += mem_limit - total_mem; + limit_addr = GRANULEROUNDDOWN(limit_addr); + + if (md->phys_addr > limit_addr) + continue; + + md->num_pages = (limit_addr - md->phys_addr) >> + EFI_PAGE_SHIFT; + first_non_wb_addr = max_addr = md->phys_addr + + (md->num_pages << EFI_PAGE_SHIFT); + } + total_mem += (md->num_pages << EFI_PAGE_SHIFT); + + if (md->num_pages == 0) + continue; + + curr.start = PAGE_OFFSET + md->phys_addr; + curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); + + if (!prev_valid) { + prev = curr; + prev_valid = 1; + } else { + if (curr.start < prev.start) + printk(KERN_ERR "Oops: EFI memory table not ordered!\n"); + + if (prev.end == curr.start) { + /* merge two consecutive memory ranges */ + prev.end = curr.end; + } else { + start = PAGE_ALIGN(prev.start); + end = prev.end & PAGE_MASK; + if ((end > start) && (*callback)(start, end, arg) < 0) + return; + prev = curr; + } + } + } + } + if (prev_valid) { + start = PAGE_ALIGN(prev.start); + end = prev.end & PAGE_MASK; + if (end > start) + (*callback)(start, end, arg); + } +} + +/* + * Look for the PAL_CODE region reported by EFI and maps it using an + * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor + * Abstraction Layer chapter 11 in ADAG + */ + +void * +efi_get_pal_addr (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + int pal_code_count = 0; + u64 vaddr, mask; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->type != EFI_PAL_CODE) + continue; + + if (++pal_code_count > 1) { + printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n", + md->phys_addr); + continue; + } + /* + * The only ITLB entry in region 7 that is used is the one installed by + * __start(). That entry covers a 64MB range. + */ + mask = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1); + vaddr = PAGE_OFFSET + md->phys_addr; + + /* + * We must check that the PAL mapping won't overlap with the kernel + * mapping. + * + * PAL code is guaranteed to be aligned on a power of 2 between 4k and + * 256KB and that only one ITR is needed to map it. This implies that the + * PAL code is always aligned on its size, i.e., the closest matching page + * size supported by the TLB. Therefore PAL code is guaranteed never to + * cross a 64MB unless it is bigger than 64MB (very unlikely!). So for + * now the following test is enough to determine whether or not we need a + * dedicated ITR for the PAL code. + */ + if ((vaddr & mask) == (KERNEL_START & mask)) { + printk(KERN_INFO "%s: no need to install ITR for PAL code\n", + __FUNCTION__); + continue; + } + + if (md->num_pages << EFI_PAGE_SHIFT > IA64_GRANULE_SIZE) + panic("Woah! PAL code size bigger than a granule!"); + +#if EFI_DEBUG + mask = ~((1 << IA64_GRANULE_SHIFT) - 1); + + printk(KERN_INFO "CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n", + smp_processor_id(), md->phys_addr, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), + vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE); +#endif + return __va(md->phys_addr); + } + printk(KERN_WARNING "%s: no PAL-code memory-descriptor found", + __FUNCTION__); + return NULL; +} + +void +efi_map_pal_code (void) +{ + void *pal_vaddr = efi_get_pal_addr (); + u64 psr; + + if (!pal_vaddr) + return; + + /* + * Cannot write to CRx with PSR.ic=1 + */ + psr = ia64_clear_ic(); + ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr), + pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)), + IA64_GRANULE_SHIFT); + ia64_set_psr(psr); /* restore psr */ + ia64_srlz_i(); +} + +void __init +efi_init (void) +{ + void *efi_map_start, *efi_map_end; + efi_config_table_t *config_tables; + efi_char16_t *c16; + u64 efi_desc_size; + char *cp, *end, vendor[100] = "unknown"; + extern char saved_command_line[]; + int i; + + /* it's too early to be able to use the standard kernel command line support... */ + for (cp = saved_command_line; *cp; ) { + if (memcmp(cp, "mem=", 4) == 0) { + cp += 4; + mem_limit = memparse(cp, &end); + if (end != cp) + break; + cp = end; + } else if (memcmp(cp, "max_addr=", 9) == 0) { + cp += 9; + max_addr = GRANULEROUNDDOWN(memparse(cp, &end)); + if (end != cp) + break; + cp = end; + } else { + while (*cp != ' ' && *cp) + ++cp; + while (*cp == ' ') + ++cp; + } + } + if (max_addr != ~0UL) + printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20); + + efi.systab = __va(ia64_boot_param->efi_systab); + + /* + * Verify the EFI Table + */ + if (efi.systab == NULL) + panic("Woah! Can't find EFI system table.\n"); + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + panic("Woah! EFI system table signature incorrect\n"); + if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0) + printk(KERN_WARNING "Warning: EFI system table major version mismatch: " + "got %d.%02d, expected %d.%02d\n", + efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, + EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff); + + config_tables = __va(efi.systab->tables); + + /* Show what we know for posterity */ + c16 = __va(efi.systab->fw_vendor); + if (c16) { + for (i = 0;i < (int) sizeof(vendor) && *c16; ++i) + vendor[i] = *c16++; + vendor[i] = '\0'; + } + + printk(KERN_INFO "EFI v%u.%.02u by %s:", + efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); + + for (i = 0; i < (int) efi.systab->nr_tables; i++) { + if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { + efi.mps = __va(config_tables[i].table); + printk(" MPS=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { + efi.acpi20 = __va(config_tables[i].table); + printk(" ACPI 2.0=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { + efi.acpi = __va(config_tables[i].table); + printk(" ACPI=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { + efi.smbios = __va(config_tables[i].table); + printk(" SMBIOS=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) { + efi.sal_systab = __va(config_tables[i].table); + printk(" SALsystab=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { + efi.hcdp = __va(config_tables[i].table); + printk(" HCDP=0x%lx", config_tables[i].table); + } + } + printk("\n"); + + runtime = __va(efi.systab->runtime); + efi.get_time = phys_get_time; + efi.set_time = phys_set_time; + efi.get_wakeup_time = phys_get_wakeup_time; + efi.set_wakeup_time = phys_set_wakeup_time; + efi.get_variable = phys_get_variable; + efi.get_next_variable = phys_get_next_variable; + efi.set_variable = phys_set_variable; + efi.get_next_high_mono_count = phys_get_next_high_mono_count; + efi.reset_system = phys_reset_system; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + +#if EFI_DEBUG + /* print EFI memory map: */ + { + efi_memory_desc_t *md; + void *p; + + for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) { + md = p; + printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n", + i, md->type, md->attribute, md->phys_addr, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), + md->num_pages >> (20 - EFI_PAGE_SHIFT)); + } + } +#endif + + efi_map_pal_code(); + efi_enter_virtual_mode(); +} + +void +efi_enter_virtual_mode (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + efi_status_t status; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->attribute & EFI_MEMORY_RUNTIME) { + /* + * Some descriptors have multiple bits set, so the order of + * the tests is relevant. + */ + if (md->attribute & EFI_MEMORY_WB) { + md->virt_addr = (u64) __va(md->phys_addr); + } else if (md->attribute & EFI_MEMORY_UC) { + md->virt_addr = (u64) ioremap(md->phys_addr, 0); + } else if (md->attribute & EFI_MEMORY_WC) { +#if 0 + md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P + | _PAGE_D + | _PAGE_MA_WC + | _PAGE_PL_0 + | _PAGE_AR_RW)); +#else + printk(KERN_INFO "EFI_MEMORY_WC mapping\n"); + md->virt_addr = (u64) ioremap(md->phys_addr, 0); +#endif + } else if (md->attribute & EFI_MEMORY_WT) { +#if 0 + md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P + | _PAGE_D | _PAGE_MA_WT + | _PAGE_PL_0 + | _PAGE_AR_RW)); +#else + printk(KERN_INFO "EFI_MEMORY_WT mapping\n"); + md->virt_addr = (u64) ioremap(md->phys_addr, 0); +#endif + } + } + } + + status = efi_call_phys(__va(runtime->set_virtual_address_map), + ia64_boot_param->efi_memmap_size, + efi_desc_size, ia64_boot_param->efi_memdesc_version, + ia64_boot_param->efi_memmap); + if (status != EFI_SUCCESS) { + printk(KERN_WARNING "warning: unable to switch EFI into virtual mode " + "(status=%lu)\n", status); + return; + } + + /* + * Now that EFI is in virtual mode, we call the EFI functions more efficiently: + */ + efi.get_time = virt_get_time; + efi.set_time = virt_set_time; + efi.get_wakeup_time = virt_get_wakeup_time; + efi.set_wakeup_time = virt_set_wakeup_time; + efi.get_variable = virt_get_variable; + efi.get_next_variable = virt_get_next_variable; + efi.set_variable = virt_set_variable; + efi.get_next_high_mono_count = virt_get_next_high_mono_count; + efi.reset_system = virt_reset_system; +} + +/* + * Walk the EFI memory map looking for the I/O port range. There can only be one entry of + * this type, other I/O port ranges should be described via ACPI. + */ +u64 +efi_get_iobase (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) { + if (md->attribute & EFI_MEMORY_UC) + return md->phys_addr; + } + } + return 0; +} + +u32 +efi_mem_type (unsigned long phys_addr) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) + return md->type; + } + return 0; +} + +u64 +efi_mem_attributes (unsigned long phys_addr) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) + return md->attribute; + } + return 0; +} +EXPORT_SYMBOL(efi_mem_attributes); + +int +valid_phys_addr_range (unsigned long phys_addr, unsigned long *size) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) { + if (!(md->attribute & EFI_MEMORY_WB)) + return 0; + + if (*size > md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr) + *size = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr; + return 1; + } + } + return 0; +} + +int __init +efi_uart_console_only(void) +{ + efi_status_t status; + char *s, name[] = "ConOut"; + efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID; + efi_char16_t *utf16, name_utf16[32]; + unsigned char data[1024]; + unsigned long size = sizeof(data); + struct efi_generic_dev_path *hdr, *end_addr; + int uart = 0; + + /* Convert to UTF-16 */ + utf16 = name_utf16; + s = name; + while (*s) + *utf16++ = *s++ & 0x7f; + *utf16 = 0; + + status = efi.get_variable(name_utf16, &guid, NULL, &size, data); + if (status != EFI_SUCCESS) { + printk(KERN_ERR "No EFI %s variable?\n", name); + return 0; + } + + hdr = (struct efi_generic_dev_path *) data; + end_addr = (struct efi_generic_dev_path *) ((u8 *) data + size); + while (hdr < end_addr) { + if (hdr->type == EFI_DEV_MSG && + hdr->sub_type == EFI_DEV_MSG_UART) + uart = 1; + else if (hdr->type == EFI_DEV_END_PATH || + hdr->type == EFI_DEV_END_PATH2) { + if (!uart) + return 0; + if (hdr->sub_type == EFI_DEV_END_ENTIRE) + return 1; + uart = 0; + } + hdr = (struct efi_generic_dev_path *) ((u8 *) hdr + hdr->length); + } + printk(KERN_ERR "Malformed %s value\n", name); + return 0; +} diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S new file mode 100644 index 000000000000..5a7fe70212a9 --- /dev/null +++ b/arch/ia64/kernel/efi_stub.S @@ -0,0 +1,86 @@ +/* + * EFI call stub. + * + * Copyright (C) 1999-2001 Hewlett-Packard Co + * David Mosberger <davidm@hpl.hp.com> + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. We need this because we can't call SetVirtualMap() until + * the kernel has booted far enough to allow allocation of struct vma_struct + * entries (which we would need to map stuff with memory attributes other + * than uncached or writeback...). Since the GetTime() service gets called + * earlier than that, we need to be able to make physical mode EFI calls from + * the kernel. + */ + +/* + * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System + * Abstraction Layer Specification", revision 2.6e). Note that + * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. + * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call + * (the br.ia instruction fails unless psr.dfl and psr.dfh are + * cleared). Fortunately, SAL promises not to touch the floating + * point regs, so at least we don't have to save f2-f127. + */ +#define PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PSR_BITS_TO_SET \ + (IA64_PSR_BN) + +#include <asm/processor.h> +#include <asm/asmmacro.h> + +/* + * Inputs: + * in0 = address of function descriptor of EFI routine to call + * in1..in7 = arguments to routine + * + * Outputs: + * r8 = EFI_STATUS returned by called function + */ + +GLOBAL_ENTRY(efi_call_phys) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,7,7,0 + ld8 r2=[in0],8 // load EFI function's entry point + mov loc0=rp + .body + ;; + mov loc2=gp // save global pointer + mov loc4=ar.rsc // save RSE configuration + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + ;; + ld8 gp=[in0] // load EFI function's global pointer + movl r16=PSR_BITS_TO_CLEAR + mov loc3=psr // save processor status word + movl r17=PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 + mov b6=r2 + ;; + andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared + br.call.sptk.many rp=ia64_switch_mode_phys +.ret0: mov out4=in5 + mov out0=in1 + mov out1=in2 + mov out2=in3 + mov out3=in4 + mov out5=in6 + mov out6=in7 + mov loc5=r19 + mov loc6=r20 + br.call.sptk.many rp=b6 // call the EFI function +.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 + mov r19=loc5 + mov r20=loc6 + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode +.ret2: mov ar.rsc=loc4 // restore RSE configuration + mov ar.pfs=loc1 + mov rp=loc0 + mov gp=loc2 + br.ret.sptk.many rp +END(efi_call_phys) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S new file mode 100644 index 000000000000..0272c010a3ba --- /dev/null +++ b/arch/ia64/kernel/entry.S @@ -0,0 +1,1587 @@ +/* + * ia64/kernel/entry.S + * + * Kernel entry points. + * + * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999, 2002-2003 + * Asit Mallick <Asit.K.Mallick@intel.com> + * Don Dugger <Don.Dugger@intel.com> + * Suresh Siddha <suresh.b.siddha@intel.com> + * Fenghua Yu <fenghua.yu@intel.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + */ +/* + * ia64_switch_to now places correct virtual mapping in in TR2 for + * kernel stack. This allows us to handle interrupts without changing + * to physical mode. + * + * Jonathan Nicklin <nicklin@missioncriticallinux.com> + * Patrick O'Rourke <orourke@missioncriticallinux.com> + * 11/07/2000 + */ +/* + * Global (preserved) predicate usage on syscall entry/exit path: + * + * pKStk: See entry.h. + * pUStk: See entry.h. + * pSys: See entry.h. + * pNonSys: !pSys + */ + +#include <linux/config.h> + +#include <asm/asmmacro.h> +#include <asm/cache.h> +#include <asm/errno.h> +#include <asm/kregs.h> +#include <asm/offsets.h> +#include <asm/pgtable.h> +#include <asm/percpu.h> +#include <asm/processor.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> + +#include "minstate.h" + + /* + * execve() is special because in case of success, we need to + * setup a null register window frame. + */ +ENTRY(ia64_execve) + /* + * Allocate 8 input registers since ptrace() may clobber them + */ + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,2,4,0 + mov loc0=rp + .body + mov out0=in0 // filename + ;; // stop bit between alloc and call + mov out1=in1 // argv + mov out2=in2 // envp + add out3=16,sp // regs + br.call.sptk.many rp=sys_execve +.ret0: +#ifdef CONFIG_IA32_SUPPORT + /* + * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers + * from pt_regs. + */ + adds r16=PT(CR_IPSR)+16,sp + ;; + ld8 r16=[r16] +#endif + cmp4.ge p6,p7=r8,r0 + mov ar.pfs=loc1 // restore ar.pfs + sxt4 r8=r8 // return 64-bit result + ;; + stf.spill [sp]=f0 +(p6) cmp.ne pKStk,pUStk=r0,r0 // a successful execve() lands us in user-mode... + mov rp=loc0 +(p6) mov ar.pfs=r0 // clear ar.pfs on success +(p7) br.ret.sptk.many rp + + /* + * In theory, we'd have to zap this state only to prevent leaking of + * security sensitive state (e.g., if current->mm->dumpable is zero). However, + * this executes in less than 20 cycles even on Itanium, so it's not worth + * optimizing for...). + */ + mov ar.unat=0; mov ar.lc=0 + mov r4=0; mov f2=f0; mov b1=r0 + mov r5=0; mov f3=f0; mov b2=r0 + mov r6=0; mov f4=f0; mov b3=r0 + mov r7=0; mov f5=f0; mov b4=r0 + ldf.fill f12=[sp]; mov f13=f0; mov b5=r0 + ldf.fill f14=[sp]; ldf.fill f15=[sp]; mov f16=f0 + ldf.fill f17=[sp]; ldf.fill f18=[sp]; mov f19=f0 + ldf.fill f20=[sp]; ldf.fill f21=[sp]; mov f22=f0 + ldf.fill f23=[sp]; ldf.fill f24=[sp]; mov f25=f0 + ldf.fill f26=[sp]; ldf.fill f27=[sp]; mov f28=f0 + ldf.fill f29=[sp]; ldf.fill f30=[sp]; mov f31=f0 +#ifdef CONFIG_IA32_SUPPORT + tbit.nz p6,p0=r16, IA64_PSR_IS_BIT + movl loc0=ia64_ret_from_ia32_execve + ;; +(p6) mov rp=loc0 +#endif + br.ret.sptk.many rp +END(ia64_execve) + +/* + * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr, + * u64 tls) + */ +GLOBAL_ENTRY(sys_clone2) + /* + * Allocate 8 input registers since ptrace() may clobber them + */ + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc r16=ar.pfs,8,2,6,0 + DO_SAVE_SWITCH_STACK + adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp + mov loc0=rp + mov loc1=r16 // save ar.pfs across do_fork + .body + mov out1=in1 + mov out3=in2 + tbit.nz p6,p0=in0,CLONE_SETTLS_BIT + mov out4=in3 // parent_tidptr: valid only w/CLONE_PARENT_SETTID + ;; +(p6) st8 [r2]=in5 // store TLS in r16 for copy_thread() + mov out5=in4 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID + adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = ®s + mov out0=in0 // out0 = clone_flags + br.call.sptk.many rp=do_fork +.ret1: .restore sp + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack + mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(sys_clone2) + +/* + * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls) + * Deprecated. Use sys_clone2() instead. + */ +GLOBAL_ENTRY(sys_clone) + /* + * Allocate 8 input registers since ptrace() may clobber them + */ + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc r16=ar.pfs,8,2,6,0 + DO_SAVE_SWITCH_STACK + adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp + mov loc0=rp + mov loc1=r16 // save ar.pfs across do_fork + .body + mov out1=in1 + mov out3=16 // stacksize (compensates for 16-byte scratch area) + tbit.nz p6,p0=in0,CLONE_SETTLS_BIT + mov out4=in2 // parent_tidptr: valid only w/CLONE_PARENT_SETTID + ;; +(p6) st8 [r2]=in4 // store TLS in r13 (tp) + mov out5=in3 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID + adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = ®s + mov out0=in0 // out0 = clone_flags + br.call.sptk.many rp=do_fork +.ret2: .restore sp + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack + mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(sys_clone) + +/* + * prev_task <- ia64_switch_to(struct task_struct *next) + * With Ingo's new scheduler, interrupts are disabled when this routine gets + * called. The code starting at .map relies on this. The rest of the code + * doesn't care about the interrupt masking status. + */ +GLOBAL_ENTRY(ia64_switch_to) + .prologue + alloc r16=ar.pfs,1,0,0,0 + DO_SAVE_SWITCH_STACK + .body + + adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 + movl r25=init_task + mov r27=IA64_KR(CURRENT_STACK) + adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 + dep r20=0,in0,61,3 // physical address of "next" + ;; + st8 [r22]=sp // save kernel stack pointer of old task + shr.u r26=r20,IA64_GRANULE_SHIFT + cmp.eq p7,p6=r25,in0 + ;; + /* + * If we've already mapped this task's page, we can skip doing it again. + */ +(p6) cmp.eq p7,p6=r26,r27 +(p6) br.cond.dpnt .map + ;; +.done: +(p6) ssm psr.ic // if we had to map, reenable the psr.ic bit FIRST!!! + ;; +(p6) srlz.d + ld8 sp=[r21] // load kernel stack pointer of new task + mov IA64_KR(CURRENT)=in0 // update "current" application register + mov r8=r13 // return pointer to previously running task + mov r13=in0 // set "current" pointer + ;; + DO_LOAD_SWITCH_STACK + +#ifdef CONFIG_SMP + sync.i // ensure "fc"s done by this CPU are visible on other CPUs +#endif + br.ret.sptk.many rp // boogie on out in new context + +.map: + rsm psr.ic // interrupts (psr.i) are already disabled here + movl r25=PAGE_KERNEL + ;; + srlz.d + or r23=r25,r20 // construct PA | page properties + mov r25=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r25 + mov cr.ifa=in0 // VA of next task... + ;; + mov r25=IA64_TR_CURRENT_STACK + mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped... + ;; + itr.d dtr[r25]=r23 // wire in new mapping... + br.cond.sptk .done +END(ia64_switch_to) + +/* + * Note that interrupts are enabled during save_switch_stack and load_switch_stack. This + * means that we may get an interrupt with "sp" pointing to the new kernel stack while + * ar.bspstore is still pointing to the old kernel backing store area. Since ar.rsc, + * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a + * problem. Also, we don't need to specify unwind information for preserved registers + * that are not modified in save_switch_stack as the right unwind information is already + * specified at the call-site of save_switch_stack. + */ + +/* + * save_switch_stack: + * - r16 holds ar.pfs + * - b7 holds address to return to + * - rp (b0) holds return address to save + */ +GLOBAL_ENTRY(save_switch_stack) + .prologue + .altrp b7 + flushrs // flush dirty regs to backing store (must be first in insn group) + .save @priunat,r17 + mov r17=ar.unat // preserve caller's + .body +#ifdef CONFIG_ITANIUM + adds r2=16+128,sp + adds r3=16+64,sp + adds r14=SW(R4)+16,sp + ;; + st8.spill [r14]=r4,16 // spill r4 + lfetch.fault.excl.nt1 [r3],128 + ;; + lfetch.fault.excl.nt1 [r2],128 + lfetch.fault.excl.nt1 [r3],128 + ;; + lfetch.fault.excl [r2] + lfetch.fault.excl [r3] + adds r15=SW(R5)+16,sp +#else + add r2=16+3*128,sp + add r3=16,sp + add r14=SW(R4)+16,sp + ;; + st8.spill [r14]=r4,SW(R6)-SW(R4) // spill r4 and prefetch offset 0x1c0 + lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x010 + ;; + lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x090 + lfetch.fault.excl.nt1 [r2],128 // prefetch offset 0x190 + ;; + lfetch.fault.excl.nt1 [r3] // prefetch offset 0x110 + lfetch.fault.excl.nt1 [r2] // prefetch offset 0x210 + adds r15=SW(R5)+16,sp +#endif + ;; + st8.spill [r15]=r5,SW(R7)-SW(R5) // spill r5 + mov.m ar.rsc=0 // put RSE in mode: enforced lazy, little endian, pl 0 + add r2=SW(F2)+16,sp // r2 = &sw->f2 + ;; + st8.spill [r14]=r6,SW(B0)-SW(R6) // spill r6 + mov.m r18=ar.fpsr // preserve fpsr + add r3=SW(F3)+16,sp // r3 = &sw->f3 + ;; + stf.spill [r2]=f2,32 + mov.m r19=ar.rnat + mov r21=b0 + + stf.spill [r3]=f3,32 + st8.spill [r15]=r7,SW(B2)-SW(R7) // spill r7 + mov r22=b1 + ;; + // since we're done with the spills, read and save ar.unat: + mov.m r29=ar.unat + mov.m r20=ar.bspstore + mov r23=b2 + stf.spill [r2]=f4,32 + stf.spill [r3]=f5,32 + mov r24=b3 + ;; + st8 [r14]=r21,SW(B1)-SW(B0) // save b0 + st8 [r15]=r23,SW(B3)-SW(B2) // save b2 + mov r25=b4 + mov r26=b5 + ;; + st8 [r14]=r22,SW(B4)-SW(B1) // save b1 + st8 [r15]=r24,SW(AR_PFS)-SW(B3) // save b3 + mov r21=ar.lc // I-unit + stf.spill [r2]=f12,32 + stf.spill [r3]=f13,32 + ;; + st8 [r14]=r25,SW(B5)-SW(B4) // save b4 + st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS) // save ar.pfs + stf.spill [r2]=f14,32 + stf.spill [r3]=f15,32 + ;; + st8 [r14]=r26 // save b5 + st8 [r15]=r21 // save ar.lc + stf.spill [r2]=f16,32 + stf.spill [r3]=f17,32 + ;; + stf.spill [r2]=f18,32 + stf.spill [r3]=f19,32 + ;; + stf.spill [r2]=f20,32 + stf.spill [r3]=f21,32 + ;; + stf.spill [r2]=f22,32 + stf.spill [r3]=f23,32 + ;; + stf.spill [r2]=f24,32 + stf.spill [r3]=f25,32 + ;; + stf.spill [r2]=f26,32 + stf.spill [r3]=f27,32 + ;; + stf.spill [r2]=f28,32 + stf.spill [r3]=f29,32 + ;; + stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30) + stf.spill [r3]=f31,SW(PR)-SW(F31) + add r14=SW(CALLER_UNAT)+16,sp + ;; + st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT) // save ar.unat + st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat + mov r21=pr + ;; + st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat + st8 [r3]=r21 // save predicate registers + ;; + st8 [r2]=r20 // save ar.bspstore + st8 [r14]=r18 // save fpsr + mov ar.rsc=3 // put RSE back into eager mode, pl 0 + br.cond.sptk.many b7 +END(save_switch_stack) + +/* + * load_switch_stack: + * - "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK) + * - b7 holds address to return to + * - must not touch r8-r11 + */ +ENTRY(load_switch_stack) + .prologue + .altrp b7 + + .body + lfetch.fault.nt1 [sp] + adds r2=SW(AR_BSPSTORE)+16,sp + adds r3=SW(AR_UNAT)+16,sp + mov ar.rsc=0 // put RSE into enforced lazy mode + adds r14=SW(CALLER_UNAT)+16,sp + adds r15=SW(AR_FPSR)+16,sp + ;; + ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE)) // bspstore + ld8 r29=[r3],(SW(B1)-SW(AR_UNAT)) // unat + ;; + ld8 r21=[r2],16 // restore b0 + ld8 r22=[r3],16 // restore b1 + ;; + ld8 r23=[r2],16 // restore b2 + ld8 r24=[r3],16 // restore b3 + ;; + ld8 r25=[r2],16 // restore b4 + ld8 r26=[r3],16 // restore b5 + ;; + ld8 r16=[r2],(SW(PR)-SW(AR_PFS)) // ar.pfs + ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC)) // ar.lc + ;; + ld8 r28=[r2] // restore pr + ld8 r30=[r3] // restore rnat + ;; + ld8 r18=[r14],16 // restore caller's unat + ld8 r19=[r15],24 // restore fpsr + ;; + ldf.fill f2=[r14],32 + ldf.fill f3=[r15],32 + ;; + ldf.fill f4=[r14],32 + ldf.fill f5=[r15],32 + ;; + ldf.fill f12=[r14],32 + ldf.fill f13=[r15],32 + ;; + ldf.fill f14=[r14],32 + ldf.fill f15=[r15],32 + ;; + ldf.fill f16=[r14],32 + ldf.fill f17=[r15],32 + ;; + ldf.fill f18=[r14],32 + ldf.fill f19=[r15],32 + mov b0=r21 + ;; + ldf.fill f20=[r14],32 + ldf.fill f21=[r15],32 + mov b1=r22 + ;; + ldf.fill f22=[r14],32 + ldf.fill f23=[r15],32 + mov b2=r23 + ;; + mov ar.bspstore=r27 + mov ar.unat=r29 // establish unat holding the NaT bits for r4-r7 + mov b3=r24 + ;; + ldf.fill f24=[r14],32 + ldf.fill f25=[r15],32 + mov b4=r25 + ;; + ldf.fill f26=[r14],32 + ldf.fill f27=[r15],32 + mov b5=r26 + ;; + ldf.fill f28=[r14],32 + ldf.fill f29=[r15],32 + mov ar.pfs=r16 + ;; + ldf.fill f30=[r14],32 + ldf.fill f31=[r15],24 + mov ar.lc=r17 + ;; + ld8.fill r4=[r14],16 + ld8.fill r5=[r15],16 + mov pr=r28,-1 + ;; + ld8.fill r6=[r14],16 + ld8.fill r7=[r15],16 + + mov ar.unat=r18 // restore caller's unat + mov ar.rnat=r30 // must restore after bspstore but before rsc! + mov ar.fpsr=r19 // restore fpsr + mov ar.rsc=3 // put RSE back into eager mode, pl 0 + br.cond.sptk.many b7 +END(load_switch_stack) + +GLOBAL_ENTRY(__ia64_syscall) + .regstk 6,0,0,0 + mov r15=in5 // put syscall number in place + break __BREAK_SYSCALL + movl r2=errno + cmp.eq p6,p7=-1,r10 + ;; +(p6) st4 [r2]=r8 +(p6) mov r8=-1 + br.ret.sptk.many rp +END(__ia64_syscall) + +GLOBAL_ENTRY(execve) + mov r15=__NR_execve // put syscall number in place + break __BREAK_SYSCALL + br.ret.sptk.many rp +END(execve) + +GLOBAL_ENTRY(clone) + mov r15=__NR_clone // put syscall number in place + break __BREAK_SYSCALL + br.ret.sptk.many rp +END(clone) + + /* + * Invoke a system call, but do some tracing before and after the call. + * We MUST preserve the current register frame throughout this routine + * because some system calls (such as ia64_execve) directly + * manipulate ar.pfs. + */ +GLOBAL_ENTRY(ia64_trace_syscall) + PT_REGS_UNWIND_INFO(0) + /* + * We need to preserve the scratch registers f6-f11 in case the system + * call is sigreturn. + */ + adds r16=PT(F6)+16,sp + adds r17=PT(F7)+16,sp + ;; + stf.spill [r16]=f6,32 + stf.spill [r17]=f7,32 + ;; + stf.spill [r16]=f8,32 + stf.spill [r17]=f9,32 + ;; + stf.spill [r16]=f10 + stf.spill [r17]=f11 + br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args + adds r16=PT(F6)+16,sp + adds r17=PT(F7)+16,sp + ;; + ldf.fill f6=[r16],32 + ldf.fill f7=[r17],32 + ;; + ldf.fill f8=[r16],32 + ldf.fill f9=[r17],32 + ;; + ldf.fill f10=[r16] + ldf.fill f11=[r17] + // the syscall number may have changed, so re-load it and re-calculate the + // syscall entry-point: + adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #) + ;; + ld8 r15=[r15] + mov r3=NR_syscalls - 1 + ;; + adds r15=-1024,r15 + movl r16=sys_call_table + ;; + shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) + cmp.leu p6,p7=r15,r3 + ;; +(p6) ld8 r20=[r20] // load address of syscall entry point +(p7) movl r20=sys_ni_syscall + ;; + mov b6=r20 + br.call.sptk.many rp=b6 // do the syscall +.strace_check_retval: + cmp.lt p6,p0=r8,r0 // syscall failed? + adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 + adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 + mov r10=0 +(p6) br.cond.sptk strace_error // syscall failed -> + ;; // avoid RAW on r10 +.strace_save_retval: +.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8 +.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10 + br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value +.ret3: br.cond.sptk .work_pending_syscall_end + +strace_error: + ld8 r3=[r2] // load pt_regs.r8 + sub r9=0,r8 // negate return value to get errno value + ;; + cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0? + adds r3=16,r2 // r3=&pt_regs.r10 + ;; +(p6) mov r10=-1 +(p6) mov r8=r9 + br.cond.sptk .strace_save_retval +END(ia64_trace_syscall) + + /* + * When traced and returning from sigreturn, we invoke syscall_trace but then + * go straight to ia64_leave_kernel rather than ia64_leave_syscall. + */ +GLOBAL_ENTRY(ia64_strace_leave_kernel) + PT_REGS_UNWIND_INFO(0) +{ /* + * Some versions of gas generate bad unwind info if the first instruction of a + * procedure doesn't go into the first slot of a bundle. This is a workaround. + */ + nop.m 0 + nop.i 0 + br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value +} +.ret4: br.cond.sptk ia64_leave_kernel +END(ia64_strace_leave_kernel) + +GLOBAL_ENTRY(ia64_ret_from_clone) + PT_REGS_UNWIND_INFO(0) +{ /* + * Some versions of gas generate bad unwind info if the first instruction of a + * procedure doesn't go into the first slot of a bundle. This is a workaround. + */ + nop.m 0 + nop.i 0 + /* + * We need to call schedule_tail() to complete the scheduling process. + * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the + * address of the previously executing task. + */ + br.call.sptk.many rp=ia64_invoke_schedule_tail +} +.ret8: + adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; + ld4 r2=[r2] + ;; + mov r8=0 + and r2=_TIF_SYSCALL_TRACEAUDIT,r2 + ;; + cmp.ne p6,p0=r2,r0 +(p6) br.cond.spnt .strace_check_retval + ;; // added stop bits to prevent r8 dependency +END(ia64_ret_from_clone) + // fall through +GLOBAL_ENTRY(ia64_ret_from_syscall) + PT_REGS_UNWIND_INFO(0) + cmp.ge p6,p7=r8,r0 // syscall executed successfully? + adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 + mov r10=r0 // clear error indication in r10 +(p7) br.cond.spnt handle_syscall_error // handle potential syscall failure +END(ia64_ret_from_syscall) + // fall through +/* + * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't + * need to switch to bank 0 and doesn't restore the scratch registers. + * To avoid leaking kernel bits, the scratch registers are set to + * the following known-to-be-safe values: + * + * r1: restored (global pointer) + * r2: cleared + * r3: 1 (when returning to user-level) + * r8-r11: restored (syscall return value(s)) + * r12: restored (user-level stack pointer) + * r13: restored (user-level thread pointer) + * r14: cleared + * r15: restored (syscall #) + * r16-r17: cleared + * r18: user-level b6 + * r19: cleared + * r20: user-level ar.fpsr + * r21: user-level b0 + * r22: cleared + * r23: user-level ar.bspstore + * r24: user-level ar.rnat + * r25: user-level ar.unat + * r26: user-level ar.pfs + * r27: user-level ar.rsc + * r28: user-level ip + * r29: user-level psr + * r30: user-level cfm + * r31: user-level pr + * f6-f11: cleared + * pr: restored (user-level pr) + * b0: restored (user-level rp) + * b6: restored + * b7: cleared + * ar.unat: restored (user-level ar.unat) + * ar.pfs: restored (user-level ar.pfs) + * ar.rsc: restored (user-level ar.rsc) + * ar.rnat: restored (user-level ar.rnat) + * ar.bspstore: restored (user-level ar.bspstore) + * ar.fpsr: restored (user-level ar.fpsr) + * ar.ccv: cleared + * ar.csd: cleared + * ar.ssd: cleared + */ +ENTRY(ia64_leave_syscall) + PT_REGS_UNWIND_INFO(0) + /* + * work.need_resched etc. mustn't get changed by this CPU before it returns to + * user- or fsys-mode, hence we disable interrupts early on. + * + * p6 controls whether current_thread_info()->flags needs to be check for + * extra work. We always check for extra work when returning to user-level. + * With CONFIG_PREEMPT, we also check for extra work when the preempt_count + * is 0. After extra work processing has been completed, execution + * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check + * needs to be redone. + */ +#ifdef CONFIG_PREEMPT + rsm psr.i // disable interrupts + cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall +(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 + ;; + .pred.rel.mutex pUStk,pKStk +(pKStk) ld4 r21=[r20] // r21 <- preempt_count +(pUStk) mov r21=0 // r21 <- 0 + ;; + cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) +#else /* !CONFIG_PREEMPT */ +(pUStk) rsm psr.i + cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall +(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk +#endif +.work_processed_syscall: + adds r2=PT(LOADRS)+16,r12 + adds r3=PT(AR_BSPSTORE)+16,r12 + adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; +(p6) ld4 r31=[r18] // load current_thread_info()->flags + ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" + mov b7=r0 // clear b7 + ;; + ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) + ld8 r18=[r2],PT(R9)-PT(B6) // load b6 +(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? + ;; + mov r16=ar.bsp // M2 get existing backing store pointer +(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending? +(p6) br.cond.spnt .work_pending_syscall + ;; + // start restoring the state saved on the kernel stack (struct pt_regs): + ld8 r9=[r2],PT(CR_IPSR)-PT(R9) + ld8 r11=[r3],PT(CR_IIP)-PT(R11) + mov f6=f0 // clear f6 + ;; + invala // M0|1 invalidate ALAT + rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection + mov f9=f0 // clear f9 + + ld8 r29=[r2],16 // load cr.ipsr + ld8 r28=[r3],16 // load cr.iip + mov f8=f0 // clear f8 + ;; + ld8 r30=[r2],16 // M0|1 load cr.ifs + mov.m ar.ssd=r0 // M2 clear ar.ssd + cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs + ;; + ld8 r25=[r3],16 // M0|1 load ar.unat + mov.m ar.csd=r0 // M2 clear ar.csd + mov r22=r0 // clear r22 + ;; + ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs +(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled + mov f10=f0 // clear f10 + ;; + ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0 + ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc + mov f11=f0 // clear f11 + ;; + ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage) + ld8 r31=[r3],PT(R1)-PT(PR) // load predicates +(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + ;; + ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr + ld8.fill r1=[r3],16 // load r1 +(pUStk) mov r17=1 + ;; + srlz.d // M0 ensure interruption collection is off + ld8.fill r13=[r3],16 + mov f7=f0 // clear f7 + ;; + ld8.fill r12=[r2] // restore r12 (sp) + ld8.fill r15=[r3] // restore r15 + addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0 + ;; +(pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8 +(pUStk) st1 [r14]=r17 + mov b6=r18 // I0 restore b6 + ;; + mov r14=r0 // clear r14 + shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition +(pKStk) br.cond.dpnt.many skip_rbs_switch + + mov.m ar.ccv=r0 // clear ar.ccv +(pNonSys) br.cond.dpnt.many dont_preserve_current_frame + br.cond.sptk.many rbs_switch +END(ia64_leave_syscall) + +#ifdef CONFIG_IA32_SUPPORT +GLOBAL_ENTRY(ia64_ret_from_ia32_execve) + PT_REGS_UNWIND_INFO(0) + adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 + adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 + ;; + .mem.offset 0,0 + st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit + .mem.offset 8,0 + st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit +END(ia64_ret_from_ia32_execve_syscall) + // fall through +#endif /* CONFIG_IA32_SUPPORT */ +GLOBAL_ENTRY(ia64_leave_kernel) + PT_REGS_UNWIND_INFO(0) + /* + * work.need_resched etc. mustn't get changed by this CPU before it returns to + * user- or fsys-mode, hence we disable interrupts early on. + * + * p6 controls whether current_thread_info()->flags needs to be check for + * extra work. We always check for extra work when returning to user-level. + * With CONFIG_PREEMPT, we also check for extra work when the preempt_count + * is 0. After extra work processing has been completed, execution + * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check + * needs to be redone. + */ +#ifdef CONFIG_PREEMPT + rsm psr.i // disable interrupts + cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel +(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 + ;; + .pred.rel.mutex pUStk,pKStk +(pKStk) ld4 r21=[r20] // r21 <- preempt_count +(pUStk) mov r21=0 // r21 <- 0 + ;; + cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) +#else +(pUStk) rsm psr.i + cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel +(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk +#endif +.work_processed_kernel: + adds r17=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; +(p6) ld4 r31=[r17] // load current_thread_info()->flags + adds r21=PT(PR)+16,r12 + ;; + + lfetch [r21],PT(CR_IPSR)-PT(PR) + adds r2=PT(B6)+16,r12 + adds r3=PT(R16)+16,r12 + ;; + lfetch [r21] + ld8 r28=[r2],8 // load b6 + adds r29=PT(R24)+16,r12 + + ld8.fill r16=[r3],PT(AR_CSD)-PT(R16) + adds r30=PT(AR_CCV)+16,r12 +(p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? + ;; + ld8.fill r24=[r29] + ld8 r15=[r30] // load ar.ccv +(p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending? + ;; + ld8 r29=[r2],16 // load b7 + ld8 r30=[r3],16 // load ar.csd +(p6) br.cond.spnt .work_pending + ;; + ld8 r31=[r2],16 // load ar.ssd + ld8.fill r8=[r3],16 + ;; + ld8.fill r9=[r2],16 + ld8.fill r10=[r3],PT(R17)-PT(R10) + ;; + ld8.fill r11=[r2],PT(R18)-PT(R11) + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + mov ar.csd=r30 + mov ar.ssd=r31 + ;; + rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + ld8.fill r22=[r2],24 + ld8.fill r23=[r3],24 + mov b6=r28 + ;; + ld8.fill r25=[r2],16 + ld8.fill r26=[r3],16 + mov b7=r29 + ;; + ld8.fill r27=[r2],16 + ld8.fill r28=[r3],16 + ;; + ld8.fill r29=[r2],16 + ld8.fill r30=[r3],24 + ;; + ld8.fill r31=[r2],PT(F9)-PT(R31) + adds r3=PT(F10)-PT(F6),r3 + ;; + ldf.fill f9=[r2],PT(F6)-PT(F9) + ldf.fill f10=[r3],PT(F8)-PT(F10) + ;; + ldf.fill f6=[r2],PT(F7)-PT(F6) + ;; + ldf.fill f7=[r2],PT(F11)-PT(F7) + ldf.fill f8=[r3],32 + ;; + srlz.i // ensure interruption collection is off + mov ar.ccv=r15 + ;; + ldf.fill f11=[r2] + bsw.0 // switch back to bank 0 (no stop bit required beforehand...) + ;; +(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency) + adds r16=PT(CR_IPSR)+16,r12 + adds r17=PT(CR_IIP)+16,r12 + +(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled + nop.i 0 + nop.i 0 + ;; + ld8 r29=[r16],16 // load cr.ipsr + ld8 r28=[r17],16 // load cr.iip + ;; + ld8 r30=[r16],16 // load cr.ifs + ld8 r25=[r17],16 // load ar.unat + ;; + ld8 r26=[r16],16 // load ar.pfs + ld8 r27=[r17],16 // load ar.rsc + cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs + ;; + ld8 r24=[r16],16 // load ar.rnat (may be garbage) + ld8 r23=[r17],16 // load ar.bspstore (may be garbage) + ;; + ld8 r31=[r16],16 // load predicates + ld8 r21=[r17],16 // load b0 + ;; + ld8 r19=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 // load r1 + ;; + ld8.fill r12=[r16],16 + ld8.fill r13=[r17],16 +(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 + ;; + ld8 r20=[r16],16 // ar.fpsr + ld8.fill r15=[r17],16 + ;; + ld8.fill r14=[r16],16 + ld8.fill r2=[r17] +(pUStk) mov r17=1 + ;; + ld8.fill r3=[r16] +(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack + shr.u r18=r19,16 // get byte size of existing "dirty" partition + ;; + mov r16=ar.bsp // get existing backing store pointer + addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 + ;; + ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8 +(pKStk) br.cond.dpnt skip_rbs_switch + + /* + * Restore user backing store. + * + * NOTE: alloc, loadrs, and cover can't be predicated. + */ +(pNonSys) br.cond.dpnt dont_preserve_current_frame + +rbs_switch: + cover // add current frame into dirty partition and set cr.ifs + ;; + mov r19=ar.bsp // get new backing store pointer + sub r16=r16,r18 // krbs = old bsp - size of dirty partition + cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs + ;; + sub r19=r19,r16 // calculate total byte size of dirty partition + add r18=64,r18 // don't force in0-in7 into memory... + ;; + shl r19=r19,16 // shift size of dirty partition into loadrs position + ;; +dont_preserve_current_frame: + /* + * To prevent leaking bits between the kernel and user-space, + * we must clear the stacked registers in the "invalid" partition here. + * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium, + * 5 registers/cycle on McKinley). + */ +# define pRecurse p6 +# define pReturn p7 +#ifdef CONFIG_ITANIUM +# define Nregs 10 +#else +# define Nregs 14 +#endif + alloc loc0=ar.pfs,2,Nregs-2,2,0 + shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) + sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize + ;; + mov ar.rsc=r19 // load ar.rsc to be used for "loadrs" + shladd in0=loc1,3,r17 + mov in1=0 + ;; + TEXT_ALIGN(32) +rse_clear_invalid: +#ifdef CONFIG_ITANIUM + // cycle 0 + { .mii + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 +}{ .mfb + add out1=1,in1 // increment recursion count + nop.f 0 + nop.b 0 // can't do br.call here because of alloc (WAW on CFM) + ;; +}{ .mfi // cycle 1 + mov loc1=0 + nop.f 0 + mov loc2=0 +}{ .mib + mov loc3=0 + mov loc4=0 +(pRecurse) br.call.sptk.many b0=rse_clear_invalid + +}{ .mfi // cycle 2 + mov loc5=0 + nop.f 0 + cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret +}{ .mib + mov loc6=0 + mov loc7=0 +(pReturn) br.ret.sptk.many b0 +} +#else /* !CONFIG_ITANIUM */ + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 + add out1=1,in1 // increment recursion count + mov loc1=0 + mov loc2=0 + ;; + mov loc3=0 + mov loc4=0 + mov loc5=0 + mov loc6=0 + mov loc7=0 +(pRecurse) br.call.sptk.few b0=rse_clear_invalid + ;; + mov loc8=0 + mov loc9=0 + cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret + mov loc10=0 + mov loc11=0 +(pReturn) br.ret.sptk.many b0 +#endif /* !CONFIG_ITANIUM */ +# undef pRecurse +# undef pReturn + ;; + alloc r17=ar.pfs,0,0,0,0 // drop current register frame + ;; + loadrs + ;; +skip_rbs_switch: + mov ar.unat=r25 // M2 +(pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22 +(pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise + ;; +(pUStk) mov ar.bspstore=r23 // M2 +(pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp +(pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise + ;; + mov cr.ipsr=r29 // M2 + mov ar.pfs=r26 // I0 +(pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise + +(p9) mov cr.ifs=r30 // M2 + mov b0=r21 // I0 +(pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise + + mov ar.fpsr=r20 // M2 + mov cr.iip=r28 // M2 + nop 0 + ;; +(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode + nop 0 +(pLvSys)mov r2=r0 + + mov ar.rsc=r27 // M2 + mov pr=r31,-1 // I0 + rfi // B + + /* + * On entry: + * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPT) + * r31 = current->thread_info->flags + * On exit: + * p6 = TRUE if work-pending-check needs to be redone + */ +.work_pending_syscall: + add r2=-8,r2 + add r3=-8,r3 + ;; + st8 [r2]=r8 + st8 [r3]=r10 +.work_pending: + tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context? +(p6) br.cond.sptk.few .sigdelayed + ;; + tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? +(p6) br.cond.sptk.few .notify +#ifdef CONFIG_PREEMPT +(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1 + ;; +(pKStk) st4 [r20]=r21 + ssm psr.i // enable interrupts +#endif + br.call.spnt.many rp=schedule +.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 + rsm psr.i // disable interrupts + ;; +#ifdef CONFIG_PREEMPT +(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 + ;; +(pKStk) st4 [r20]=r0 // preempt_count() <- 0 +#endif +(pLvSys)br.cond.sptk.few .work_pending_syscall_end + br.cond.sptk.many .work_processed_kernel // re-check + +.notify: +(pUStk) br.call.spnt.many rp=notify_resume_user +.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 +(pLvSys)br.cond.sptk.few .work_pending_syscall_end + br.cond.sptk.many .work_processed_kernel // don't re-check + +// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where +// it could not be delivered. Deliver it now. The signal might be for us and +// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed +// signal. + +.sigdelayed: + br.call.sptk.many rp=do_sigdelayed + cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check +(pLvSys)br.cond.sptk.few .work_pending_syscall_end + br.cond.sptk.many .work_processed_kernel // re-check + +.work_pending_syscall_end: + adds r2=PT(R8)+16,r12 + adds r3=PT(R10)+16,r12 + ;; + ld8 r8=[r2] + ld8 r10=[r3] + br.cond.sptk.many .work_processed_syscall // re-check + +END(ia64_leave_kernel) + +ENTRY(handle_syscall_error) + /* + * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could + * lead us to mistake a negative return value as a failed syscall. Those syscall + * must deposit a non-zero value in pt_regs.r8 to indicate an error. If + * pt_regs.r8 is zero, we assume that the call completed successfully. + */ + PT_REGS_UNWIND_INFO(0) + ld8 r3=[r2] // load pt_regs.r8 + ;; + cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0? + ;; +(p7) mov r10=-1 +(p7) sub r8=0,r8 // negate return value to get errno + br.cond.sptk ia64_leave_syscall +END(handle_syscall_error) + + /* + * Invoke schedule_tail(task) while preserving in0-in7, which may be needed + * in case a system call gets restarted. + */ +GLOBAL_ENTRY(ia64_invoke_schedule_tail) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,2,1,0 + mov loc0=rp + mov out0=r8 // Address of previous task + ;; + br.call.sptk.many rp=schedule_tail +.ret11: mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(ia64_invoke_schedule_tail) + + /* + * Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to + * be set up by the caller. We declare 8 input registers so the system call + * args get preserved, in case we need to restart a system call. + */ +ENTRY(notify_resume_user) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! + mov r9=ar.unat + mov loc0=rp // save return address + mov out0=0 // there is no "oldset" + adds out1=8,sp // out1=&sigscratch->ar_pfs +(pSys) mov out2=1 // out2==1 => we're in a syscall + ;; +(pNonSys) mov out2=0 // out2==0 => not a syscall + .fframe 16 + .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!) + st8 [sp]=r9,-16 // allocate space for ar.unat and save it + st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch + .body + br.call.sptk.many rp=do_notify_resume_user +.ret15: .restore sp + adds sp=16,sp // pop scratch stack space + ;; + ld8 r9=[sp] // load new unat from sigscratch->scratch_unat + mov rp=loc0 + ;; + mov ar.unat=r9 + mov ar.pfs=loc1 + br.ret.sptk.many rp +END(notify_resume_user) + +GLOBAL_ENTRY(sys_rt_sigsuspend) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! + mov r9=ar.unat + mov loc0=rp // save return address + mov out0=in0 // mask + mov out1=in1 // sigsetsize + adds out2=8,sp // out2=&sigscratch->ar_pfs + ;; + .fframe 16 + .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!) + st8 [sp]=r9,-16 // allocate space for ar.unat and save it + st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch + .body + br.call.sptk.many rp=ia64_rt_sigsuspend +.ret17: .restore sp + adds sp=16,sp // pop scratch stack space + ;; + ld8 r9=[sp] // load new unat from sw->caller_unat + mov rp=loc0 + ;; + mov ar.unat=r9 + mov ar.pfs=loc1 + br.ret.sptk.many rp +END(sys_rt_sigsuspend) + +ENTRY(sys_rt_sigreturn) + PT_REGS_UNWIND_INFO(0) + /* + * Allocate 8 input registers since ptrace() may clobber them + */ + alloc r2=ar.pfs,8,0,1,0 + .prologue + PT_REGS_SAVES(16) + adds sp=-16,sp + .body + cmp.eq pNonSys,pSys=r0,r0 // sigreturn isn't a normal syscall... + ;; + /* + * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined + * syscall-entry path does not save them we save them here instead. Note: we + * don't need to save any other registers that are not saved by the stream-lined + * syscall path, because restore_sigcontext() restores them. + */ + adds r16=PT(F6)+32,sp + adds r17=PT(F7)+32,sp + ;; + stf.spill [r16]=f6,32 + stf.spill [r17]=f7,32 + ;; + stf.spill [r16]=f8,32 + stf.spill [r17]=f9,32 + ;; + stf.spill [r16]=f10 + stf.spill [r17]=f11 + adds out0=16,sp // out0 = &sigscratch + br.call.sptk.many rp=ia64_rt_sigreturn +.ret19: .restore sp 0 + adds sp=16,sp + ;; + ld8 r9=[sp] // load new ar.unat + mov.sptk b7=r8,ia64_leave_kernel + ;; + mov ar.unat=r9 + br.many b7 +END(sys_rt_sigreturn) + +GLOBAL_ENTRY(ia64_prepare_handle_unaligned) + .prologue + /* + * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 + */ + mov r16=r0 + DO_SAVE_SWITCH_STACK + br.call.sptk.many rp=ia64_handle_unaligned // stack frame setup in ivt +.ret21: .body + DO_LOAD_SWITCH_STACK + br.cond.sptk.many rp // goes to ia64_leave_kernel +END(ia64_prepare_handle_unaligned) + + // + // unw_init_running(void (*callback)(info, arg), void *arg) + // +# define EXTRA_FRAME_SIZE ((UNW_FRAME_INFO_SIZE+15)&~15) + +GLOBAL_ENTRY(unw_init_running) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + alloc loc1=ar.pfs,2,3,3,0 + ;; + ld8 loc2=[in0],8 + mov loc0=rp + mov r16=loc1 + DO_SAVE_SWITCH_STACK + .body + + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + .fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE + SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE) + adds sp=-EXTRA_FRAME_SIZE,sp + .body + ;; + adds out0=16,sp // &info + mov out1=r13 // current + adds out2=16+EXTRA_FRAME_SIZE,sp // &switch_stack + br.call.sptk.many rp=unw_init_frame_info +1: adds out0=16,sp // &info + mov b6=loc2 + mov loc2=gp // save gp across indirect function call + ;; + ld8 gp=[in0] + mov out1=in1 // arg + br.call.sptk.many rp=b6 // invoke the callback function +1: mov gp=loc2 // restore gp + + // For now, we don't allow changing registers from within + // unw_init_running; if we ever want to allow that, we'd + // have to do a load_switch_stack here: + .restore sp + adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp + + mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(unw_init_running) + + .rodata + .align 8 + .globl sys_call_table +sys_call_table: + data8 sys_ni_syscall // This must be sys_ni_syscall! See ivt.S. + data8 sys_exit // 1025 + data8 sys_read + data8 sys_write + data8 sys_open + data8 sys_close + data8 sys_creat // 1030 + data8 sys_link + data8 sys_unlink + data8 ia64_execve + data8 sys_chdir + data8 sys_fchdir // 1035 + data8 sys_utimes + data8 sys_mknod + data8 sys_chmod + data8 sys_chown + data8 sys_lseek // 1040 + data8 sys_getpid + data8 sys_getppid + data8 sys_mount + data8 sys_umount + data8 sys_setuid // 1045 + data8 sys_getuid + data8 sys_geteuid + data8 sys_ptrace + data8 sys_access + data8 sys_sync // 1050 + data8 sys_fsync + data8 sys_fdatasync + data8 sys_kill + data8 sys_rename + data8 sys_mkdir // 1055 + data8 sys_rmdir + data8 sys_dup + data8 sys_pipe + data8 sys_times + data8 ia64_brk // 1060 + data8 sys_setgid + data8 sys_getgid + data8 sys_getegid + data8 sys_acct + data8 sys_ioctl // 1065 + data8 sys_fcntl + data8 sys_umask + data8 sys_chroot + data8 sys_ustat + data8 sys_dup2 // 1070 + data8 sys_setreuid + data8 sys_setregid + data8 sys_getresuid + data8 sys_setresuid + data8 sys_getresgid // 1075 + data8 sys_setresgid + data8 sys_getgroups + data8 sys_setgroups + data8 sys_getpgid + data8 sys_setpgid // 1080 + data8 sys_setsid + data8 sys_getsid + data8 sys_sethostname + data8 sys_setrlimit + data8 sys_getrlimit // 1085 + data8 sys_getrusage + data8 sys_gettimeofday + data8 sys_settimeofday + data8 sys_select + data8 sys_poll // 1090 + data8 sys_symlink + data8 sys_readlink + data8 sys_uselib + data8 sys_swapon + data8 sys_swapoff // 1095 + data8 sys_reboot + data8 sys_truncate + data8 sys_ftruncate + data8 sys_fchmod + data8 sys_fchown // 1100 + data8 ia64_getpriority + data8 sys_setpriority + data8 sys_statfs + data8 sys_fstatfs + data8 sys_gettid // 1105 + data8 sys_semget + data8 sys_semop + data8 sys_semctl + data8 sys_msgget + data8 sys_msgsnd // 1110 + data8 sys_msgrcv + data8 sys_msgctl + data8 sys_shmget + data8 ia64_shmat + data8 sys_shmdt // 1115 + data8 sys_shmctl + data8 sys_syslog + data8 sys_setitimer + data8 sys_getitimer + data8 sys_ni_syscall // 1120 /* was: ia64_oldstat */ + data8 sys_ni_syscall /* was: ia64_oldlstat */ + data8 sys_ni_syscall /* was: ia64_oldfstat */ + data8 sys_vhangup + data8 sys_lchown + data8 sys_remap_file_pages // 1125 + data8 sys_wait4 + data8 sys_sysinfo + data8 sys_clone + data8 sys_setdomainname + data8 sys_newuname // 1130 + data8 sys_adjtimex + data8 sys_ni_syscall /* was: ia64_create_module */ + data8 sys_init_module + data8 sys_delete_module + data8 sys_ni_syscall // 1135 /* was: sys_get_kernel_syms */ + data8 sys_ni_syscall /* was: sys_query_module */ + data8 sys_quotactl + data8 sys_bdflush + data8 sys_sysfs + data8 sys_personality // 1140 + data8 sys_ni_syscall // sys_afs_syscall + data8 sys_setfsuid + data8 sys_setfsgid + data8 sys_getdents + data8 sys_flock // 1145 + data8 sys_readv + data8 sys_writev + data8 sys_pread64 + data8 sys_pwrite64 + data8 sys_sysctl // 1150 + data8 sys_mmap + data8 sys_munmap + data8 sys_mlock + data8 sys_mlockall + data8 sys_mprotect // 1155 + data8 ia64_mremap + data8 sys_msync + data8 sys_munlock + data8 sys_munlockall + data8 sys_sched_getparam // 1160 + data8 sys_sched_setparam + data8 sys_sched_getscheduler + data8 sys_sched_setscheduler + data8 sys_sched_yield + data8 sys_sched_get_priority_max // 1165 + data8 sys_sched_get_priority_min + data8 sys_sched_rr_get_interval + data8 sys_nanosleep + data8 sys_nfsservctl + data8 sys_prctl // 1170 + data8 sys_getpagesize + data8 sys_mmap2 + data8 sys_pciconfig_read + data8 sys_pciconfig_write + data8 sys_perfmonctl // 1175 + data8 sys_sigaltstack + data8 sys_rt_sigaction + data8 sys_rt_sigpending + data8 sys_rt_sigprocmask + data8 sys_rt_sigqueueinfo // 1180 + data8 sys_rt_sigreturn + data8 sys_rt_sigsuspend + data8 sys_rt_sigtimedwait + data8 sys_getcwd + data8 sys_capget // 1185 + data8 sys_capset + data8 sys_sendfile64 + data8 sys_ni_syscall // sys_getpmsg (STREAMS) + data8 sys_ni_syscall // sys_putpmsg (STREAMS) + data8 sys_socket // 1190 + data8 sys_bind + data8 sys_connect + data8 sys_listen + data8 sys_accept + data8 sys_getsockname // 1195 + data8 sys_getpeername + data8 sys_socketpair + data8 sys_send + data8 sys_sendto + data8 sys_recv // 1200 + data8 sys_recvfrom + data8 sys_shutdown + data8 sys_setsockopt + data8 sys_getsockopt + data8 sys_sendmsg // 1205 + data8 sys_recvmsg + data8 sys_pivot_root + data8 sys_mincore + data8 sys_madvise + data8 sys_newstat // 1210 + data8 sys_newlstat + data8 sys_newfstat + data8 sys_clone2 + data8 sys_getdents64 + data8 sys_getunwind // 1215 + data8 sys_readahead + data8 sys_setxattr + data8 sys_lsetxattr + data8 sys_fsetxattr + data8 sys_getxattr // 1220 + data8 sys_lgetxattr + data8 sys_fgetxattr + data8 sys_listxattr + data8 sys_llistxattr + data8 sys_flistxattr // 1225 + data8 sys_removexattr + data8 sys_lremovexattr + data8 sys_fremovexattr + data8 sys_tkill + data8 sys_futex // 1230 + data8 sys_sched_setaffinity + data8 sys_sched_getaffinity + data8 sys_set_tid_address + data8 sys_fadvise64_64 + data8 sys_tgkill // 1235 + data8 sys_exit_group + data8 sys_lookup_dcookie + data8 sys_io_setup + data8 sys_io_destroy + data8 sys_io_getevents // 1240 + data8 sys_io_submit + data8 sys_io_cancel + data8 sys_epoll_create + data8 sys_epoll_ctl + data8 sys_epoll_wait // 1245 + data8 sys_restart_syscall + data8 sys_semtimedop + data8 sys_timer_create + data8 sys_timer_settime + data8 sys_timer_gettime // 1250 + data8 sys_timer_getoverrun + data8 sys_timer_delete + data8 sys_clock_settime + data8 sys_clock_gettime + data8 sys_clock_getres // 1255 + data8 sys_clock_nanosleep + data8 sys_fstatfs64 + data8 sys_statfs64 + data8 sys_mbind + data8 sys_get_mempolicy // 1260 + data8 sys_set_mempolicy + data8 sys_mq_open + data8 sys_mq_unlink + data8 sys_mq_timedsend + data8 sys_mq_timedreceive // 1265 + data8 sys_mq_notify + data8 sys_mq_getsetattr + data8 sys_ni_syscall // reserved for kexec_load + data8 sys_ni_syscall // reserved for vserver + data8 sys_waitid // 1270 + data8 sys_add_key + data8 sys_request_key + data8 sys_keyctl + data8 sys_ni_syscall + data8 sys_ni_syscall // 1275 + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_ni_syscall + + .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h new file mode 100644 index 000000000000..6d4ecec989b5 --- /dev/null +++ b/arch/ia64/kernel/entry.h @@ -0,0 +1,82 @@ +#include <linux/config.h> + +/* + * Preserved registers that are shared between code in ivt.S and + * entry.S. Be careful not to step on these! + */ +#define PRED_LEAVE_SYSCALL 1 /* TRUE iff leave from syscall */ +#define PRED_KERNEL_STACK 2 /* returning to kernel-stacks? */ +#define PRED_USER_STACK 3 /* returning to user-stacks? */ +#define PRED_SYSCALL 4 /* inside a system call? */ +#define PRED_NON_SYSCALL 5 /* complement of PRED_SYSCALL */ + +#ifdef __ASSEMBLY__ +# define PASTE2(x,y) x##y +# define PASTE(x,y) PASTE2(x,y) + +# define pLvSys PASTE(p,PRED_LEAVE_SYSCALL) +# define pKStk PASTE(p,PRED_KERNEL_STACK) +# define pUStk PASTE(p,PRED_USER_STACK) +# define pSys PASTE(p,PRED_SYSCALL) +# define pNonSys PASTE(p,PRED_NON_SYSCALL) +#endif + +#define PT(f) (IA64_PT_REGS_##f##_OFFSET) +#define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET) + +#define PT_REGS_SAVES(off) \ + .unwabi 3, 'i'; \ + .fframe IA64_PT_REGS_SIZE+16+(off); \ + .spillsp rp, PT(CR_IIP)+16+(off); \ + .spillsp ar.pfs, PT(CR_IFS)+16+(off); \ + .spillsp ar.unat, PT(AR_UNAT)+16+(off); \ + .spillsp ar.fpsr, PT(AR_FPSR)+16+(off); \ + .spillsp pr, PT(PR)+16+(off); + +#define PT_REGS_UNWIND_INFO(off) \ + .prologue; \ + PT_REGS_SAVES(off); \ + .body + +#define SWITCH_STACK_SAVES(off) \ + .savesp ar.unat,SW(CALLER_UNAT)+16+(off); \ + .savesp ar.fpsr,SW(AR_FPSR)+16+(off); \ + .spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off); \ + .spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off); \ + .spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off); \ + .spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off); \ + .spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off); \ + .spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off); \ + .spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off); \ + .spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off); \ + .spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off); \ + .spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off); \ + .spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off); \ + .spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off); \ + .spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off); \ + .spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off); \ + .spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off); \ + .spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off); \ + .spillsp @priunat,SW(AR_UNAT)+16+(off); \ + .spillsp ar.rnat,SW(AR_RNAT)+16+(off); \ + .spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off); \ + .spillsp pr,SW(PR)+16+(off)) + +#define DO_SAVE_SWITCH_STACK \ + movl r28=1f; \ + ;; \ + .fframe IA64_SWITCH_STACK_SIZE; \ + adds sp=-IA64_SWITCH_STACK_SIZE,sp; \ + mov.ret.sptk b7=r28,1f; \ + SWITCH_STACK_SAVES(0); \ + br.cond.sptk.many save_switch_stack; \ +1: + +#define DO_LOAD_SWITCH_STACK \ + movl r28=1f; \ + ;; \ + invala; \ + mov.ret.sptk b7=r28,1f; \ + br.cond.sptk.many load_switch_stack; \ +1: .restore sp; \ + adds sp=IA64_SWITCH_STACK_SIZE,sp diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S new file mode 100644 index 000000000000..0d8650f7fce7 --- /dev/null +++ b/arch/ia64/kernel/fsys.S @@ -0,0 +1,884 @@ +/* + * This file contains the light-weight system call handlers (fsyscall-handlers). + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * 25-Sep-03 davidm Implement fsys_rt_sigprocmask(). + * 18-Feb-03 louisk Implement fsys_gettimeofday(). + * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more, + * probably broke it along the way... ;-) + * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make + * it capable of using memory based clocks without falling back to C code. + */ + +#include <asm/asmmacro.h> +#include <asm/errno.h> +#include <asm/offsets.h> +#include <asm/percpu.h> +#include <asm/thread_info.h> +#include <asm/sal.h> +#include <asm/signal.h> +#include <asm/system.h> +#include <asm/unistd.h> + +#include "entry.h" + +/* + * See Documentation/ia64/fsys.txt for details on fsyscalls. + * + * On entry to an fsyscall handler: + * r10 = 0 (i.e., defaults to "successful syscall return") + * r11 = saved ar.pfs (a user-level value) + * r15 = system call number + * r16 = "current" task pointer (in normal kernel-mode, this is in r13) + * r32-r39 = system call arguments + * b6 = return address (a user-level value) + * ar.pfs = previous frame-state (a user-level value) + * PSR.be = cleared to zero (i.e., little-endian byte order is in effect) + * all other registers may contain values passed in from user-mode + * + * On return from an fsyscall handler: + * r11 = saved ar.pfs (as passed into the fsyscall handler) + * r15 = system call number (as passed into the fsyscall handler) + * r32-r39 = system call arguments (as passed into the fsyscall handler) + * b6 = return address (as passed into the fsyscall handler) + * ar.pfs = previous frame-state (as passed into the fsyscall handler) + */ + +ENTRY(fsys_ni_syscall) + .prologue + .altrp b6 + .body + mov r8=ENOSYS + mov r10=-1 + FSYS_RETURN +END(fsys_ni_syscall) + +ENTRY(fsys_getpid) + .prologue + .altrp b6 + .body + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + ;; + ld4 r9=[r9] + add r8=IA64_TASK_TGID_OFFSET,r16 + ;; + and r9=TIF_ALLWORK_MASK,r9 + ld4 r8=[r8] // r8 = current->tgid + ;; + cmp.ne p8,p0=0,r9 +(p8) br.spnt.many fsys_fallback_syscall + FSYS_RETURN +END(fsys_getpid) + +ENTRY(fsys_getppid) + .prologue + .altrp b6 + .body + add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 + ;; + ld8 r17=[r17] // r17 = current->group_leader + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + ;; + + ld4 r9=[r9] + add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = ¤t->group_leader->real_parent + ;; + and r9=TIF_ALLWORK_MASK,r9 + +1: ld8 r18=[r17] // r18 = current->group_leader->real_parent + ;; + cmp.ne p8,p0=0,r9 + add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = ¤t->group_leader->real_parent->tgid + ;; + + /* + * The .acq is needed to ensure that the read of tgid has returned its data before + * we re-check "real_parent". + */ + ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid +#ifdef CONFIG_SMP + /* + * Re-read current->group_leader->real_parent. + */ + ld8 r19=[r17] // r19 = current->group_leader->real_parent +(p8) br.spnt.many fsys_fallback_syscall + ;; + cmp.ne p6,p0=r18,r19 // did real_parent change? + mov r19=0 // i must not leak kernel bits... +(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check + ;; + mov r17=0 // i must not leak kernel bits... + mov r18=0 // i must not leak kernel bits... +#else + mov r17=0 // i must not leak kernel bits... + mov r18=0 // i must not leak kernel bits... + mov r19=0 // i must not leak kernel bits... +#endif + FSYS_RETURN +END(fsys_getppid) + +ENTRY(fsys_set_tid_address) + .prologue + .altrp b6 + .body + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + ;; + ld4 r9=[r9] + tnat.z p6,p7=r32 // check argument register for being NaT + ;; + and r9=TIF_ALLWORK_MASK,r9 + add r8=IA64_TASK_PID_OFFSET,r16 + add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 + ;; + ld4 r8=[r8] + cmp.ne p8,p0=0,r9 + mov r17=-1 + ;; +(p6) st8 [r18]=r32 +(p7) st8 [r18]=r17 +(p8) br.spnt.many fsys_fallback_syscall + ;; + mov r17=0 // i must not leak kernel bits... + mov r18=0 // i must not leak kernel bits... + FSYS_RETURN +END(fsys_set_tid_address) + +/* + * Ensure that the time interpolator structure is compatible with the asm code + */ +#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \ + || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4 +#error fsys_gettimeofday incompatible with changes to struct time_interpolator +#endif +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_DIVIDE_BY_1000 0x4000 +#define CLOCK_ADD_MONOTONIC 0x8000 + +ENTRY(fsys_gettimeofday) + .prologue + .altrp b6 + .body + mov r31 = r32 + tnat.nz p6,p0 = r33 // guard against NaT argument +(p6) br.cond.spnt.few .fail_einval + mov r30 = CLOCK_DIVIDE_BY_1000 + ;; +.gettime: + // Register map + // Incoming r31 = pointer to address where to place result + // r30 = flags determining how time is processed + // r2,r3 = temp r4-r7 preserved + // r8 = result nanoseconds + // r9 = result seconds + // r10 = temporary storage for clock difference + // r11 = preserved: saved ar.pfs + // r12 = preserved: memory stack + // r13 = preserved: thread pointer + // r14 = address of mask / mask + // r15 = preserved: system call number + // r16 = preserved: current task pointer + // r17 = wall to monotonic use + // r18 = time_interpolator->offset + // r19 = address of wall_to_monotonic + // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address + // r21 = shift factor + // r22 = address of time interpolator->last_counter + // r23 = address of time_interpolator->last_cycle + // r24 = adress of time_interpolator->offset + // r25 = last_cycle value + // r26 = last_counter value + // r27 = pointer to xtime + // r28 = sequence number at the beginning of critcal section + // r29 = address of seqlock + // r30 = time processing flags / memory address + // r31 = pointer to result + // Predicates + // p6,p7 short term use + // p8 = timesource ar.itc + // p9 = timesource mmio64 + // p10 = timesource mmio32 + // p11 = timesource not to be handled by asm code + // p12 = memory time source ( = p9 | p10) + // p13 = do cmpxchg with time_interpolator_last_cycle + // p14 = Divide by 1000 + // p15 = Add monotonic + // + // Note that instructions are optimized for McKinley. McKinley can process two + // bundles simultaneously and therefore we continuously try to feed the CPU + // two bundles and then a stop. + tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure + mov pr = r30,0xc000 // Set predicates according to function + add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 + movl r20 = time_interpolator + ;; + ld8 r20 = [r20] // get pointer to time_interpolator structure + movl r29 = xtime_lock + ld4 r2 = [r2] // process work pending flags + movl r27 = xtime + ;; // only one bundle here + ld8 r21 = [r20] // first quad with control information + and r2 = TIF_ALLWORK_MASK,r2 +(p6) br.cond.spnt.few .fail_einval // deferred branch + ;; + add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20 + extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc + extr r8 = r21,0,16 // time_interpolator->source + cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled +(p6) br.cond.spnt.many fsys_fallback_syscall + ;; + cmp.eq p8,p12 = 0,r8 // Check for cpu timer + cmp.eq p9,p0 = 1,r8 // MMIO64 ? + extr r2 = r21,24,8 // time_interpolator->jitter + cmp.eq p10,p0 = 2,r8 // MMIO32 ? + cmp.ltu p11,p0 = 2,r8 // function or other clock +(p11) br.cond.spnt.many fsys_fallback_syscall + ;; + setf.sig f7 = r3 // Setup for scaling of counter +(p15) movl r19 = wall_to_monotonic +(p12) ld8 r30 = [r10] + cmp.ne p13,p0 = r2,r0 // need jitter compensation? + extr r21 = r21,16,8 // shift factor + ;; +.time_redo: + .pred.rel.mutex p8,p9,p10 + ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes +(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! + add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20 +(p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues.. +(p10) ld4 r2 = [r30] // readw(ti->address) +(p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20 + ;; // could be removed by moving the last add upward + ld8 r26 = [r22] // time_interpolator->last_counter +(p13) ld8 r25 = [r23] // time interpolator->last_cycle + add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20 +(p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET + ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET + add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20 + ;; + ld8 r18 = [r24] // time_interpolator->offset + ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec +(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) + ;; + ld8 r14 = [r14] // time_interpolator->mask +(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared + sub r10 = r2,r26 // current_counter - last_counter + ;; +(p6) sub r10 = r25,r26 // time we got was less than last_cycle +(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg + ;; + and r10 = r10,r14 // Apply mask + ;; + setf.sig f8 = r10 + nop.i 123 + ;; +(p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv +EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time + xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) +(p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs + ;; +(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET +(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo + // simulate tbit.nz.or p7,p0 = r28,0 + and r28 = ~1,r28 // Make sequence even to force retry if odd + getf.sig r2 = f8 + mf + add r8 = r8,r18 // Add time interpolator offset + ;; + ld4 r10 = [r29] // xtime_lock.sequence +(p15) add r8 = r8, r17 // Add monotonic.nsecs to nsecs + shr.u r2 = r2,r21 + ;; // overloaded 3 bundles! + // End critical section. + add r8 = r8,r2 // Add xtime.nsecs + cmp4.ne.or p7,p0 = r28,r10 +(p7) br.cond.dpnt.few .time_redo // sequence number changed ? + // Now r8=tv->tv_nsec and r9=tv->tv_sec + mov r10 = r0 + movl r2 = 1000000000 + add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31 +(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack + ;; +.time_normalize: + mov r21 = r8 + cmp.ge p6,p0 = r8,r2 +(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting some time + ;; +(p14) setf.sig f8 = r20 +(p6) sub r8 = r8,r2 +(p6) add r9 = 1,r9 // two nops before the branch. +(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod +(p6) br.cond.dpnt.few .time_normalize + ;; + // Divided by 8 though shift. Now divide by 125 + // The compiler was able to do that with a multiply + // and a shift and we do the same +EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles +(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it... + ;; + mov r8 = r0 +(p14) getf.sig r2 = f8 + ;; +(p14) shr.u r21 = r2, 4 + ;; +EX(.fail_efault, st8 [r31] = r9) +EX(.fail_efault, st8 [r23] = r21) + FSYS_RETURN +.fail_einval: + mov r8 = EINVAL + mov r10 = -1 + FSYS_RETURN +.fail_efault: + mov r8 = EFAULT + mov r10 = -1 + FSYS_RETURN +END(fsys_gettimeofday) + +ENTRY(fsys_clock_gettime) + .prologue + .altrp b6 + .body + cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32 + // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC +(p6) br.spnt.few fsys_fallback_syscall + mov r31 = r33 + shl r30 = r32,15 + br.many .gettime +END(fsys_clock_gettime) + +/* + * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize). + */ +#if _NSIG_WORDS != 1 +# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1. +#endif +ENTRY(fsys_rt_sigprocmask) + .prologue + .altrp b6 + .body + + add r2=IA64_TASK_BLOCKED_OFFSET,r16 + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + cmp4.ltu p6,p0=SIG_SETMASK,r32 + + cmp.ne p15,p0=r0,r34 // oset != NULL? + tnat.nz p8,p0=r34 + add r31=IA64_TASK_SIGHAND_OFFSET,r16 + ;; + ld8 r3=[r2] // read/prefetch current->blocked + ld4 r9=[r9] + tnat.nz.or p6,p0=r35 + + cmp.ne.or p6,p0=_NSIG_WORDS*8,r35 + tnat.nz.or p6,p0=r32 +(p6) br.spnt.few .fail_einval // fail with EINVAL + ;; +#ifdef CONFIG_SMP + ld8 r31=[r31] // r31 <- current->sighand +#endif + and r9=TIF_ALLWORK_MASK,r9 + tnat.nz.or p8,p0=r33 + ;; + cmp.ne p7,p0=0,r9 + cmp.eq p6,p0=r0,r33 // set == NULL? + add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock +(p8) br.spnt.few .fail_efault // fail with EFAULT +(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work... +(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask + + /* Argh, we actually have to do some work and _update_ the signal mask: */ + +EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set +EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set + mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1)) + ;; + + rsm psr.i // mask interrupt delivery + mov ar.ccv=0 + andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP + +#ifdef CONFIG_SMP + mov r17=1 + ;; + cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock + mov r8=EINVAL // default to EINVAL + ;; + ld8 r3=[r2] // re-read current->blocked now that we hold the lock + cmp4.ne p6,p0=r18,r0 +(p6) br.cond.spnt.many .lock_contention + ;; +#else + ld8 r3=[r2] // re-read current->blocked now that we hold the lock + mov r8=EINVAL // default to EINVAL +#endif + add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16 + add r19=IA64_TASK_SIGNAL_OFFSET,r16 + cmp4.eq p6,p0=SIG_BLOCK,r32 + ;; + ld8 r19=[r19] // r19 <- current->signal + cmp4.eq p7,p0=SIG_UNBLOCK,r32 + cmp4.eq p8,p0=SIG_SETMASK,r32 + ;; + ld8 r18=[r18] // r18 <- current->pending.signal + .pred.rel.mutex p6,p7,p8 +(p6) or r14=r3,r14 // SIG_BLOCK +(p7) andcm r14=r3,r14 // SIG_UNBLOCK + +(p8) mov r14=r14 // SIG_SETMASK +(p6) mov r8=0 // clear error code + // recalc_sigpending() + add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19 + + add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19 + ;; + ld4 r17=[r17] // r17 <- current->signal->group_stop_count +(p7) mov r8=0 // clear error code + + ld8 r19=[r19] // r19 <- current->signal->shared_pending + ;; + cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)? +(p8) mov r8=0 // clear error code + + or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending + ;; + // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked: + andcm r18=r18,r14 + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + ;; + +(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending + mov r19=0 // i must not leak kernel bits... +(p6) br.cond.dpnt.many .sig_pending + ;; + +1: ld4 r17=[r9] // r17 <- current->thread_info->flags + ;; + mov ar.ccv=r17 + and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING) + ;; + + st8 [r2]=r14 // update current->blocked with new mask + cmpxchg4.acq r14=[r9],r18,ar.ccv // current->thread_info->flags <- r18 + ;; + cmp.ne p6,p0=r17,r14 // update failed? +(p6) br.cond.spnt.few 1b // yes -> retry + +#ifdef CONFIG_SMP + st4.rel [r31]=r0 // release the lock +#endif + ssm psr.i + ;; + + srlz.d // ensure psr.i is set again + mov r18=0 // i must not leak kernel bits... + +.store_mask: +EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset +EX(.fail_efault, (p15) st8 [r34]=r3) + mov r2=0 // i must not leak kernel bits... + mov r3=0 // i must not leak kernel bits... + mov r8=0 // return 0 + mov r9=0 // i must not leak kernel bits... + mov r14=0 // i must not leak kernel bits... + mov r17=0 // i must not leak kernel bits... + mov r31=0 // i must not leak kernel bits... + FSYS_RETURN + +.sig_pending: +#ifdef CONFIG_SMP + st4.rel [r31]=r0 // release the lock +#endif + ssm psr.i + ;; + srlz.d + br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall + +#ifdef CONFIG_SMP +.lock_contention: + /* Rather than spinning here, fall back on doing a heavy-weight syscall. */ + ssm psr.i + ;; + srlz.d + br.sptk.many fsys_fallback_syscall +#endif +END(fsys_rt_sigprocmask) + +ENTRY(fsys_fallback_syscall) + .prologue + .altrp b6 + .body + /* + * We only get here from light-weight syscall handlers. Thus, we already + * know that r15 contains a valid syscall number. No need to re-check. + */ + adds r17=-1024,r15 + movl r14=sys_call_table + ;; + rsm psr.i + shladd r18=r17,3,r14 + ;; + ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point + mov r29=psr // read psr (12 cyc load latency) + mov r27=ar.rsc + mov r21=ar.fpsr + mov r26=ar.pfs +END(fsys_fallback_syscall) + /* FALL THROUGH */ +GLOBAL_ENTRY(fsys_bubble_down) + .prologue + .altrp b6 + .body + /* + * We get here for syscalls that don't have a lightweight handler. For those, we + * need to bubble down into the kernel and that requires setting up a minimal + * pt_regs structure, and initializing the CPU state more or less as if an + * interruption had occurred. To make syscall-restarts work, we setup pt_regs + * such that cr_iip points to the second instruction in syscall_via_break. + * Decrementing the IP hence will restart the syscall via break and not + * decrementing IP will return us to the caller, as usual. Note that we preserve + * the value of psr.pp rather than initializing it from dcr.pp. This makes it + * possible to distinguish fsyscall execution from other privileged execution. + * + * On entry: + * - normal fsyscall handler register usage, except that we also have: + * - r18: address of syscall entry point + * - r21: ar.fpsr + * - r26: ar.pfs + * - r27: ar.rsc + * - r29: psr + */ +# define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \ + | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \ + | IA64_PSR_IC) + /* + * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have + * to synthesize. + */ +# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \ + | IA64_PSR_BN | IA64_PSR_I) + + invala + movl r8=PSR_ONE_BITS + + mov r25=ar.unat // save ar.unat (5 cyc) + movl r9=PSR_PRESERVED_BITS + + mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0 + movl r28=__kernel_syscall_via_break + ;; + mov r23=ar.bspstore // save ar.bspstore (12 cyc) + mov r31=pr // save pr (2 cyc) + mov r20=r1 // save caller's gp in r20 + ;; + mov r2=r16 // copy current task addr to addl-addressable register + and r9=r9,r29 + mov r19=b6 // save b6 (2 cyc) + ;; + mov psr.l=r9 // slam the door (17 cyc to srlz.i) + or r29=r8,r29 // construct cr.ipsr value to save + addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS + ;; + // GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks + // we may be reading ar.itc after writing to psr.l. Avoid that message with + // this directive: + dv_serialize_data + mov.m r24=ar.rnat // read ar.rnat (5 cyc lat) + lfetch.fault.excl.nt1 [r22] + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2 + + // ensure previous insn group is issued before we stall for srlz.i: + ;; + srlz.i // ensure new psr.l has been established + ///////////////////////////////////////////////////////////////////////////// + ////////// from this point on, execution is not interruptible anymore + ///////////////////////////////////////////////////////////////////////////// + addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack + cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1 + ;; + st1 [r16]=r0 // clear current->thread.on_ustack flag + mov ar.bspstore=r22 // switch to kernel RBS + mov b6=r18 // copy syscall entry-point to b6 (7 cyc) + add r3=TI_FLAGS+IA64_TASK_SIZE,r2 + ;; + ld4 r3=[r3] // r2 = current_thread_info()->flags + mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc) + mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0 + br.call.sptk.many b7=ia64_syscall_setup + ;; + ssm psr.i + movl r2=ia64_ret_from_syscall + ;; + mov rp=r2 // set the real return addr + tbit.z p8,p0=r3,TIF_SYSCALL_TRACE + ;; +(p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8 +(p8) br.call.sptk.many b6=b6 // ignore this return addr + br.cond.sptk ia64_trace_syscall +END(fsys_bubble_down) + + .rodata + .align 8 + .globl fsyscall_table + + data8 fsys_bubble_down +fsyscall_table: + data8 fsys_ni_syscall + data8 0 // exit // 1025 + data8 0 // read + data8 0 // write + data8 0 // open + data8 0 // close + data8 0 // creat // 1030 + data8 0 // link + data8 0 // unlink + data8 0 // execve + data8 0 // chdir + data8 0 // fchdir // 1035 + data8 0 // utimes + data8 0 // mknod + data8 0 // chmod + data8 0 // chown + data8 0 // lseek // 1040 + data8 fsys_getpid // getpid + data8 fsys_getppid // getppid + data8 0 // mount + data8 0 // umount + data8 0 // setuid // 1045 + data8 0 // getuid + data8 0 // geteuid + data8 0 // ptrace + data8 0 // access + data8 0 // sync // 1050 + data8 0 // fsync + data8 0 // fdatasync + data8 0 // kill + data8 0 // rename + data8 0 // mkdir // 1055 + data8 0 // rmdir + data8 0 // dup + data8 0 // pipe + data8 0 // times + data8 0 // brk // 1060 + data8 0 // setgid + data8 0 // getgid + data8 0 // getegid + data8 0 // acct + data8 0 // ioctl // 1065 + data8 0 // fcntl + data8 0 // umask + data8 0 // chroot + data8 0 // ustat + data8 0 // dup2 // 1070 + data8 0 // setreuid + data8 0 // setregid + data8 0 // getresuid + data8 0 // setresuid + data8 0 // getresgid // 1075 + data8 0 // setresgid + data8 0 // getgroups + data8 0 // setgroups + data8 0 // getpgid + data8 0 // setpgid // 1080 + data8 0 // setsid + data8 0 // getsid + data8 0 // sethostname + data8 0 // setrlimit + data8 0 // getrlimit // 1085 + data8 0 // getrusage + data8 fsys_gettimeofday // gettimeofday + data8 0 // settimeofday + data8 0 // select + data8 0 // poll // 1090 + data8 0 // symlink + data8 0 // readlink + data8 0 // uselib + data8 0 // swapon + data8 0 // swapoff // 1095 + data8 0 // reboot + data8 0 // truncate + data8 0 // ftruncate + data8 0 // fchmod + data8 0 // fchown // 1100 + data8 0 // getpriority + data8 0 // setpriority + data8 0 // statfs + data8 0 // fstatfs + data8 0 // gettid // 1105 + data8 0 // semget + data8 0 // semop + data8 0 // semctl + data8 0 // msgget + data8 0 // msgsnd // 1110 + data8 0 // msgrcv + data8 0 // msgctl + data8 0 // shmget + data8 0 // shmat + data8 0 // shmdt // 1115 + data8 0 // shmctl + data8 0 // syslog + data8 0 // setitimer + data8 0 // getitimer + data8 0 // 1120 + data8 0 + data8 0 + data8 0 // vhangup + data8 0 // lchown + data8 0 // remap_file_pages // 1125 + data8 0 // wait4 + data8 0 // sysinfo + data8 0 // clone + data8 0 // setdomainname + data8 0 // newuname // 1130 + data8 0 // adjtimex + data8 0 + data8 0 // init_module + data8 0 // delete_module + data8 0 // 1135 + data8 0 + data8 0 // quotactl + data8 0 // bdflush + data8 0 // sysfs + data8 0 // personality // 1140 + data8 0 // afs_syscall + data8 0 // setfsuid + data8 0 // setfsgid + data8 0 // getdents + data8 0 // flock // 1145 + data8 0 // readv + data8 0 // writev + data8 0 // pread64 + data8 0 // pwrite64 + data8 0 // sysctl // 1150 + data8 0 // mmap + data8 0 // munmap + data8 0 // mlock + data8 0 // mlockall + data8 0 // mprotect // 1155 + data8 0 // mremap + data8 0 // msync + data8 0 // munlock + data8 0 // munlockall + data8 0 // sched_getparam // 1160 + data8 0 // sched_setparam + data8 0 // sched_getscheduler + data8 0 // sched_setscheduler + data8 0 // sched_yield + data8 0 // sched_get_priority_max // 1165 + data8 0 // sched_get_priority_min + data8 0 // sched_rr_get_interval + data8 0 // nanosleep + data8 0 // nfsservctl + data8 0 // prctl // 1170 + data8 0 // getpagesize + data8 0 // mmap2 + data8 0 // pciconfig_read + data8 0 // pciconfig_write + data8 0 // perfmonctl // 1175 + data8 0 // sigaltstack + data8 0 // rt_sigaction + data8 0 // rt_sigpending + data8 fsys_rt_sigprocmask // rt_sigprocmask + data8 0 // rt_sigqueueinfo // 1180 + data8 0 // rt_sigreturn + data8 0 // rt_sigsuspend + data8 0 // rt_sigtimedwait + data8 0 // getcwd + data8 0 // capget // 1185 + data8 0 // capset + data8 0 // sendfile + data8 0 + data8 0 + data8 0 // socket // 1190 + data8 0 // bind + data8 0 // connect + data8 0 // listen + data8 0 // accept + data8 0 // getsockname // 1195 + data8 0 // getpeername + data8 0 // socketpair + data8 0 // send + data8 0 // sendto + data8 0 // recv // 1200 + data8 0 // recvfrom + data8 0 // shutdown + data8 0 // setsockopt + data8 0 // getsockopt + data8 0 // sendmsg // 1205 + data8 0 // recvmsg + data8 0 // pivot_root + data8 0 // mincore + data8 0 // madvise + data8 0 // newstat // 1210 + data8 0 // newlstat + data8 0 // newfstat + data8 0 // clone2 + data8 0 // getdents64 + data8 0 // getunwind // 1215 + data8 0 // readahead + data8 0 // setxattr + data8 0 // lsetxattr + data8 0 // fsetxattr + data8 0 // getxattr // 1220 + data8 0 // lgetxattr + data8 0 // fgetxattr + data8 0 // listxattr + data8 0 // llistxattr + data8 0 // flistxattr // 1225 + data8 0 // removexattr + data8 0 // lremovexattr + data8 0 // fremovexattr + data8 0 // tkill + data8 0 // futex // 1230 + data8 0 // sched_setaffinity + data8 0 // sched_getaffinity + data8 fsys_set_tid_address // set_tid_address + data8 0 // fadvise64_64 + data8 0 // tgkill // 1235 + data8 0 // exit_group + data8 0 // lookup_dcookie + data8 0 // io_setup + data8 0 // io_destroy + data8 0 // io_getevents // 1240 + data8 0 // io_submit + data8 0 // io_cancel + data8 0 // epoll_create + data8 0 // epoll_ctl + data8 0 // epoll_wait // 1245 + data8 0 // restart_syscall + data8 0 // semtimedop + data8 0 // timer_create + data8 0 // timer_settime + data8 0 // timer_gettime // 1250 + data8 0 // timer_getoverrun + data8 0 // timer_delete + data8 0 // clock_settime + data8 fsys_clock_gettime // clock_gettime + data8 0 // clock_getres // 1255 + data8 0 // clock_nanosleep + data8 0 // fstatfs64 + data8 0 // statfs64 + data8 0 + data8 0 // 1260 + data8 0 + data8 0 // mq_open + data8 0 // mq_unlink + data8 0 // mq_timedsend + data8 0 // mq_timedreceive // 1265 + data8 0 // mq_notify + data8 0 // mq_getsetattr + data8 0 // kexec_load + data8 0 + data8 0 // 1270 + data8 0 + data8 0 + data8 0 + data8 0 + data8 0 // 1275 + data8 0 + data8 0 + data8 0 + data8 0 + + .org fsyscall_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/ia64/kernel/gate-data.S b/arch/ia64/kernel/gate-data.S new file mode 100644 index 000000000000..258c0a3238fb --- /dev/null +++ b/arch/ia64/kernel/gate-data.S @@ -0,0 +1,3 @@ + .section .data.gate, "aw" + + .incbin "arch/ia64/kernel/gate.so" diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S new file mode 100644 index 000000000000..facf75acdc85 --- /dev/null +++ b/arch/ia64/kernel/gate.S @@ -0,0 +1,372 @@ +/* + * This file contains the code that gets mapped at the upper end of each task's text + * region. For now, it contains the signal trampoline code only. + * + * Copyright (C) 1999-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <linux/config.h> + +#include <asm/asmmacro.h> +#include <asm/errno.h> +#include <asm/offsets.h> +#include <asm/sigcontext.h> +#include <asm/system.h> +#include <asm/unistd.h> + +/* + * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation, + * complications with the linker (which likes to create PLT stubs for branches + * to targets outside the shared object) and to avoid multi-phase kernel builds, we + * simply create minimalistic "patch lists" in special ELF sections. + */ + .section ".data.patch.fsyscall_table", "a" + .previous +#define LOAD_FSYSCALL_TABLE(reg) \ +[1:] movl reg=0; \ + .xdata4 ".data.patch.fsyscall_table", 1b-. + + .section ".data.patch.brl_fsys_bubble_down", "a" + .previous +#define BRL_COND_FSYS_BUBBLE_DOWN(pr) \ +[1:](pr)brl.cond.sptk 0; \ + .xdata4 ".data.patch.brl_fsys_bubble_down", 1b-. + +GLOBAL_ENTRY(__kernel_syscall_via_break) + .prologue + .altrp b6 + .body + /* + * Note: for (fast) syscall restart to work, the break instruction must be + * the first one in the bundle addressed by syscall_via_break. + */ +{ .mib + break 0x100000 + nop.i 0 + br.ret.sptk.many b6 +} +END(__kernel_syscall_via_break) + +/* + * On entry: + * r11 = saved ar.pfs + * r15 = system call # + * b0 = saved return address + * b6 = return address + * On exit: + * r11 = saved ar.pfs + * r15 = system call # + * b0 = saved return address + * all other "scratch" registers: undefined + * all "preserved" registers: same as on entry + */ + +GLOBAL_ENTRY(__kernel_syscall_via_epc) + .prologue + .altrp b6 + .body +{ + /* + * Note: the kernel cannot assume that the first two instructions in this + * bundle get executed. The remaining code must be safe even if + * they do not get executed. + */ + adds r17=-1024,r15 + mov r10=0 // default to successful syscall execution + epc +} + ;; + rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be" + LOAD_FSYSCALL_TABLE(r14) + + mov r16=IA64_KR(CURRENT) // 12 cycle read latency + tnat.nz p10,p9=r15 + mov r19=NR_syscalls-1 + ;; + shladd r18=r17,3,r14 + + srlz.d + cmp.ne p8,p0=r0,r0 // p8 <- FALSE + /* Note: if r17 is a NaT, p6 will be set to zero. */ + cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)? + ;; +(p6) ld8 r18=[r18] + mov r21=ar.fpsr + add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry + ;; +(p6) mov b7=r18 +(p6) tbit.z p8,p0=r18,0 +(p8) br.dptk.many b7 + +(p6) rsm psr.i + mov r27=ar.rsc + mov r26=ar.pfs + ;; + mov r29=psr // read psr (12 cyc load latency) +/* + * brl.cond doesn't work as intended because the linker would convert this branch + * into a branch to a PLT. Perhaps there will be a way to avoid this with some + * future version of the linker. In the meantime, we just use an indirect branch + * instead. + */ +#ifdef CONFIG_ITANIUM +(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down + ;; +(p6) mov b7=r14 +(p6) br.sptk.many b7 +#else + BRL_COND_FSYS_BUBBLE_DOWN(p6) +#endif + + mov r10=-1 +(p10) mov r8=EINVAL +(p9) mov r8=ENOSYS + FSYS_RETURN +END(__kernel_syscall_via_epc) + +# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) +# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET) +# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET) +# define SIGHANDLER_OFF (16 + IA64_SIGFRAME_HANDLER_OFFSET) +# define SIGCONTEXT_OFF (16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET) + +# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET +# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET +# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET +# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET +# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET +# define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET +# define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET +# define PR_OFF IA64_SIGCONTEXT_PR_OFFSET +# define RP_OFF IA64_SIGCONTEXT_IP_OFFSET +# define SP_OFF IA64_SIGCONTEXT_R12_OFFSET +# define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET +# define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET +# define base0 r2 +# define base1 r3 + /* + * When we get here, the memory stack looks like this: + * + * +===============================+ + * | | + * // struct sigframe // + * | | + * +-------------------------------+ <-- sp+16 + * | 16 byte of scratch | + * | space | + * +-------------------------------+ <-- sp + * + * The register stack looks _exactly_ the way it looked at the time the signal + * occurred. In other words, we're treading on a potential mine-field: each + * incoming general register may be a NaT value (including sp, in which case the + * process ends up dying with a SIGSEGV). + * + * The first thing need to do is a cover to get the registers onto the backing + * store. Once that is done, we invoke the signal handler which may modify some + * of the machine state. After returning from the signal handler, we return + * control to the previous context by executing a sigreturn system call. A signal + * handler may call the rt_sigreturn() function to directly return to a given + * sigcontext. However, the user-level sigreturn() needs to do much more than + * calling the rt_sigreturn() system call as it needs to unwind the stack to + * restore preserved registers that may have been saved on the signal handler's + * call stack. + */ + +#define SIGTRAMP_SAVES \ + .unwabi 3, 's'; /* mark this as a sigtramp handler (saves scratch regs) */ \ + .unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */ \ + .savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF; \ + .savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF; \ + .savesp pr, PR_OFF+SIGCONTEXT_OFF; \ + .savesp rp, RP_OFF+SIGCONTEXT_OFF; \ + .savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF; \ + .vframesp SP_OFF+SIGCONTEXT_OFF + +GLOBAL_ENTRY(__kernel_sigtramp) + // describe the state that is active when we get here: + .prologue + SIGTRAMP_SAVES + .body + + .label_state 1 + + adds base0=SIGHANDLER_OFF,sp + adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp + br.call.sptk.many rp=1f +1: + ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF) // get pointer to signal handler's plabel + ld8 r15=[base1] // get address of new RBS base (or NULL) + cover // push args in interrupted frame onto backing store + ;; + cmp.ne p1,p0=r15,r0 // do we need to switch rbs? (note: pr is saved by kernel) + mov.m r9=ar.bsp // fetch ar.bsp + .spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF +(p1) br.cond.spnt setup_rbs // yup -> (clobbers p8, r14-r16, and r18-r20) +back_from_setup_rbs: + alloc r8=ar.pfs,0,0,3,0 + ld8 out0=[base0],16 // load arg0 (signum) + adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1 + ;; + ld8 out1=[base1] // load arg1 (siginfop) + ld8 r10=[r17],8 // get signal handler entry point + ;; + ld8 out2=[base0] // load arg2 (sigcontextp) + ld8 gp=[r17] // get signal handler's global pointer + adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp + ;; + .spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF + st8 [base0]=r9 // save sc_ar_bsp + adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp + adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp + ;; + stf.spill [base0]=f6,32 + stf.spill [base1]=f7,32 + ;; + stf.spill [base0]=f8,32 + stf.spill [base1]=f9,32 + mov b6=r10 + ;; + stf.spill [base0]=f10,32 + stf.spill [base1]=f11,32 + ;; + stf.spill [base0]=f12,32 + stf.spill [base1]=f13,32 + ;; + stf.spill [base0]=f14,32 + stf.spill [base1]=f15,32 + br.call.sptk.many rp=b6 // call the signal handler +.ret0: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp + ;; + ld8 r15=[base0] // fetch sc_ar_bsp + mov r14=ar.bsp + ;; + cmp.ne p1,p0=r14,r15 // do we need to restore the rbs? +(p1) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7) + ;; +back_from_restore_rbs: + adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp + adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp + ;; + ldf.fill f6=[base0],32 + ldf.fill f7=[base1],32 + ;; + ldf.fill f8=[base0],32 + ldf.fill f9=[base1],32 + ;; + ldf.fill f10=[base0],32 + ldf.fill f11=[base1],32 + ;; + ldf.fill f12=[base0],32 + ldf.fill f13=[base1],32 + ;; + ldf.fill f14=[base0],32 + ldf.fill f15=[base1],32 + mov r15=__NR_rt_sigreturn + .restore sp // pop .prologue + break __BREAK_SYSCALL + + .prologue + SIGTRAMP_SAVES +setup_rbs: + mov ar.rsc=0 // put RSE into enforced lazy mode + ;; + .save ar.rnat, r19 + mov r19=ar.rnat // save RNaT before switching backing store area + adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp + + mov r18=ar.bspstore + mov ar.bspstore=r15 // switch over to new register backing store area + ;; + + .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF + st8 [r14]=r19 // save sc_ar_rnat + .body + mov.m r16=ar.bsp // sc_loadrs <- (new bsp - new bspstore) << 16 + adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp + ;; + invala + sub r15=r16,r15 + extr.u r20=r18,3,6 + ;; + mov ar.rsc=0xf // set RSE into eager mode, pl 3 + cmp.eq p8,p0=63,r20 + shl r15=r15,16 + ;; + st8 [r14]=r15 // save sc_loadrs +(p8) st8 [r18]=r19 // if bspstore points at RNaT slot, store RNaT there now + .restore sp // pop .prologue + br.cond.sptk back_from_setup_rbs + + .prologue + SIGTRAMP_SAVES + .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF + .body +restore_rbs: + // On input: + // r14 = bsp1 (bsp at the time of return from signal handler) + // r15 = bsp0 (bsp at the time the signal occurred) + // + // Here, we need to calculate bspstore0, the value that ar.bspstore needs + // to be set to, based on bsp0 and the size of the dirty partition on + // the alternate stack (sc_loadrs >> 16). This can be done with the + // following algorithm: + // + // bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1)); + // + // This is what the code below does. + // + alloc r2=ar.pfs,0,0,0,0 // alloc null frame + adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp + adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp + ;; + ld8 r17=[r16] + ld8 r16=[r18] // get new rnat + extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0) + ;; + mov ar.rsc=r17 // put RSE into enforced lazy mode + shr.u r17=r17,16 + ;; + sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16) + shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19) + ;; + loadrs // restore dirty partition + extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1) + ;; + add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19) + ;; + shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40 + ;; + sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1) + movl r17=0x8208208208208209 + ;; + add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1) + setf.sig f7=r17 + cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)? + ;; +(p7) adds r18=-62,r18 // delta -= 62 + ;; + setf.sig f6=r18 + ;; + xmpy.h f6=f6,f7 + ;; + getf.sig r17=f6 + ;; + add r17=r17,r18 + shr r18=r18,63 + ;; + shr r17=r17,5 + ;; + sub r17=r17,r18 // r17 = delta/63 + ;; + add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1) + ;; + shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1)) + ;; + mov ar.bspstore=r15 // switch back to old register backing store area + ;; + mov ar.rnat=r16 // restore RNaT + mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc) + // invala not necessary as that will happen when returning to user-mode + br.cond.sptk back_from_restore_rbs +END(__kernel_sigtramp) diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S new file mode 100644 index 000000000000..e1e4aba9ecd0 --- /dev/null +++ b/arch/ia64/kernel/gate.lds.S @@ -0,0 +1,95 @@ +/* + * Linker script for gate DSO. The gate pages are an ELF shared object prelinked to its + * virtual address, with only one read-only segment and one execute-only segment (both fit + * in one page). This script controls its layout. + */ + +#include <linux/config.h> + +#include <asm/system.h> + +SECTIONS +{ + . = GATE_ADDR + SIZEOF_HEADERS; + + .hash : { *(.hash) } :readable + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .dynamic : { *(.dynamic) } :readable :dynamic + + /* + * This linker script is used both with -r and with -shared. For the layouts to match, + * we need to skip more than enough space for the dynamic symbol table et al. If this + * amount is insufficient, ld -shared will barf. Just increase it here. + */ + . = GATE_ADDR + 0x500; + + .data.patch : { + __start_gate_mckinley_e9_patchlist = .; + *(.data.patch.mckinley_e9) + __end_gate_mckinley_e9_patchlist = .; + + __start_gate_vtop_patchlist = .; + *(.data.patch.vtop) + __end_gate_vtop_patchlist = .; + + __start_gate_fsyscall_patchlist = .; + *(.data.patch.fsyscall_table) + __end_gate_fsyscall_patchlist = .; + + __start_gate_brl_fsys_bubble_down_patchlist = .; + *(.data.patch.brl_fsys_bubble_down) + __end_gate_brl_fsys_bubble_down_patchlist = .; + } :readable + .IA_64.unwind_info : { *(.IA_64.unwind_info*) } + .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind +#ifdef HAVE_BUGGY_SEGREL + .text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable +#else + . = ALIGN (PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1)); + .text : { *(.text) *(.text.*) } :epc +#endif + + /DISCARD/ : { + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + *(__ex_table) + } +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */ +#ifndef HAVE_BUGGY_SEGREL + epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */ +#endif + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */ +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_2.5 { + global: + __kernel_syscall_via_break; + __kernel_syscall_via_epc; + __kernel_sigtramp; + + local: *; + }; +} + +/* The ELF entry point can be used to set the AT_SYSINFO value. */ +ENTRY(__kernel_syscall_via_epc) diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S new file mode 100644 index 000000000000..105c7fec8c6d --- /dev/null +++ b/arch/ia64/kernel/head.S @@ -0,0 +1,996 @@ +/* + * Here is where the ball gets rolling as far as the kernel is concerned. + * When control is transferred to _start, the bootload has already + * loaded us to the correct address. All that's left to do here is + * to set up the kernel's global pointer and jump to the kernel + * entry point. + * + * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com> + * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com> + * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com> + * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2. + */ + +#include <linux/config.h> + +#include <asm/asmmacro.h> +#include <asm/fpu.h> +#include <asm/kregs.h> +#include <asm/mmu_context.h> +#include <asm/offsets.h> +#include <asm/pal.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> + + .section __special_page_section,"ax" + + .global empty_zero_page +empty_zero_page: + .skip PAGE_SIZE + + .global swapper_pg_dir +swapper_pg_dir: + .skip PAGE_SIZE + + .rodata +halt_msg: + stringz "Halting kernel\n" + + .text + + .global start_ap + + /* + * Start the kernel. When the bootloader passes control to _start(), r28 + * points to the address of the boot parameter area. Execution reaches + * here in physical mode. + */ +GLOBAL_ENTRY(_start) +start_ap: + .prologue + .save rp, r0 // terminate unwind chain with a NULL rp + .body + + rsm psr.i | psr.ic + ;; + srlz.i + ;; + /* + * Initialize kernel region registers: + * rr[0]: VHPT enabled, page size = PAGE_SHIFT + * rr[1]: VHPT enabled, page size = PAGE_SHIFT + * rr[2]: VHPT enabled, page size = PAGE_SHIFT + * rr[3]: VHPT enabled, page size = PAGE_SHIFT + * rr[4]: VHPT enabled, page size = PAGE_SHIFT + * rr[5]: VHPT enabled, page size = PAGE_SHIFT + * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT + * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT + * We initialize all of them to prevent inadvertently assuming + * something about the state of address translation early in boot. + */ + mov r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1) + movl r7=(0<<61) + mov r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1) + movl r9=(1<<61) + mov r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1) + movl r11=(2<<61) + mov r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1) + movl r13=(3<<61) + mov r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1) + movl r15=(4<<61) + mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1) + movl r17=(5<<61) + mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) + movl r19=(6<<61) + mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) + movl r21=(7<<61) + ;; + mov rr[r7]=r6 + mov rr[r9]=r8 + mov rr[r11]=r10 + mov rr[r13]=r12 + mov rr[r15]=r14 + mov rr[r17]=r16 + mov rr[r19]=r18 + mov rr[r21]=r20 + ;; + /* + * Now pin mappings into the TLB for kernel text and data + */ + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + movl r17=KERNEL_START + ;; + mov cr.itir=r18 + mov cr.ifa=r17 + mov r16=IA64_TR_KERNEL + mov r3=ip + movl r18=PAGE_KERNEL + ;; + dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT + ;; + or r18=r2,r18 + ;; + srlz.i + ;; + itr.i itr[r16]=r18 + ;; + itr.d dtr[r16]=r18 + ;; + srlz.i + + /* + * Switch into virtual mode: + */ + movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ + |IA64_PSR_DI) + ;; + mov cr.ipsr=r16 + movl r17=1f + ;; + mov cr.iip=r17 + mov cr.ifs=r0 + ;; + rfi + ;; +1: // now we are in virtual mode + + // set IVT entry point---can't access I/O ports without it + movl r3=ia64_ivt + ;; + mov cr.iva=r3 + movl r2=FPSR_DEFAULT + ;; + srlz.i + movl gp=__gp + + mov ar.fpsr=r2 + ;; + +#define isAP p2 // are we an Application Processor? +#define isBP p3 // are we the Bootstrap Processor? + +#ifdef CONFIG_SMP + /* + * Find the init_task for the currently booting CPU. At poweron, and in + * UP mode, task_for_booting_cpu is NULL. + */ + movl r3=task_for_booting_cpu + ;; + ld8 r3=[r3] + movl r2=init_task + ;; + cmp.eq isBP,isAP=r3,r0 + ;; +(isAP) mov r2=r3 +#else + movl r2=init_task + cmp.eq isBP,isAP=r0,r0 +#endif + ;; + tpa r3=r2 // r3 == phys addr of task struct + mov r16=-1 +(isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it + + // load mapping for stack (virtaddr in r2, physaddr in r3) + rsm psr.ic + movl r17=PAGE_KERNEL + ;; + srlz.d + dep r18=0,r3,0,12 + ;; + or r18=r17,r18 + dep r2=-1,r3,61,3 // IMVA of task + ;; + mov r17=rr[r2] + shr.u r16=r3,IA64_GRANULE_SHIFT + ;; + dep r17=0,r17,8,24 + ;; + mov cr.itir=r17 + mov cr.ifa=r2 + + mov r19=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r19]=r18 + ;; + ssm psr.ic + srlz.d + ;; + +.load_current: + // load the "current" pointer (r13) and ar.k6 with the current task + mov IA64_KR(CURRENT)=r2 // virtual address + mov IA64_KR(CURRENT_STACK)=r16 + mov r13=r2 + /* + * Reserve space at the top of the stack for "struct pt_regs". Kernel threads + * don't store interesting values in that structure, but the space still needs + * to be there because time-critical stuff such as the context switching can + * be implemented more efficiently (for example, __switch_to() + * always sets the psr.dfh bit of the task it is switching to). + */ + addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2 + addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE + mov ar.rsc=0 // place RSE in enforced lazy mode + ;; + loadrs // clear the dirty partition + ;; + mov ar.bspstore=r2 // establish the new RSE stack + ;; + mov ar.rsc=0x3 // place RSE in eager mode + +(isBP) dep r28=-1,r28,61,3 // make address virtual +(isBP) movl r2=ia64_boot_param + ;; +(isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader + +#ifdef CONFIG_SMP +(isAP) br.call.sptk.many rp=start_secondary +.ret0: +(isAP) br.cond.sptk self +#endif + + // This is executed by the bootstrap processor (bsp) only: + +#ifdef CONFIG_IA64_FW_EMU + // initialize PAL & SAL emulator: + br.call.sptk.many rp=sys_fw_init +.ret1: +#endif + br.call.sptk.many rp=start_kernel +.ret2: addl r3=@ltoff(halt_msg),gp + ;; + alloc r2=ar.pfs,8,0,2,0 + ;; + ld8 out0=[r3] + br.call.sptk.many b0=console_print + +self: hint @pause + br.sptk.many self // endless loop +END(_start) + +GLOBAL_ENTRY(ia64_save_debug_regs) + alloc r16=ar.pfs,1,0,0,0 + mov r20=ar.lc // preserve ar.lc + mov ar.lc=IA64_NUM_DBG_REGS-1 + mov r18=0 + add r19=IA64_NUM_DBG_REGS*8,in0 + ;; +1: mov r16=dbr[r18] +#ifdef CONFIG_ITANIUM + ;; + srlz.d +#endif + mov r17=ibr[r18] + add r18=1,r18 + ;; + st8.nta [in0]=r16,8 + st8.nta [r19]=r17,8 + br.cloop.sptk.many 1b + ;; + mov ar.lc=r20 // restore ar.lc + br.ret.sptk.many rp +END(ia64_save_debug_regs) + +GLOBAL_ENTRY(ia64_load_debug_regs) + alloc r16=ar.pfs,1,0,0,0 + lfetch.nta [in0] + mov r20=ar.lc // preserve ar.lc + add r19=IA64_NUM_DBG_REGS*8,in0 + mov ar.lc=IA64_NUM_DBG_REGS-1 + mov r18=-1 + ;; +1: ld8.nta r16=[in0],8 + ld8.nta r17=[r19],8 + add r18=1,r18 + ;; + mov dbr[r18]=r16 +#ifdef CONFIG_ITANIUM + ;; + srlz.d // Errata 132 (NoFix status) +#endif + mov ibr[r18]=r17 + br.cloop.sptk.many 1b + ;; + mov ar.lc=r20 // restore ar.lc + br.ret.sptk.many rp +END(ia64_load_debug_regs) + +GLOBAL_ENTRY(__ia64_save_fpu) + alloc r2=ar.pfs,1,4,0,0 + adds loc0=96*16-16,in0 + adds loc1=96*16-16-128,in0 + ;; + stf.spill.nta [loc0]=f127,-256 + stf.spill.nta [loc1]=f119,-256 + ;; + stf.spill.nta [loc0]=f111,-256 + stf.spill.nta [loc1]=f103,-256 + ;; + stf.spill.nta [loc0]=f95,-256 + stf.spill.nta [loc1]=f87,-256 + ;; + stf.spill.nta [loc0]=f79,-256 + stf.spill.nta [loc1]=f71,-256 + ;; + stf.spill.nta [loc0]=f63,-256 + stf.spill.nta [loc1]=f55,-256 + adds loc2=96*16-32,in0 + ;; + stf.spill.nta [loc0]=f47,-256 + stf.spill.nta [loc1]=f39,-256 + adds loc3=96*16-32-128,in0 + ;; + stf.spill.nta [loc2]=f126,-256 + stf.spill.nta [loc3]=f118,-256 + ;; + stf.spill.nta [loc2]=f110,-256 + stf.spill.nta [loc3]=f102,-256 + ;; + stf.spill.nta [loc2]=f94,-256 + stf.spill.nta [loc3]=f86,-256 + ;; + stf.spill.nta [loc2]=f78,-256 + stf.spill.nta [loc3]=f70,-256 + ;; + stf.spill.nta [loc2]=f62,-256 + stf.spill.nta [loc3]=f54,-256 + adds loc0=96*16-48,in0 + ;; + stf.spill.nta [loc2]=f46,-256 + stf.spill.nta [loc3]=f38,-256 + adds loc1=96*16-48-128,in0 + ;; + stf.spill.nta [loc0]=f125,-256 + stf.spill.nta [loc1]=f117,-256 + ;; + stf.spill.nta [loc0]=f109,-256 + stf.spill.nta [loc1]=f101,-256 + ;; + stf.spill.nta [loc0]=f93,-256 + stf.spill.nta [loc1]=f85,-256 + ;; + stf.spill.nta [loc0]=f77,-256 + stf.spill.nta [loc1]=f69,-256 + ;; + stf.spill.nta [loc0]=f61,-256 + stf.spill.nta [loc1]=f53,-256 + adds loc2=96*16-64,in0 + ;; + stf.spill.nta [loc0]=f45,-256 + stf.spill.nta [loc1]=f37,-256 + adds loc3=96*16-64-128,in0 + ;; + stf.spill.nta [loc2]=f124,-256 + stf.spill.nta [loc3]=f116,-256 + ;; + stf.spill.nta [loc2]=f108,-256 + stf.spill.nta [loc3]=f100,-256 + ;; + stf.spill.nta [loc2]=f92,-256 + stf.spill.nta [loc3]=f84,-256 + ;; + stf.spill.nta [loc2]=f76,-256 + stf.spill.nta [loc3]=f68,-256 + ;; + stf.spill.nta [loc2]=f60,-256 + stf.spill.nta [loc3]=f52,-256 + adds loc0=96*16-80,in0 + ;; + stf.spill.nta [loc2]=f44,-256 + stf.spill.nta [loc3]=f36,-256 + adds loc1=96*16-80-128,in0 + ;; + stf.spill.nta [loc0]=f123,-256 + stf.spill.nta [loc1]=f115,-256 + ;; + stf.spill.nta [loc0]=f107,-256 + stf.spill.nta [loc1]=f99,-256 + ;; + stf.spill.nta [loc0]=f91,-256 + stf.spill.nta [loc1]=f83,-256 + ;; + stf.spill.nta [loc0]=f75,-256 + stf.spill.nta [loc1]=f67,-256 + ;; + stf.spill.nta [loc0]=f59,-256 + stf.spill.nta [loc1]=f51,-256 + adds loc2=96*16-96,in0 + ;; + stf.spill.nta [loc0]=f43,-256 + stf.spill.nta [loc1]=f35,-256 + adds loc3=96*16-96-128,in0 + ;; + stf.spill.nta [loc2]=f122,-256 + stf.spill.nta [loc3]=f114,-256 + ;; + stf.spill.nta [loc2]=f106,-256 + stf.spill.nta [loc3]=f98,-256 + ;; + stf.spill.nta [loc2]=f90,-256 + stf.spill.nta [loc3]=f82,-256 + ;; + stf.spill.nta [loc2]=f74,-256 + stf.spill.nta [loc3]=f66,-256 + ;; + stf.spill.nta [loc2]=f58,-256 + stf.spill.nta [loc3]=f50,-256 + adds loc0=96*16-112,in0 + ;; + stf.spill.nta [loc2]=f42,-256 + stf.spill.nta [loc3]=f34,-256 + adds loc1=96*16-112-128,in0 + ;; + stf.spill.nta [loc0]=f121,-256 + stf.spill.nta [loc1]=f113,-256 + ;; + stf.spill.nta [loc0]=f105,-256 + stf.spill.nta [loc1]=f97,-256 + ;; + stf.spill.nta [loc0]=f89,-256 + stf.spill.nta [loc1]=f81,-256 + ;; + stf.spill.nta [loc0]=f73,-256 + stf.spill.nta [loc1]=f65,-256 + ;; + stf.spill.nta [loc0]=f57,-256 + stf.spill.nta [loc1]=f49,-256 + adds loc2=96*16-128,in0 + ;; + stf.spill.nta [loc0]=f41,-256 + stf.spill.nta [loc1]=f33,-256 + adds loc3=96*16-128-128,in0 + ;; + stf.spill.nta [loc2]=f120,-256 + stf.spill.nta [loc3]=f112,-256 + ;; + stf.spill.nta [loc2]=f104,-256 + stf.spill.nta [loc3]=f96,-256 + ;; + stf.spill.nta [loc2]=f88,-256 + stf.spill.nta [loc3]=f80,-256 + ;; + stf.spill.nta [loc2]=f72,-256 + stf.spill.nta [loc3]=f64,-256 + ;; + stf.spill.nta [loc2]=f56,-256 + stf.spill.nta [loc3]=f48,-256 + ;; + stf.spill.nta [loc2]=f40 + stf.spill.nta [loc3]=f32 + br.ret.sptk.many rp +END(__ia64_save_fpu) + +GLOBAL_ENTRY(__ia64_load_fpu) + alloc r2=ar.pfs,1,2,0,0 + adds r3=128,in0 + adds r14=256,in0 + adds r15=384,in0 + mov loc0=512 + mov loc1=-1024+16 + ;; + ldf.fill.nta f32=[in0],loc0 + ldf.fill.nta f40=[ r3],loc0 + ldf.fill.nta f48=[r14],loc0 + ldf.fill.nta f56=[r15],loc0 + ;; + ldf.fill.nta f64=[in0],loc0 + ldf.fill.nta f72=[ r3],loc0 + ldf.fill.nta f80=[r14],loc0 + ldf.fill.nta f88=[r15],loc0 + ;; + ldf.fill.nta f96=[in0],loc1 + ldf.fill.nta f104=[ r3],loc1 + ldf.fill.nta f112=[r14],loc1 + ldf.fill.nta f120=[r15],loc1 + ;; + ldf.fill.nta f33=[in0],loc0 + ldf.fill.nta f41=[ r3],loc0 + ldf.fill.nta f49=[r14],loc0 + ldf.fill.nta f57=[r15],loc0 + ;; + ldf.fill.nta f65=[in0],loc0 + ldf.fill.nta f73=[ r3],loc0 + ldf.fill.nta f81=[r14],loc0 + ldf.fill.nta f89=[r15],loc0 + ;; + ldf.fill.nta f97=[in0],loc1 + ldf.fill.nta f105=[ r3],loc1 + ldf.fill.nta f113=[r14],loc1 + ldf.fill.nta f121=[r15],loc1 + ;; + ldf.fill.nta f34=[in0],loc0 + ldf.fill.nta f42=[ r3],loc0 + ldf.fill.nta f50=[r14],loc0 + ldf.fill.nta f58=[r15],loc0 + ;; + ldf.fill.nta f66=[in0],loc0 + ldf.fill.nta f74=[ r3],loc0 + ldf.fill.nta f82=[r14],loc0 + ldf.fill.nta f90=[r15],loc0 + ;; + ldf.fill.nta f98=[in0],loc1 + ldf.fill.nta f106=[ r3],loc1 + ldf.fill.nta f114=[r14],loc1 + ldf.fill.nta f122=[r15],loc1 + ;; + ldf.fill.nta f35=[in0],loc0 + ldf.fill.nta f43=[ r3],loc0 + ldf.fill.nta f51=[r14],loc0 + ldf.fill.nta f59=[r15],loc0 + ;; + ldf.fill.nta f67=[in0],loc0 + ldf.fill.nta f75=[ r3],loc0 + ldf.fill.nta f83=[r14],loc0 + ldf.fill.nta f91=[r15],loc0 + ;; + ldf.fill.nta f99=[in0],loc1 + ldf.fill.nta f107=[ r3],loc1 + ldf.fill.nta f115=[r14],loc1 + ldf.fill.nta f123=[r15],loc1 + ;; + ldf.fill.nta f36=[in0],loc0 + ldf.fill.nta f44=[ r3],loc0 + ldf.fill.nta f52=[r14],loc0 + ldf.fill.nta f60=[r15],loc0 + ;; + ldf.fill.nta f68=[in0],loc0 + ldf.fill.nta f76=[ r3],loc0 + ldf.fill.nta f84=[r14],loc0 + ldf.fill.nta f92=[r15],loc0 + ;; + ldf.fill.nta f100=[in0],loc1 + ldf.fill.nta f108=[ r3],loc1 + ldf.fill.nta f116=[r14],loc1 + ldf.fill.nta f124=[r15],loc1 + ;; + ldf.fill.nta f37=[in0],loc0 + ldf.fill.nta f45=[ r3],loc0 + ldf.fill.nta f53=[r14],loc0 + ldf.fill.nta f61=[r15],loc0 + ;; + ldf.fill.nta f69=[in0],loc0 + ldf.fill.nta f77=[ r3],loc0 + ldf.fill.nta f85=[r14],loc0 + ldf.fill.nta f93=[r15],loc0 + ;; + ldf.fill.nta f101=[in0],loc1 + ldf.fill.nta f109=[ r3],loc1 + ldf.fill.nta f117=[r14],loc1 + ldf.fill.nta f125=[r15],loc1 + ;; + ldf.fill.nta f38 =[in0],loc0 + ldf.fill.nta f46 =[ r3],loc0 + ldf.fill.nta f54 =[r14],loc0 + ldf.fill.nta f62 =[r15],loc0 + ;; + ldf.fill.nta f70 =[in0],loc0 + ldf.fill.nta f78 =[ r3],loc0 + ldf.fill.nta f86 =[r14],loc0 + ldf.fill.nta f94 =[r15],loc0 + ;; + ldf.fill.nta f102=[in0],loc1 + ldf.fill.nta f110=[ r3],loc1 + ldf.fill.nta f118=[r14],loc1 + ldf.fill.nta f126=[r15],loc1 + ;; + ldf.fill.nta f39 =[in0],loc0 + ldf.fill.nta f47 =[ r3],loc0 + ldf.fill.nta f55 =[r14],loc0 + ldf.fill.nta f63 =[r15],loc0 + ;; + ldf.fill.nta f71 =[in0],loc0 + ldf.fill.nta f79 =[ r3],loc0 + ldf.fill.nta f87 =[r14],loc0 + ldf.fill.nta f95 =[r15],loc0 + ;; + ldf.fill.nta f103=[in0] + ldf.fill.nta f111=[ r3] + ldf.fill.nta f119=[r14] + ldf.fill.nta f127=[r15] + br.ret.sptk.many rp +END(__ia64_load_fpu) + +GLOBAL_ENTRY(__ia64_init_fpu) + stf.spill [sp]=f0 // M3 + mov f32=f0 // F + nop.b 0 + + ldfps f33,f34=[sp] // M0 + ldfps f35,f36=[sp] // M1 + mov f37=f0 // F + ;; + + setf.s f38=r0 // M2 + setf.s f39=r0 // M3 + mov f40=f0 // F + + ldfps f41,f42=[sp] // M0 + ldfps f43,f44=[sp] // M1 + mov f45=f0 // F + + setf.s f46=r0 // M2 + setf.s f47=r0 // M3 + mov f48=f0 // F + + ldfps f49,f50=[sp] // M0 + ldfps f51,f52=[sp] // M1 + mov f53=f0 // F + + setf.s f54=r0 // M2 + setf.s f55=r0 // M3 + mov f56=f0 // F + + ldfps f57,f58=[sp] // M0 + ldfps f59,f60=[sp] // M1 + mov f61=f0 // F + + setf.s f62=r0 // M2 + setf.s f63=r0 // M3 + mov f64=f0 // F + + ldfps f65,f66=[sp] // M0 + ldfps f67,f68=[sp] // M1 + mov f69=f0 // F + + setf.s f70=r0 // M2 + setf.s f71=r0 // M3 + mov f72=f0 // F + + ldfps f73,f74=[sp] // M0 + ldfps f75,f76=[sp] // M1 + mov f77=f0 // F + + setf.s f78=r0 // M2 + setf.s f79=r0 // M3 + mov f80=f0 // F + + ldfps f81,f82=[sp] // M0 + ldfps f83,f84=[sp] // M1 + mov f85=f0 // F + + setf.s f86=r0 // M2 + setf.s f87=r0 // M3 + mov f88=f0 // F + + /* + * When the instructions are cached, it would be faster to initialize + * the remaining registers with simply mov instructions (F-unit). + * This gets the time down to ~29 cycles. However, this would use up + * 33 bundles, whereas continuing with the above pattern yields + * 10 bundles and ~30 cycles. + */ + + ldfps f89,f90=[sp] // M0 + ldfps f91,f92=[sp] // M1 + mov f93=f0 // F + + setf.s f94=r0 // M2 + setf.s f95=r0 // M3 + mov f96=f0 // F + + ldfps f97,f98=[sp] // M0 + ldfps f99,f100=[sp] // M1 + mov f101=f0 // F + + setf.s f102=r0 // M2 + setf.s f103=r0 // M3 + mov f104=f0 // F + + ldfps f105,f106=[sp] // M0 + ldfps f107,f108=[sp] // M1 + mov f109=f0 // F + + setf.s f110=r0 // M2 + setf.s f111=r0 // M3 + mov f112=f0 // F + + ldfps f113,f114=[sp] // M0 + ldfps f115,f116=[sp] // M1 + mov f117=f0 // F + + setf.s f118=r0 // M2 + setf.s f119=r0 // M3 + mov f120=f0 // F + + ldfps f121,f122=[sp] // M0 + ldfps f123,f124=[sp] // M1 + mov f125=f0 // F + + setf.s f126=r0 // M2 + setf.s f127=r0 // M3 + br.ret.sptk.many rp // F +END(__ia64_init_fpu) + +/* + * Switch execution mode from virtual to physical + * + * Inputs: + * r16 = new psr to establish + * Output: + * r19 = old virtual address of ar.bsp + * r20 = old virtual address of sp + * + * Note: RSE must already be in enforced lazy mode + */ +GLOBAL_ENTRY(ia64_switch_mode_phys) + { + alloc r2=ar.pfs,0,0,0,0 + rsm psr.i | psr.ic // disable interrupts and interrupt collection + mov r15=ip + } + ;; + { + flushrs // must be first insn in group + srlz.i + } + ;; + mov cr.ipsr=r16 // set new PSR + add r3=1f-ia64_switch_mode_phys,r15 + + mov r19=ar.bsp + mov r20=sp + mov r14=rp // get return address into a general register + ;; + + // going to physical mode, use tpa to translate virt->phys + tpa r17=r19 + tpa r3=r3 + tpa sp=sp + tpa r14=r14 + ;; + + mov r18=ar.rnat // save ar.rnat + mov ar.bspstore=r17 // this steps on ar.rnat + mov cr.iip=r3 + mov cr.ifs=r0 + ;; + mov ar.rnat=r18 // restore ar.rnat + rfi // must be last insn in group + ;; +1: mov rp=r14 + br.ret.sptk.many rp +END(ia64_switch_mode_phys) + +/* + * Switch execution mode from physical to virtual + * + * Inputs: + * r16 = new psr to establish + * r19 = new bspstore to establish + * r20 = new sp to establish + * + * Note: RSE must already be in enforced lazy mode + */ +GLOBAL_ENTRY(ia64_switch_mode_virt) + { + alloc r2=ar.pfs,0,0,0,0 + rsm psr.i | psr.ic // disable interrupts and interrupt collection + mov r15=ip + } + ;; + { + flushrs // must be first insn in group + srlz.i + } + ;; + mov cr.ipsr=r16 // set new PSR + add r3=1f-ia64_switch_mode_virt,r15 + + mov r14=rp // get return address into a general register + ;; + + // going to virtual + // - for code addresses, set upper bits of addr to KERNEL_START + // - for stack addresses, copy from input argument + movl r18=KERNEL_START + dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT + dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT + mov sp=r20 + ;; + or r3=r3,r18 + or r14=r14,r18 + ;; + + mov r18=ar.rnat // save ar.rnat + mov ar.bspstore=r19 // this steps on ar.rnat + mov cr.iip=r3 + mov cr.ifs=r0 + ;; + mov ar.rnat=r18 // restore ar.rnat + rfi // must be last insn in group + ;; +1: mov rp=r14 + br.ret.sptk.many rp +END(ia64_switch_mode_virt) + +GLOBAL_ENTRY(ia64_delay_loop) + .prologue +{ nop 0 // work around GAS unwind info generation bug... + .save ar.lc,r2 + mov r2=ar.lc + .body + ;; + mov ar.lc=r32 +} + ;; + // force loop to be 32-byte aligned (GAS bug means we cannot use .align + // inside function body without corrupting unwind info). +{ nop 0 } +1: br.cloop.sptk.few 1b + ;; + mov ar.lc=r2 + br.ret.sptk.many rp +END(ia64_delay_loop) + +/* + * Return a CPU-local timestamp in nano-seconds. This timestamp is + * NOT synchronized across CPUs its return value must never be + * compared against the values returned on another CPU. The usage in + * kernel/sched.c ensures that. + * + * The return-value of sched_clock() is NOT supposed to wrap-around. + * If it did, it would cause some scheduling hiccups (at the worst). + * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even + * that would happen only once every 5+ years. + * + * The code below basically calculates: + * + * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT + * + * except that the multiplication and the shift are done with 128-bit + * intermediate precision so that we can produce a full 64-bit result. + */ +GLOBAL_ENTRY(sched_clock) + addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 + mov.m r9=ar.itc // fetch cycle-counter (35 cyc) + ;; + ldf8 f8=[r8] + ;; + setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8... + ;; + xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) + xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product + ;; + getf.sig r8=f10 // (5 cyc) + getf.sig r9=f11 + ;; + shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT + br.ret.sptk.many rp +END(sched_clock) + +GLOBAL_ENTRY(start_kernel_thread) + .prologue + .save rp, r0 // this is the end of the call-chain + .body + alloc r2 = ar.pfs, 0, 0, 2, 0 + mov out0 = r9 + mov out1 = r11;; + br.call.sptk.many rp = kernel_thread_helper;; + mov out0 = r8 + br.call.sptk.many rp = sys_exit;; +1: br.sptk.few 1b // not reached +END(start_kernel_thread) + +#ifdef CONFIG_IA64_BRL_EMU + +/* + * Assembly routines used by brl_emu.c to set preserved register state. + */ + +#define SET_REG(reg) \ + GLOBAL_ENTRY(ia64_set_##reg); \ + alloc r16=ar.pfs,1,0,0,0; \ + mov reg=r32; \ + ;; \ + br.ret.sptk.many rp; \ + END(ia64_set_##reg) + +SET_REG(b1); +SET_REG(b2); +SET_REG(b3); +SET_REG(b4); +SET_REG(b5); + +#endif /* CONFIG_IA64_BRL_EMU */ + +#ifdef CONFIG_SMP + /* + * This routine handles spinlock contention. It uses a non-standard calling + * convention to avoid converting leaf routines into interior routines. Because + * of this special convention, there are several restrictions: + * + * - do not use gp relative variables, this code is called from the kernel + * and from modules, r1 is undefined. + * - do not use stacked registers, the caller owns them. + * - do not use the scratch stack space, the caller owns it. + * - do not use any registers other than the ones listed below + * + * Inputs: + * ar.pfs - saved CFM of caller + * ar.ccv - 0 (and available for use) + * r27 - flags from spin_lock_irqsave or 0. Must be preserved. + * r28 - available for use. + * r29 - available for use. + * r30 - available for use. + * r31 - address of lock, available for use. + * b6 - return address + * p14 - available for use. + * p15 - used to track flag status. + * + * If you patch this code to use more registers, do not forget to update + * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h. + */ + +#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) + +GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4) + .prologue + .save ar.pfs, r0 // this code effectively has a zero frame size + .save rp, r28 + .body + nop 0 + tbit.nz p15,p0=r27,IA64_PSR_I_BIT + .restore sp // pop existing prologue after next insn + mov b6 = r28 + .prologue + .save ar.pfs, r0 + .altrp b6 + .body + ;; +(p15) ssm psr.i // reenable interrupts if they were on + // DavidM says that srlz.d is slow and is not required in this case +.wait: + // exponential backoff, kdb, lockmeter etc. go in here + hint @pause + ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word + nop 0 + ;; + cmp4.ne p14,p0=r30,r0 +(p14) br.cond.sptk.few .wait +(p15) rsm psr.i // disable interrupts if we reenabled them + br.cond.sptk.few b6 // lock is now free, try to acquire + .global ia64_spinlock_contention_pre3_4_end // for kernprof +ia64_spinlock_contention_pre3_4_end: +END(ia64_spinlock_contention_pre3_4) + +#else + +GLOBAL_ENTRY(ia64_spinlock_contention) + .prologue + .altrp b6 + .body + tbit.nz p15,p0=r27,IA64_PSR_I_BIT + ;; +.wait: +(p15) ssm psr.i // reenable interrupts if they were on + // DavidM says that srlz.d is slow and is not required in this case +.wait2: + // exponential backoff, kdb, lockmeter etc. go in here + hint @pause + ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word + ;; + cmp4.ne p14,p0=r30,r0 + mov r30 = 1 +(p14) br.cond.sptk.few .wait2 +(p15) rsm psr.i // disable interrupts if we reenabled them + ;; + cmpxchg4.acq r30=[r31], r30, ar.ccv + ;; + cmp4.ne p14,p0=r0,r30 +(p14) br.cond.sptk.few .wait + + br.ret.sptk.many b6 // lock is now taken +END(ia64_spinlock_contention) + +#endif + +#endif /* CONFIG_SMP */ diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c new file mode 100644 index 000000000000..7bbf019c9867 --- /dev/null +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -0,0 +1,127 @@ +/* + * Architecture-specific kernel symbols + * + * Don't put any exports here unless it's defined in an assembler file. + * All other exports should be put directly after the definition. + */ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/string.h> +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memchr); +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memscan); +EXPORT_SYMBOL(strcat); +EXPORT_SYMBOL(strchr); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strncat); +EXPORT_SYMBOL(strncmp); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strnlen); +EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(strstr); +EXPORT_SYMBOL(strpbrk); + +#include <asm/checksum.h> +EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */ + +#include <asm/semaphore.h> +EXPORT_SYMBOL(__down); +EXPORT_SYMBOL(__down_interruptible); +EXPORT_SYMBOL(__down_trylock); +EXPORT_SYMBOL(__up); + +#include <asm/page.h> +EXPORT_SYMBOL(clear_page); + +#ifdef CONFIG_VIRTUAL_MEM_MAP +#include <linux/bootmem.h> +EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic code */ +#endif + +#include <asm/processor.h> +EXPORT_SYMBOL(per_cpu__cpu_info); +#ifdef CONFIG_SMP +EXPORT_SYMBOL(per_cpu__local_per_cpu_offset); +#endif + +#include <asm/uaccess.h> +EXPORT_SYMBOL(__copy_user); +EXPORT_SYMBOL(__do_clear_user); +EXPORT_SYMBOL(__strlen_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__strnlen_user); + +#include <asm/unistd.h> +EXPORT_SYMBOL(__ia64_syscall); + +/* from arch/ia64/lib */ +extern void __divsi3(void); +extern void __udivsi3(void); +extern void __modsi3(void); +extern void __umodsi3(void); +extern void __divdi3(void); +extern void __udivdi3(void); +extern void __moddi3(void); +extern void __umoddi3(void); + +EXPORT_SYMBOL(__divsi3); +EXPORT_SYMBOL(__udivsi3); +EXPORT_SYMBOL(__modsi3); +EXPORT_SYMBOL(__umodsi3); +EXPORT_SYMBOL(__divdi3); +EXPORT_SYMBOL(__udivdi3); +EXPORT_SYMBOL(__moddi3); +EXPORT_SYMBOL(__umoddi3); + +#if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE) +extern void xor_ia64_2(void); +extern void xor_ia64_3(void); +extern void xor_ia64_4(void); +extern void xor_ia64_5(void); + +EXPORT_SYMBOL(xor_ia64_2); +EXPORT_SYMBOL(xor_ia64_3); +EXPORT_SYMBOL(xor_ia64_4); +EXPORT_SYMBOL(xor_ia64_5); +#endif + +#include <asm/pal.h> +EXPORT_SYMBOL(ia64_pal_call_phys_stacked); +EXPORT_SYMBOL(ia64_pal_call_phys_static); +EXPORT_SYMBOL(ia64_pal_call_stacked); +EXPORT_SYMBOL(ia64_pal_call_static); +EXPORT_SYMBOL(ia64_load_scratch_fpregs); +EXPORT_SYMBOL(ia64_save_scratch_fpregs); + +#include <asm/unwind.h> +EXPORT_SYMBOL(unw_init_running); + +#ifdef ASM_SUPPORTED +# ifdef CONFIG_SMP +# if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +/* + * This is not a normal routine and we don't want a function descriptor for it, so we use + * a fake declaration here. + */ +extern char ia64_spinlock_contention_pre3_4; +EXPORT_SYMBOL(ia64_spinlock_contention_pre3_4); +# else +/* + * This is not a normal routine and we don't want a function descriptor for it, so we use + * a fake declaration here. + */ +extern char ia64_spinlock_contention; +EXPORT_SYMBOL(ia64_spinlock_contention); +# endif +# endif +#endif + +extern char ia64_ivt[]; +EXPORT_SYMBOL(ia64_ivt); diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c new file mode 100644 index 000000000000..b69c397ed1bf --- /dev/null +++ b/arch/ia64/kernel/init_task.c @@ -0,0 +1,46 @@ +/* + * This is where we statically allocate and initialize the initial + * task. + * + * Copyright (C) 1999, 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init_task.h> +#include <linux/mqueue.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> + +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +struct mm_struct init_mm = INIT_MM(init_mm); + +EXPORT_SYMBOL(init_mm); + +/* + * Initial task structure. + * + * We need to make sure that this is properly aligned due to the way process stacks are + * handled. This is done by having a special ".data.init_task" section... + */ +#define init_thread_info init_task_mem.s.thread_info + +union { + struct { + struct task_struct task; + struct thread_info thread_info; + } s; + unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)]; +} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{ + .task = INIT_TASK(init_task_mem.s.task), + .thread_info = INIT_THREAD_INFO(init_task_mem.s.task) +}}; + +EXPORT_SYMBOL(init_task); diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c new file mode 100644 index 000000000000..c15be5c38f56 --- /dev/null +++ b/arch/ia64/kernel/iosapic.c @@ -0,0 +1,827 @@ +/* + * I/O SAPIC support. + * + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com> + * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com> + * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co. + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> + * + * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code. + * In particular, we now have separate handlers for edge + * and level triggered interrupts. + * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation + * PCI to vector mapping, shared PCI interrupts. + * 00/10/27 D. Mosberger Document things a bit more to make them more understandable. + * Clean up much of the old IOSAPIC cruft. + * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts and fixes for + * ACPI S5(SoftOff) support. + * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT + * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt vectors in + * iosapic_set_affinity(), initializations for + * /proc/irq/#/smp_affinity + * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing. + * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq + * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping + * error + * 02/07/29 T. Kochi Allocate interrupt vectors dynamically + * 02/08/04 T. Kochi Cleaned up terminology (irq, global system interrupt, vector, etc.) + * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's pci_irq code. + * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC. + * Remove iosapic_address & gsi_base from external interfaces. + * Rationalize __init/__devinit attributes. + * 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004 + * Updated to work with irq migration necessary for CPU Hotplug + */ +/* + * Here is what the interrupt logic between a PCI device and the kernel looks like: + * + * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The + * device is uniquely identified by its bus--, and slot-number (the function + * number does not matter here because all functions share the same interrupt + * lines). + * + * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller. + * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level + * triggered and use the same polarity). Each interrupt line has a unique Global + * System Interrupt (GSI) number which can be calculated as the sum of the controller's + * base GSI number and the IOSAPIC pin number to which the line connects. + * + * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin + * into the IA-64 interrupt vector. This interrupt vector is then sent to the CPU. + * + * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is used as + * architecture-independent interrupt handling mechanism in Linux. As an + * IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number + * mapping. On smaller systems, we use one-to-one mapping between IA-64 vector and + * IRQ. A platform can implement platform_irq_to_vector(irq) and + * platform_local_vector_to_irq(vector) APIs to differentiate the mapping. + * Please see also include/asm-ia64/hw_irq.h for those APIs. + * + * To sum up, there are three levels of mappings involved: + * + * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ + * + * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts. + * Now we use "IRQ" only for Linux IRQ's. ISA IRQ (isa_irq) is the only exception in this + * source code. + */ +#include <linux/config.h> + +#include <linux/acpi.h> +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/string.h> + +#include <asm/delay.h> +#include <asm/hw_irq.h> +#include <asm/io.h> +#include <asm/iosapic.h> +#include <asm/machvec.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> + + +#undef DEBUG_INTERRUPT_ROUTING + +#ifdef DEBUG_INTERRUPT_ROUTING +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +static DEFINE_SPINLOCK(iosapic_lock); + +/* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */ + +static struct iosapic_intr_info { + char __iomem *addr; /* base address of IOSAPIC */ + u32 low32; /* current value of low word of Redirection table entry */ + unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ + char rte_index; /* IOSAPIC RTE index (-1 => not an IOSAPIC interrupt) */ + unsigned char dmode : 3; /* delivery mode (see iosapic.h) */ + unsigned char polarity: 1; /* interrupt polarity (see iosapic.h) */ + unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ + int refcnt; /* reference counter */ +} iosapic_intr_info[IA64_NUM_VECTORS]; + +static struct iosapic { + char __iomem *addr; /* base address of IOSAPIC */ + unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ + unsigned short num_rte; /* number of RTE in this IOSAPIC */ +#ifdef CONFIG_NUMA + unsigned short node; /* numa node association via pxm */ +#endif +} iosapic_lists[NR_IOSAPICS]; + +static int num_iosapic; + +static unsigned char pcat_compat __initdata; /* 8259 compatibility flag */ + + +/* + * Find an IOSAPIC associated with a GSI + */ +static inline int +find_iosapic (unsigned int gsi) +{ + int i; + + for (i = 0; i < num_iosapic; i++) { + if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte) + return i; + } + + return -1; +} + +static inline int +_gsi_to_vector (unsigned int gsi) +{ + struct iosapic_intr_info *info; + + for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info) + if (info->gsi_base + info->rte_index == gsi) + return info - iosapic_intr_info; + return -1; +} + +/* + * Translate GSI number to the corresponding IA-64 interrupt vector. If no + * entry exists, return -1. + */ +inline int +gsi_to_vector (unsigned int gsi) +{ + return _gsi_to_vector(gsi); +} + +int +gsi_to_irq (unsigned int gsi) +{ + /* + * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq + * numbers... + */ + return _gsi_to_vector(gsi); +} + +static void +set_rte (unsigned int vector, unsigned int dest, int mask) +{ + unsigned long pol, trigger, dmode; + u32 low32, high32; + char __iomem *addr; + int rte_index; + char redir; + + DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest); + + rte_index = iosapic_intr_info[vector].rte_index; + if (rte_index < 0) + return; /* not an IOSAPIC interrupt */ + + addr = iosapic_intr_info[vector].addr; + pol = iosapic_intr_info[vector].polarity; + trigger = iosapic_intr_info[vector].trigger; + dmode = iosapic_intr_info[vector].dmode; + vector &= (~IA64_IRQ_REDIRECTED); + + redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0; + +#ifdef CONFIG_SMP + { + unsigned int irq; + + for (irq = 0; irq < NR_IRQS; ++irq) + if (irq_to_vector(irq) == vector) { + set_irq_affinity_info(irq, (int)(dest & 0xffff), redir); + break; + } + } +#endif + + low32 = ((pol << IOSAPIC_POLARITY_SHIFT) | + (trigger << IOSAPIC_TRIGGER_SHIFT) | + (dmode << IOSAPIC_DELIVERY_SHIFT) | + ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) | + vector); + + /* dest contains both id and eid */ + high32 = (dest << IOSAPIC_DEST_SHIFT); + + iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + iosapic_intr_info[vector].low32 = low32; +} + +static void +nop (unsigned int vector) +{ + /* do nothing... */ +} + +static void +mask_irq (unsigned int irq) +{ + unsigned long flags; + char __iomem *addr; + u32 low32; + int rte_index; + ia64_vector vec = irq_to_vector(irq); + + addr = iosapic_intr_info[vec].addr; + rte_index = iosapic_intr_info[vec].rte_index; + + if (rte_index < 0) + return; /* not an IOSAPIC interrupt! */ + + spin_lock_irqsave(&iosapic_lock, flags); + { + /* set only the mask bit */ + low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK; + iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + } + spin_unlock_irqrestore(&iosapic_lock, flags); +} + +static void +unmask_irq (unsigned int irq) +{ + unsigned long flags; + char __iomem *addr; + u32 low32; + int rte_index; + ia64_vector vec = irq_to_vector(irq); + + addr = iosapic_intr_info[vec].addr; + rte_index = iosapic_intr_info[vec].rte_index; + if (rte_index < 0) + return; /* not an IOSAPIC interrupt! */ + + spin_lock_irqsave(&iosapic_lock, flags); + { + low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK; + iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + } + spin_unlock_irqrestore(&iosapic_lock, flags); +} + + +static void +iosapic_set_affinity (unsigned int irq, cpumask_t mask) +{ +#ifdef CONFIG_SMP + unsigned long flags; + u32 high32, low32; + int dest, rte_index; + char __iomem *addr; + int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; + ia64_vector vec; + + irq &= (~IA64_IRQ_REDIRECTED); + vec = irq_to_vector(irq); + + if (cpus_empty(mask)) + return; + + dest = cpu_physical_id(first_cpu(mask)); + + rte_index = iosapic_intr_info[vec].rte_index; + addr = iosapic_intr_info[vec].addr; + + if (rte_index < 0) + return; /* not an IOSAPIC interrupt */ + + set_irq_affinity_info(irq, dest, redir); + + /* dest contains both id and eid */ + high32 = dest << IOSAPIC_DEST_SHIFT; + + spin_lock_irqsave(&iosapic_lock, flags); + { + low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT); + + if (redir) + /* change delivery mode to lowest priority */ + low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); + else + /* change delivery mode to fixed */ + low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); + + iosapic_intr_info[vec].low32 = low32; + iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + } + spin_unlock_irqrestore(&iosapic_lock, flags); +#endif +} + +/* + * Handlers for level-triggered interrupts. + */ + +static unsigned int +iosapic_startup_level_irq (unsigned int irq) +{ + unmask_irq(irq); + return 0; +} + +static void +iosapic_end_level_irq (unsigned int irq) +{ + ia64_vector vec = irq_to_vector(irq); + + move_irq(irq); + iosapic_eoi(iosapic_intr_info[vec].addr, vec); +} + +#define iosapic_shutdown_level_irq mask_irq +#define iosapic_enable_level_irq unmask_irq +#define iosapic_disable_level_irq mask_irq +#define iosapic_ack_level_irq nop + +struct hw_interrupt_type irq_type_iosapic_level = { + .typename = "IO-SAPIC-level", + .startup = iosapic_startup_level_irq, + .shutdown = iosapic_shutdown_level_irq, + .enable = iosapic_enable_level_irq, + .disable = iosapic_disable_level_irq, + .ack = iosapic_ack_level_irq, + .end = iosapic_end_level_irq, + .set_affinity = iosapic_set_affinity +}; + +/* + * Handlers for edge-triggered interrupts. + */ + +static unsigned int +iosapic_startup_edge_irq (unsigned int irq) +{ + unmask_irq(irq); + /* + * IOSAPIC simply drops interrupts pended while the + * corresponding pin was masked, so we can't know if an + * interrupt is pending already. Let's hope not... + */ + return 0; +} + +static void +iosapic_ack_edge_irq (unsigned int irq) +{ + irq_desc_t *idesc = irq_descp(irq); + + move_irq(irq); + /* + * Once we have recorded IRQ_PENDING already, we can mask the + * interrupt for real. This prevents IRQ storms from unhandled + * devices. + */ + if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED)) + mask_irq(irq); +} + +#define iosapic_enable_edge_irq unmask_irq +#define iosapic_disable_edge_irq nop +#define iosapic_end_edge_irq nop + +struct hw_interrupt_type irq_type_iosapic_edge = { + .typename = "IO-SAPIC-edge", + .startup = iosapic_startup_edge_irq, + .shutdown = iosapic_disable_edge_irq, + .enable = iosapic_enable_edge_irq, + .disable = iosapic_disable_edge_irq, + .ack = iosapic_ack_edge_irq, + .end = iosapic_end_edge_irq, + .set_affinity = iosapic_set_affinity +}; + +unsigned int +iosapic_version (char __iomem *addr) +{ + /* + * IOSAPIC Version Register return 32 bit structure like: + * { + * unsigned int version : 8; + * unsigned int reserved1 : 8; + * unsigned int max_redir : 8; + * unsigned int reserved2 : 8; + * } + */ + return iosapic_read(addr, IOSAPIC_VERSION); +} + +/* + * if the given vector is already owned by other, + * assign a new vector for the other and make the vector available + */ +static void __init +iosapic_reassign_vector (int vector) +{ + int new_vector; + + if (iosapic_intr_info[vector].rte_index >= 0 || iosapic_intr_info[vector].addr + || iosapic_intr_info[vector].gsi_base || iosapic_intr_info[vector].dmode + || iosapic_intr_info[vector].polarity || iosapic_intr_info[vector].trigger) + { + new_vector = assign_irq_vector(AUTO_ASSIGN); + printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector); + memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector], + sizeof(struct iosapic_intr_info)); + memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); + iosapic_intr_info[vector].rte_index = -1; + } +} + +static void +register_intr (unsigned int gsi, int vector, unsigned char delivery, + unsigned long polarity, unsigned long trigger) +{ + irq_desc_t *idesc; + struct hw_interrupt_type *irq_type; + int rte_index; + int index; + unsigned long gsi_base; + void __iomem *iosapic_address; + + index = find_iosapic(gsi); + if (index < 0) { + printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi); + return; + } + + iosapic_address = iosapic_lists[index].addr; + gsi_base = iosapic_lists[index].gsi_base; + + rte_index = gsi - gsi_base; + iosapic_intr_info[vector].rte_index = rte_index; + iosapic_intr_info[vector].polarity = polarity; + iosapic_intr_info[vector].dmode = delivery; + iosapic_intr_info[vector].addr = iosapic_address; + iosapic_intr_info[vector].gsi_base = gsi_base; + iosapic_intr_info[vector].trigger = trigger; + iosapic_intr_info[vector].refcnt++; + + if (trigger == IOSAPIC_EDGE) + irq_type = &irq_type_iosapic_edge; + else + irq_type = &irq_type_iosapic_level; + + idesc = irq_descp(vector); + if (idesc->handler != irq_type) { + if (idesc->handler != &no_irq_type) + printk(KERN_WARNING "%s: changing vector %d from %s to %s\n", + __FUNCTION__, vector, idesc->handler->typename, irq_type->typename); + idesc->handler = irq_type; + } +} + +static unsigned int +get_target_cpu (unsigned int gsi, int vector) +{ +#ifdef CONFIG_SMP + static int cpu = -1; + + /* + * If the platform supports redirection via XTP, let it + * distribute interrupts. + */ + if (smp_int_redirect & SMP_IRQ_REDIRECTION) + return cpu_physical_id(smp_processor_id()); + + /* + * Some interrupts (ACPI SCI, for instance) are registered + * before the BSP is marked as online. + */ + if (!cpu_online(smp_processor_id())) + return cpu_physical_id(smp_processor_id()); + +#ifdef CONFIG_NUMA + { + int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; + cpumask_t cpu_mask; + + iosapic_index = find_iosapic(gsi); + if (iosapic_index < 0 || + iosapic_lists[iosapic_index].node == MAX_NUMNODES) + goto skip_numa_setup; + + cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node); + + for_each_cpu_mask(numa_cpu, cpu_mask) { + if (!cpu_online(numa_cpu)) + cpu_clear(numa_cpu, cpu_mask); + } + + num_cpus = cpus_weight(cpu_mask); + + if (!num_cpus) + goto skip_numa_setup; + + /* Use vector assigment to distribute across cpus in node */ + cpu_index = vector % num_cpus; + + for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++) + numa_cpu = next_cpu(numa_cpu, cpu_mask); + + if (numa_cpu != NR_CPUS) + return cpu_physical_id(numa_cpu); + } +skip_numa_setup: +#endif + /* + * Otherwise, round-robin interrupt vectors across all the + * processors. (It'd be nice if we could be smarter in the + * case of NUMA.) + */ + do { + if (++cpu >= NR_CPUS) + cpu = 0; + } while (!cpu_online(cpu)); + + return cpu_physical_id(cpu); +#else + return cpu_physical_id(smp_processor_id()); +#endif +} + +/* + * ACPI can describe IOSAPIC interrupts via static tables and namespace + * methods. This provides an interface to register those interrupts and + * program the IOSAPIC RTE. + */ +int +iosapic_register_intr (unsigned int gsi, + unsigned long polarity, unsigned long trigger) +{ + int vector; + unsigned int dest; + unsigned long flags; + + /* + * If this GSI has already been registered (i.e., it's a + * shared interrupt, or we lost a race to register it), + * don't touch the RTE. + */ + spin_lock_irqsave(&iosapic_lock, flags); + { + vector = gsi_to_vector(gsi); + if (vector > 0) { + iosapic_intr_info[vector].refcnt++; + spin_unlock_irqrestore(&iosapic_lock, flags); + return vector; + } + + vector = assign_irq_vector(AUTO_ASSIGN); + dest = get_target_cpu(gsi, vector); + register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, + polarity, trigger); + + set_rte(vector, dest, 1); + } + spin_unlock_irqrestore(&iosapic_lock, flags); + + printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", + gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), + (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), + cpu_logical_id(dest), dest, vector); + + return vector; +} + +#ifdef CONFIG_ACPI_DEALLOCATE_IRQ +void +iosapic_unregister_intr (unsigned int gsi) +{ + unsigned long flags; + int irq, vector; + irq_desc_t *idesc; + int rte_index; + unsigned long trigger, polarity; + + /* + * If the irq associated with the gsi is not found, + * iosapic_unregister_intr() is unbalanced. We need to check + * this again after getting locks. + */ + irq = gsi_to_irq(gsi); + if (irq < 0) { + printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi); + WARN_ON(1); + return; + } + vector = irq_to_vector(irq); + + idesc = irq_descp(irq); + spin_lock_irqsave(&idesc->lock, flags); + spin_lock(&iosapic_lock); + { + rte_index = iosapic_intr_info[vector].rte_index; + if (rte_index < 0) { + spin_unlock(&iosapic_lock); + spin_unlock_irqrestore(&idesc->lock, flags); + printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi); + WARN_ON(1); + return; + } + + if (--iosapic_intr_info[vector].refcnt > 0) { + spin_unlock(&iosapic_lock); + spin_unlock_irqrestore(&idesc->lock, flags); + return; + } + + /* + * If interrupt handlers still exist on the irq + * associated with the gsi, don't unregister the + * interrupt. + */ + if (idesc->action) { + iosapic_intr_info[vector].refcnt++; + spin_unlock(&iosapic_lock); + spin_unlock_irqrestore(&idesc->lock, flags); + printk(KERN_WARNING "Cannot unregister GSI. IRQ %u is still in use.\n", irq); + return; + } + + /* Clear the interrupt controller descriptor. */ + idesc->handler = &no_irq_type; + + trigger = iosapic_intr_info[vector].trigger; + polarity = iosapic_intr_info[vector].polarity; + + /* Clear the interrupt information. */ + memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); + iosapic_intr_info[vector].rte_index = -1; /* mark as unused */ + } + spin_unlock(&iosapic_lock); + spin_unlock_irqrestore(&idesc->lock, flags); + + /* Free the interrupt vector */ + free_irq_vector(vector); + + printk(KERN_INFO "GSI %u (%s, %s) -> vector %d unregisterd.\n", + gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), + (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), + vector); +} +#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */ + +/* + * ACPI calls this when it finds an entry for a platform interrupt. + * Note that the irq_base and IOSAPIC address must be set in iosapic_init(). + */ +int __init +iosapic_register_platform_intr (u32 int_type, unsigned int gsi, + int iosapic_vector, u16 eid, u16 id, + unsigned long polarity, unsigned long trigger) +{ + static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"}; + unsigned char delivery; + int vector, mask = 0; + unsigned int dest = ((id << 8) | eid) & 0xffff; + + switch (int_type) { + case ACPI_INTERRUPT_PMI: + vector = iosapic_vector; + /* + * since PMI vector is alloc'd by FW(ACPI) not by kernel, + * we need to make sure the vector is available + */ + iosapic_reassign_vector(vector); + delivery = IOSAPIC_PMI; + break; + case ACPI_INTERRUPT_INIT: + vector = assign_irq_vector(AUTO_ASSIGN); + delivery = IOSAPIC_INIT; + break; + case ACPI_INTERRUPT_CPEI: + vector = IA64_CPE_VECTOR; + delivery = IOSAPIC_LOWEST_PRIORITY; + mask = 1; + break; + default: + printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type); + return -1; + } + + register_intr(gsi, vector, delivery, polarity, trigger); + + printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", + int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown", + int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), + (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), + cpu_logical_id(dest), dest, vector); + + set_rte(vector, dest, mask); + return vector; +} + + +/* + * ACPI calls this when it finds an entry for a legacy ISA IRQ override. + * Note that the gsi_base and IOSAPIC address must be set in iosapic_init(). + */ +void __init +iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, + unsigned long polarity, + unsigned long trigger) +{ + int vector; + unsigned int dest = cpu_physical_id(smp_processor_id()); + + vector = isa_irq_to_vector(isa_irq); + + register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger); + + DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n", + isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level", + polarity == IOSAPIC_POL_HIGH ? "high" : "low", + cpu_logical_id(dest), dest, vector); + + set_rte(vector, dest, 1); +} + +void __init +iosapic_system_init (int system_pcat_compat) +{ + int vector; + + for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) + iosapic_intr_info[vector].rte_index = -1; /* mark as unused */ + + pcat_compat = system_pcat_compat; + if (pcat_compat) { + /* + * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support + * enabled. + */ + printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__); + outb(0xff, 0xA1); + outb(0xff, 0x21); + } +} + +void __init +iosapic_init (unsigned long phys_addr, unsigned int gsi_base) +{ + int num_rte; + unsigned int isa_irq, ver; + char __iomem *addr; + + addr = ioremap(phys_addr, 0); + ver = iosapic_version(addr); + + /* + * The MAX_REDIR register holds the highest input pin + * number (starting from 0). + * We add 1 so that we can use it for number of pins (= RTEs) + */ + num_rte = ((ver >> 16) & 0xff) + 1; + + iosapic_lists[num_iosapic].addr = addr; + iosapic_lists[num_iosapic].gsi_base = gsi_base; + iosapic_lists[num_iosapic].num_rte = num_rte; +#ifdef CONFIG_NUMA + iosapic_lists[num_iosapic].node = MAX_NUMNODES; +#endif + num_iosapic++; + + if ((gsi_base == 0) && pcat_compat) { + /* + * Map the legacy ISA devices into the IOSAPIC data. Some of these may + * get reprogrammed later on with data from the ACPI Interrupt Source + * Override table. + */ + for (isa_irq = 0; isa_irq < 16; ++isa_irq) + iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE); + } +} + +#ifdef CONFIG_NUMA +void __init +map_iosapic_to_node(unsigned int gsi_base, int node) +{ + int index; + + index = find_iosapic(gsi_base); + if (index < 0) { + printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", + __FUNCTION__, gsi_base); + return; + } + iosapic_lists[index].node = node; + return; +} +#endif diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c new file mode 100644 index 000000000000..28f2aadc38d0 --- /dev/null +++ b/arch/ia64/kernel/irq.c @@ -0,0 +1,238 @@ +/* + * linux/arch/ia64/kernel/irq.c + * + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + * + * Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004 + * + * 4/14/2004: Added code to handle cpu migration and do safe irq + * migration without lossing interrupts for iosapic + * architecture. + */ + +#include <asm/delay.h> +#include <asm/uaccess.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> + +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id()); +} + +#ifdef CONFIG_IA64_GENERIC +unsigned int __ia64_local_vector_to_irq (ia64_vector vec) +{ + return (unsigned int) vec; +} +#endif + +/* + * Interrupt statistics: + */ + +atomic_t irq_err_count; + +/* + * /proc/interrupts printing: + */ + +int show_interrupts(struct seq_file *p, void *v) +{ + int i = *(loff_t *) v, j; + struct irqaction * action; + unsigned long flags; + + if (i == 0) { + seq_printf(p, " "); + for (j=0; j<NR_CPUS; j++) + if (cpu_online(j)) + seq_printf(p, "CPU%d ",j); + seq_putc(p, '\n'); + } + + if (i < NR_IRQS) { + spin_lock_irqsave(&irq_desc[i].lock, flags); + action = irq_desc[i].action; + if (!action) + goto skip; + seq_printf(p, "%3d: ",i); +#ifndef CONFIG_SMP + seq_printf(p, "%10u ", kstat_irqs(i)); +#else + for (j = 0; j < NR_CPUS; j++) + if (cpu_online(j)) + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); +#endif + seq_printf(p, " %14s", irq_desc[i].handler->typename); + seq_printf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + + seq_putc(p, '\n'); +skip: + spin_unlock_irqrestore(&irq_desc[i].lock, flags); + } else if (i == NR_IRQS) + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); + return 0; +} + +#ifdef CONFIG_SMP +/* + * This is updated when the user sets irq affinity via /proc + */ +static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; +static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)]; + +static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; + +/* + * Arch specific routine for deferred write to iosapic rte to reprogram + * intr destination. + */ +void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) +{ + pending_irq_cpumask[irq] = mask_val; +} + +void set_irq_affinity_info (unsigned int irq, int hwid, int redir) +{ + cpumask_t mask = CPU_MASK_NONE; + + cpu_set(cpu_logical_id(hwid), mask); + + if (irq < NR_IRQS) { + irq_affinity[irq] = mask; + irq_redir[irq] = (char) (redir & 0xff); + } +} + + +void move_irq(int irq) +{ + /* note - we hold desc->lock */ + cpumask_t tmp; + irq_desc_t *desc = irq_descp(irq); + int redir = test_bit(irq, pending_irq_redir); + + if (unlikely(!desc->handler->set_affinity)) + return; + + if (!cpus_empty(pending_irq_cpumask[irq])) { + cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); + if (unlikely(!cpus_empty(tmp))) { + desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0), + pending_irq_cpumask[irq]); + } + cpus_clear(pending_irq_cpumask[irq]); + } +} + + +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_HOTPLUG_CPU +unsigned int vectors_in_migration[NR_IRQS]; + +/* + * Since cpu_online_map is already updated, we just need to check for + * affinity that has zeros + */ +static void migrate_irqs(void) +{ + cpumask_t mask; + irq_desc_t *desc; + int irq, new_cpu; + + for (irq=0; irq < NR_IRQS; irq++) { + desc = irq_descp(irq); + + /* + * No handling for now. + * TBD: Implement a disable function so we can now + * tell CPU not to respond to these local intr sources. + * such as ITV,CPEI,MCA etc. + */ + if (desc->status == IRQ_PER_CPU) + continue; + + cpus_and(mask, irq_affinity[irq], cpu_online_map); + if (any_online_cpu(mask) == NR_CPUS) { + /* + * Save it for phase 2 processing + */ + vectors_in_migration[irq] = irq; + + new_cpu = any_online_cpu(cpu_online_map); + mask = cpumask_of_cpu(new_cpu); + + /* + * Al three are essential, currently WARN_ON.. maybe panic? + */ + if (desc->handler && desc->handler->disable && + desc->handler->enable && desc->handler->set_affinity) { + desc->handler->disable(irq); + desc->handler->set_affinity(irq, mask); + desc->handler->enable(irq); + } else { + WARN_ON((!(desc->handler) || !(desc->handler->disable) || + !(desc->handler->enable) || + !(desc->handler->set_affinity))); + } + } + } +} + +void fixup_irqs(void) +{ + unsigned int irq; + extern void ia64_process_pending_intr(void); + + ia64_set_itv(1<<16); + /* + * Phase 1: Locate irq's bound to this cpu and + * relocate them for cpu removal. + */ + migrate_irqs(); + + /* + * Phase 2: Perform interrupt processing for all entries reported in + * local APIC. + */ + ia64_process_pending_intr(); + + /* + * Phase 3: Now handle any interrupts not captured in local APIC. + * This is to account for cases that device interrupted during the time the + * rte was being disabled and re-programmed. + */ + for (irq=0; irq < NR_IRQS; irq++) { + if (vectors_in_migration[irq]) { + vectors_in_migration[irq]=0; + __do_IRQ(irq, NULL); + } + } + + /* + * Now let processor die. We do irq disable and max_xtp() to + * ensure there is no more interrupts routed to this processor. + * But the local timer interrupt can have 1 pending which we + * take care in timer_interrupt(). + */ + max_xtp(); + local_irq_disable(); +} +#endif diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c new file mode 100644 index 000000000000..5ba06ebe355b --- /dev/null +++ b/arch/ia64/kernel/irq_ia64.c @@ -0,0 +1,278 @@ +/* + * linux/arch/ia64/kernel/irq.c + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * 6/10/99: Updated to bring in sync with x86 version to facilitate + * support for SMP and different interrupt controllers. + * + * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector + * PCI to vector allocation routine. + * 04/14/2004 Ashok Raj <ashok.raj@intel.com> + * Added CPU Hotplug handling for IPF. + */ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/jiffies.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/kernel_stat.h> +#include <linux/slab.h> +#include <linux/ptrace.h> +#include <linux/random.h> /* for rand_initialize_irq() */ +#include <linux/signal.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/threads.h> +#include <linux/bitops.h> + +#include <asm/delay.h> +#include <asm/intrinsics.h> +#include <asm/io.h> +#include <asm/hw_irq.h> +#include <asm/machvec.h> +#include <asm/pgtable.h> +#include <asm/system.h> + +#ifdef CONFIG_PERFMON +# include <asm/perfmon.h> +#endif + +#define IRQ_DEBUG 0 + +/* default base addr of IPI table */ +void __iomem *ipi_base_addr = ((void __iomem *) + (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR)); + +/* + * Legacy IRQ to IA-64 vector translation table. + */ +__u8 isa_irq_to_vector_map[16] = { + /* 8259 IRQ translation, first 16 entries */ + 0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, + 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21 +}; +EXPORT_SYMBOL(isa_irq_to_vector_map); + +static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)]; + +int +assign_irq_vector (int irq) +{ + int pos, vector; + again: + pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS); + vector = IA64_FIRST_DEVICE_VECTOR + pos; + if (vector > IA64_LAST_DEVICE_VECTOR) + /* XXX could look for sharable vectors instead of panic'ing... */ + panic("assign_irq_vector: out of interrupt vectors!"); + if (test_and_set_bit(pos, ia64_vector_mask)) + goto again; + return vector; +} + +void +free_irq_vector (int vector) +{ + int pos; + + if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR) + return; + + pos = vector - IA64_FIRST_DEVICE_VECTOR; + if (!test_and_clear_bit(pos, ia64_vector_mask)) + printk(KERN_WARNING "%s: double free!\n", __FUNCTION__); +} + +#ifdef CONFIG_SMP +# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) +#else +# define IS_RESCHEDULE(vec) (0) +#endif +/* + * That's where the IVT branches when we get an external + * interrupt. This branches to the correct hardware IRQ handler via + * function ptr. + */ +void +ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) +{ + unsigned long saved_tpr; + +#if IRQ_DEBUG + { + unsigned long bsp, sp; + + /* + * Note: if the interrupt happened while executing in + * the context switch routine (ia64_switch_to), we may + * get a spurious stack overflow here. This is + * because the register and the memory stack are not + * switched atomically. + */ + bsp = ia64_getreg(_IA64_REG_AR_BSP); + sp = ia64_getreg(_IA64_REG_SP); + + if ((sp - bsp) < 1024) { + static unsigned char count; + static long last_time; + + if (jiffies - last_time > 5*HZ) + count = 0; + if (++count < 5) { + last_time = jiffies; + printk("ia64_handle_irq: DANGER: less than " + "1KB of free stack space!!\n" + "(bsp=0x%lx, sp=%lx)\n", bsp, sp); + } + } + } +#endif /* IRQ_DEBUG */ + + /* + * Always set TPR to limit maximum interrupt nesting depth to + * 16 (without this, it would be ~240, which could easily lead + * to kernel stack overflows). + */ + irq_enter(); + saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); + ia64_srlz_d(); + while (vector != IA64_SPURIOUS_INT_VECTOR) { + if (!IS_RESCHEDULE(vector)) { + ia64_setreg(_IA64_REG_CR_TPR, vector); + ia64_srlz_d(); + + __do_IRQ(local_vector_to_irq(vector), regs); + + /* + * Disable interrupts and send EOI: + */ + local_irq_disable(); + ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); + } + ia64_eoi(); + vector = ia64_get_ivr(); + } + /* + * This must be done *after* the ia64_eoi(). For example, the keyboard softirq + * handler needs to be able to wait for further keyboard interrupts, which can't + * come through until ia64_eoi() has been done. + */ + irq_exit(); +} + +#ifdef CONFIG_HOTPLUG_CPU +/* + * This function emulates a interrupt processing when a cpu is about to be + * brought down. + */ +void ia64_process_pending_intr(void) +{ + ia64_vector vector; + unsigned long saved_tpr; + extern unsigned int vectors_in_migration[NR_IRQS]; + + vector = ia64_get_ivr(); + + irq_enter(); + saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); + ia64_srlz_d(); + + /* + * Perform normal interrupt style processing + */ + while (vector != IA64_SPURIOUS_INT_VECTOR) { + if (!IS_RESCHEDULE(vector)) { + ia64_setreg(_IA64_REG_CR_TPR, vector); + ia64_srlz_d(); + + /* + * Now try calling normal ia64_handle_irq as it would have got called + * from a real intr handler. Try passing null for pt_regs, hopefully + * it will work. I hope it works!. + * Probably could shared code. + */ + vectors_in_migration[local_vector_to_irq(vector)]=0; + __do_IRQ(local_vector_to_irq(vector), NULL); + + /* + * Disable interrupts and send EOI + */ + local_irq_disable(); + ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); + } + ia64_eoi(); + vector = ia64_get_ivr(); + } + irq_exit(); +} +#endif + + +#ifdef CONFIG_SMP +extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs); + +static struct irqaction ipi_irqaction = { + .handler = handle_IPI, + .flags = SA_INTERRUPT, + .name = "IPI" +}; +#endif + +void +register_percpu_irq (ia64_vector vec, struct irqaction *action) +{ + irq_desc_t *desc; + unsigned int irq; + + for (irq = 0; irq < NR_IRQS; ++irq) + if (irq_to_vector(irq) == vec) { + desc = irq_descp(irq); + desc->status |= IRQ_PER_CPU; + desc->handler = &irq_type_ia64_lsapic; + if (action) + setup_irq(irq, action); + } +} + +void __init +init_IRQ (void) +{ + register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL); +#ifdef CONFIG_SMP + register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); +#endif +#ifdef CONFIG_PERFMON + pfm_init_percpu(); +#endif + platform_irq_init(); +} + +void +ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect) +{ + void __iomem *ipi_addr; + unsigned long ipi_data; + unsigned long phys_cpu_id; + +#ifdef CONFIG_SMP + phys_cpu_id = cpu_physical_id(cpu); +#else + phys_cpu_id = (ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff; +#endif + + /* + * cpu number is in 8bit ID and 8bit EID + */ + + ipi_data = (delivery_mode << 8) | (vector & 0xff); + ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3)); + + writeq(ipi_data, ipi_addr); +} diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c new file mode 100644 index 000000000000..ea14e6a04409 --- /dev/null +++ b/arch/ia64/kernel/irq_lsapic.c @@ -0,0 +1,37 @@ +/* + * LSAPIC Interrupt Controller + * + * This takes care of interrupts that are generated by the CPU's + * internal Streamlined Advanced Programmable Interrupt Controller + * (LSAPIC), such as the ITC and IPI interrupts. + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 2000 Hewlett-Packard Co + * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <linux/sched.h> +#include <linux/irq.h> + +static unsigned int +lsapic_noop_startup (unsigned int irq) +{ + return 0; +} + +static void +lsapic_noop (unsigned int irq) +{ + /* nuthing to do... */ +} + +struct hw_interrupt_type irq_type_ia64_lsapic = { + .typename = "LSAPIC", + .startup = lsapic_noop_startup, + .shutdown = lsapic_noop, + .enable = lsapic_noop, + .disable = lsapic_noop, + .ack = lsapic_noop, + .end = lsapic_noop +}; diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S new file mode 100644 index 000000000000..d9c05d53435b --- /dev/null +++ b/arch/ia64/kernel/ivt.S @@ -0,0 +1,1619 @@ +/* + * arch/ia64/kernel/ivt.S + * + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger <davidm@hpl.hp.com> + * Copyright (C) 2000, 2002-2003 Intel Co + * Asit Mallick <asit.k.mallick@intel.com> + * Suresh Siddha <suresh.b.siddha@intel.com> + * Kenneth Chen <kenneth.w.chen@intel.com> + * Fenghua Yu <fenghua.yu@intel.com> + * + * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP + * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT. + */ +/* + * This file defines the interruption vector table used by the CPU. + * It does not include one entry per possible cause of interruption. + * + * The first 20 entries of the table contain 64 bundles each while the + * remaining 48 entries contain only 16 bundles each. + * + * The 64 bundles are used to allow inlining the whole handler for critical + * interruptions like TLB misses. + * + * For each entry, the comment is as follows: + * + * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) + * entry offset ----/ / / / / + * entry number ---------/ / / / + * size of the entry -------------/ / / + * vector name -------------------------------------/ / + * interruptions triggering this vector ----------------------/ + * + * The table is 32KB in size and must be aligned on 32KB boundary. + * (The CPU ignores the 15 lower bits of the address) + * + * Table is based upon EAS2.6 (Oct 1999) + */ + +#include <linux/config.h> + +#include <asm/asmmacro.h> +#include <asm/break.h> +#include <asm/ia32.h> +#include <asm/kregs.h> +#include <asm/offsets.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> +#include <asm/errno.h> + +#if 1 +# define PSR_DEFAULT_BITS psr.ac +#else +# define PSR_DEFAULT_BITS 0 +#endif + +#if 0 + /* + * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't + * needed for something else before enabling this... + */ +# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16 +#else +# define DBG_FAULT(i) +#endif + +#define MINSTATE_VIRT /* needed by minstate.h */ +#include "minstate.h" + +#define FAULT(n) \ + mov r31=pr; \ + mov r19=n;; /* prepare to save predicates */ \ + br.sptk.many dispatch_to_fault_handler + + .section .text.ivt,"ax" + + .align 32768 // align on 32KB boundary + .global ia64_ivt +ia64_ivt: +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) +ENTRY(vhpt_miss) + DBG_FAULT(0) + /* + * The VHPT vector is invoked when the TLB entry for the virtual page table + * is missing. This happens only as a result of a previous + * (the "original") TLB miss, which may either be caused by an instruction + * fetch or a data access (or non-access). + * + * What we do here is normal TLB miss handing for the _original_ miss, followed + * by inserting the TLB entry for the virtual page table page that the VHPT + * walker was attempting to access. The latter gets inserted as long + * as both L1 and L2 have valid mappings for the faulting address. + * The TLB entry for the original miss gets inserted only if + * the L3 entry indicates that the page is present. + * + * do_page_fault gets invoked in the following cases: + * - the faulting virtual address uses unimplemented address bits + * - the faulting virtual address has no L1, L2, or L3 mapping + */ + mov r16=cr.ifa // get address that caused the TLB miss +#ifdef CONFIG_HUGETLB_PAGE + movl r18=PAGE_SHIFT + mov r25=cr.itir +#endif + ;; + rsm psr.dt // use physical addressing for data + mov r31=pr // save the predicate registers + mov r19=IA64_KR(PT_BASE) // get page table base address + shl r21=r16,3 // shift bit 60 into sign bit + shr.u r17=r16,61 // get the region number into r17 + ;; + shr r22=r21,3 +#ifdef CONFIG_HUGETLB_PAGE + extr.u r26=r25,2,6 + ;; + cmp.ne p8,p0=r18,r26 + sub r27=r26,r18 + ;; +(p8) dep r25=r18,r25,2,6 +(p8) shr r22=r22,r27 +#endif + ;; + cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? + shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + + srlz.d + LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir + + .pred.rel "mutex", p6, p7 +(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT +(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? + shr.u r18=r22,PMD_SHIFT // shift L2 index into position + ;; + ld8 r17=[r17] // fetch the L1 entry (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry + ;; +(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0) + shr.u r19=r22,PAGE_SHIFT // shift L3 index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL? + dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page table entry + ;; +(p7) ld8 r18=[r21] // read the L3 PTE + mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss + ;; +(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? + mov r22=cr.iha // get the VHPT address that caused the TLB miss + ;; // avoid RAW on p7 +(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? + dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address + ;; +(p10) itc.i r18 // insert the instruction TLB entry +(p11) itc.d r18 // insert the data TLB entry +(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) + mov cr.ifa=r22 + +#ifdef CONFIG_HUGETLB_PAGE +(p8) mov cr.itir=r25 // change to default page-size for VHPT +#endif + + /* + * Now compute and insert the TLB entry for the virtual page table. We never + * execute in a page table page so there is no need to set the exception deferral + * bit. + */ + adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23 + ;; +(p7) itc.d r24 + ;; +#ifdef CONFIG_SMP + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + /* + * Re-check L2 and L3 pagetable. If they changed, we may have received a ptc.g + * between reading the pagetable and the "itc". If so, flush the entry we + * inserted and retry. + */ + ld8 r25=[r21] // read L3 PTE again + ld8 r26=[r17] // read L2 entry again + ;; + cmp.ne p6,p7=r26,r20 // did L2 entry change + mov r27=PAGE_SHIFT<<2 + ;; +(p6) ptc.l r22,r27 // purge PTE page translation +(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change + ;; +(p6) ptc.l r16,r27 // purge translation +#endif + + mov pr=r31,-1 // restore predicate registers + rfi +END(vhpt_miss) + + .org ia64_ivt+0x400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0400 Entry 1 (size 64 bundles) ITLB (21) +ENTRY(itlb_miss) + DBG_FAULT(1) + /* + * The ITLB handler accesses the L3 PTE via the virtually mapped linear + * page table. If a nested TLB miss occurs, we switch into physical + * mode, walk the page table, and then re-execute the L3 PTE read + * and go on normally after that. + */ + mov r16=cr.ifa // get virtual address + mov r29=b0 // save b0 + mov r31=pr // save predicates +.itlb_fault: + mov r17=cr.iha // get virtual address of L3 PTE + movl r30=1f // load nested fault continuation point + ;; +1: ld8 r18=[r17] // read L3 PTE + ;; + mov b0=r29 + tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? +(p6) br.cond.spnt page_fault + ;; + itc.i r18 + ;; +#ifdef CONFIG_SMP + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r19=[r17] // read L3 PTE again and see if same + mov r20=PAGE_SHIFT<<2 // setup page size for purge + ;; + cmp.ne p7,p0=r18,r19 + ;; +(p7) ptc.l r16,r20 +#endif + mov pr=r31,-1 + rfi +END(itlb_miss) + + .org ia64_ivt+0x0800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) +ENTRY(dtlb_miss) + DBG_FAULT(2) + /* + * The DTLB handler accesses the L3 PTE via the virtually mapped linear + * page table. If a nested TLB miss occurs, we switch into physical + * mode, walk the page table, and then re-execute the L3 PTE read + * and go on normally after that. + */ + mov r16=cr.ifa // get virtual address + mov r29=b0 // save b0 + mov r31=pr // save predicates +dtlb_fault: + mov r17=cr.iha // get virtual address of L3 PTE + movl r30=1f // load nested fault continuation point + ;; +1: ld8 r18=[r17] // read L3 PTE + ;; + mov b0=r29 + tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? +(p6) br.cond.spnt page_fault + ;; + itc.d r18 + ;; +#ifdef CONFIG_SMP + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r19=[r17] // read L3 PTE again and see if same + mov r20=PAGE_SHIFT<<2 // setup page size for purge + ;; + cmp.ne p7,p0=r18,r19 + ;; +(p7) ptc.l r16,r20 +#endif + mov pr=r31,-1 + rfi +END(dtlb_miss) + + .org ia64_ivt+0x0c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) +ENTRY(alt_itlb_miss) + DBG_FAULT(3) + mov r16=cr.ifa // get address that caused the TLB miss + movl r17=PAGE_KERNEL + mov r21=cr.ipsr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r31=pr + ;; +#ifdef CONFIG_DISABLE_VHPT + shr.u r22=r16,61 // get the region number into r21 + ;; + cmp.gt p8,p0=6,r22 // user mode + ;; +(p8) thash r17=r16 + ;; +(p8) mov cr.iha=r17 +(p8) mov r29=b0 // save b0 +(p8) br.cond.dptk .itlb_fault +#endif + extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + shr.u r18=r16,57 // move address bit 61 to bit 4 + ;; + andcm r18=0x10,r18 // bit 4=~address-bit(61) + cmp.ne p8,p0=r0,r23 // psr.cpl != 0? + or r19=r17,r19 // insert PTE control bits into r19 + ;; + or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 +(p8) br.cond.spnt page_fault + ;; + itc.i r19 // insert the TLB entry + mov pr=r31,-1 + rfi +END(alt_itlb_miss) + + .org ia64_ivt+0x1000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) +ENTRY(alt_dtlb_miss) + DBG_FAULT(4) + mov r16=cr.ifa // get address that caused the TLB miss + movl r17=PAGE_KERNEL + mov r20=cr.isr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r21=cr.ipsr + mov r31=pr + ;; +#ifdef CONFIG_DISABLE_VHPT + shr.u r22=r16,61 // get the region number into r21 + ;; + cmp.gt p8,p0=6,r22 // access to region 0-5 + ;; +(p8) thash r17=r16 + ;; +(p8) mov cr.iha=r17 +(p8) mov r29=b0 // save b0 +(p8) br.cond.dptk dtlb_fault +#endif + extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl + and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field + tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? + shr.u r18=r16,57 // move address bit 61 to bit 4 + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? + ;; + andcm r18=0x10,r18 // bit 4=~address-bit(61) + cmp.ne p8,p0=r0,r23 +(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field +(p8) br.cond.spnt page_fault + + dep r21=-1,r21,IA64_PSR_ED_BIT,1 + or r19=r19,r17 // insert PTE control bits into r19 + ;; + or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 +(p6) mov cr.ipsr=r21 + ;; +(p7) itc.d r19 // insert the TLB entry + mov pr=r31,-1 + rfi +END(alt_dtlb_miss) + + .org ia64_ivt+0x1400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) +ENTRY(nested_dtlb_miss) + /* + * In the absence of kernel bugs, we get here when the virtually mapped linear + * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction + * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page + * table is missing, a nested TLB miss fault is triggered and control is + * transferred to this point. When this happens, we lookup the pte for the + * faulting address by walking the page table in physical mode and return to the + * continuation point passed in register r30 (or call page_fault if the address is + * not mapped). + * + * Input: r16: faulting address + * r29: saved b0 + * r30: continuation address + * r31: saved pr + * + * Output: r17: physical address of L3 PTE of faulting address + * r29: saved b0 + * r30: continuation address + * r31: saved pr + * + * Clobbered: b0, r18, r19, r21, psr.dt (cleared) + */ + rsm psr.dt // switch to using physical data addressing + mov r19=IA64_KR(PT_BASE) // get the page table base address + shl r21=r16,3 // shift bit 60 into sign bit + ;; + shr.u r17=r16,61 // get the region number into r17 + ;; + cmp.eq p6,p7=5,r17 // is faulting address in region 5? + shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + + srlz.d + LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir + + .pred.rel "mutex", p6, p7 +(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT +(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? + shr.u r18=r16,PMD_SHIFT // shift L2 index into position + ;; + ld8 r17=[r17] // fetch the L1 entry (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry + ;; +(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) + shr.u r19=r16,PAGE_SHIFT // shift L3 index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? + dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry +(p6) br.cond.spnt page_fault + mov b0=r30 + br.sptk.many b0 // return to continuation point +END(nested_dtlb_miss) + + .org ia64_ivt+0x1800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) +ENTRY(ikey_miss) + DBG_FAULT(6) + FAULT(6) +END(ikey_miss) + + //----------------------------------------------------------------------------------- + // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) +ENTRY(page_fault) + ssm psr.dt + ;; + srlz.i + ;; + SAVE_MIN_WITH_COVER + alloc r15=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=cr.isr + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collectin is on + ;; +(p15) ssm psr.i // restore psr.i + movl r14=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 // out2 = pointer to pt_regs + br.call.sptk.many b6=ia64_do_page_fault // ignore return address +END(page_fault) + + .org ia64_ivt+0x1c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) +ENTRY(dkey_miss) + DBG_FAULT(7) + FAULT(7) +END(dkey_miss) + + .org ia64_ivt+0x2000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) +ENTRY(dirty_bit) + DBG_FAULT(8) + /* + * What we do here is to simply turn on the dirty bit in the PTE. We need to + * update both the page-table and the TLB entry. To efficiently access the PTE, + * we address it through the virtual page table. Most likely, the TLB entry for + * the relevant virtual page table page is still present in the TLB so we can + * normally do this without additional TLB misses. In case the necessary virtual + * page table TLB entry isn't present, we take a nested TLB miss hit where we look + * up the physical address of the L3 PTE and then continue at label 1 below. + */ + mov r16=cr.ifa // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + ;; + thash r17=r16 // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault + mov r31=pr // save pr +#ifdef CONFIG_SMP + mov r28=ar.ccv // save ar.ccv + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + mov ar.ccv=r18 // set compare value for cmpxchg + or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits + ;; + cmpxchg8.acq r26=[r17],r25,ar.ccv + mov r24=PAGE_SHIFT<<2 + ;; + cmp.eq p6,p7=r26,r18 + ;; +(p6) itc.d r25 // install updated PTE + ;; + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r18=[r17] // read PTE again + ;; + cmp.eq p6,p7=r18,r25 // is it same as the newly installed + ;; +(p7) ptc.l r16,r24 + mov b0=r29 // restore b0 + mov ar.ccv=r28 +#else + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + itc.d r18 // install updated PTE +#endif + mov pr=r31,-1 // restore pr + rfi +END(dirty_bit) + + .org ia64_ivt+0x2400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) +ENTRY(iaccess_bit) + DBG_FAULT(9) + // Like Entry 8, except for instruction access + mov r16=cr.ifa // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + mov r31=pr // save predicates +#ifdef CONFIG_ITANIUM + /* + * Erratum 10 (IFA may contain incorrect address) has "NoFix" status. + */ + mov r17=cr.ipsr + ;; + mov r18=cr.iip + tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set? + ;; +(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa +#endif /* CONFIG_ITANIUM */ + ;; + thash r17=r16 // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault) +#ifdef CONFIG_SMP + mov r28=ar.ccv // save ar.ccv + ;; +1: ld8 r18=[r17] + ;; + mov ar.ccv=r18 // set compare value for cmpxchg + or r25=_PAGE_A,r18 // set the accessed bit + ;; + cmpxchg8.acq r26=[r17],r25,ar.ccv + mov r24=PAGE_SHIFT<<2 + ;; + cmp.eq p6,p7=r26,r18 + ;; +(p6) itc.i r25 // install updated PTE + ;; + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r18=[r17] // read PTE again + ;; + cmp.eq p6,p7=r18,r25 // is it same as the newly installed + ;; +(p7) ptc.l r16,r24 + mov b0=r29 // restore b0 + mov ar.ccv=r28 +#else /* !CONFIG_SMP */ + ;; +1: ld8 r18=[r17] + ;; + or r18=_PAGE_A,r18 // set the accessed bit + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + itc.i r18 // install updated PTE +#endif /* !CONFIG_SMP */ + mov pr=r31,-1 + rfi +END(iaccess_bit) + + .org ia64_ivt+0x2800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) +ENTRY(daccess_bit) + DBG_FAULT(10) + // Like Entry 8, except for data access + mov r16=cr.ifa // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + ;; + thash r17=r16 // compute virtual address of L3 PTE + mov r31=pr + mov r29=b0 // save b0 in case of nested fault) +#ifdef CONFIG_SMP + mov r28=ar.ccv // save ar.ccv + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + mov ar.ccv=r18 // set compare value for cmpxchg + or r25=_PAGE_A,r18 // set the dirty bit + ;; + cmpxchg8.acq r26=[r17],r25,ar.ccv + mov r24=PAGE_SHIFT<<2 + ;; + cmp.eq p6,p7=r26,r18 + ;; +(p6) itc.d r25 // install updated PTE + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + ;; + ld8 r18=[r17] // read PTE again + ;; + cmp.eq p6,p7=r18,r25 // is it same as the newly installed + ;; +(p7) ptc.l r16,r24 + mov ar.ccv=r28 +#else + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_A,r18 // set the accessed bit + ;; + st8 [r17]=r18 // store back updated PTE + itc.d r18 // install updated PTE +#endif + mov b0=r29 // restore b0 + mov pr=r31,-1 + rfi +END(daccess_bit) + + .org ia64_ivt+0x2c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) +ENTRY(break_fault) + /* + * The streamlined system call entry/exit paths only save/restore the initial part + * of pt_regs. This implies that the callers of system-calls must adhere to the + * normal procedure calling conventions. + * + * Registers to be saved & restored: + * CR registers: cr.ipsr, cr.iip, cr.ifs + * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr + * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15 + * Registers to be restored only: + * r8-r11: output value from the system call. + * + * During system call exit, scratch registers (including r15) are modified/cleared + * to prevent leaking bits from kernel to user level. + */ + DBG_FAULT(11) + mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat. + mov r17=cr.iim + mov r18=__IA64_BREAK_SYSCALL + mov r21=ar.fpsr + mov r29=cr.ipsr + mov r19=b6 + mov r25=ar.unat + mov r27=ar.rsc + mov r26=ar.pfs + mov r28=cr.iip + mov r31=pr // prepare to save predicates + mov r20=r1 + ;; + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 + cmp.eq p0,p7=r18,r17 // is this a system call? (p7 <- false, if so) +(p7) br.cond.spnt non_syscall + ;; + ld1 r17=[r16] // load current->thread.on_ustack flag + st1 [r16]=r0 // clear current->thread.on_ustack flag + add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT + ;; + invala + + /* adjust return address so we skip over the break instruction: */ + + extr.u r8=r29,41,2 // extract ei field from cr.ipsr + ;; + cmp.eq p6,p7=2,r8 // isr.ei==2? + mov r2=r1 // setup r2 for ia64_syscall_setup + ;; +(p6) mov r8=0 // clear ei to 0 +(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped +(p7) adds r8=1,r8 // increment ei to next slot + ;; + cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode already? + dep r29=r8,r29,41,2 // insert new ei into cr.ipsr + ;; + + // switch from user to kernel RBS: + MINSTATE_START_SAVE_MIN_VIRT + br.call.sptk.many b7=ia64_syscall_setup + ;; + MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1 + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + mov r3=NR_syscalls - 1 + ;; +(p15) ssm psr.i // restore psr.i + // p10==true means out registers are more than 8 or r15's Nat is true +(p10) br.cond.spnt.many ia64_ret_from_syscall + ;; + movl r16=sys_call_table + + adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024 + movl r2=ia64_ret_from_syscall + ;; + shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) + cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall < 1024 + NR_syscalls) ? + mov rp=r2 // set the real return addr + ;; +(p6) ld8 r20=[r20] // load address of syscall entry point +(p7) movl r20=sys_ni_syscall + + add r2=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; + ld4 r2=[r2] // r2 = current_thread_info()->flags + ;; + and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit + ;; + cmp.eq p8,p0=r2,r0 + mov b6=r20 + ;; +(p8) br.call.sptk.many b6=b6 // ignore this return addr + br.cond.sptk ia64_trace_syscall + // NOT REACHED +END(break_fault) + + .org ia64_ivt+0x3000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) +ENTRY(interrupt) + DBG_FAULT(12) + mov r31=pr // prepare to save predicates + ;; + SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 + ssm psr.ic | PSR_DEFAULT_BITS + ;; + adds r3=8,r2 // set up second base pointer for SAVE_REST + srlz.i // ensure everybody knows psr.ic is back on + ;; + SAVE_REST + ;; + alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group + mov out0=cr.ivr // pass cr.ivr as first arg + add out1=16,sp // pass pointer to pt_regs as second arg + ;; + srlz.d // make sure we see the effect of cr.ivr + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_handle_irq +END(interrupt) + + .org ia64_ivt+0x3400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3400 Entry 13 (size 64 bundles) Reserved + DBG_FAULT(13) + FAULT(13) + + .org ia64_ivt+0x3800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3800 Entry 14 (size 64 bundles) Reserved + DBG_FAULT(14) + FAULT(14) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + * + * ia64_syscall_setup() is a separate subroutine so that it can + * allocate stacked registers so it can safely demine any + * potential NaT values from the input registers. + * + * On entry: + * - executing on bank 0 or bank 1 register set (doesn't matter) + * - r1: stack pointer + * - r2: current task pointer + * - r3: preserved + * - r11: original contents (saved ar.pfs to be saved) + * - r12: original contents (sp to be saved) + * - r13: original contents (tp to be saved) + * - r15: original contents (syscall # to be saved) + * - r18: saved bsp (after switching to kernel stack) + * - r19: saved b6 + * - r20: saved r1 (gp) + * - r21: saved ar.fpsr + * - r22: kernel's register backing store base (krbs_base) + * - r23: saved ar.bspstore + * - r24: saved ar.rnat + * - r25: saved ar.unat + * - r26: saved ar.pfs + * - r27: saved ar.rsc + * - r28: saved cr.iip + * - r29: saved cr.ipsr + * - r31: saved pr + * - b0: original contents (to be saved) + * On exit: + * - executing on bank 1 registers + * - psr.ic enabled, interrupts restored + * - p10: TRUE if syscall is invoked with more than 8 out + * registers or r15's Nat is true + * - r1: kernel's gp + * - r3: preserved (same as on entry) + * - r8: -EINVAL if p10 is true + * - r12: points to kernel stack + * - r13: points to current task + * - p15: TRUE if interrupts need to be re-enabled + * - ar.fpsr: set to kernel settings + */ +GLOBAL_ENTRY(ia64_syscall_setup) +#if PT(B6) != 0 +# error This code assumes that b6 is the first field in pt_regs. +#endif + st8 [r1]=r19 // save b6 + add r16=PT(CR_IPSR),r1 // initialize first base pointer + add r17=PT(R11),r1 // initialize second base pointer + ;; + alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable + st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr + tnat.nz p8,p0=in0 + + st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11 + tnat.nz p9,p0=in1 +(pKStk) mov r18=r0 // make sure r18 isn't NaT + ;; + + st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs + st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip + mov r28=b0 // save b0 (2 cyc) + ;; + + st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat + dep r19=0,r19,38,26 // clear all bits but 0..37 [I0] +(p8) mov in0=-1 + ;; + + st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs + extr.u r11=r19,7,7 // I0 // get sol of ar.pfs + and r8=0x7f,r19 // A // get sof of ar.pfs + + st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc + tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0 +(p9) mov in1=-1 + ;; + +(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8 + tnat.nz p10,p0=in2 + add r11=8,r11 + ;; +(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field +(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field + tnat.nz p11,p0=in3 + ;; +(p10) mov in2=-1 + tnat.nz p12,p0=in4 // [I0] +(p11) mov in3=-1 + ;; +(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat +(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore + shl r18=r18,16 // compute ar.rsc to be used for "loadrs" + ;; + st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates + st8 [r17]=r28,PT(R1)-PT(B0) // save b0 + tnat.nz p13,p0=in5 // [I0] + ;; + st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs" + st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1 +(p12) mov in4=-1 + ;; + +.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12 +.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13 +(p13) mov in5=-1 + ;; + st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr + tnat.nz p14,p0=in6 + cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 + ;; + stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error) +(p9) tnat.nz p10,p0=r15 + adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) + + st8.spill [r17]=r15 // save r15 + tnat.nz p8,p0=in7 + nop.i 0 + + mov r13=r2 // establish `current' + movl r1=__gp // establish kernel global pointer + ;; +(p14) mov in6=-1 +(p8) mov in7=-1 + nop.i 0 + + cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 + movl r17=FPSR_DEFAULT + ;; + mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value +(p10) mov r8=-EINVAL + br.ret.sptk.many b7 +END(ia64_syscall_setup) + + .org ia64_ivt+0x3c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3c00 Entry 15 (size 64 bundles) Reserved + DBG_FAULT(15) + FAULT(15) + + /* + * Squatting in this space ... + * + * This special case dispatcher for illegal operation faults allows preserved + * registers to be modified through a callback function (asm only) that is handed + * back from the fault handler in r8. Up to three arguments can be passed to the + * callback function by returning an aggregate with the callback as its first + * element, followed by the arguments. + */ +ENTRY(dispatch_illegal_op_fault) + .prologue + .body + SAVE_MIN_WITH_COVER + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in insn group + mov out0=ar.ec + ;; + SAVE_REST + PT_REGS_UNWIND_INFO(0) + ;; + br.call.sptk.many rp=ia64_illegal_op_fault +.ret0: ;; + alloc r14=ar.pfs,0,0,3,0 // must be first in insn group + mov out0=r9 + mov out1=r10 + mov out2=r11 + movl r15=ia64_leave_kernel + ;; + mov rp=r15 + mov b6=r8 + ;; + cmp.ne p6,p0=0,r8 +(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel + br.sptk.many ia64_leave_kernel +END(dispatch_illegal_op_fault) + + .org ia64_ivt+0x4000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + DBG_FAULT(16) + FAULT(16) + + .org ia64_ivt+0x4400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4400 Entry 17 (size 64 bundles) Reserved + DBG_FAULT(17) + FAULT(17) + +ENTRY(non_syscall) + SAVE_MIN_WITH_COVER + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + + alloc r14=ar.pfs,0,0,2,0 + mov out0=cr.iim + add out1=16,sp + adds r3=8,r2 // set up second base pointer for SAVE_REST + + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + movl r15=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r15 + ;; + br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr +END(non_syscall) + + .org ia64_ivt+0x4800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4800 Entry 18 (size 64 bundles) Reserved + DBG_FAULT(18) + FAULT(18) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +ENTRY(dispatch_unaligned_handler) + SAVE_MIN_WITH_COVER + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + mov out0=cr.ifa + adds out1=16,sp + + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.sptk.many ia64_prepare_handle_unaligned +END(dispatch_unaligned_handler) + + .org ia64_ivt+0x4c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4c00 Entry 19 (size 64 bundles) Reserved + DBG_FAULT(19) + FAULT(19) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +ENTRY(dispatch_to_fault_handler) + /* + * Input: + * psr.ic: off + * r19: fault vector number (e.g., 24 for General Exception) + * r31: contains saved predicates (pr) + */ + SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + mov out0=r15 + mov out1=cr.isr + mov out2=cr.ifa + mov out3=cr.iim + mov out4=cr.itir + ;; + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_fault +END(dispatch_to_fault_handler) + +// +// --- End of long entries, Beginning of short entries +// + + .org ia64_ivt+0x5000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) +ENTRY(page_not_present) + DBG_FAULT(20) + mov r16=cr.ifa + rsm psr.dt + /* + * The Linux page fault handler doesn't expect non-present pages to be in + * the TLB. Flush the existing entry now, so we meet that expectation. + */ + mov r17=PAGE_SHIFT<<2 + ;; + ptc.l r16,r17 + ;; + mov r31=pr + srlz.d + br.sptk.many page_fault +END(page_not_present) + + .org ia64_ivt+0x5100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) +ENTRY(key_permission) + DBG_FAULT(21) + mov r16=cr.ifa + rsm psr.dt + mov r31=pr + ;; + srlz.d + br.sptk.many page_fault +END(key_permission) + + .org ia64_ivt+0x5200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) +ENTRY(iaccess_rights) + DBG_FAULT(22) + mov r16=cr.ifa + rsm psr.dt + mov r31=pr + ;; + srlz.d + br.sptk.many page_fault +END(iaccess_rights) + + .org ia64_ivt+0x5300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) +ENTRY(daccess_rights) + DBG_FAULT(23) + mov r16=cr.ifa + rsm psr.dt + mov r31=pr + ;; + srlz.d + br.sptk.many page_fault +END(daccess_rights) + + .org ia64_ivt+0x5400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) +ENTRY(general_exception) + DBG_FAULT(24) + mov r16=cr.isr + mov r31=pr + ;; + cmp4.eq p6,p0=0,r16 +(p6) br.sptk.many dispatch_illegal_op_fault + ;; + mov r19=24 // fault number + br.sptk.many dispatch_to_fault_handler +END(general_exception) + + .org ia64_ivt+0x5500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) +ENTRY(disabled_fp_reg) + DBG_FAULT(25) + rsm psr.dfh // ensure we can access fph + ;; + srlz.d + mov r31=pr + mov r19=25 + br.sptk.many dispatch_to_fault_handler +END(disabled_fp_reg) + + .org ia64_ivt+0x5600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) +ENTRY(nat_consumption) + DBG_FAULT(26) + FAULT(26) +END(nat_consumption) + + .org ia64_ivt+0x5700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5700 Entry 27 (size 16 bundles) Speculation (40) +ENTRY(speculation_vector) + DBG_FAULT(27) + /* + * A [f]chk.[as] instruction needs to take the branch to the recovery code but + * this part of the architecture is not implemented in hardware on some CPUs, such + * as Itanium. Thus, in general we need to emulate the behavior. IIM contains + * the relative target (not yet sign extended). So after sign extending it we + * simply add it to IIP. We also need to reset the EI field of the IPSR to zero, + * i.e., the slot to restart into. + * + * cr.imm contains zero_ext(imm21) + */ + mov r18=cr.iim + ;; + mov r17=cr.iip + shl r18=r18,43 // put sign bit in position (43=64-21) + ;; + + mov r16=cr.ipsr + shr r18=r18,39 // sign extend (39=43-4) + ;; + + add r17=r17,r18 // now add the offset + ;; + mov cr.iip=r17 + dep r16=0,r16,41,2 // clear EI + ;; + + mov cr.ipsr=r16 + ;; + + rfi // and go back +END(speculation_vector) + + .org ia64_ivt+0x5800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5800 Entry 28 (size 16 bundles) Reserved + DBG_FAULT(28) + FAULT(28) + + .org ia64_ivt+0x5900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) +ENTRY(debug_vector) + DBG_FAULT(29) + FAULT(29) +END(debug_vector) + + .org ia64_ivt+0x5a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) +ENTRY(unaligned_access) + DBG_FAULT(30) + mov r16=cr.ipsr + mov r31=pr // prepare to save predicates + ;; + br.sptk.many dispatch_unaligned_handler +END(unaligned_access) + + .org ia64_ivt+0x5b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) +ENTRY(unsupported_data_reference) + DBG_FAULT(31) + FAULT(31) +END(unsupported_data_reference) + + .org ia64_ivt+0x5c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) +ENTRY(floating_point_fault) + DBG_FAULT(32) + FAULT(32) +END(floating_point_fault) + + .org ia64_ivt+0x5d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) +ENTRY(floating_point_trap) + DBG_FAULT(33) + FAULT(33) +END(floating_point_trap) + + .org ia64_ivt+0x5e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) +ENTRY(lower_privilege_trap) + DBG_FAULT(34) + FAULT(34) +END(lower_privilege_trap) + + .org ia64_ivt+0x5f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) +ENTRY(taken_branch_trap) + DBG_FAULT(35) + FAULT(35) +END(taken_branch_trap) + + .org ia64_ivt+0x6000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) +ENTRY(single_step_trap) + DBG_FAULT(36) + FAULT(36) +END(single_step_trap) + + .org ia64_ivt+0x6100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6100 Entry 37 (size 16 bundles) Reserved + DBG_FAULT(37) + FAULT(37) + + .org ia64_ivt+0x6200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6200 Entry 38 (size 16 bundles) Reserved + DBG_FAULT(38) + FAULT(38) + + .org ia64_ivt+0x6300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6300 Entry 39 (size 16 bundles) Reserved + DBG_FAULT(39) + FAULT(39) + + .org ia64_ivt+0x6400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6400 Entry 40 (size 16 bundles) Reserved + DBG_FAULT(40) + FAULT(40) + + .org ia64_ivt+0x6500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6500 Entry 41 (size 16 bundles) Reserved + DBG_FAULT(41) + FAULT(41) + + .org ia64_ivt+0x6600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6600 Entry 42 (size 16 bundles) Reserved + DBG_FAULT(42) + FAULT(42) + + .org ia64_ivt+0x6700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6700 Entry 43 (size 16 bundles) Reserved + DBG_FAULT(43) + FAULT(43) + + .org ia64_ivt+0x6800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6800 Entry 44 (size 16 bundles) Reserved + DBG_FAULT(44) + FAULT(44) + + .org ia64_ivt+0x6900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) +ENTRY(ia32_exception) + DBG_FAULT(45) + FAULT(45) +END(ia32_exception) + + .org ia64_ivt+0x6a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) +ENTRY(ia32_intercept) + DBG_FAULT(46) +#ifdef CONFIG_IA32_SUPPORT + mov r31=pr + mov r16=cr.isr + ;; + extr.u r17=r16,16,8 // get ISR.code + mov r18=ar.eflag + mov r19=cr.iim // old eflag value + ;; + cmp.ne p6,p0=2,r17 +(p6) br.cond.spnt 1f // not a system flag fault + xor r16=r18,r19 + ;; + extr.u r17=r16,18,1 // get the eflags.ac bit + ;; + cmp.eq p6,p0=0,r17 +(p6) br.cond.spnt 1f // eflags.ac bit didn't change + ;; + mov pr=r31,-1 // restore predicate registers + rfi + +1: +#endif // CONFIG_IA32_SUPPORT + FAULT(46) +END(ia32_intercept) + + .org ia64_ivt+0x6b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) +ENTRY(ia32_interrupt) + DBG_FAULT(47) +#ifdef CONFIG_IA32_SUPPORT + mov r31=pr + br.sptk.many dispatch_to_ia32_handler +#else + FAULT(47) +#endif +END(ia32_interrupt) + + .org ia64_ivt+0x6c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6c00 Entry 48 (size 16 bundles) Reserved + DBG_FAULT(48) + FAULT(48) + + .org ia64_ivt+0x6d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6d00 Entry 49 (size 16 bundles) Reserved + DBG_FAULT(49) + FAULT(49) + + .org ia64_ivt+0x6e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6e00 Entry 50 (size 16 bundles) Reserved + DBG_FAULT(50) + FAULT(50) + + .org ia64_ivt+0x6f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6f00 Entry 51 (size 16 bundles) Reserved + DBG_FAULT(51) + FAULT(51) + + .org ia64_ivt+0x7000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7000 Entry 52 (size 16 bundles) Reserved + DBG_FAULT(52) + FAULT(52) + + .org ia64_ivt+0x7100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7100 Entry 53 (size 16 bundles) Reserved + DBG_FAULT(53) + FAULT(53) + + .org ia64_ivt+0x7200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7200 Entry 54 (size 16 bundles) Reserved + DBG_FAULT(54) + FAULT(54) + + .org ia64_ivt+0x7300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7300 Entry 55 (size 16 bundles) Reserved + DBG_FAULT(55) + FAULT(55) + + .org ia64_ivt+0x7400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7400 Entry 56 (size 16 bundles) Reserved + DBG_FAULT(56) + FAULT(56) + + .org ia64_ivt+0x7500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7500 Entry 57 (size 16 bundles) Reserved + DBG_FAULT(57) + FAULT(57) + + .org ia64_ivt+0x7600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7600 Entry 58 (size 16 bundles) Reserved + DBG_FAULT(58) + FAULT(58) + + .org ia64_ivt+0x7700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7700 Entry 59 (size 16 bundles) Reserved + DBG_FAULT(59) + FAULT(59) + + .org ia64_ivt+0x7800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7800 Entry 60 (size 16 bundles) Reserved + DBG_FAULT(60) + FAULT(60) + + .org ia64_ivt+0x7900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7900 Entry 61 (size 16 bundles) Reserved + DBG_FAULT(61) + FAULT(61) + + .org ia64_ivt+0x7a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7a00 Entry 62 (size 16 bundles) Reserved + DBG_FAULT(62) + FAULT(62) + + .org ia64_ivt+0x7b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7b00 Entry 63 (size 16 bundles) Reserved + DBG_FAULT(63) + FAULT(63) + + .org ia64_ivt+0x7c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7c00 Entry 64 (size 16 bundles) Reserved + DBG_FAULT(64) + FAULT(64) + + .org ia64_ivt+0x7d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7d00 Entry 65 (size 16 bundles) Reserved + DBG_FAULT(65) + FAULT(65) + + .org ia64_ivt+0x7e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7e00 Entry 66 (size 16 bundles) Reserved + DBG_FAULT(66) + FAULT(66) + + .org ia64_ivt+0x7f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7f00 Entry 67 (size 16 bundles) Reserved + DBG_FAULT(67) + FAULT(67) + +#ifdef CONFIG_IA32_SUPPORT + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + + // IA32 interrupt entry point + +ENTRY(dispatch_to_ia32_handler) + SAVE_MIN + ;; + mov r14=cr.isr + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i + adds r3=8,r2 // Base pointer for SAVE_REST + ;; + SAVE_REST + ;; + mov r15=0x80 + shr r14=r14,16 // Get interrupt number + ;; + cmp.ne p6,p0=r14,r15 +(p6) br.call.dpnt.many b6=non_ia32_syscall + + adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions + adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp + ;; + cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 + ld8 r8=[r14] // get r8 + ;; + st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't use the GP) + ;; + alloc r15=ar.pfs,0,0,6,0 // must first in an insn group + ;; + ld4 r8=[r14],8 // r8 == eax (syscall number) + mov r15=IA32_NR_syscalls + ;; + cmp.ltu.unc p6,p7=r8,r15 + ld4 out1=[r14],8 // r9 == ecx + ;; + ld4 out2=[r14],8 // r10 == edx + ;; + ld4 out0=[r14] // r11 == ebx + adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp + ;; + ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp + ;; + ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi + adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; + ld4 out4=[r14] // r15 == edi + movl r16=ia32_syscall_table + ;; +(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number + ld4 r2=[r2] // r2 = current_thread_info()->flags + ;; + ld8 r16=[r16] + and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit + ;; + mov b6=r16 + movl r15=ia32_ret_from_syscall + cmp.eq p8,p0=r2,r0 + ;; + mov rp=r15 +(p8) br.call.sptk.many b6=b6 + br.cond.sptk ia32_trace_syscall + +non_ia32_syscall: + alloc r15=ar.pfs,0,0,2,0 + mov out0=r14 // interrupt # + add out1=16,sp // pointer to pt_regs + ;; // avoid WAW on CFM + br.call.sptk.many rp=ia32_bad_interrupt +.ret1: movl r15=ia64_leave_kernel + ;; + mov rp=r15 + br.ret.sptk.many rp +END(dispatch_to_ia32_handler) + +#endif /* CONFIG_IA32_SUPPORT */ diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c new file mode 100644 index 000000000000..c3a04ee7f4f6 --- /dev/null +++ b/arch/ia64/kernel/machvec.c @@ -0,0 +1,70 @@ +#include <linux/config.h> +#include <linux/module.h> + +#include <asm/machvec.h> +#include <asm/system.h> + +#ifdef CONFIG_IA64_GENERIC + +#include <linux/kernel.h> +#include <linux/string.h> + +#include <asm/page.h> + +struct ia64_machine_vector ia64_mv; +EXPORT_SYMBOL(ia64_mv); + +static struct ia64_machine_vector * +lookup_machvec (const char *name) +{ + extern struct ia64_machine_vector machvec_start[]; + extern struct ia64_machine_vector machvec_end[]; + struct ia64_machine_vector *mv; + + for (mv = machvec_start; mv < machvec_end; ++mv) + if (strcmp (mv->name, name) == 0) + return mv; + + return 0; +} + +void +machvec_init (const char *name) +{ + struct ia64_machine_vector *mv; + + mv = lookup_machvec(name); + if (!mv) { + panic("generic kernel failed to find machine vector for platform %s!", name); + } + ia64_mv = *mv; + printk(KERN_INFO "booting generic kernel on platform %s\n", name); +} + +#endif /* CONFIG_IA64_GENERIC */ + +void +machvec_setup (char **arg) +{ +} +EXPORT_SYMBOL(machvec_setup); + +void +machvec_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) +{ +} +EXPORT_SYMBOL(machvec_timer_interrupt); + +void +machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir) +{ + mb(); +} +EXPORT_SYMBOL(machvec_dma_sync_single); + +void +machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n, int dir) +{ + mb(); +} +EXPORT_SYMBOL(machvec_dma_sync_sg); diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c new file mode 100644 index 000000000000..4d6c7b8f667b --- /dev/null +++ b/arch/ia64/kernel/mca.c @@ -0,0 +1,1470 @@ +/* + * File: mca.c + * Purpose: Generic MCA handling layer + * + * Updated for latest kernel + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * Copyright (C) 2002 Dell Inc. + * Copyright (C) Matt Domsch (Matt_Domsch@dell.com) + * + * Copyright (C) 2002 Intel + * Copyright (C) Jenna Hall (jenna.s.hall@intel.com) + * + * Copyright (C) 2001 Intel + * Copyright (C) Fred Lewis (frederick.v.lewis@intel.com) + * + * Copyright (C) 2000 Intel + * Copyright (C) Chuck Fleckenstein (cfleck@co.intel.com) + * + * Copyright (C) 1999, 2004 Silicon Graphics, Inc. + * Copyright (C) Vijay Chander(vijay@engr.sgi.com) + * + * 03/04/15 D. Mosberger Added INIT backtrace support. + * 02/03/25 M. Domsch GUID cleanups + * + * 02/01/04 J. Hall Aligned MCA stack to 16 bytes, added platform vs. CPU + * error flag, set SAL default return values, changed + * error record structure to linked list, added init call + * to sal_get_state_info_size(). + * + * 01/01/03 F. Lewis Added setup of CMCI and CPEI IRQs, logging of corrected + * platform errors, completed code for logging of + * corrected & uncorrected machine check errors, and + * updated for conformance with Nov. 2000 revision of the + * SAL 3.0 spec. + * 00/03/29 C. Fleckenstein Fixed PAL/SAL update issues, began MCA bug fixes, logging issues, + * added min save state dump, added INIT handler. + * + * 2003-12-08 Keith Owens <kaos@sgi.com> + * smp_call_function() must not be called from interrupt context (can + * deadlock on tasklist_lock). Use keventd to call smp_call_function(). + * + * 2004-02-01 Keith Owens <kaos@sgi.com> + * Avoid deadlock when using printk() for MCA and INIT records. + * Delete all record printing code, moved to salinfo_decode in user space. + * Mark variables and functions static where possible. + * Delete dead variables and functions. + * Reorder to remove the need for forward declarations and to consolidate + * related code. + */ +#include <linux/config.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kallsyms.h> +#include <linux/smp_lock.h> +#include <linux/bootmem.h> +#include <linux/acpi.h> +#include <linux/timer.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/workqueue.h> + +#include <asm/delay.h> +#include <asm/machvec.h> +#include <asm/meminit.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/sal.h> +#include <asm/mca.h> + +#include <asm/irq.h> +#include <asm/hw_irq.h> + +#if defined(IA64_MCA_DEBUG_INFO) +# define IA64_MCA_DEBUG(fmt...) printk(fmt) +#else +# define IA64_MCA_DEBUG(fmt...) +#endif + +/* Used by mca_asm.S */ +ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state; +ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state; +u64 ia64_mca_serialize; +DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ +DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ +DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ +DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */ + +unsigned long __per_cpu_mca[NR_CPUS]; + +/* In mca_asm.S */ +extern void ia64_monarch_init_handler (void); +extern void ia64_slave_init_handler (void); + +static ia64_mc_info_t ia64_mc_info; + +#define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */ +#define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */ +#define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */ +#define CPE_HISTORY_LENGTH 5 +#define CMC_HISTORY_LENGTH 5 + +static struct timer_list cpe_poll_timer; +static struct timer_list cmc_poll_timer; +/* + * This variable tells whether we are currently in polling mode. + * Start with this in the wrong state so we won't play w/ timers + * before the system is ready. + */ +static int cmc_polling_enabled = 1; + +/* + * Clearing this variable prevents CPE polling from getting activated + * in mca_late_init. Use it if your system doesn't provide a CPEI, + * but encounters problems retrieving CPE logs. This should only be + * necessary for debugging. + */ +static int cpe_poll_enabled = 1; + +extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); + +static int mca_init; + +/* + * IA64_MCA log support + */ +#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */ +#define IA64_MAX_LOG_TYPES 4 /* MCA, INIT, CMC, CPE */ + +typedef struct ia64_state_log_s +{ + spinlock_t isl_lock; + int isl_index; + unsigned long isl_count; + ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */ +} ia64_state_log_t; + +static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES]; + +#define IA64_LOG_ALLOCATE(it, size) \ + {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \ + (ia64_err_rec_t *)alloc_bootmem(size); \ + ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \ + (ia64_err_rec_t *)alloc_bootmem(size);} +#define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock) +#define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s) +#define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s) +#define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index +#define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index +#define IA64_LOG_INDEX_INC(it) \ + {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \ + ia64_state_log[it].isl_count++;} +#define IA64_LOG_INDEX_DEC(it) \ + ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index +#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])) +#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])) +#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count + +/* + * ia64_log_init + * Reset the OS ia64 log buffer + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) + * Outputs : None + */ +static void +ia64_log_init(int sal_info_type) +{ + u64 max_size = 0; + + IA64_LOG_NEXT_INDEX(sal_info_type) = 0; + IA64_LOG_LOCK_INIT(sal_info_type); + + // SAL will tell us the maximum size of any error record of this type + max_size = ia64_sal_get_state_info_size(sal_info_type); + if (!max_size) + /* alloc_bootmem() doesn't like zero-sized allocations! */ + return; + + // set up OS data structures to hold error info + IA64_LOG_ALLOCATE(sal_info_type, max_size); + memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size); + memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size); +} + +/* + * ia64_log_get + * + * Get the current MCA log from SAL and copy it into the OS log buffer. + * + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) + * irq_safe whether you can use printk at this point + * Outputs : size (total record length) + * *buffer (ptr to error record) + * + */ +static u64 +ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe) +{ + sal_log_record_header_t *log_buffer; + u64 total_len = 0; + int s; + + IA64_LOG_LOCK(sal_info_type); + + /* Get the process state information */ + log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type); + + total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer); + + if (total_len) { + IA64_LOG_INDEX_INC(sal_info_type); + IA64_LOG_UNLOCK(sal_info_type); + if (irq_safe) { + IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. " + "Record length = %ld\n", __FUNCTION__, sal_info_type, total_len); + } + *buffer = (u8 *) log_buffer; + return total_len; + } else { + IA64_LOG_UNLOCK(sal_info_type); + return 0; + } +} + +/* + * ia64_mca_log_sal_error_record + * + * This function retrieves a specified error record type from SAL + * and wakes up any processes waiting for error records. + * + * Inputs : sal_info_type (Type of error record MCA/CMC/CPE/INIT) + */ +static void +ia64_mca_log_sal_error_record(int sal_info_type) +{ + u8 *buffer; + sal_log_record_header_t *rh; + u64 size; + int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA && sal_info_type != SAL_INFO_TYPE_INIT; +#ifdef IA64_MCA_DEBUG_INFO + static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" }; +#endif + + size = ia64_log_get(sal_info_type, &buffer, irq_safe); + if (!size) + return; + + salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe); + + if (irq_safe) + IA64_MCA_DEBUG("CPU %d: SAL log contains %s error record\n", + smp_processor_id(), + sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN"); + + /* Clear logs from corrected errors in case there's no user-level logger */ + rh = (sal_log_record_header_t *)buffer; + if (rh->severity == sal_log_severity_corrected) + ia64_sal_clear_state_info(sal_info_type); +} + +/* + * platform dependent error handling + */ +#ifndef PLATFORM_MCA_HANDLERS + +#ifdef CONFIG_ACPI + +static int cpe_vector = -1; + +static irqreturn_t +ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) +{ + static unsigned long cpe_history[CPE_HISTORY_LENGTH]; + static int index; + static DEFINE_SPINLOCK(cpe_history_lock); + + IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", + __FUNCTION__, cpe_irq, smp_processor_id()); + + /* SAL spec states this should run w/ interrupts enabled */ + local_irq_enable(); + + /* Get the CPE error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); + + spin_lock(&cpe_history_lock); + if (!cpe_poll_enabled && cpe_vector >= 0) { + + int i, count = 1; /* we know 1 happened now */ + unsigned long now = jiffies; + + for (i = 0; i < CPE_HISTORY_LENGTH; i++) { + if (now - cpe_history[i] <= HZ) + count++; + } + + IA64_MCA_DEBUG(KERN_INFO "CPE threshold %d/%d\n", count, CPE_HISTORY_LENGTH); + if (count >= CPE_HISTORY_LENGTH) { + + cpe_poll_enabled = 1; + spin_unlock(&cpe_history_lock); + disable_irq_nosync(local_vector_to_irq(IA64_CPE_VECTOR)); + + /* + * Corrected errors will still be corrected, but + * make sure there's a log somewhere that indicates + * something is generating more than we can handle. + */ + printk(KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n"); + + mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL); + + /* lock already released, get out now */ + return IRQ_HANDLED; + } else { + cpe_history[index++] = now; + if (index == CPE_HISTORY_LENGTH) + index = 0; + } + } + spin_unlock(&cpe_history_lock); + return IRQ_HANDLED; +} + +#endif /* CONFIG_ACPI */ + +static void +show_min_state (pal_min_state_area_t *minstate) +{ + u64 iip = minstate->pmsa_iip + ((struct ia64_psr *)(&minstate->pmsa_ipsr))->ri; + u64 xip = minstate->pmsa_xip + ((struct ia64_psr *)(&minstate->pmsa_xpsr))->ri; + + printk("NaT bits\t%016lx\n", minstate->pmsa_nat_bits); + printk("pr\t\t%016lx\n", minstate->pmsa_pr); + printk("b0\t\t%016lx ", minstate->pmsa_br0); print_symbol("%s\n", minstate->pmsa_br0); + printk("ar.rsc\t\t%016lx\n", minstate->pmsa_rsc); + printk("cr.iip\t\t%016lx ", iip); print_symbol("%s\n", iip); + printk("cr.ipsr\t\t%016lx\n", minstate->pmsa_ipsr); + printk("cr.ifs\t\t%016lx\n", minstate->pmsa_ifs); + printk("xip\t\t%016lx ", xip); print_symbol("%s\n", xip); + printk("xpsr\t\t%016lx\n", minstate->pmsa_xpsr); + printk("xfs\t\t%016lx\n", minstate->pmsa_xfs); + printk("b1\t\t%016lx ", minstate->pmsa_br1); + print_symbol("%s\n", minstate->pmsa_br1); + + printk("\nstatic registers r0-r15:\n"); + printk(" r0- 3 %016lx %016lx %016lx %016lx\n", + 0UL, minstate->pmsa_gr[0], minstate->pmsa_gr[1], minstate->pmsa_gr[2]); + printk(" r4- 7 %016lx %016lx %016lx %016lx\n", + minstate->pmsa_gr[3], minstate->pmsa_gr[4], + minstate->pmsa_gr[5], minstate->pmsa_gr[6]); + printk(" r8-11 %016lx %016lx %016lx %016lx\n", + minstate->pmsa_gr[7], minstate->pmsa_gr[8], + minstate->pmsa_gr[9], minstate->pmsa_gr[10]); + printk("r12-15 %016lx %016lx %016lx %016lx\n", + minstate->pmsa_gr[11], minstate->pmsa_gr[12], + minstate->pmsa_gr[13], minstate->pmsa_gr[14]); + + printk("\nbank 0:\n"); + printk("r16-19 %016lx %016lx %016lx %016lx\n", + minstate->pmsa_bank0_gr[0], minstate->pmsa_bank0_gr[1], + minstate->pmsa_bank0_gr[2], minstate->pmsa_bank0_gr[3]); + printk("r20-23 %016lx %016lx %016lx %016lx\n", + minstate->pmsa_bank0_gr[4], minstate->pmsa_bank0_gr[5], + minstate->pmsa_bank0_gr[6], minstate->pmsa_bank0_gr[7]); + printk("r24-27 %016lx %016lx %016lx %016lx\n", + minstate->pmsa_bank0_gr[8], minstate->pmsa_bank0_gr[9], + minstate->pmsa_bank0_gr[10], minstate->pmsa_bank0_gr[11]); + printk("r28-31 %016lx %016lx %016lx %016lx\n", + minstate->pmsa_bank0_gr[12], minstate->pmsa_bank0_gr[13], + minstate->pmsa_bank0_gr[14], minstate->pmsa_bank0_gr[15]); + + printk("\nbank 1:\n"); + printk("r16-19 %016lx %016lx %016lx %016lx\n", + minstate->pmsa_bank1_gr[0], minstate->pmsa_bank1_gr[1], + minstate->pmsa_bank1_gr[2], minstate->pmsa_bank1_gr[3]); + printk("r20-23 %016lx %016lx %016lx %016lx\n", + minstate->pmsa_bank1_gr[4], minstate->pmsa_bank1_gr[5], + minstate->pmsa_bank1_gr[6], minstate->pmsa_bank1_gr[7]); + printk("r24-27 %016lx %016lx %016lx %016lx\n", + minstate->pmsa_bank1_gr[8], minstate->pmsa_bank1_gr[9], + minstate->pmsa_bank1_gr[10], minstate->pmsa_bank1_gr[11]); + printk("r28-31 %016lx %016lx %016lx %016lx\n", + minstate->pmsa_bank1_gr[12], minstate->pmsa_bank1_gr[13], + minstate->pmsa_bank1_gr[14], minstate->pmsa_bank1_gr[15]); +} + +static void +fetch_min_state (pal_min_state_area_t *ms, struct pt_regs *pt, struct switch_stack *sw) +{ + u64 *dst_banked, *src_banked, bit, shift, nat_bits; + int i; + + /* + * First, update the pt-regs and switch-stack structures with the contents stored + * in the min-state area: + */ + if (((struct ia64_psr *) &ms->pmsa_ipsr)->ic == 0) { + pt->cr_ipsr = ms->pmsa_xpsr; + pt->cr_iip = ms->pmsa_xip; + pt->cr_ifs = ms->pmsa_xfs; + } else { + pt->cr_ipsr = ms->pmsa_ipsr; + pt->cr_iip = ms->pmsa_iip; + pt->cr_ifs = ms->pmsa_ifs; + } + pt->ar_rsc = ms->pmsa_rsc; + pt->pr = ms->pmsa_pr; + pt->r1 = ms->pmsa_gr[0]; + pt->r2 = ms->pmsa_gr[1]; + pt->r3 = ms->pmsa_gr[2]; + sw->r4 = ms->pmsa_gr[3]; + sw->r5 = ms->pmsa_gr[4]; + sw->r6 = ms->pmsa_gr[5]; + sw->r7 = ms->pmsa_gr[6]; + pt->r8 = ms->pmsa_gr[7]; + pt->r9 = ms->pmsa_gr[8]; + pt->r10 = ms->pmsa_gr[9]; + pt->r11 = ms->pmsa_gr[10]; + pt->r12 = ms->pmsa_gr[11]; + pt->r13 = ms->pmsa_gr[12]; + pt->r14 = ms->pmsa_gr[13]; + pt->r15 = ms->pmsa_gr[14]; + dst_banked = &pt->r16; /* r16-r31 are contiguous in struct pt_regs */ + src_banked = ms->pmsa_bank1_gr; + for (i = 0; i < 16; ++i) + dst_banked[i] = src_banked[i]; + pt->b0 = ms->pmsa_br0; + sw->b1 = ms->pmsa_br1; + + /* construct the NaT bits for the pt-regs structure: */ +# define PUT_NAT_BIT(dst, addr) \ + do { \ + bit = nat_bits & 1; nat_bits >>= 1; \ + shift = ((unsigned long) addr >> 3) & 0x3f; \ + dst = ((dst) & ~(1UL << shift)) | (bit << shift); \ + } while (0) + + /* Rotate the saved NaT bits such that bit 0 corresponds to pmsa_gr[0]: */ + shift = ((unsigned long) &ms->pmsa_gr[0] >> 3) & 0x3f; + nat_bits = (ms->pmsa_nat_bits >> shift) | (ms->pmsa_nat_bits << (64 - shift)); + + PUT_NAT_BIT(sw->caller_unat, &pt->r1); + PUT_NAT_BIT(sw->caller_unat, &pt->r2); + PUT_NAT_BIT(sw->caller_unat, &pt->r3); + PUT_NAT_BIT(sw->ar_unat, &sw->r4); + PUT_NAT_BIT(sw->ar_unat, &sw->r5); + PUT_NAT_BIT(sw->ar_unat, &sw->r6); + PUT_NAT_BIT(sw->ar_unat, &sw->r7); + PUT_NAT_BIT(sw->caller_unat, &pt->r8); PUT_NAT_BIT(sw->caller_unat, &pt->r9); + PUT_NAT_BIT(sw->caller_unat, &pt->r10); PUT_NAT_BIT(sw->caller_unat, &pt->r11); + PUT_NAT_BIT(sw->caller_unat, &pt->r12); PUT_NAT_BIT(sw->caller_unat, &pt->r13); + PUT_NAT_BIT(sw->caller_unat, &pt->r14); PUT_NAT_BIT(sw->caller_unat, &pt->r15); + nat_bits >>= 16; /* skip over bank0 NaT bits */ + PUT_NAT_BIT(sw->caller_unat, &pt->r16); PUT_NAT_BIT(sw->caller_unat, &pt->r17); + PUT_NAT_BIT(sw->caller_unat, &pt->r18); PUT_NAT_BIT(sw->caller_unat, &pt->r19); + PUT_NAT_BIT(sw->caller_unat, &pt->r20); PUT_NAT_BIT(sw->caller_unat, &pt->r21); + PUT_NAT_BIT(sw->caller_unat, &pt->r22); PUT_NAT_BIT(sw->caller_unat, &pt->r23); + PUT_NAT_BIT(sw->caller_unat, &pt->r24); PUT_NAT_BIT(sw->caller_unat, &pt->r25); + PUT_NAT_BIT(sw->caller_unat, &pt->r26); PUT_NAT_BIT(sw->caller_unat, &pt->r27); + PUT_NAT_BIT(sw->caller_unat, &pt->r28); PUT_NAT_BIT(sw->caller_unat, &pt->r29); + PUT_NAT_BIT(sw->caller_unat, &pt->r30); PUT_NAT_BIT(sw->caller_unat, &pt->r31); +} + +static void +init_handler_platform (pal_min_state_area_t *ms, + struct pt_regs *pt, struct switch_stack *sw) +{ + struct unw_frame_info info; + + /* if a kernel debugger is available call it here else just dump the registers */ + + /* + * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be + * generated via the BMC's command-line interface, but since the console is on the + * same serial line, the user will need some time to switch out of the BMC before + * the dump begins. + */ + printk("Delaying for 5 seconds...\n"); + udelay(5*1000000); + show_min_state(ms); + + printk("Backtrace of current task (pid %d, %s)\n", current->pid, current->comm); + fetch_min_state(ms, pt, sw); + unw_init_from_interruption(&info, current, pt, sw); + ia64_do_show_stack(&info, NULL); + +#ifdef CONFIG_SMP + /* read_trylock() would be handy... */ + if (!tasklist_lock.write_lock) + read_lock(&tasklist_lock); +#endif + { + struct task_struct *g, *t; + do_each_thread (g, t) { + if (t == current) + continue; + + printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); + show_stack(t, NULL); + } while_each_thread (g, t); + } +#ifdef CONFIG_SMP + if (!tasklist_lock.write_lock) + read_unlock(&tasklist_lock); +#endif + + printk("\nINIT dump complete. Please reboot now.\n"); + while (1); /* hang city if no debugger */ +} + +#ifdef CONFIG_ACPI +/* + * ia64_mca_register_cpev + * + * Register the corrected platform error vector with SAL. + * + * Inputs + * cpev Corrected Platform Error Vector number + * + * Outputs + * None + */ +static void +ia64_mca_register_cpev (int cpev) +{ + /* Register the CPE interrupt vector with SAL */ + struct ia64_sal_retval isrv; + + isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0); + if (isrv.status) { + printk(KERN_ERR "Failed to register Corrected Platform " + "Error interrupt vector with SAL (status %ld)\n", isrv.status); + return; + } + + IA64_MCA_DEBUG("%s: corrected platform error " + "vector %#x registered\n", __FUNCTION__, cpev); +} +#endif /* CONFIG_ACPI */ + +#endif /* PLATFORM_MCA_HANDLERS */ + +/* + * ia64_mca_cmc_vector_setup + * + * Setup the corrected machine check vector register in the processor. + * (The interrupt is masked on boot. ia64_mca_late_init unmask this.) + * This function is invoked on a per-processor basis. + * + * Inputs + * None + * + * Outputs + * None + */ +void +ia64_mca_cmc_vector_setup (void) +{ + cmcv_reg_t cmcv; + + cmcv.cmcv_regval = 0; + cmcv.cmcv_mask = 1; /* Mask/disable interrupt at first */ + cmcv.cmcv_vector = IA64_CMC_VECTOR; + ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); + + IA64_MCA_DEBUG("%s: CPU %d corrected " + "machine check vector %#x registered.\n", + __FUNCTION__, smp_processor_id(), IA64_CMC_VECTOR); + + IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n", + __FUNCTION__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV)); +} + +/* + * ia64_mca_cmc_vector_disable + * + * Mask the corrected machine check vector register in the processor. + * This function is invoked on a per-processor basis. + * + * Inputs + * dummy(unused) + * + * Outputs + * None + */ +static void +ia64_mca_cmc_vector_disable (void *dummy) +{ + cmcv_reg_t cmcv; + + cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV); + + cmcv.cmcv_mask = 1; /* Mask/disable interrupt */ + ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); + + IA64_MCA_DEBUG("%s: CPU %d corrected " + "machine check vector %#x disabled.\n", + __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector); +} + +/* + * ia64_mca_cmc_vector_enable + * + * Unmask the corrected machine check vector register in the processor. + * This function is invoked on a per-processor basis. + * + * Inputs + * dummy(unused) + * + * Outputs + * None + */ +static void +ia64_mca_cmc_vector_enable (void *dummy) +{ + cmcv_reg_t cmcv; + + cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV); + + cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */ + ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); + + IA64_MCA_DEBUG("%s: CPU %d corrected " + "machine check vector %#x enabled.\n", + __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector); +} + +/* + * ia64_mca_cmc_vector_disable_keventd + * + * Called via keventd (smp_call_function() is not safe in interrupt context) to + * disable the cmc interrupt vector. + */ +static void +ia64_mca_cmc_vector_disable_keventd(void *unused) +{ + on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0); +} + +/* + * ia64_mca_cmc_vector_enable_keventd + * + * Called via keventd (smp_call_function() is not safe in interrupt context) to + * enable the cmc interrupt vector. + */ +static void +ia64_mca_cmc_vector_enable_keventd(void *unused) +{ + on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0); +} + +/* + * ia64_mca_wakeup_ipi_wait + * + * Wait for the inter-cpu interrupt to be sent by the + * monarch processor once it is done with handling the + * MCA. + * + * Inputs : None + * Outputs : None + */ +static void +ia64_mca_wakeup_ipi_wait(void) +{ + int irr_num = (IA64_MCA_WAKEUP_VECTOR >> 6); + int irr_bit = (IA64_MCA_WAKEUP_VECTOR & 0x3f); + u64 irr = 0; + + do { + switch(irr_num) { + case 0: + irr = ia64_getreg(_IA64_REG_CR_IRR0); + break; + case 1: + irr = ia64_getreg(_IA64_REG_CR_IRR1); + break; + case 2: + irr = ia64_getreg(_IA64_REG_CR_IRR2); + break; + case 3: + irr = ia64_getreg(_IA64_REG_CR_IRR3); + break; + } + cpu_relax(); + } while (!(irr & (1UL << irr_bit))) ; +} + +/* + * ia64_mca_wakeup + * + * Send an inter-cpu interrupt to wake-up a particular cpu + * and mark that cpu to be out of rendez. + * + * Inputs : cpuid + * Outputs : None + */ +static void +ia64_mca_wakeup(int cpu) +{ + platform_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0); + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + +} + +/* + * ia64_mca_wakeup_all + * + * Wakeup all the cpus which have rendez'ed previously. + * + * Inputs : None + * Outputs : None + */ +static void +ia64_mca_wakeup_all(void) +{ + int cpu; + + /* Clear the Rendez checkin flag for all cpus */ + for(cpu = 0; cpu < NR_CPUS; cpu++) { + if (!cpu_online(cpu)) + continue; + if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE) + ia64_mca_wakeup(cpu); + } + +} + +/* + * ia64_mca_rendez_interrupt_handler + * + * This is handler used to put slave processors into spinloop + * while the monarch processor does the mca handling and later + * wake each slave up once the monarch is done. + * + * Inputs : None + * Outputs : None + */ +static irqreturn_t +ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) +{ + unsigned long flags; + int cpu = smp_processor_id(); + + /* Mask all interrupts */ + local_irq_save(flags); + + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; + /* Register with the SAL monarch that the slave has + * reached SAL + */ + ia64_sal_mc_rendez(); + + /* Wait for the wakeup IPI from the monarch + * This waiting is done by polling on the wakeup-interrupt + * vector bit in the processor's IRRs + */ + ia64_mca_wakeup_ipi_wait(); + + /* Enable all interrupts */ + local_irq_restore(flags); + return IRQ_HANDLED; +} + +/* + * ia64_mca_wakeup_int_handler + * + * The interrupt handler for processing the inter-cpu interrupt to the + * slave cpu which was spinning in the rendez loop. + * Since this spinning is done by turning off the interrupts and + * polling on the wakeup-interrupt bit in the IRR, there is + * nothing useful to be done in the handler. + * + * Inputs : wakeup_irq (Wakeup-interrupt bit) + * arg (Interrupt handler specific argument) + * ptregs (Exception frame at the time of the interrupt) + * Outputs : None + * + */ +static irqreturn_t +ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs) +{ + return IRQ_HANDLED; +} + +/* + * ia64_return_to_sal_check + * + * This is function called before going back from the OS_MCA handler + * to the OS_MCA dispatch code which finally takes the control back + * to the SAL. + * The main purpose of this routine is to setup the OS_MCA to SAL + * return state which can be used by the OS_MCA dispatch code + * just before going back to SAL. + * + * Inputs : None + * Outputs : None + */ + +static void +ia64_return_to_sal_check(int recover) +{ + + /* Copy over some relevant stuff from the sal_to_os_mca_handoff + * so that it can be used at the time of os_mca_to_sal_handoff + */ + ia64_os_to_sal_handoff_state.imots_sal_gp = + ia64_sal_to_os_handoff_state.imsto_sal_gp; + + ia64_os_to_sal_handoff_state.imots_sal_check_ra = + ia64_sal_to_os_handoff_state.imsto_sal_check_ra; + + if (recover) + ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED; + else + ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT; + + /* Default = tell SAL to return to same context */ + ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT; + + ia64_os_to_sal_handoff_state.imots_new_min_state = + (u64 *)ia64_sal_to_os_handoff_state.pal_min_state; + +} + +/* Function pointer for extra MCA recovery */ +int (*ia64_mca_ucmc_extension) + (void*,ia64_mca_sal_to_os_state_t*,ia64_mca_os_to_sal_state_t*) + = NULL; + +int +ia64_reg_MCA_extension(void *fn) +{ + if (ia64_mca_ucmc_extension) + return 1; + + ia64_mca_ucmc_extension = fn; + return 0; +} + +void +ia64_unreg_MCA_extension(void) +{ + if (ia64_mca_ucmc_extension) + ia64_mca_ucmc_extension = NULL; +} + +EXPORT_SYMBOL(ia64_reg_MCA_extension); +EXPORT_SYMBOL(ia64_unreg_MCA_extension); + +/* + * ia64_mca_ucmc_handler + * + * This is uncorrectable machine check handler called from OS_MCA + * dispatch code which is in turn called from SAL_CHECK(). + * This is the place where the core of OS MCA handling is done. + * Right now the logs are extracted and displayed in a well-defined + * format. This handler code is supposed to be run only on the + * monarch processor. Once the monarch is done with MCA handling + * further MCA logging is enabled by clearing logs. + * Monarch also has the duty of sending wakeup-IPIs to pull the + * slave processors out of rendezvous spinloop. + * + * Inputs : None + * Outputs : None + */ +void +ia64_mca_ucmc_handler(void) +{ + pal_processor_state_info_t *psp = (pal_processor_state_info_t *) + &ia64_sal_to_os_handoff_state.proc_state_param; + int recover; + + /* Get the MCA error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); + + /* TLB error is only exist in this SAL error record */ + recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) + /* other error recovery */ + || (ia64_mca_ucmc_extension + && ia64_mca_ucmc_extension( + IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), + &ia64_sal_to_os_handoff_state, + &ia64_os_to_sal_handoff_state)); + + if (recover) { + sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA); + rh->severity = sal_log_severity_corrected; + ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); + } + /* + * Wakeup all the processors which are spinning in the rendezvous + * loop. + */ + ia64_mca_wakeup_all(); + + /* Return to SAL */ + ia64_return_to_sal_check(recover); +} + +static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL); +static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd, NULL); + +/* + * ia64_mca_cmc_int_handler + * + * This is corrected machine check interrupt handler. + * Right now the logs are extracted and displayed in a well-defined + * format. + * + * Inputs + * interrupt number + * client data arg ptr + * saved registers ptr + * + * Outputs + * None + */ +static irqreturn_t +ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) +{ + static unsigned long cmc_history[CMC_HISTORY_LENGTH]; + static int index; + static DEFINE_SPINLOCK(cmc_history_lock); + + IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", + __FUNCTION__, cmc_irq, smp_processor_id()); + + /* SAL spec states this should run w/ interrupts enabled */ + local_irq_enable(); + + /* Get the CMC error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); + + spin_lock(&cmc_history_lock); + if (!cmc_polling_enabled) { + int i, count = 1; /* we know 1 happened now */ + unsigned long now = jiffies; + + for (i = 0; i < CMC_HISTORY_LENGTH; i++) { + if (now - cmc_history[i] <= HZ) + count++; + } + + IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH); + if (count >= CMC_HISTORY_LENGTH) { + + cmc_polling_enabled = 1; + spin_unlock(&cmc_history_lock); + schedule_work(&cmc_disable_work); + + /* + * Corrected errors will still be corrected, but + * make sure there's a log somewhere that indicates + * something is generating more than we can handle. + */ + printk(KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n"); + + mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); + + /* lock already released, get out now */ + return IRQ_HANDLED; + } else { + cmc_history[index++] = now; + if (index == CMC_HISTORY_LENGTH) + index = 0; + } + } + spin_unlock(&cmc_history_lock); + return IRQ_HANDLED; +} + +/* + * ia64_mca_cmc_int_caller + * + * Triggered by sw interrupt from CMC polling routine. Calls + * real interrupt handler and either triggers a sw interrupt + * on the next cpu or does cleanup at the end. + * + * Inputs + * interrupt number + * client data arg ptr + * saved registers ptr + * Outputs + * handled + */ +static irqreturn_t +ia64_mca_cmc_int_caller(int cmc_irq, void *arg, struct pt_regs *ptregs) +{ + static int start_count = -1; + unsigned int cpuid; + + cpuid = smp_processor_id(); + + /* If first cpu, update count */ + if (start_count == -1) + start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC); + + ia64_mca_cmc_int_handler(cmc_irq, arg, ptregs); + + for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); + + if (cpuid < NR_CPUS) { + platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); + } else { + /* If no log record, switch out of polling mode */ + if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) { + + printk(KERN_WARNING "Returning to interrupt driven CMC handler\n"); + schedule_work(&cmc_enable_work); + cmc_polling_enabled = 0; + + } else { + + mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); + } + + start_count = -1; + } + + return IRQ_HANDLED; +} + +/* + * ia64_mca_cmc_poll + * + * Poll for Corrected Machine Checks (CMCs) + * + * Inputs : dummy(unused) + * Outputs : None + * + */ +static void +ia64_mca_cmc_poll (unsigned long dummy) +{ + /* Trigger a CMC interrupt cascade */ + platform_send_ipi(first_cpu(cpu_online_map), IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); +} + +/* + * ia64_mca_cpe_int_caller + * + * Triggered by sw interrupt from CPE polling routine. Calls + * real interrupt handler and either triggers a sw interrupt + * on the next cpu or does cleanup at the end. + * + * Inputs + * interrupt number + * client data arg ptr + * saved registers ptr + * Outputs + * handled + */ +#ifdef CONFIG_ACPI + +static irqreturn_t +ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) +{ + static int start_count = -1; + static int poll_time = MIN_CPE_POLL_INTERVAL; + unsigned int cpuid; + + cpuid = smp_processor_id(); + + /* If first cpu, update count */ + if (start_count == -1) + start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE); + + ia64_mca_cpe_int_handler(cpe_irq, arg, ptregs); + + for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); + + if (cpuid < NR_CPUS) { + platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); + } else { + /* + * If a log was recorded, increase our polling frequency, + * otherwise, backoff or return to interrupt mode. + */ + if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) { + poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time / 2); + } else if (cpe_vector < 0) { + poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2); + } else { + poll_time = MIN_CPE_POLL_INTERVAL; + + printk(KERN_WARNING "Returning to interrupt driven CPE handler\n"); + enable_irq(local_vector_to_irq(IA64_CPE_VECTOR)); + cpe_poll_enabled = 0; + } + + if (cpe_poll_enabled) + mod_timer(&cpe_poll_timer, jiffies + poll_time); + start_count = -1; + } + + return IRQ_HANDLED; +} + +#endif /* CONFIG_ACPI */ + +/* + * ia64_mca_cpe_poll + * + * Poll for Corrected Platform Errors (CPEs), trigger interrupt + * on first cpu, from there it will trickle through all the cpus. + * + * Inputs : dummy(unused) + * Outputs : None + * + */ +static void +ia64_mca_cpe_poll (unsigned long dummy) +{ + /* Trigger a CPE interrupt cascade */ + platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); +} + +/* + * C portion of the OS INIT handler + * + * Called from ia64_monarch_init_handler + * + * Inputs: pointer to pt_regs where processor info was saved. + * + * Returns: + * 0 if SAL must warm boot the System + * 1 if SAL must return to interrupted context using PAL_MC_RESUME + * + */ +void +ia64_init_handler (struct pt_regs *pt, struct switch_stack *sw) +{ + pal_min_state_area_t *ms; + + oops_in_progress = 1; /* avoid deadlock in printk, but it makes recovery dodgy */ + console_loglevel = 15; /* make sure printks make it to console */ + + printk(KERN_INFO "Entered OS INIT handler. PSP=%lx\n", + ia64_sal_to_os_handoff_state.proc_state_param); + + /* + * Address of minstate area provided by PAL is physical, + * uncacheable (bit 63 set). Convert to Linux virtual + * address in region 6. + */ + ms = (pal_min_state_area_t *)(ia64_sal_to_os_handoff_state.pal_min_state | (6ul<<61)); + + init_handler_platform(ms, pt, sw); /* call platform specific routines */ +} + +static int __init +ia64_mca_disable_cpe_polling(char *str) +{ + cpe_poll_enabled = 0; + return 1; +} + +__setup("disable_cpe_poll", ia64_mca_disable_cpe_polling); + +static struct irqaction cmci_irqaction = { + .handler = ia64_mca_cmc_int_handler, + .flags = SA_INTERRUPT, + .name = "cmc_hndlr" +}; + +static struct irqaction cmcp_irqaction = { + .handler = ia64_mca_cmc_int_caller, + .flags = SA_INTERRUPT, + .name = "cmc_poll" +}; + +static struct irqaction mca_rdzv_irqaction = { + .handler = ia64_mca_rendez_int_handler, + .flags = SA_INTERRUPT, + .name = "mca_rdzv" +}; + +static struct irqaction mca_wkup_irqaction = { + .handler = ia64_mca_wakeup_int_handler, + .flags = SA_INTERRUPT, + .name = "mca_wkup" +}; + +#ifdef CONFIG_ACPI +static struct irqaction mca_cpe_irqaction = { + .handler = ia64_mca_cpe_int_handler, + .flags = SA_INTERRUPT, + .name = "cpe_hndlr" +}; + +static struct irqaction mca_cpep_irqaction = { + .handler = ia64_mca_cpe_int_caller, + .flags = SA_INTERRUPT, + .name = "cpe_poll" +}; +#endif /* CONFIG_ACPI */ + +/* Do per-CPU MCA-related initialization. */ + +void __devinit +ia64_mca_cpu_init(void *cpu_data) +{ + void *pal_vaddr; + + if (smp_processor_id() == 0) { + void *mca_data; + int cpu; + + mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) + * NR_CPUS); + for (cpu = 0; cpu < NR_CPUS; cpu++) { + __per_cpu_mca[cpu] = __pa(mca_data); + mca_data += sizeof(struct ia64_mca_cpu); + } + } + + /* + * The MCA info structure was allocated earlier and its + * physical address saved in __per_cpu_mca[cpu]. Copy that + * address * to ia64_mca_data so we can access it as a per-CPU + * variable. + */ + __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()]; + + /* + * Stash away a copy of the PTE needed to map the per-CPU page. + * We may need it during MCA recovery. + */ + __get_cpu_var(ia64_mca_per_cpu_pte) = + pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL)); + + /* + * Also, stash away a copy of the PAL address and the PTE + * needed to map it. + */ + pal_vaddr = efi_get_pal_addr(); + if (!pal_vaddr) + return; + __get_cpu_var(ia64_mca_pal_base) = + GRANULEROUNDDOWN((unsigned long) pal_vaddr); + __get_cpu_var(ia64_mca_pal_pte) = pte_val(mk_pte_phys(__pa(pal_vaddr), + PAGE_KERNEL)); +} + +/* + * ia64_mca_init + * + * Do all the system level mca specific initialization. + * + * 1. Register spinloop and wakeup request interrupt vectors + * + * 2. Register OS_MCA handler entry point + * + * 3. Register OS_INIT handler entry point + * + * 4. Initialize MCA/CMC/INIT related log buffers maintained by the OS. + * + * Note that this initialization is done very early before some kernel + * services are available. + * + * Inputs : None + * + * Outputs : None + */ +void __init +ia64_mca_init(void) +{ + ia64_fptr_t *mon_init_ptr = (ia64_fptr_t *)ia64_monarch_init_handler; + ia64_fptr_t *slave_init_ptr = (ia64_fptr_t *)ia64_slave_init_handler; + ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; + int i; + s64 rc; + struct ia64_sal_retval isrv; + u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ + + IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__); + + /* Clear the Rendez checkin flag for all cpus */ + for(i = 0 ; i < NR_CPUS; i++) + ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + + /* + * Register the rendezvous spinloop and wakeup mechanism with SAL + */ + + /* Register the rendezvous interrupt vector with SAL */ + while (1) { + isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT, + SAL_MC_PARAM_MECHANISM_INT, + IA64_MCA_RENDEZ_VECTOR, + timeout, + SAL_MC_PARAM_RZ_ALWAYS); + rc = isrv.status; + if (rc == 0) + break; + if (rc == -2) { + printk(KERN_INFO "Increasing MCA rendezvous timeout from " + "%ld to %ld milliseconds\n", timeout, isrv.v0); + timeout = isrv.v0; + continue; + } + printk(KERN_ERR "Failed to register rendezvous interrupt " + "with SAL (status %ld)\n", rc); + return; + } + + /* Register the wakeup interrupt vector with SAL */ + isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP, + SAL_MC_PARAM_MECHANISM_INT, + IA64_MCA_WAKEUP_VECTOR, + 0, 0); + rc = isrv.status; + if (rc) { + printk(KERN_ERR "Failed to register wakeup interrupt with SAL " + "(status %ld)\n", rc); + return; + } + + IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __FUNCTION__); + + ia64_mc_info.imi_mca_handler = ia64_tpa(mca_hldlr_ptr->fp); + /* + * XXX - disable SAL checksum by setting size to 0; should be + * ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch); + */ + ia64_mc_info.imi_mca_handler_size = 0; + + /* Register the os mca handler with SAL */ + if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, + ia64_mc_info.imi_mca_handler, + ia64_tpa(mca_hldlr_ptr->gp), + ia64_mc_info.imi_mca_handler_size, + 0, 0, 0))) + { + printk(KERN_ERR "Failed to register OS MCA handler with SAL " + "(status %ld)\n", rc); + return; + } + + IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __FUNCTION__, + ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp)); + + /* + * XXX - disable SAL checksum by setting size to 0, should be + * size of the actual init handler in mca_asm.S. + */ + ia64_mc_info.imi_monarch_init_handler = ia64_tpa(mon_init_ptr->fp); + ia64_mc_info.imi_monarch_init_handler_size = 0; + ia64_mc_info.imi_slave_init_handler = ia64_tpa(slave_init_ptr->fp); + ia64_mc_info.imi_slave_init_handler_size = 0; + + IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__, + ia64_mc_info.imi_monarch_init_handler); + + /* Register the os init handler with SAL */ + if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, + ia64_mc_info.imi_monarch_init_handler, + ia64_tpa(ia64_getreg(_IA64_REG_GP)), + ia64_mc_info.imi_monarch_init_handler_size, + ia64_mc_info.imi_slave_init_handler, + ia64_tpa(ia64_getreg(_IA64_REG_GP)), + ia64_mc_info.imi_slave_init_handler_size))) + { + printk(KERN_ERR "Failed to register m/s INIT handlers with SAL " + "(status %ld)\n", rc); + return; + } + + IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__); + + /* + * Configure the CMCI/P vector and handler. Interrupts for CMC are + * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c). + */ + register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction); + register_percpu_irq(IA64_CMCP_VECTOR, &cmcp_irqaction); + ia64_mca_cmc_vector_setup(); /* Setup vector on BSP */ + + /* Setup the MCA rendezvous interrupt vector */ + register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, &mca_rdzv_irqaction); + + /* Setup the MCA wakeup interrupt vector */ + register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction); + +#ifdef CONFIG_ACPI + /* Setup the CPEI/P vector and handler */ + cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); + register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); +#endif + + /* Initialize the areas set aside by the OS to buffer the + * platform/processor error states for MCA/INIT/CMC + * handling. + */ + ia64_log_init(SAL_INFO_TYPE_MCA); + ia64_log_init(SAL_INFO_TYPE_INIT); + ia64_log_init(SAL_INFO_TYPE_CMC); + ia64_log_init(SAL_INFO_TYPE_CPE); + + mca_init = 1; + printk(KERN_INFO "MCA related initialization done\n"); +} + +/* + * ia64_mca_late_init + * + * Opportunity to setup things that require initialization later + * than ia64_mca_init. Setup a timer to poll for CPEs if the + * platform doesn't support an interrupt driven mechanism. + * + * Inputs : None + * Outputs : Status + */ +static int __init +ia64_mca_late_init(void) +{ + if (!mca_init) + return 0; + + /* Setup the CMCI/P vector and handler */ + init_timer(&cmc_poll_timer); + cmc_poll_timer.function = ia64_mca_cmc_poll; + + /* Unmask/enable the vector */ + cmc_polling_enabled = 0; + schedule_work(&cmc_enable_work); + + IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __FUNCTION__); + +#ifdef CONFIG_ACPI + /* Setup the CPEI/P vector and handler */ + init_timer(&cpe_poll_timer); + cpe_poll_timer.function = ia64_mca_cpe_poll; + + { + irq_desc_t *desc; + unsigned int irq; + + if (cpe_vector >= 0) { + /* If platform supports CPEI, enable the irq. */ + cpe_poll_enabled = 0; + for (irq = 0; irq < NR_IRQS; ++irq) + if (irq_to_vector(irq) == cpe_vector) { + desc = irq_descp(irq); + desc->status |= IRQ_PER_CPU; + setup_irq(irq, &mca_cpe_irqaction); + } + ia64_mca_register_cpev(cpe_vector); + IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); + } else { + /* If platform doesn't support CPEI, get the timer going. */ + if (cpe_poll_enabled) { + ia64_mca_cpe_poll(0UL); + IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __FUNCTION__); + } + } + } +#endif + + return 0; +} + +device_initcall(ia64_mca_late_init); diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S new file mode 100644 index 000000000000..cf3f8014f9ad --- /dev/null +++ b/arch/ia64/kernel/mca_asm.S @@ -0,0 +1,928 @@ +// +// assembly portion of the IA64 MCA handling +// +// Mods by cfleck to integrate into kernel build +// 00/03/15 davidm Added various stop bits to get a clean compile +// +// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp +// kstack, switch modes, jump to C INIT handler +// +// 02/01/04 J.Hall <jenna.s.hall@intel.com> +// Before entering virtual mode code: +// 1. Check for TLB CPU error +// 2. Restore current thread pointer to kr6 +// 3. Move stack ptr 16 bytes to conform to C calling convention +// +// 04/11/12 Russ Anderson <rja@sgi.com> +// Added per cpu MCA/INIT stack save areas. +// +#include <linux/config.h> +#include <linux/threads.h> + +#include <asm/asmmacro.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/mca_asm.h> +#include <asm/mca.h> + +/* + * When we get a machine check, the kernel stack pointer is no longer + * valid, so we need to set a new stack pointer. + */ +#define MINSTATE_PHYS /* Make sure stack access is physical for MINSTATE */ + +/* + * Needed for return context to SAL + */ +#define IA64_MCA_SAME_CONTEXT 0 +#define IA64_MCA_COLD_BOOT -2 + +#include "minstate.h" + +/* + * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec) + * 1. GR1 = OS GP + * 2. GR8 = PAL_PROC physical address + * 3. GR9 = SAL_PROC physical address + * 4. GR10 = SAL GP (physical) + * 5. GR11 = Rendez state + * 6. GR12 = Return address to location within SAL_CHECK + */ +#define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp) \ + LOAD_PHYSICAL(p0, _tmp, ia64_sal_to_os_handoff_state);; \ + st8 [_tmp]=r1,0x08;; \ + st8 [_tmp]=r8,0x08;; \ + st8 [_tmp]=r9,0x08;; \ + st8 [_tmp]=r10,0x08;; \ + st8 [_tmp]=r11,0x08;; \ + st8 [_tmp]=r12,0x08;; \ + st8 [_tmp]=r17,0x08;; \ + st8 [_tmp]=r18,0x08 + +/* + * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec) + * (p6) is executed if we never entered virtual mode (TLB error) + * (p7) is executed if we entered virtual mode as expected (normal case) + * 1. GR8 = OS_MCA return status + * 2. GR9 = SAL GP (physical) + * 3. GR10 = 0/1 returning same/new context + * 4. GR22 = New min state save area pointer + * returns ptr to SAL rtn save loc in _tmp + */ +#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \ + movl _tmp=ia64_os_to_sal_handoff_state;; \ + DATA_VA_TO_PA(_tmp);; \ + ld8 r8=[_tmp],0x08;; \ + ld8 r9=[_tmp],0x08;; \ + ld8 r10=[_tmp],0x08;; \ + ld8 r22=[_tmp],0x08;; + // now _tmp is pointing to SAL rtn save location + +/* + * COLD_BOOT_HANDOFF_STATE() sets ia64_mca_os_to_sal_state + * imots_os_status=IA64_MCA_COLD_BOOT + * imots_sal_gp=SAL GP + * imots_context=IA64_MCA_SAME_CONTEXT + * imots_new_min_state=Min state save area pointer + * imots_sal_check_ra=Return address to location within SAL_CHECK + * + */ +#define COLD_BOOT_HANDOFF_STATE(sal_to_os_handoff,os_to_sal_handoff,tmp)\ + movl tmp=IA64_MCA_COLD_BOOT; \ + movl sal_to_os_handoff=__pa(ia64_sal_to_os_handoff_state); \ + movl os_to_sal_handoff=__pa(ia64_os_to_sal_handoff_state);; \ + st8 [os_to_sal_handoff]=tmp,8;; \ + ld8 tmp=[sal_to_os_handoff],48;; \ + st8 [os_to_sal_handoff]=tmp,8;; \ + movl tmp=IA64_MCA_SAME_CONTEXT;; \ + st8 [os_to_sal_handoff]=tmp,8;; \ + ld8 tmp=[sal_to_os_handoff],-8;; \ + st8 [os_to_sal_handoff]=tmp,8;; \ + ld8 tmp=[sal_to_os_handoff];; \ + st8 [os_to_sal_handoff]=tmp;; + +#define GET_IA64_MCA_DATA(reg) \ + GET_THIS_PADDR(reg, ia64_mca_data) \ + ;; \ + ld8 reg=[reg] + + .global ia64_os_mca_dispatch + .global ia64_os_mca_dispatch_end + .global ia64_sal_to_os_handoff_state + .global ia64_os_to_sal_handoff_state + + .text + .align 16 + +ia64_os_mca_dispatch: + + // Serialize all MCA processing + mov r3=1;; + LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);; +ia64_os_mca_spin: + xchg8 r4=[r2],r3;; + cmp.ne p6,p0=r4,r0 +(p6) br ia64_os_mca_spin + + // Save the SAL to OS MCA handoff state as defined + // by SAL SPEC 3.0 + // NOTE : The order in which the state gets saved + // is dependent on the way the C-structure + // for ia64_mca_sal_to_os_state_t has been + // defined in include/asm/mca.h + SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2) + ;; + + // LOG PROCESSOR STATE INFO FROM HERE ON.. +begin_os_mca_dump: + br ia64_os_mca_proc_state_dump;; + +ia64_os_mca_done_dump: + + LOAD_PHYSICAL(p0,r16,ia64_sal_to_os_handoff_state+56) + ;; + ld8 r18=[r16] // Get processor state parameter on existing PALE_CHECK. + ;; + tbit.nz p6,p7=r18,60 +(p7) br.spnt done_tlb_purge_and_reload + + // The following code purges TC and TR entries. Then reload all TC entries. + // Purge percpu data TC entries. +begin_tlb_purge_and_reload: + +#define O(member) IA64_CPUINFO_##member##_OFFSET + + GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2 + ;; + addl r17=O(PTCE_STRIDE),r2 + addl r2=O(PTCE_BASE),r2 + ;; + ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base + ld4 r19=[r2],4 // r19=ptce_count[0] + ld4 r21=[r17],4 // r21=ptce_stride[0] + ;; + ld4 r20=[r2] // r20=ptce_count[1] + ld4 r22=[r17] // r22=ptce_stride[1] + mov r24=0 + ;; + adds r20=-1,r20 + ;; +#undef O + +2: + cmp.ltu p6,p7=r24,r19 +(p7) br.cond.dpnt.few 4f + mov ar.lc=r20 +3: + ptc.e r18 + ;; + add r18=r22,r18 + br.cloop.sptk.few 3b + ;; + add r18=r21,r18 + add r24=1,r24 + ;; + br.sptk.few 2b +4: + srlz.i // srlz.i implies srlz.d + ;; + + // Now purge addresses formerly mapped by TR registers + // 1. Purge ITR&DTR for kernel. + movl r16=KERNEL_START + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + ;; + ptr.i r16, r18 + ptr.d r16, r18 + ;; + srlz.i + ;; + srlz.d + ;; + // 2. Purge DTR for PERCPU data. + movl r16=PERCPU_ADDR + mov r18=PERCPU_PAGE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.d + ;; + // 3. Purge ITR for PAL code. + GET_THIS_PADDR(r2, ia64_mca_pal_base) + ;; + ld8 r16=[r2] + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.i r16,r18 + ;; + srlz.i + ;; + // 4. Purge DTR for stack. + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r16=r19,r16 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.i + ;; + // Finally reload the TR registers. + // 1. Reload DTR/ITR registers for kernel. + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + movl r17=KERNEL_START + ;; + mov cr.itir=r18 + mov cr.ifa=r17 + mov r16=IA64_TR_KERNEL + mov r19=ip + movl r18=PAGE_KERNEL + ;; + dep r17=0,r19,0, KERNEL_TR_PAGE_SHIFT + ;; + or r18=r17,r18 + ;; + itr.i itr[r16]=r18 + ;; + itr.d dtr[r16]=r18 + ;; + srlz.i + srlz.d + ;; + // 2. Reload DTR register for PERCPU data. + GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte) + ;; + movl r16=PERCPU_ADDR // vaddr + movl r18=PERCPU_PAGE_SHIFT<<2 + ;; + mov cr.itir=r18 + mov cr.ifa=r16 + ;; + ld8 r18=[r2] // load per-CPU PTE + mov r16=IA64_TR_PERCPU_DATA; + ;; + itr.d dtr[r16]=r18 + ;; + srlz.d + ;; + // 3. Reload ITR for PAL code. + GET_THIS_PADDR(r2, ia64_mca_pal_pte) + ;; + ld8 r18=[r2] // load PAL PTE + ;; + GET_THIS_PADDR(r2, ia64_mca_pal_base) + ;; + ld8 r16=[r2] // load PAL vaddr + mov r19=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r19 + mov cr.ifa=r16 + mov r20=IA64_TR_PALCODE + ;; + itr.i itr[r20]=r18 + ;; + srlz.i + ;; + // 4. Reload DTR for stack. + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r18=r19,r16 + movl r20=PAGE_KERNEL + ;; + add r16=r20,r16 + mov r19=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r19 + mov cr.ifa=r18 + mov r20=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r20]=r16 + ;; + srlz.d + ;; + br.sptk.many done_tlb_purge_and_reload +err: + COLD_BOOT_HANDOFF_STATE(r20,r21,r22) + br.sptk.many ia64_os_mca_done_restore + +done_tlb_purge_and_reload: + + // Setup new stack frame for OS_MCA handling + GET_IA64_MCA_DATA(r2) + ;; + add r3 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2 + add r2 = IA64_MCA_CPU_RBSTORE_OFFSET, r2 + ;; + rse_switch_context(r6,r3,r2);; // RSC management in this new context + + GET_IA64_MCA_DATA(r2) + ;; + add r2 = IA64_MCA_CPU_STACK_OFFSET+IA64_MCA_STACK_SIZE-16, r2 + ;; + mov r12=r2 // establish new stack-pointer + + // Enter virtual mode from physical mode + VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4) +ia64_os_mca_virtual_begin: + + // Call virtual mode handler + movl r2=ia64_mca_ucmc_handler;; + mov b6=r2;; + br.call.sptk.many b0=b6;; +.ret0: + // Revert back to physical mode before going back to SAL + PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4) +ia64_os_mca_virtual_end: + + // restore the original stack frame here + GET_IA64_MCA_DATA(r2) + ;; + add r2 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2 + ;; + movl r4=IA64_PSR_MC + ;; + rse_return_context(r4,r3,r2) // switch from interrupt context for RSE + + // let us restore all the registers from our PSI structure + mov r8=gp + ;; +begin_os_mca_restore: + br ia64_os_mca_proc_state_restore;; + +ia64_os_mca_done_restore: + OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2);; + // branch back to SALE_CHECK + ld8 r3=[r2];; + mov b0=r3;; // SAL_CHECK return address + + // release lock + movl r3=ia64_mca_serialize;; + DATA_VA_TO_PA(r3);; + st8.rel [r3]=r0 + + br b0 + ;; +ia64_os_mca_dispatch_end: +//EndMain////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_os_mca_proc_state_dump() +// +// Stub Description: +// +// This stub dumps the processor state during MCHK to a data area +// +//-- + +ia64_os_mca_proc_state_dump: +// Save bank 1 GRs 16-31 which will be used by c-language code when we switch +// to virtual addressing mode. + GET_IA64_MCA_DATA(r2) + ;; + add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2 + ;; +// save ar.NaT + mov r5=ar.unat // ar.unat + +// save banked GRs 16-31 along with NaT bits + bsw.1;; + st8.spill [r2]=r16,8;; + st8.spill [r2]=r17,8;; + st8.spill [r2]=r18,8;; + st8.spill [r2]=r19,8;; + st8.spill [r2]=r20,8;; + st8.spill [r2]=r21,8;; + st8.spill [r2]=r22,8;; + st8.spill [r2]=r23,8;; + st8.spill [r2]=r24,8;; + st8.spill [r2]=r25,8;; + st8.spill [r2]=r26,8;; + st8.spill [r2]=r27,8;; + st8.spill [r2]=r28,8;; + st8.spill [r2]=r29,8;; + st8.spill [r2]=r30,8;; + st8.spill [r2]=r31,8;; + + mov r4=ar.unat;; + st8 [r2]=r4,8 // save User NaT bits for r16-r31 + mov ar.unat=r5 // restore original unat + bsw.0;; + +//save BRs + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r4 + + mov r3=b0 + mov r5=b1 + mov r7=b2;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=b3 + mov r5=b4 + mov r7=b5;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=b6 + mov r5=b7;; + st8 [r2]=r3,2*8 + st8 [r4]=r5,2*8;; + +cSaveCRs: +// save CRs + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r4 + + mov r3=cr.dcr + mov r5=cr.itm + mov r7=cr.iva;; + + st8 [r2]=r3,8*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; // 48 byte rements + + mov r3=cr.pta;; + st8 [r2]=r3,8*8;; // 64 byte rements + +// if PSR.ic=0, reading interruption registers causes an illegal operation fault + mov r3=psr;; + tbit.nz.unc p6,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test +(p6) st8 [r2]=r0,9*8+160 // increment by 232 byte inc. +begin_skip_intr_regs: +(p6) br SkipIntrRegs;; + + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r6 + + mov r3=cr.ipsr + mov r5=cr.isr + mov r7=r0;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=cr.iip + mov r5=cr.ifa + mov r7=cr.itir;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=cr.iipa + mov r5=cr.ifs + mov r7=cr.iim;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=cr25;; // cr.iha + st8 [r2]=r3,160;; // 160 byte rement + +SkipIntrRegs: + st8 [r2]=r0,152;; // another 152 byte . + + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r6 + + mov r3=cr.lid +// mov r5=cr.ivr // cr.ivr, don't read it + mov r7=cr.tpr;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=r0 // cr.eoi => cr67 + mov r5=r0 // cr.irr0 => cr68 + mov r7=r0;; // cr.irr1 => cr69 + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=r0 // cr.irr2 => cr70 + mov r5=r0 // cr.irr3 => cr71 + mov r7=cr.itv;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=cr.pmv + mov r5=cr.cmcv;; + st8 [r2]=r3,7*8 + st8 [r4]=r5,7*8;; + + mov r3=r0 // cr.lrr0 => cr80 + mov r5=r0;; // cr.lrr1 => cr81 + st8 [r2]=r3,23*8 + st8 [r4]=r5,23*8;; + + adds r2=25*8,r2;; + +cSaveARs: +// save ARs + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r6 + + mov r3=ar.k0 + mov r5=ar.k1 + mov r7=ar.k2;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=ar.k3 + mov r5=ar.k4 + mov r7=ar.k5;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=ar.k6 + mov r5=ar.k7 + mov r7=r0;; // ar.kr8 + st8 [r2]=r3,10*8 + st8 [r4]=r5,10*8 + st8 [r6]=r7,10*8;; // rement by 72 bytes + + mov r3=ar.rsc + mov ar.rsc=r0 // put RSE in enforced lazy mode + mov r5=ar.bsp + ;; + mov r7=ar.bspstore;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=ar.rnat;; + st8 [r2]=r3,8*13 // increment by 13x8 bytes + + mov r3=ar.ccv;; + st8 [r2]=r3,8*4 + + mov r3=ar.unat;; + st8 [r2]=r3,8*4 + + mov r3=ar.fpsr;; + st8 [r2]=r3,8*4 + + mov r3=ar.itc;; + st8 [r2]=r3,160 // 160 + + mov r3=ar.pfs;; + st8 [r2]=r3,8 + + mov r3=ar.lc;; + st8 [r2]=r3,8 + + mov r3=ar.ec;; + st8 [r2]=r3 + add r2=8*62,r2 //padding + +// save RRs + mov ar.lc=0x08-1 + movl r4=0x00;; + +cStRR: + dep.z r5=r4,61,3;; + mov r3=rr[r5];; + st8 [r2]=r3,8 + add r4=1,r4 + br.cloop.sptk.few cStRR + ;; +end_os_mca_dump: + br ia64_os_mca_done_dump;; + +//EndStub////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_os_mca_proc_state_restore() +// +// Stub Description: +// +// This is a stub to restore the saved processor state during MCHK +// +//-- + +ia64_os_mca_proc_state_restore: + +// Restore bank1 GR16-31 + GET_IA64_MCA_DATA(r2) + ;; + add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2 + +restore_GRs: // restore bank-1 GRs 16-31 + bsw.1;; + add r3=16*8,r2;; // to get to NaT of GR 16-31 + ld8 r3=[r3];; + mov ar.unat=r3;; // first restore NaT + + ld8.fill r16=[r2],8;; + ld8.fill r17=[r2],8;; + ld8.fill r18=[r2],8;; + ld8.fill r19=[r2],8;; + ld8.fill r20=[r2],8;; + ld8.fill r21=[r2],8;; + ld8.fill r22=[r2],8;; + ld8.fill r23=[r2],8;; + ld8.fill r24=[r2],8;; + ld8.fill r25=[r2],8;; + ld8.fill r26=[r2],8;; + ld8.fill r27=[r2],8;; + ld8.fill r28=[r2],8;; + ld8.fill r29=[r2],8;; + ld8.fill r30=[r2],8;; + ld8.fill r31=[r2],8;; + + ld8 r3=[r2],8;; // increment to skip NaT + bsw.0;; + +restore_BRs: + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r4 + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov b0=r3 + mov b1=r5 + mov b2=r7;; + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov b3=r3 + mov b4=r5 + mov b5=r7;; + + ld8 r3=[r2],2*8 + ld8 r5=[r4],2*8;; + mov b6=r3 + mov b7=r5;; + +restore_CRs: + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r4 + + ld8 r3=[r2],8*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; // 48 byte increments + mov cr.dcr=r3 + mov cr.itm=r5 + mov cr.iva=r7;; + + ld8 r3=[r2],8*8;; // 64 byte increments +// mov cr.pta=r3 + + +// if PSR.ic=1, reading interruption registers causes an illegal operation fault + mov r3=psr;; + tbit.nz.unc p6,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test +(p6) st8 [r2]=r0,9*8+160 // increment by 232 byte inc. + +begin_rskip_intr_regs: +(p6) br rSkipIntrRegs;; + + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r4 + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov cr.ipsr=r3 +// mov cr.isr=r5 // cr.isr is read only + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov cr.iip=r3 + mov cr.ifa=r5 + mov cr.itir=r7;; + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov cr.iipa=r3 + mov cr.ifs=r5 + mov cr.iim=r7 + + ld8 r3=[r2],160;; // 160 byte increment + mov cr.iha=r3 + +rSkipIntrRegs: + ld8 r3=[r2],152;; // another 152 byte inc. + + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r6 + + ld8 r3=[r2],8*3 + ld8 r5=[r4],8*3 + ld8 r7=[r6],8*3;; + mov cr.lid=r3 +// mov cr.ivr=r5 // cr.ivr is read only + mov cr.tpr=r7;; + + ld8 r3=[r2],8*3 + ld8 r5=[r4],8*3 + ld8 r7=[r6],8*3;; +// mov cr.eoi=r3 +// mov cr.irr0=r5 // cr.irr0 is read only +// mov cr.irr1=r7;; // cr.irr1 is read only + + ld8 r3=[r2],8*3 + ld8 r5=[r4],8*3 + ld8 r7=[r6],8*3;; +// mov cr.irr2=r3 // cr.irr2 is read only +// mov cr.irr3=r5 // cr.irr3 is read only + mov cr.itv=r7;; + + ld8 r3=[r2],8*7 + ld8 r5=[r4],8*7;; + mov cr.pmv=r3 + mov cr.cmcv=r5;; + + ld8 r3=[r2],8*23 + ld8 r5=[r4],8*23;; + adds r2=8*23,r2 + adds r4=8*23,r4;; +// mov cr.lrr0=r3 +// mov cr.lrr1=r5 + + adds r2=8*2,r2;; + +restore_ARs: + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r4 + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov ar.k0=r3 + mov ar.k1=r5 + mov ar.k2=r7;; + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov ar.k3=r3 + mov ar.k4=r5 + mov ar.k5=r7;; + + ld8 r3=[r2],10*8 + ld8 r5=[r4],10*8 + ld8 r7=[r6],10*8;; + mov ar.k6=r3 + mov ar.k7=r5 + ;; + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; +// mov ar.rsc=r3 +// mov ar.bsp=r5 // ar.bsp is read only + mov ar.rsc=r0 // make sure that RSE is in enforced lazy mode + ;; + mov ar.bspstore=r7;; + + ld8 r9=[r2],8*13;; + mov ar.rnat=r9 + + mov ar.rsc=r3 + ld8 r3=[r2],8*4;; + mov ar.ccv=r3 + + ld8 r3=[r2],8*4;; + mov ar.unat=r3 + + ld8 r3=[r2],8*4;; + mov ar.fpsr=r3 + + ld8 r3=[r2],160;; // 160 +// mov ar.itc=r3 + + ld8 r3=[r2],8;; + mov ar.pfs=r3 + + ld8 r3=[r2],8;; + mov ar.lc=r3 + + ld8 r3=[r2];; + mov ar.ec=r3 + add r2=8*62,r2;; // padding + +restore_RRs: + mov r5=ar.lc + mov ar.lc=0x08-1 + movl r4=0x00;; +cStRRr: + dep.z r7=r4,61,3 + ld8 r3=[r2],8;; + mov rr[r7]=r3 // what are its access previledges? + add r4=1,r4 + br.cloop.sptk.few cStRRr + ;; + mov ar.lc=r5 + ;; +end_os_mca_restore: + br ia64_os_mca_done_restore;; + +//EndStub////////////////////////////////////////////////////////////////////// + + +// ok, the issue here is that we need to save state information so +// it can be useable by the kernel debugger and show regs routines. +// In order to do this, our best bet is save the current state (plus +// the state information obtain from the MIN_STATE_AREA) into a pt_regs +// format. This way we can pass it on in a useable format. +// + +// +// SAL to OS entry point for INIT on the monarch processor +// This has been defined for registration purposes with SAL +// as a part of ia64_mca_init. +// +// When we get here, the following registers have been +// set by the SAL for our use +// +// 1. GR1 = OS INIT GP +// 2. GR8 = PAL_PROC physical address +// 3. GR9 = SAL_PROC physical address +// 4. GR10 = SAL GP (physical) +// 5. GR11 = Init Reason +// 0 = Received INIT for event other than crash dump switch +// 1 = Received wakeup at the end of an OS_MCA corrected machine check +// 2 = Received INIT dude to CrashDump switch assertion +// +// 6. GR12 = Return address to location within SAL_INIT procedure + + +GLOBAL_ENTRY(ia64_monarch_init_handler) + .prologue + // stash the information the SAL passed to os + SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2) + ;; + SAVE_MIN_WITH_COVER + ;; + mov r8=cr.ifa + mov r9=cr.isr + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + +// ok, enough should be saved at this point to be dangerous, and supply +// information for a dump +// We need to switch to Virtual mode before hitting the C functions. + + movl r2=IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN + mov r3=psr // get the current psr, minimum enabled at this point + ;; + or r2=r2,r3 + ;; + movl r3=IVirtual_Switch + ;; + mov cr.iip=r3 // short return to set the appropriate bits + mov cr.ipsr=r2 // need to do an rfi to set appropriate bits + ;; + rfi + ;; +IVirtual_Switch: + // + // We should now be running virtual + // + // Let's call the C handler to get the rest of the state info + // + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + ;; + adds out0=16,sp // out0 = pointer to pt_regs + ;; + DO_SAVE_SWITCH_STACK + .body + adds out1=16,sp // out0 = pointer to switch_stack + + br.call.sptk.many rp=ia64_init_handler +.ret1: + +return_from_init: + br.sptk return_from_init +END(ia64_monarch_init_handler) + +// +// SAL to OS entry point for INIT on the slave processor +// This has been defined for registration purposes with SAL +// as a part of ia64_mca_init. +// + +GLOBAL_ENTRY(ia64_slave_init_handler) +1: br.sptk 1b +END(ia64_slave_init_handler) diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c new file mode 100644 index 000000000000..ab478172c349 --- /dev/null +++ b/arch/ia64/kernel/mca_drv.c @@ -0,0 +1,639 @@ +/* + * File: mca_drv.c + * Purpose: Generic MCA handling layer + * + * Copyright (C) 2004 FUJITSU LIMITED + * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) + */ +#include <linux/config.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kallsyms.h> +#include <linux/smp_lock.h> +#include <linux/bootmem.h> +#include <linux/acpi.h> +#include <linux/timer.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/workqueue.h> +#include <linux/mm.h> + +#include <asm/delay.h> +#include <asm/machvec.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/sal.h> +#include <asm/mca.h> + +#include <asm/irq.h> +#include <asm/hw_irq.h> + +#include "mca_drv.h" + +/* max size of SAL error record (default) */ +static int sal_rec_max = 10000; + +/* from mca.c */ +static ia64_mca_sal_to_os_state_t *sal_to_os_handoff_state; +static ia64_mca_os_to_sal_state_t *os_to_sal_handoff_state; + +/* from mca_drv_asm.S */ +extern void *mca_handler_bhhook(void); + +static DEFINE_SPINLOCK(mca_bh_lock); + +typedef enum { + MCA_IS_LOCAL = 0, + MCA_IS_GLOBAL = 1 +} mca_type_t; + +#define MAX_PAGE_ISOLATE 1024 + +static struct page *page_isolate[MAX_PAGE_ISOLATE]; +static int num_page_isolate = 0; + +typedef enum { + ISOLATE_NG = 0, + ISOLATE_OK = 1 +} isolate_status_t; + +/* + * This pool keeps pointers to the section part of SAL error record + */ +static struct { + slidx_list_t *buffer; /* section pointer list pool */ + int cur_idx; /* Current index of section pointer list pool */ + int max_idx; /* Maximum index of section pointer list pool */ +} slidx_pool; + +/** + * mca_page_isolate - isolate a poisoned page in order not to use it later + * @paddr: poisoned memory location + * + * Return value: + * ISOLATE_OK / ISOLATE_NG + */ + +static isolate_status_t +mca_page_isolate(unsigned long paddr) +{ + int i; + struct page *p; + + /* whether physical address is valid or not */ + if ( !ia64_phys_addr_valid(paddr) ) + return ISOLATE_NG; + + /* convert physical address to physical page number */ + p = pfn_to_page(paddr>>PAGE_SHIFT); + + /* check whether a page number have been already registered or not */ + for( i = 0; i < num_page_isolate; i++ ) + if( page_isolate[i] == p ) + return ISOLATE_OK; /* already listed */ + + /* limitation check */ + if( num_page_isolate == MAX_PAGE_ISOLATE ) + return ISOLATE_NG; + + /* kick pages having attribute 'SLAB' or 'Reserved' */ + if( PageSlab(p) || PageReserved(p) ) + return ISOLATE_NG; + + /* add attribute 'Reserved' and register the page */ + SetPageReserved(p); + page_isolate[num_page_isolate++] = p; + + return ISOLATE_OK; +} + +/** + * mca_hanlder_bh - Kill the process which occurred memory read error + * @paddr: poisoned address received from MCA Handler + */ + +void +mca_handler_bh(unsigned long paddr) +{ + printk(KERN_DEBUG "OS_MCA: process [pid: %d](%s) encounters MCA.\n", + current->pid, current->comm); + + spin_lock(&mca_bh_lock); + if (mca_page_isolate(paddr) == ISOLATE_OK) { + printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr); + } else { + printk(KERN_DEBUG "Page isolation: ( %lx ) failure.\n", paddr); + } + spin_unlock(&mca_bh_lock); + + /* This process is about to be killed itself */ + force_sig(SIGKILL, current); + schedule(); +} + +/** + * mca_make_peidx - Make index of processor error section + * @slpi: pointer to record of processor error section + * @peidx: pointer to index of processor error section + */ + +static void +mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx) +{ + /* + * calculate the start address of + * "struct cpuid_info" and "sal_processor_static_info_t". + */ + u64 total_check_num = slpi->valid.num_cache_check + + slpi->valid.num_tlb_check + + slpi->valid.num_bus_check + + slpi->valid.num_reg_file_check + + slpi->valid.num_ms_check; + u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num + + sizeof(sal_log_processor_info_t); + u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info); + + peidx_head(peidx) = slpi; + peidx_mid(peidx) = (struct sal_cpuid_info *) + (slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL); + peidx_bottom(peidx) = (sal_processor_static_info_t *) + (slpi->valid.psi_static_struct ? + ((char*)slpi + head_size + mid_size) : NULL); +} + +/** + * mca_make_slidx - Make index of SAL error record + * @buffer: pointer to SAL error record + * @slidx: pointer to index of SAL error record + * + * Return value: + * 1 if record has platform error / 0 if not + */ +#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \ + { slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \ + hl->hdr = ptr; \ + list_add(&hl->list, &(sect)); \ + slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; } + +static int +mca_make_slidx(void *buffer, slidx_table_t *slidx) +{ + int platform_err = 0; + int record_len = ((sal_log_record_header_t*)buffer)->len; + u32 ercd_pos; + int sects; + sal_log_section_hdr_t *sp; + + /* + * Initialize index referring current record + */ + INIT_LIST_HEAD(&(slidx->proc_err)); + INIT_LIST_HEAD(&(slidx->mem_dev_err)); + INIT_LIST_HEAD(&(slidx->sel_dev_err)); + INIT_LIST_HEAD(&(slidx->pci_bus_err)); + INIT_LIST_HEAD(&(slidx->smbios_dev_err)); + INIT_LIST_HEAD(&(slidx->pci_comp_err)); + INIT_LIST_HEAD(&(slidx->plat_specific_err)); + INIT_LIST_HEAD(&(slidx->host_ctlr_err)); + INIT_LIST_HEAD(&(slidx->plat_bus_err)); + INIT_LIST_HEAD(&(slidx->unsupported)); + + /* + * Extract a Record Header + */ + slidx->header = buffer; + + /* + * Extract each section records + * (arranged from "int ia64_log_platform_info_print()") + */ + for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0; + ercd_pos < record_len; ercd_pos += sp->len, sects++) { + sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos); + if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) { + LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_BUS_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp); + } else { + LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp); + } + } + slidx->n_sections = sects; + + return platform_err; +} + +/** + * init_record_index_pools - Initialize pool of lists for SAL record index + * + * Return value: + * 0 on Success / -ENOMEM on Failure + */ +static int +init_record_index_pools(void) +{ + int i; + int rec_max_size; /* Maximum size of SAL error records */ + int sect_min_size; /* Minimum size of SAL error sections */ + /* minimum size table of each section */ + static int sal_log_sect_min_sizes[] = { + sizeof(sal_log_processor_info_t) + sizeof(sal_processor_static_info_t), + sizeof(sal_log_mem_dev_err_info_t), + sizeof(sal_log_sel_dev_err_info_t), + sizeof(sal_log_pci_bus_err_info_t), + sizeof(sal_log_smbios_dev_err_info_t), + sizeof(sal_log_pci_comp_err_info_t), + sizeof(sal_log_plat_specific_err_info_t), + sizeof(sal_log_host_ctlr_err_info_t), + sizeof(sal_log_plat_bus_err_info_t), + }; + + /* + * MCA handler cannot allocate new memory on flight, + * so we preallocate enough memory to handle a SAL record. + * + * Initialize a handling set of slidx_pool: + * 1. Pick up the max size of SAL error records + * 2. Pick up the min size of SAL error sections + * 3. Allocate the pool as enough to 2 SAL records + * (now we can estimate the maxinum of section in a record.) + */ + + /* - 1 - */ + rec_max_size = sal_rec_max; + + /* - 2 - */ + sect_min_size = sal_log_sect_min_sizes[0]; + for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++) + if (sect_min_size > sal_log_sect_min_sizes[i]) + sect_min_size = sal_log_sect_min_sizes[i]; + + /* - 3 - */ + slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1; + slidx_pool.buffer = (slidx_list_t *) kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL); + + return slidx_pool.buffer ? 0 : -ENOMEM; +} + + +/***************************************************************************** + * Recovery functions * + *****************************************************************************/ + +/** + * is_mca_global - Check whether this MCA is global or not + * @peidx: pointer of index of processor error section + * @pbci: pointer to pal_bus_check_info_t + * + * Return value: + * MCA_IS_LOCAL / MCA_IS_GLOBAL + */ + +static mca_type_t +is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci) +{ + pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); + + /* + * PAL can request a rendezvous, if the MCA has a global scope. + * If "rz_always" flag is set, SAL requests MCA rendezvous + * in spite of global MCA. + * Therefore it is local MCA when rendezvous has not been requested. + * Failed to rendezvous, the system must be down. + */ + switch (sal_to_os_handoff_state->imsto_rendez_state) { + case -1: /* SAL rendezvous unsuccessful */ + return MCA_IS_GLOBAL; + case 0: /* SAL rendezvous not required */ + return MCA_IS_LOCAL; + case 1: /* SAL rendezvous successful int */ + case 2: /* SAL rendezvous successful int with init */ + default: + break; + } + + /* + * If One or more Cache/TLB/Reg_File/Uarch_Check is here, + * it would be a local MCA. (i.e. processor internal error) + */ + if (psp->tc || psp->cc || psp->rc || psp->uc) + return MCA_IS_LOCAL; + + /* + * Bus_Check structure with Bus_Check.ib (internal bus error) flag set + * would be a global MCA. (e.g. a system bus address parity error) + */ + if (!pbci || pbci->ib) + return MCA_IS_GLOBAL; + + /* + * Bus_Check structure with Bus_Check.eb (external bus error) flag set + * could be either a local MCA or a global MCA. + * + * Referring Bus_Check.bsi: + * 0: Unknown/unclassified + * 1: BERR# + * 2: BINIT# + * 3: Hard Fail + * (FIXME: Are these SGI specific or generic bsi values?) + */ + if (pbci->eb) + switch (pbci->bsi) { + case 0: + /* e.g. a load from poisoned memory */ + return MCA_IS_LOCAL; + case 1: + case 2: + case 3: + return MCA_IS_GLOBAL; + } + + return MCA_IS_GLOBAL; +} + +/** + * recover_from_read_error - Try to recover the errors which type are "read"s. + * @slidx: pointer of index of SAL error record + * @peidx: pointer of index of processor error section + * @pbci: pointer of pal_bus_check_info + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) +{ + sal_log_mod_error_info_t *smei; + pal_min_state_area_t *pmsa; + struct ia64_psr *psr1, *psr2; + ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook; + + /* Is target address valid? */ + if (!pbci->tv) + return 0; + + /* + * cpu read or memory-mapped io read + * + * offending process affected process OS MCA do + * kernel mode kernel mode down system + * kernel mode user mode kill the process + * user mode kernel mode down system (*) + * user mode user mode kill the process + * + * (*) You could terminate offending user-mode process + * if (pbci->pv && pbci->pl != 0) *and* if you sure + * the process not have any locks of kernel. + */ + + psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); + + /* + * Check the privilege level of interrupted context. + * If it is user-mode, then terminate affected process. + */ + if (psr1->cpl != 0) { + smei = peidx_bus_check(peidx, 0); + if (smei->valid.target_identifier) { + /* + * setup for resume to bottom half of MCA, + * "mca_handler_bhhook" + */ + pmsa = (pal_min_state_area_t *)(sal_to_os_handoff_state->pal_min_state | (6ul<<61)); + /* pass to bhhook as 1st argument (gr8) */ + pmsa->pmsa_gr[8-1] = smei->target_identifier; + /* set interrupted return address (but no use) */ + pmsa->pmsa_br0 = pmsa->pmsa_iip; + /* change resume address to bottom half */ + pmsa->pmsa_iip = mca_hdlr_bh->fp; + pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; + /* set cpl with kernel mode */ + psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; + psr2->cpl = 0; + psr2->ri = 0; + + return 1; + } + + } + + return 0; +} + +/** + * recover_from_platform_error - Recover from platform error. + * @slidx: pointer of index of SAL error record + * @peidx: pointer of index of processor error section + * @pbci: pointer of pal_bus_check_info + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) +{ + int status = 0; + pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); + + if (psp->bc && pbci->eb && pbci->bsi == 0) { + switch(pbci->type) { + case 1: /* partial read */ + case 3: /* full line(cpu) read */ + case 9: /* I/O space read */ + status = recover_from_read_error(slidx, peidx, pbci); + break; + case 0: /* unknown */ + case 2: /* partial write */ + case 4: /* full line write */ + case 5: /* implicit or explicit write-back operation */ + case 6: /* snoop probe */ + case 7: /* incoming or outgoing ptc.g */ + case 8: /* write coalescing transactions */ + case 10: /* I/O space write */ + case 11: /* inter-processor interrupt message(IPI) */ + case 12: /* interrupt acknowledge or external task priority cycle */ + default: + break; + } + } + + return status; +} + +/** + * recover_from_processor_error + * @platform: whether there are some platform error section or not + * @slidx: pointer of index of SAL error record + * @peidx: pointer of index of processor error section + * @pbci: pointer of pal_bus_check_info + * + * Return value: + * 1 on Success / 0 on Failure + */ +/* + * Later we try to recover when below all conditions are satisfied. + * 1. Only one processor error section is exist. + * 2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK) + * 3. The entry of BUS_CHECK_INFO is 1. + * 4. "External bus error" flag is set and the others are not set. + */ + +static int +recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) +{ + pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); + + /* + * We cannot recover errors with other than bus_check. + */ + if (psp->cc || psp->rc || psp->uc) + return 0; + + /* + * If there is no bus error, record is weird but we need not to recover. + */ + if (psp->bc == 0 || pbci == NULL) + return 1; + + /* + * Sorry, we cannot handle so many. + */ + if (peidx_bus_check_num(peidx) > 1) + return 0; + /* + * Well, here is only one bus error. + */ + if (pbci->ib || pbci->cc) + return 0; + if (pbci->eb && pbci->bsi > 0) + return 0; + if (psp->ci == 0) + return 0; + + /* + * This is a local MCA and estimated as recoverble external bus error. + * (e.g. a load from poisoned memory) + * This means "there are some platform errors". + */ + if (platform) + return recover_from_platform_error(slidx, peidx, pbci); + /* + * On account of strange SAL error record, we cannot recover. + */ + return 0; +} + +/** + * mca_try_to_recover - Try to recover from MCA + * @rec: pointer to a SAL error record + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +mca_try_to_recover(void *rec, + ia64_mca_sal_to_os_state_t *sal_to_os_state, + ia64_mca_os_to_sal_state_t *os_to_sal_state) +{ + int platform_err; + int n_proc_err; + slidx_table_t slidx; + peidx_table_t peidx; + pal_bus_check_info_t pbci; + + /* handoff state from/to mca.c */ + sal_to_os_handoff_state = sal_to_os_state; + os_to_sal_handoff_state = os_to_sal_state; + + /* Make index of SAL error record */ + platform_err = mca_make_slidx(rec, &slidx); + + /* Count processor error sections */ + n_proc_err = slidx_count(&slidx, proc_err); + + /* Now, OS can recover when there is one processor error section */ + if (n_proc_err > 1) + return 0; + else if (n_proc_err == 0) { + /* Weird SAL record ... We need not to recover */ + + return 1; + } + + /* Make index of processor error section */ + mca_make_peidx((sal_log_processor_info_t*)slidx_first_entry(&slidx.proc_err)->hdr, &peidx); + + /* Extract Processor BUS_CHECK[0] */ + *((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0); + + /* Check whether MCA is global or not */ + if (is_mca_global(&peidx, &pbci)) + return 0; + + /* Try to recover a processor error */ + return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci); +} + +/* + * ============================================================================= + */ + +int __init mca_external_handler_init(void) +{ + if (init_record_index_pools()) + return -ENOMEM; + + /* register external mca handlers */ + if (ia64_reg_MCA_extension(mca_try_to_recover)){ + printk(KERN_ERR "ia64_reg_MCA_extension failed.\n"); + kfree(slidx_pool.buffer); + return -EFAULT; + } + return 0; +} + +void __exit mca_external_handler_exit(void) +{ + /* unregister external mca handlers */ + ia64_unreg_MCA_extension(); + kfree(slidx_pool.buffer); +} + +module_init(mca_external_handler_init); +module_exit(mca_external_handler_exit); + +module_param(sal_rec_max, int, 0644); +MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record"); + +MODULE_DESCRIPTION("ia64 platform dependent mca handler driver"); +MODULE_LICENSE("GPL"); diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h new file mode 100644 index 000000000000..0227b761f2c4 --- /dev/null +++ b/arch/ia64/kernel/mca_drv.h @@ -0,0 +1,113 @@ +/* + * File: mca_drv.h + * Purpose: Define helpers for Generic MCA handling + * + * Copyright (C) 2004 FUJITSU LIMITED + * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) + */ +/* + * Processor error section: + * + * +-sal_log_processor_info_t *info-------------+ + * | sal_log_section_hdr_t header; | + * | ... | + * | sal_log_mod_error_info_t info[0]; | + * +-+----------------+-------------------------+ + * | CACHE_CHECK | ^ num_cache_check v + * +----------------+ + * | TLB_CHECK | ^ num_tlb_check v + * +----------------+ + * | BUS_CHECK | ^ num_bus_check v + * +----------------+ + * | REG_FILE_CHECK | ^ num_reg_file_check v + * +----------------+ + * | MS_CHECK | ^ num_ms_check v + * +-struct cpuid_info *id----------------------+ + * | regs[5]; | + * | reserved; | + * +-sal_processor_static_info_t *regs----------+ + * | valid; | + * | ... | + * | fr[128]; | + * +--------------------------------------------+ + */ + +/* peidx: index of processor error section */ +typedef struct peidx_table { + sal_log_processor_info_t *info; + struct sal_cpuid_info *id; + sal_processor_static_info_t *regs; +} peidx_table_t; + +#define peidx_head(p) (((p)->info)) +#define peidx_mid(p) (((p)->id)) +#define peidx_bottom(p) (((p)->regs)) + +#define peidx_psp(p) (&(peidx_head(p)->proc_state_parameter)) +#define peidx_field_valid(p) (&(peidx_head(p)->valid)) +#define peidx_minstate_area(p) (&(peidx_bottom(p)->min_state_area)) + +#define peidx_cache_check_num(p) (peidx_head(p)->valid.num_cache_check) +#define peidx_tlb_check_num(p) (peidx_head(p)->valid.num_tlb_check) +#define peidx_bus_check_num(p) (peidx_head(p)->valid.num_bus_check) +#define peidx_reg_file_check_num(p) (peidx_head(p)->valid.num_reg_file_check) +#define peidx_ms_check_num(p) (peidx_head(p)->valid.num_ms_check) + +#define peidx_cache_check_idx(p, n) (n) +#define peidx_tlb_check_idx(p, n) (peidx_cache_check_idx(p, peidx_cache_check_num(p)) + n) +#define peidx_bus_check_idx(p, n) (peidx_tlb_check_idx(p, peidx_tlb_check_num(p)) + n) +#define peidx_reg_file_check_idx(p, n) (peidx_bus_check_idx(p, peidx_bus_check_num(p)) + n) +#define peidx_ms_check_idx(p, n) (peidx_reg_file_check_idx(p, peidx_reg_file_check_num(p)) + n) + +#define peidx_mod_error_info(p, name, n) \ +({ int __idx = peidx_##name##_idx(p, n); \ + sal_log_mod_error_info_t *__ret = NULL; \ + if (peidx_##name##_num(p) > n) /*BUG*/ \ + __ret = &(peidx_head(p)->info[__idx]); \ + __ret; }) + +#define peidx_cache_check(p, n) peidx_mod_error_info(p, cache_check, n) +#define peidx_tlb_check(p, n) peidx_mod_error_info(p, tlb_check, n) +#define peidx_bus_check(p, n) peidx_mod_error_info(p, bus_check, n) +#define peidx_reg_file_check(p, n) peidx_mod_error_info(p, reg_file_check, n) +#define peidx_ms_check(p, n) peidx_mod_error_info(p, ms_check, n) + +#define peidx_check_info(proc, name, n) \ +({ \ + sal_log_mod_error_info_t *__info = peidx_mod_error_info(proc, name, n);\ + u64 __temp = __info && __info->valid.check_info \ + ? __info->check_info : 0; \ + __temp; }) + +/* slidx: index of SAL log error record */ + +typedef struct slidx_list { + struct list_head list; + sal_log_section_hdr_t *hdr; +} slidx_list_t; + +typedef struct slidx_table { + sal_log_record_header_t *header; + int n_sections; /* # of section headers */ + struct list_head proc_err; + struct list_head mem_dev_err; + struct list_head sel_dev_err; + struct list_head pci_bus_err; + struct list_head smbios_dev_err; + struct list_head pci_comp_err; + struct list_head plat_specific_err; + struct list_head host_ctlr_err; + struct list_head plat_bus_err; + struct list_head unsupported; /* list of unsupported sections */ +} slidx_table_t; + +#define slidx_foreach_entry(pos, head) \ + list_for_each_entry(pos, head, list) +#define slidx_first_entry(head) \ + (((head)->next != (head)) ? list_entry((head)->next, typeof(slidx_list_t), list) : NULL) +#define slidx_count(slidx, sec) \ +({ int __count = 0; \ + slidx_list_t *__pos; \ + slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\ + __count; }) + diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S new file mode 100644 index 000000000000..bcfa05acc561 --- /dev/null +++ b/arch/ia64/kernel/mca_drv_asm.S @@ -0,0 +1,45 @@ +/* + * File: mca_drv_asm.S + * Purpose: Assembly portion of Generic MCA handling + * + * Copyright (C) 2004 FUJITSU LIMITED + * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) + */ +#include <linux/config.h> +#include <linux/threads.h> + +#include <asm/asmmacro.h> +#include <asm/processor.h> + +GLOBAL_ENTRY(mca_handler_bhhook) + invala // clear RSE ? + ;; // + cover // + ;; // + clrrrb // + ;; + alloc r16=ar.pfs,0,2,1,0 // make a new frame + ;; + mov r13=IA64_KR(CURRENT) // current task pointer + ;; + adds r12=IA64_TASK_THREAD_KSP_OFFSET,r13 + ;; + ld8 r12=[r12] // stack pointer + ;; + mov loc0=r16 + movl loc1=mca_handler_bh // recovery C function + ;; + mov out0=r8 // poisoned address + mov b6=loc1 + ;; + mov loc1=rp + ;; + br.call.sptk.many rp=b6 // not return ... + ;; + mov ar.pfs=loc0 + mov rp=loc1 + ;; + mov r8=r0 + br.ret.sptk.many rp + ;; +END(mca_handler_bhhook) diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h new file mode 100644 index 000000000000..1dbc7b2497c9 --- /dev/null +++ b/arch/ia64/kernel/minstate.h @@ -0,0 +1,251 @@ +#include <linux/config.h> + +#include <asm/cache.h> + +#include "entry.h" + +/* + * For ivt.s we want to access the stack virtually so we don't have to disable translation + * on interrupts. + * + * On entry: + * r1: pointer to current task (ar.k6) + */ +#define MINSTATE_START_SAVE_MIN_VIRT \ +(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ + ;; \ +(pUStk) mov.m r24=ar.rnat; \ +(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \ +(pKStk) mov r1=sp; /* get sp */ \ + ;; \ +(pUStk) lfetch.fault.excl.nt1 [r22]; \ +(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ +(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ + ;; \ +(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ +(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ + ;; \ +(pUStk) mov r18=ar.bsp; \ +(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ + +#define MINSTATE_END_SAVE_MIN_VIRT \ + bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ + ;; + +/* + * For mca_asm.S we want to access the stack physically since the state is saved before we + * go virtual and don't want to destroy the iip or ipsr. + */ +#define MINSTATE_START_SAVE_MIN_PHYS \ +(pKStk) mov r3=IA64_KR(PER_CPU_DATA);; \ +(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \ +(pKStk) ld8 r3 = [r3];; \ +(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \ +(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \ +(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ +(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \ + ;; \ +(pUStk) mov r24=ar.rnat; \ +(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ +(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ +(pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \ + ;; \ +(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ +(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ + ;; \ +(pUStk) mov r18=ar.bsp; \ +(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ + +#define MINSTATE_END_SAVE_MIN_PHYS \ + dep r12=-1,r12,61,3; /* make sp a kernel virtual address */ \ + ;; + +#ifdef MINSTATE_VIRT +# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT) +# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_VIRT +# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_VIRT +#endif + +#ifdef MINSTATE_PHYS +# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; tpa reg=reg +# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_PHYS +# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_PHYS +#endif + +/* + * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves + * the minimum state necessary that allows us to turn psr.ic back + * on. + * + * Assumed state upon entry: + * psr.ic: off + * r31: contains saved predicates (pr) + * + * Upon exit, the state is as follows: + * psr.ic: off + * r2 = points to &pt_regs.r16 + * r8 = contents of ar.ccv + * r9 = contents of ar.csd + * r10 = contents of ar.ssd + * r11 = FPSR_DEFAULT + * r12 = kernel sp (kernel virtual address) + * r13 = points to current task_struct (kernel virtual address) + * p15 = TRUE if psr.i is set in cr.ipsr + * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: + * preserved + * + * Note that psr.ic is NOT turned on by this macro. This is so that + * we can pass interruption state as arguments to a handler. + */ +#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ + MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ + mov r27=ar.rsc; /* M */ \ + mov r20=r1; /* A */ \ + mov r25=ar.unat; /* M */ \ + mov r29=cr.ipsr; /* M */ \ + mov r26=ar.pfs; /* I */ \ + mov r28=cr.iip; /* M */ \ + mov r21=ar.fpsr; /* M */ \ + COVER; /* B;; (or nothing) */ \ + ;; \ + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \ + ;; \ + ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \ + st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \ + adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \ + /* switch from user to kernel RBS: */ \ + ;; \ + invala; /* M */ \ + SAVE_IFS; \ + cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \ + ;; \ + MINSTATE_START_SAVE_MIN \ + adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \ + adds r16=PT(CR_IPSR),r1; \ + ;; \ + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ + st8 [r16]=r29; /* save cr.ipsr */ \ + ;; \ + lfetch.fault.excl.nt1 [r17]; \ + tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \ + mov r29=b0 \ + ;; \ + adds r16=PT(R8),r1; /* initialize first base pointer */ \ + adds r17=PT(R9),r1; /* initialize second base pointer */ \ +(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r8,16; \ +.mem.offset 8,0; st8.spill [r17]=r9,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r10,24; \ +.mem.offset 8,0; st8.spill [r17]=r11,24; \ + ;; \ + st8 [r16]=r28,16; /* save cr.iip */ \ + st8 [r17]=r30,16; /* save cr.ifs */ \ +(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \ + mov r8=ar.ccv; \ + mov r9=ar.csd; \ + mov r10=ar.ssd; \ + movl r11=FPSR_DEFAULT; /* L-unit */ \ + ;; \ + st8 [r16]=r25,16; /* save ar.unat */ \ + st8 [r17]=r26,16; /* save ar.pfs */ \ + shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ + ;; \ + st8 [r16]=r27,16; /* save ar.rsc */ \ +(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \ +(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \ + ;; /* avoid RAW on r16 & r17 */ \ +(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \ + st8 [r17]=r31,16; /* save predicates */ \ +(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \ + ;; \ + st8 [r16]=r29,16; /* save b0 */ \ + st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \ + cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \ +.mem.offset 8,0; st8.spill [r17]=r12,16; \ + adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r13,16; \ +.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \ + mov r13=IA64_KR(CURRENT); /* establish `current' */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r15,16; \ +.mem.offset 8,0; st8.spill [r17]=r14,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r2,16; \ +.mem.offset 8,0; st8.spill [r17]=r3,16; \ + adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ + ;; \ + EXTRA; \ + movl r1=__gp; /* establish kernel global pointer */ \ + ;; \ + MINSTATE_END_SAVE_MIN + +/* + * SAVE_REST saves the remainder of pt_regs (with psr.ic on). + * + * Assumed state upon entry: + * psr.ic: on + * r2: points to &pt_regs.r16 + * r3: points to &pt_regs.r17 + * r8: contents of ar.ccv + * r9: contents of ar.csd + * r10: contents of ar.ssd + * r11: FPSR_DEFAULT + * + * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. + */ +#define SAVE_REST \ +.mem.offset 0,0; st8.spill [r2]=r16,16; \ +.mem.offset 8,0; st8.spill [r3]=r17,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r18,16; \ +.mem.offset 8,0; st8.spill [r3]=r19,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r20,16; \ +.mem.offset 8,0; st8.spill [r3]=r21,16; \ + mov r18=b6; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r22,16; \ +.mem.offset 8,0; st8.spill [r3]=r23,16; \ + mov r19=b7; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r24,16; \ +.mem.offset 8,0; st8.spill [r3]=r25,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r26,16; \ +.mem.offset 8,0; st8.spill [r3]=r27,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r28,16; \ +.mem.offset 8,0; st8.spill [r3]=r29,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r30,16; \ +.mem.offset 8,0; st8.spill [r3]=r31,32; \ + ;; \ + mov ar.fpsr=r11; /* M-unit */ \ + st8 [r2]=r8,8; /* ar.ccv */ \ + adds r24=PT(B6)-PT(F7),r3; \ + ;; \ + stf.spill [r2]=f6,32; \ + stf.spill [r3]=f7,32; \ + ;; \ + stf.spill [r2]=f8,32; \ + stf.spill [r3]=f9,32; \ + ;; \ + stf.spill [r2]=f10; \ + stf.spill [r3]=f11; \ + adds r25=PT(B7)-PT(F11),r3; \ + ;; \ + st8 [r24]=r18,16; /* b6 */ \ + st8 [r25]=r19,16; /* b7 */ \ + ;; \ + st8 [r24]=r9; /* ar.csd */ \ + st8 [r25]=r10; /* ar.ssd */ \ + ;; + +#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs,) +#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19) +#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, ) diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c new file mode 100644 index 000000000000..febc091c2f02 --- /dev/null +++ b/arch/ia64/kernel/module.c @@ -0,0 +1,952 @@ +/* + * IA-64-specific support for kernel module loader. + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * Loosely based on patch by Rusty Russell. + */ + +/* relocs tested so far: + + DIR64LSB + FPTR64LSB + GPREL22 + LDXMOV + LDXMOV + LTOFF22 + LTOFF22X + LTOFF22X + LTOFF_FPTR22 + PCREL21B (for br.call only; br.cond is not supported out of modules!) + PCREL60B (for brl.cond only; brl.call is not supported for modules!) + PCREL64LSB + SECREL32LSB + SEGREL64LSB + */ + +#include <linux/config.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/elf.h> +#include <linux/moduleloader.h> +#include <linux/string.h> +#include <linux/vmalloc.h> + +#include <asm/patch.h> +#include <asm/unaligned.h> + +#define ARCH_MODULE_DEBUG 0 + +#if ARCH_MODULE_DEBUG +# define DEBUGP printk +# define inline +#else +# define DEBUGP(fmt , a...) +#endif + +#ifdef CONFIG_ITANIUM +# define USE_BRL 0 +#else +# define USE_BRL 1 +#endif + +#define MAX_LTOFF ((uint64_t) (1 << 22)) /* max. allowable linkage-table offset */ + +/* Define some relocation helper macros/types: */ + +#define FORMAT_SHIFT 0 +#define FORMAT_BITS 3 +#define FORMAT_MASK ((1 << FORMAT_BITS) - 1) +#define VALUE_SHIFT 3 +#define VALUE_BITS 5 +#define VALUE_MASK ((1 << VALUE_BITS) - 1) + +enum reloc_target_format { + /* direct encoded formats: */ + RF_NONE = 0, + RF_INSN14 = 1, + RF_INSN22 = 2, + RF_INSN64 = 3, + RF_32MSB = 4, + RF_32LSB = 5, + RF_64MSB = 6, + RF_64LSB = 7, + + /* formats that cannot be directly decoded: */ + RF_INSN60, + RF_INSN21B, /* imm21 form 1 */ + RF_INSN21M, /* imm21 form 2 */ + RF_INSN21F /* imm21 form 3 */ +}; + +enum reloc_value_formula { + RV_DIRECT = 4, /* S + A */ + RV_GPREL = 5, /* @gprel(S + A) */ + RV_LTREL = 6, /* @ltoff(S + A) */ + RV_PLTREL = 7, /* @pltoff(S + A) */ + RV_FPTR = 8, /* @fptr(S + A) */ + RV_PCREL = 9, /* S + A - P */ + RV_LTREL_FPTR = 10, /* @ltoff(@fptr(S + A)) */ + RV_SEGREL = 11, /* @segrel(S + A) */ + RV_SECREL = 12, /* @secrel(S + A) */ + RV_BDREL = 13, /* BD + A */ + RV_LTV = 14, /* S + A (like RV_DIRECT, except frozen at static link-time) */ + RV_PCREL2 = 15, /* S + A - P */ + RV_SPECIAL = 16, /* various (see below) */ + RV_RSVD17 = 17, + RV_TPREL = 18, /* @tprel(S + A) */ + RV_LTREL_TPREL = 19, /* @ltoff(@tprel(S + A)) */ + RV_DTPMOD = 20, /* @dtpmod(S + A) */ + RV_LTREL_DTPMOD = 21, /* @ltoff(@dtpmod(S + A)) */ + RV_DTPREL = 22, /* @dtprel(S + A) */ + RV_LTREL_DTPREL = 23, /* @ltoff(@dtprel(S + A)) */ + RV_RSVD24 = 24, + RV_RSVD25 = 25, + RV_RSVD26 = 26, + RV_RSVD27 = 27 + /* 28-31 reserved for implementation-specific purposes. */ +}; + +#define N(reloc) [R_IA64_##reloc] = #reloc + +static const char *reloc_name[256] = { + N(NONE), N(IMM14), N(IMM22), N(IMM64), + N(DIR32MSB), N(DIR32LSB), N(DIR64MSB), N(DIR64LSB), + N(GPREL22), N(GPREL64I), N(GPREL32MSB), N(GPREL32LSB), + N(GPREL64MSB), N(GPREL64LSB), N(LTOFF22), N(LTOFF64I), + N(PLTOFF22), N(PLTOFF64I), N(PLTOFF64MSB), N(PLTOFF64LSB), + N(FPTR64I), N(FPTR32MSB), N(FPTR32LSB), N(FPTR64MSB), + N(FPTR64LSB), N(PCREL60B), N(PCREL21B), N(PCREL21M), + N(PCREL21F), N(PCREL32MSB), N(PCREL32LSB), N(PCREL64MSB), + N(PCREL64LSB), N(LTOFF_FPTR22), N(LTOFF_FPTR64I), N(LTOFF_FPTR32MSB), + N(LTOFF_FPTR32LSB), N(LTOFF_FPTR64MSB), N(LTOFF_FPTR64LSB), N(SEGREL32MSB), + N(SEGREL32LSB), N(SEGREL64MSB), N(SEGREL64LSB), N(SECREL32MSB), + N(SECREL32LSB), N(SECREL64MSB), N(SECREL64LSB), N(REL32MSB), + N(REL32LSB), N(REL64MSB), N(REL64LSB), N(LTV32MSB), + N(LTV32LSB), N(LTV64MSB), N(LTV64LSB), N(PCREL21BI), + N(PCREL22), N(PCREL64I), N(IPLTMSB), N(IPLTLSB), + N(COPY), N(LTOFF22X), N(LDXMOV), N(TPREL14), + N(TPREL22), N(TPREL64I), N(TPREL64MSB), N(TPREL64LSB), + N(LTOFF_TPREL22), N(DTPMOD64MSB), N(DTPMOD64LSB), N(LTOFF_DTPMOD22), + N(DTPREL14), N(DTPREL22), N(DTPREL64I), N(DTPREL32MSB), + N(DTPREL32LSB), N(DTPREL64MSB), N(DTPREL64LSB), N(LTOFF_DTPREL22) +}; + +#undef N + +struct got_entry { + uint64_t val; +}; + +struct fdesc { + uint64_t ip; + uint64_t gp; +}; + +/* Opaque struct for insns, to protect against derefs. */ +struct insn; + +static inline uint64_t +bundle (const struct insn *insn) +{ + return (uint64_t) insn & ~0xfUL; +} + +static inline int +slot (const struct insn *insn) +{ + return (uint64_t) insn & 0x3; +} + +static int +apply_imm64 (struct module *mod, struct insn *insn, uint64_t val) +{ + if (slot(insn) != 2) { + printk(KERN_ERR "%s: invalid slot number %d for IMM64\n", + mod->name, slot(insn)); + return 0; + } + ia64_patch_imm64((u64) insn, val); + return 1; +} + +static int +apply_imm60 (struct module *mod, struct insn *insn, uint64_t val) +{ + if (slot(insn) != 2) { + printk(KERN_ERR "%s: invalid slot number %d for IMM60\n", + mod->name, slot(insn)); + return 0; + } + if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) { + printk(KERN_ERR "%s: value %ld out of IMM60 range\n", mod->name, (int64_t) val); + return 0; + } + ia64_patch_imm60((u64) insn, val); + return 1; +} + +static int +apply_imm22 (struct module *mod, struct insn *insn, uint64_t val) +{ + if (val + (1 << 21) >= (1 << 22)) { + printk(KERN_ERR "%s: value %li out of IMM22 range\n", mod->name, (int64_t)val); + return 0; + } + ia64_patch((u64) insn, 0x01fffcfe000UL, ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */ + | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */ + | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */ + | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */)); + return 1; +} + +static int +apply_imm21b (struct module *mod, struct insn *insn, uint64_t val) +{ + if (val + (1 << 20) >= (1 << 21)) { + printk(KERN_ERR "%s: value %li out of IMM21b range\n", mod->name, (int64_t)val); + return 0; + } + ia64_patch((u64) insn, 0x11ffffe000UL, ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */ + | ((val & 0x0fffffUL) << 13) /* bit 0 -> 13 */)); + return 1; +} + +#if USE_BRL + +struct plt_entry { + /* Three instruction bundles in PLT. */ + unsigned char bundle[2][16]; +}; + +static const struct plt_entry ia64_plt_template = { + { + { + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */ + 0x00, 0x00, 0x00, 0x60 + }, + { + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* brl.many gp=TARGET_GP */ + 0x08, 0x00, 0x00, 0xc0 + } + } +}; + +static int +patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp) +{ + if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_gp) + && apply_imm60(mod, (struct insn *) (plt->bundle[1] + 2), + (target_ip - (int64_t) plt->bundle[1]) / 16)) + return 1; + return 0; +} + +unsigned long +plt_target (struct plt_entry *plt) +{ + uint64_t b0, b1, *b = (uint64_t *) plt->bundle[1]; + long off; + + b0 = b[0]; b1 = b[1]; + off = ( ((b1 & 0x00fffff000000000UL) >> 36) /* imm20b -> bit 0 */ + | ((b0 >> 48) << 20) | ((b1 & 0x7fffffUL) << 36) /* imm39 -> bit 20 */ + | ((b1 & 0x0800000000000000UL) << 0)); /* i -> bit 59 */ + return (long) plt->bundle[1] + 16*off; +} + +#else /* !USE_BRL */ + +struct plt_entry { + /* Three instruction bundles in PLT. */ + unsigned char bundle[3][16]; +}; + +static const struct plt_entry ia64_plt_template = { + { + { + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* movl r16=TARGET_IP */ + 0x02, 0x00, 0x00, 0x60 + }, + { + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */ + 0x00, 0x00, 0x00, 0x60 + }, + { + 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MIB] nop.m 0 */ + 0x60, 0x80, 0x04, 0x80, 0x03, 0x00, /* mov b6=r16 */ + 0x60, 0x00, 0x80, 0x00 /* br.few b6 */ + } + } +}; + +static int +patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp) +{ + if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_ip) + && apply_imm64(mod, (struct insn *) (plt->bundle[1] + 2), target_gp)) + return 1; + return 0; +} + +unsigned long +plt_target (struct plt_entry *plt) +{ + uint64_t b0, b1, *b = (uint64_t *) plt->bundle[0]; + + b0 = b[0]; b1 = b[1]; + return ( ((b1 & 0x000007f000000000) >> 36) /* imm7b -> bit 0 */ + | ((b1 & 0x07fc000000000000) >> 43) /* imm9d -> bit 7 */ + | ((b1 & 0x0003e00000000000) >> 29) /* imm5c -> bit 16 */ + | ((b1 & 0x0000100000000000) >> 23) /* ic -> bit 21 */ + | ((b0 >> 46) << 22) | ((b1 & 0x7fffff) << 40) /* imm41 -> bit 22 */ + | ((b1 & 0x0800000000000000) << 4)); /* i -> bit 63 */ +} + +#endif /* !USE_BRL */ + +void * +module_alloc (unsigned long size) +{ + if (!size) + return NULL; + return vmalloc(size); +} + +void +module_free (struct module *mod, void *module_region) +{ + if (mod->arch.init_unw_table && module_region == mod->module_init) { + unw_remove_unwind_table(mod->arch.init_unw_table); + mod->arch.init_unw_table = NULL; + } + vfree(module_region); +} + +/* Have we already seen one of these relocations? */ +/* FIXME: we could look in other sections, too --RR */ +static int +duplicate_reloc (const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + if (rela[i].r_info == rela[num].r_info && rela[i].r_addend == rela[num].r_addend) + return 1; + } + return 0; +} + +/* Count how many GOT entries we may need */ +static unsigned int +count_gots (const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_LTOFF22: + case R_IA64_LTOFF22X: + case R_IA64_LTOFF64I: + case R_IA64_LTOFF_FPTR22: + case R_IA64_LTOFF_FPTR64I: + case R_IA64_LTOFF_FPTR32MSB: + case R_IA64_LTOFF_FPTR32LSB: + case R_IA64_LTOFF_FPTR64MSB: + case R_IA64_LTOFF_FPTR64LSB: + if (!duplicate_reloc(rela, i)) + ret++; + break; + } + } + return ret; +} + +/* Count how many PLT entries we may need */ +static unsigned int +count_plts (const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_PCREL21B: + case R_IA64_PLTOFF22: + case R_IA64_PLTOFF64I: + case R_IA64_PLTOFF64MSB: + case R_IA64_PLTOFF64LSB: + case R_IA64_IPLTMSB: + case R_IA64_IPLTLSB: + if (!duplicate_reloc(rela, i)) + ret++; + break; + } + } + return ret; +} + +/* We need to create an function-descriptors for any internal function + which is referenced. */ +static unsigned int +count_fdescs (const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not time critical. */ + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_FPTR64I: + case R_IA64_FPTR32LSB: + case R_IA64_FPTR32MSB: + case R_IA64_FPTR64LSB: + case R_IA64_FPTR64MSB: + case R_IA64_LTOFF_FPTR22: + case R_IA64_LTOFF_FPTR32LSB: + case R_IA64_LTOFF_FPTR32MSB: + case R_IA64_LTOFF_FPTR64I: + case R_IA64_LTOFF_FPTR64LSB: + case R_IA64_LTOFF_FPTR64MSB: + case R_IA64_IPLTMSB: + case R_IA64_IPLTLSB: + /* + * Jumps to static functions sometimes go straight to their + * offset. Of course, that may not be possible if the jump is + * from init -> core or vice. versa, so we need to generate an + * FDESC (and PLT etc) for that. + */ + case R_IA64_PCREL21B: + if (!duplicate_reloc(rela, i)) + ret++; + break; + } + } + return ret; +} + +int +module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, + struct module *mod) +{ + unsigned long core_plts = 0, init_plts = 0, gots = 0, fdescs = 0; + Elf64_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; + + /* + * To store the PLTs and function-descriptors, we expand the .text section for + * core module-code and the .init.text section for initialization code. + */ + for (s = sechdrs; s < sechdrs_end; ++s) + if (strcmp(".core.plt", secstrings + s->sh_name) == 0) + mod->arch.core_plt = s; + else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) + mod->arch.init_plt = s; + else if (strcmp(".got", secstrings + s->sh_name) == 0) + mod->arch.got = s; + else if (strcmp(".opd", secstrings + s->sh_name) == 0) + mod->arch.opd = s; + else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0) + mod->arch.unwind = s; + + if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) { + printk(KERN_ERR "%s: sections missing\n", mod->name); + return -ENOEXEC; + } + + /* GOT and PLTs can occur in any relocated section... */ + for (s = sechdrs + 1; s < sechdrs_end; ++s) { + const Elf64_Rela *rels = (void *)ehdr + s->sh_offset; + unsigned long numrels = s->sh_size/sizeof(Elf64_Rela); + + if (s->sh_type != SHT_RELA) + continue; + + gots += count_gots(rels, numrels); + fdescs += count_fdescs(rels, numrels); + if (strstr(secstrings + s->sh_name, ".init")) + init_plts += count_plts(rels, numrels); + else + core_plts += count_plts(rels, numrels); + } + + mod->arch.core_plt->sh_type = SHT_NOBITS; + mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.core_plt->sh_addralign = 16; + mod->arch.core_plt->sh_size = core_plts * sizeof(struct plt_entry); + mod->arch.init_plt->sh_type = SHT_NOBITS; + mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.init_plt->sh_addralign = 16; + mod->arch.init_plt->sh_size = init_plts * sizeof(struct plt_entry); + mod->arch.got->sh_type = SHT_NOBITS; + mod->arch.got->sh_flags = ARCH_SHF_SMALL | SHF_ALLOC; + mod->arch.got->sh_addralign = 8; + mod->arch.got->sh_size = gots * sizeof(struct got_entry); + mod->arch.opd->sh_type = SHT_NOBITS; + mod->arch.opd->sh_flags = SHF_ALLOC; + mod->arch.opd->sh_addralign = 8; + mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc); + DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n", + __FUNCTION__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size, + mod->arch.got->sh_size, mod->arch.opd->sh_size); + return 0; +} + +static inline int +in_init (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_init < mod->init_size; +} + +static inline int +in_core (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_core < mod->core_size; +} + +static inline int +is_internal (const struct module *mod, uint64_t value) +{ + return in_init(mod, value) || in_core(mod, value); +} + +/* + * Get gp-relative offset for the linkage-table entry of VALUE. + */ +static uint64_t +get_ltoff (struct module *mod, uint64_t value, int *okp) +{ + struct got_entry *got, *e; + + if (!*okp) + return 0; + + got = (void *) mod->arch.got->sh_addr; + for (e = got; e < got + mod->arch.next_got_entry; ++e) + if (e->val == value) + goto found; + + /* Not enough GOT entries? */ + if (e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size)) + BUG(); + + e->val = value; + ++mod->arch.next_got_entry; + found: + return (uint64_t) e - mod->arch.gp; +} + +static inline int +gp_addressable (struct module *mod, uint64_t value) +{ + return value - mod->arch.gp + MAX_LTOFF/2 < MAX_LTOFF; +} + +/* Get PC-relative PLT entry for this value. Returns 0 on failure. */ +static uint64_t +get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp) +{ + struct plt_entry *plt, *plt_end; + uint64_t target_ip, target_gp; + + if (!*okp) + return 0; + + if (in_init(mod, (uint64_t) insn)) { + plt = (void *) mod->arch.init_plt->sh_addr; + plt_end = (void *) plt + mod->arch.init_plt->sh_size; + } else { + plt = (void *) mod->arch.core_plt->sh_addr; + plt_end = (void *) plt + mod->arch.core_plt->sh_size; + } + + /* "value" is a pointer to a function-descriptor; fetch the target ip/gp from it: */ + target_ip = ((uint64_t *) value)[0]; + target_gp = ((uint64_t *) value)[1]; + + /* Look for existing PLT entry. */ + while (plt->bundle[0][0]) { + if (plt_target(plt) == target_ip) + goto found; + if (++plt >= plt_end) + BUG(); + } + *plt = ia64_plt_template; + if (!patch_plt(mod, plt, target_ip, target_gp)) { + *okp = 0; + return 0; + } +#if ARCH_MODULE_DEBUG + if (plt_target(plt) != target_ip) { + printk("%s: mistargeted PLT: wanted %lx, got %lx\n", + __FUNCTION__, target_ip, plt_target(plt)); + *okp = 0; + return 0; + } +#endif + found: + return (uint64_t) plt; +} + +/* Get function descriptor for VALUE. */ +static uint64_t +get_fdesc (struct module *mod, uint64_t value, int *okp) +{ + struct fdesc *fdesc = (void *) mod->arch.opd->sh_addr; + + if (!*okp) + return 0; + + if (!value) { + printk(KERN_ERR "%s: fdesc for zero requested!\n", mod->name); + return 0; + } + + if (!is_internal(mod, value)) + /* + * If it's not a module-local entry-point, "value" already points to a + * function-descriptor. + */ + return value; + + /* Look for existing function descriptor. */ + while (fdesc->ip) { + if (fdesc->ip == value) + return (uint64_t)fdesc; + if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size) + BUG(); + } + + /* Create new one */ + fdesc->ip = value; + fdesc->gp = mod->arch.gp; + return (uint64_t) fdesc; +} + +static inline int +do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend, + Elf64_Shdr *sec, void *location) +{ + enum reloc_target_format format = (r_type >> FORMAT_SHIFT) & FORMAT_MASK; + enum reloc_value_formula formula = (r_type >> VALUE_SHIFT) & VALUE_MASK; + uint64_t val; + int ok = 1; + + val = sym->st_value + addend; + + switch (formula) { + case RV_SEGREL: /* segment base is arbitrarily chosen to be 0 for kernel modules */ + case RV_DIRECT: + break; + + case RV_GPREL: val -= mod->arch.gp; break; + case RV_LTREL: val = get_ltoff(mod, val, &ok); break; + case RV_PLTREL: val = get_plt(mod, location, val, &ok); break; + case RV_FPTR: val = get_fdesc(mod, val, &ok); break; + case RV_SECREL: val -= sec->sh_addr; break; + case RV_LTREL_FPTR: val = get_ltoff(mod, get_fdesc(mod, val, &ok), &ok); break; + + case RV_PCREL: + switch (r_type) { + case R_IA64_PCREL21B: + if ((in_init(mod, val) && in_core(mod, (uint64_t)location)) || + (in_core(mod, val) && in_init(mod, (uint64_t)location))) { + /* + * Init section may have been allocated far away from core, + * if the branch won't reach, then allocate a plt for it. + */ + uint64_t delta = ((int64_t)val - (int64_t)location) / 16; + if (delta + (1 << 20) >= (1 << 21)) { + val = get_fdesc(mod, val, &ok); + val = get_plt(mod, location, val, &ok); + } + } else if (!is_internal(mod, val)) + val = get_plt(mod, location, val, &ok); + /* FALL THROUGH */ + default: + val -= bundle(location); + break; + + case R_IA64_PCREL32MSB: + case R_IA64_PCREL32LSB: + case R_IA64_PCREL64MSB: + case R_IA64_PCREL64LSB: + val -= (uint64_t) location; + break; + + } + switch (r_type) { + case R_IA64_PCREL60B: format = RF_INSN60; break; + case R_IA64_PCREL21B: format = RF_INSN21B; break; + case R_IA64_PCREL21M: format = RF_INSN21M; break; + case R_IA64_PCREL21F: format = RF_INSN21F; break; + default: break; + } + break; + + case RV_BDREL: + val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core); + break; + + case RV_LTV: + /* can link-time value relocs happen here? */ + BUG(); + break; + + case RV_PCREL2: + if (r_type == R_IA64_PCREL21BI) { + if (!is_internal(mod, val)) { + printk(KERN_ERR "%s: %s reloc against non-local symbol (%lx)\n", + __FUNCTION__, reloc_name[r_type], val); + return -ENOEXEC; + } + format = RF_INSN21B; + } + val -= bundle(location); + break; + + case RV_SPECIAL: + switch (r_type) { + case R_IA64_IPLTMSB: + case R_IA64_IPLTLSB: + val = get_fdesc(mod, get_plt(mod, location, val, &ok), &ok); + format = RF_64LSB; + if (r_type == R_IA64_IPLTMSB) + format = RF_64MSB; + break; + + case R_IA64_SUB: + val = addend - sym->st_value; + format = RF_INSN64; + break; + + case R_IA64_LTOFF22X: + if (gp_addressable(mod, val)) + val -= mod->arch.gp; + else + val = get_ltoff(mod, val, &ok); + format = RF_INSN22; + break; + + case R_IA64_LDXMOV: + if (gp_addressable(mod, val)) { + /* turn "ld8" into "mov": */ + DEBUGP("%s: patching ld8 at %p to mov\n", __FUNCTION__, location); + ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL); + } + return 0; + + default: + if (reloc_name[r_type]) + printk(KERN_ERR "%s: special reloc %s not supported", + mod->name, reloc_name[r_type]); + else + printk(KERN_ERR "%s: unknown special reloc %x\n", + mod->name, r_type); + return -ENOEXEC; + } + break; + + case RV_TPREL: + case RV_LTREL_TPREL: + case RV_DTPMOD: + case RV_LTREL_DTPMOD: + case RV_DTPREL: + case RV_LTREL_DTPREL: + printk(KERN_ERR "%s: %s reloc not supported\n", + mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?"); + return -ENOEXEC; + + default: + printk(KERN_ERR "%s: unknown reloc %x\n", mod->name, r_type); + return -ENOEXEC; + } + + if (!ok) + return -ENOEXEC; + + DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __FUNCTION__, location, val, + reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend); + + switch (format) { + case RF_INSN21B: ok = apply_imm21b(mod, location, (int64_t) val / 16); break; + case RF_INSN22: ok = apply_imm22(mod, location, val); break; + case RF_INSN64: ok = apply_imm64(mod, location, val); break; + case RF_INSN60: ok = apply_imm60(mod, location, (int64_t) val / 16); break; + case RF_32LSB: put_unaligned(val, (uint32_t *) location); break; + case RF_64LSB: put_unaligned(val, (uint64_t *) location); break; + case RF_32MSB: /* ia64 Linux is little-endian... */ + case RF_64MSB: /* ia64 Linux is little-endian... */ + case RF_INSN14: /* must be within-module, i.e., resolved by "ld -r" */ + case RF_INSN21M: /* must be within-module, i.e., resolved by "ld -r" */ + case RF_INSN21F: /* must be within-module, i.e., resolved by "ld -r" */ + printk(KERN_ERR "%s: format %u needed by %s reloc is not supported\n", + mod->name, format, reloc_name[r_type] ? reloc_name[r_type] : "?"); + return -ENOEXEC; + + default: + printk(KERN_ERR "%s: relocation %s resulted in unknown format %u\n", + mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?", format); + return -ENOEXEC; + } + return ok ? 0 : -ENOEXEC; +} + +int +apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, + unsigned int relsec, struct module *mod) +{ + unsigned int i, n = sechdrs[relsec].sh_size / sizeof(Elf64_Rela); + Elf64_Rela *rela = (void *) sechdrs[relsec].sh_addr; + Elf64_Shdr *target_sec; + int ret; + + DEBUGP("%s: applying section %u (%u relocs) to %u\n", __FUNCTION__, + relsec, n, sechdrs[relsec].sh_info); + + target_sec = sechdrs + sechdrs[relsec].sh_info; + + if (target_sec->sh_entsize == ~0UL) + /* + * If target section wasn't allocated, we don't need to relocate it. + * Happens, e.g., for debug sections. + */ + return 0; + + if (!mod->arch.gp) { + /* + * XXX Should have an arch-hook for running this after final section + * addresses have been selected... + */ + /* See if gp can cover the entire core module: */ + uint64_t gp = (uint64_t) mod->module_core + MAX_LTOFF / 2; + if (mod->core_size >= MAX_LTOFF) + /* + * This takes advantage of fact that SHF_ARCH_SMALL gets allocated + * at the end of the module. + */ + gp = (uint64_t) mod->module_core + mod->core_size - MAX_LTOFF / 2; + mod->arch.gp = gp; + DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp); + } + + for (i = 0; i < n; i++) { + ret = do_reloc(mod, ELF64_R_TYPE(rela[i].r_info), + ((Elf64_Sym *) sechdrs[symindex].sh_addr + + ELF64_R_SYM(rela[i].r_info)), + rela[i].r_addend, target_sec, + (void *) target_sec->sh_addr + rela[i].r_offset); + if (ret < 0) + return ret; + } + return 0; +} + +int +apply_relocate (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, + unsigned int relsec, struct module *mod) +{ + printk(KERN_ERR "module %s: REL relocs in section %u unsupported\n", mod->name, relsec); + return -ENOEXEC; +} + +/* + * Modules contain a single unwind table which covers both the core and the init text + * sections but since the two are not contiguous, we need to split this table up such that + * we can register (and unregister) each "segment" seperately. Fortunately, this sounds + * more complicated than it really is. + */ +static void +register_unwind_table (struct module *mod) +{ + struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr; + struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start); + struct unw_table_entry tmp, *e1, *e2, *core, *init; + unsigned long num_init = 0, num_core = 0; + + /* First, count how many init and core unwind-table entries there are. */ + for (e1 = start; e1 < end; ++e1) + if (in_init(mod, e1->start_offset)) + ++num_init; + else + ++num_core; + /* + * Second, sort the table such that all unwind-table entries for the init and core + * text sections are nicely separated. We do this with a stupid bubble sort + * (unwind tables don't get ridiculously huge). + */ + for (e1 = start; e1 < end; ++e1) { + for (e2 = e1 + 1; e2 < end; ++e2) { + if (e2->start_offset < e1->start_offset) { + tmp = *e1; + *e1 = *e2; + *e2 = tmp; + } + } + } + /* + * Third, locate the init and core segments in the unwind table: + */ + if (in_init(mod, start->start_offset)) { + init = start; + core = start + num_init; + } else { + core = start; + init = start + num_core; + } + + DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __FUNCTION__, + mod->name, mod->arch.gp, num_init, num_core); + + /* + * Fourth, register both tables (if not empty). + */ + if (num_core > 0) { + mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp, + core, core + num_core); + DEBUGP("%s: core: handle=%p [%p-%p)\n", __FUNCTION__, + mod->arch.core_unw_table, core, core + num_core); + } + if (num_init > 0) { + mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp, + init, init + num_init); + DEBUGP("%s: init: handle=%p [%p-%p)\n", __FUNCTION__, + mod->arch.init_unw_table, init, init + num_init); + } +} + +int +module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) +{ + DEBUGP("%s: init: entry=%p\n", __FUNCTION__, mod->init); + if (mod->arch.unwind) + register_unwind_table(mod); + return 0; +} + +void +module_arch_cleanup (struct module *mod) +{ + if (mod->arch.init_unw_table) + unw_remove_unwind_table(mod->arch.init_unw_table); + if (mod->arch.core_unw_table) + unw_remove_unwind_table(mod->arch.core_unw_table); +} + +#ifdef CONFIG_SMP +void +percpu_modcopy (void *pcpudst, const void *src, unsigned long size) +{ + unsigned int i; + for (i = 0; i < NR_CPUS; i++) + if (cpu_possible(i)) + memcpy(pcpudst + __per_cpu_offset[i], src, size); +} +#endif /* CONFIG_SMP */ diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S new file mode 100644 index 000000000000..5018c7f2e7a8 --- /dev/null +++ b/arch/ia64/kernel/pal.S @@ -0,0 +1,302 @@ +/* + * PAL Firmware support + * IA-64 Processor Programmers Reference Vol 2 + * + * Copyright (C) 1999 Don Dugger <don.dugger@intel.com> + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co + * David Mosberger <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * + * 05/22/2000 eranian Added support for stacked register calls + * 05/24/2000 eranian Added support for physical mode static calls + */ + +#include <asm/asmmacro.h> +#include <asm/processor.h> + + .data +pal_entry_point: + data8 ia64_pal_default_handler + .text + +/* + * Set the PAL entry point address. This could be written in C code, but we do it here + * to keep it all in one module (besides, it's so trivial that it's + * not a big deal). + * + * in0 Address of the PAL entry point (text address, NOT a function descriptor). + */ +GLOBAL_ENTRY(ia64_pal_handler_init) + alloc r3=ar.pfs,1,0,0,0 + movl r2=pal_entry_point + ;; + st8 [r2]=in0 + br.ret.sptk.many rp +END(ia64_pal_handler_init) + +/* + * Default PAL call handler. This needs to be coded in assembly because it uses + * the static calling convention, i.e., the RSE may not be used and calls are + * done via "br.cond" (not "br.call"). + */ +GLOBAL_ENTRY(ia64_pal_default_handler) + mov r8=-1 + br.cond.sptk.many rp +END(ia64_pal_default_handler) + +/* + * Make a PAL call using the static calling convention. + * + * in0 Index of PAL service + * in1 - in3 Remaining PAL arguments + * in4 1 ==> clear psr.ic, 0 ==> don't clear psr.ic + * + */ +GLOBAL_ENTRY(ia64_pal_call_static) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5) + alloc loc1 = ar.pfs,5,5,0,0 + movl loc2 = pal_entry_point +1: { + mov r28 = in0 + mov r29 = in1 + mov r8 = ip + } + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + tbit.nz p6,p7 = in4, 0 + adds r8 = 1f-1b,r8 + mov loc4=ar.rsc // save RSE configuration + ;; + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov loc3 = psr + mov loc0 = rp + .body + mov r30 = in2 + +(p6) rsm psr.i | psr.ic + mov r31 = in3 + mov b7 = loc2 + +(p7) rsm psr.i + ;; +(p6) srlz.i + mov rp = r8 + br.cond.sptk.many b7 +1: mov psr.l = loc3 + mov ar.rsc = loc4 // restore RSE configuration + mov ar.pfs = loc1 + mov rp = loc0 + ;; + srlz.d // seralize restoration of psr.l + br.ret.sptk.many b0 +END(ia64_pal_call_static) + +/* + * Make a PAL call using the stacked registers calling convention. + * + * Inputs: + * in0 Index of PAL service + * in2 - in3 Remaning PAL arguments + */ +GLOBAL_ENTRY(ia64_pal_call_stacked) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) + alloc loc1 = ar.pfs,4,4,4,0 + movl loc2 = pal_entry_point + + mov r28 = in0 // Index MUST be copied to r28 + mov out0 = in0 // AND in0 of PAL function + mov loc0 = rp + .body + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + mov out1 = in1 + mov out2 = in2 + mov out3 = in3 + mov loc3 = psr + ;; + rsm psr.i + mov b7 = loc2 + ;; + br.call.sptk.many rp=b7 // now make the call +.ret0: mov psr.l = loc3 + mov ar.pfs = loc1 + mov rp = loc0 + ;; + srlz.d // serialize restoration of psr.l + br.ret.sptk.many b0 +END(ia64_pal_call_stacked) + +/* + * Make a physical mode PAL call using the static registers calling convention. + * + * Inputs: + * in0 Index of PAL service + * in2 - in3 Remaning PAL arguments + * + * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel. + * So we don't need to clear them. + */ +#define PAL_PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_DB | IA64_PSR_RT | \ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PAL_PSR_BITS_TO_SET \ + (IA64_PSR_BN) + + +GLOBAL_ENTRY(ia64_pal_call_phys_static) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) + alloc loc1 = ar.pfs,4,7,0,0 + movl loc2 = pal_entry_point +1: { + mov r28 = in0 // copy procedure index + mov r8 = ip // save ip to compute branch + mov loc0 = rp // save rp + } + .body + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + mov r29 = in1 // first argument + mov r30 = in2 // copy arg2 + mov r31 = in3 // copy arg3 + ;; + mov loc3 = psr // save psr + adds r8 = 1f-1b,r8 // calculate return address for call + ;; + mov loc4=ar.rsc // save RSE configuration + dep.z loc2=loc2,0,61 // convert pal entry point to physical + tpa r8=r8 // convert rp to physical + ;; + mov b7 = loc2 // install target to branch reg + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + movl r16=PAL_PSR_BITS_TO_CLEAR + movl r17=PAL_PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 // add in psr the bits to set + ;; + andcm r16=loc3,r16 // removes bits to clear from psr + br.call.sptk.many rp=ia64_switch_mode_phys +.ret1: mov rp = r8 // install return address (physical) + mov loc5 = r19 + mov loc6 = r20 + br.cond.sptk.many b7 +1: + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 // r16= original psr + mov r19=loc5 + mov r20=loc6 + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode +.ret2: + mov psr.l = loc3 // restore init PSR + + mov ar.pfs = loc1 + mov rp = loc0 + ;; + mov ar.rsc=loc4 // restore RSE configuration + srlz.d // seralize restoration of psr.l + br.ret.sptk.many b0 +END(ia64_pal_call_phys_static) + +/* + * Make a PAL call using the stacked registers in physical mode. + * + * Inputs: + * in0 Index of PAL service + * in2 - in3 Remaning PAL arguments + */ +GLOBAL_ENTRY(ia64_pal_call_phys_stacked) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5) + alloc loc1 = ar.pfs,5,7,4,0 + movl loc2 = pal_entry_point +1: { + mov r28 = in0 // copy procedure index + mov loc0 = rp // save rp + } + .body + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + mov out0 = in0 // first argument + mov out1 = in1 // copy arg2 + mov out2 = in2 // copy arg3 + mov out3 = in3 // copy arg3 + ;; + mov loc3 = psr // save psr + ;; + mov loc4=ar.rsc // save RSE configuration + dep.z loc2=loc2,0,61 // convert pal entry point to physical + ;; + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + movl r16=PAL_PSR_BITS_TO_CLEAR + movl r17=PAL_PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 // add in psr the bits to set + mov b7 = loc2 // install target to branch reg + ;; + andcm r16=loc3,r16 // removes bits to clear from psr + br.call.sptk.many rp=ia64_switch_mode_phys +.ret6: + mov loc5 = r19 + mov loc6 = r20 + br.call.sptk.many rp=b7 // now make the call +.ret7: + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 // r16= original psr + mov r19=loc5 + mov r20=loc6 + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode + +.ret8: mov psr.l = loc3 // restore init PSR + mov ar.pfs = loc1 + mov rp = loc0 + ;; + mov ar.rsc=loc4 // restore RSE configuration + srlz.d // seralize restoration of psr.l + br.ret.sptk.many b0 +END(ia64_pal_call_phys_stacked) + +/* + * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15). + * + * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch + * regs fp-low partition. + * + * Inputs: + * in0 Address of stack storage for fp regs + */ +GLOBAL_ENTRY(ia64_save_scratch_fpregs) + alloc r3=ar.pfs,1,0,0,0 + add r2=16,in0 + ;; + stf.spill [in0] = f10,32 + stf.spill [r2] = f11,32 + ;; + stf.spill [in0] = f12,32 + stf.spill [r2] = f13,32 + ;; + stf.spill [in0] = f14,32 + stf.spill [r2] = f15,32 + br.ret.sptk.many rp +END(ia64_save_scratch_fpregs) + +/* + * Load scratch fp scratch regs (fp10-fp15) + * + * Inputs: + * in0 Address of stack storage for fp regs + */ +GLOBAL_ENTRY(ia64_load_scratch_fpregs) + alloc r3=ar.pfs,1,0,0,0 + add r2=16,in0 + ;; + ldf.fill f10 = [in0],32 + ldf.fill f11 = [r2],32 + ;; + ldf.fill f12 = [in0],32 + ldf.fill f13 = [r2],32 + ;; + ldf.fill f14 = [in0],32 + ldf.fill f15 = [r2],32 + br.ret.sptk.many rp +END(ia64_load_scratch_fpregs) diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c new file mode 100644 index 000000000000..25e7c8344564 --- /dev/null +++ b/arch/ia64/kernel/palinfo.c @@ -0,0 +1,1023 @@ +/* + * palinfo.c + * + * Prints processor specific information reported by PAL. + * This code is based on specification of PAL as of the + * Intel IA-64 Architecture Software Developer's Manual v1.0. + * + * + * Copyright (C) 2000-2001, 2003 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 2004 Intel Corporation + * Ashok Raj <ashok.raj@intel.com> + * + * 05/26/2000 S.Eranian initial release + * 08/21/2000 S.Eranian updated to July 2000 PAL specs + * 02/05/2001 S.Eranian fixed module support + * 10/23/2001 S.Eranian updated pal_perf_mon_info bug fixes + * 03/24/2004 Ashok Raj updated to work with CPU Hotplug + */ +#include <linux/config.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/efi.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> + +#include <asm/pal.h> +#include <asm/sal.h> +#include <asm/page.h> +#include <asm/processor.h> +#include <linux/smp.h> + +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); +MODULE_DESCRIPTION("/proc interface to IA-64 PAL"); +MODULE_LICENSE("GPL"); + +#define PALINFO_VERSION "0.5" + +typedef int (*palinfo_func_t)(char*); + +typedef struct { + const char *name; /* name of the proc entry */ + palinfo_func_t proc_read; /* function to call for reading */ + struct proc_dir_entry *entry; /* registered entry (removal) */ +} palinfo_entry_t; + + +/* + * A bunch of string array to get pretty printing + */ + +static char *cache_types[] = { + "", /* not used */ + "Instruction", + "Data", + "Data/Instruction" /* unified */ +}; + +static const char *cache_mattrib[]={ + "WriteThrough", + "WriteBack", + "", /* reserved */ + "" /* reserved */ +}; + +static const char *cache_st_hints[]={ + "Temporal, level 1", + "Reserved", + "Reserved", + "Non-temporal, all levels", + "Reserved", + "Reserved", + "Reserved", + "Reserved" +}; + +static const char *cache_ld_hints[]={ + "Temporal, level 1", + "Non-temporal, level 1", + "Reserved", + "Non-temporal, all levels", + "Reserved", + "Reserved", + "Reserved", + "Reserved" +}; + +static const char *rse_hints[]={ + "enforced lazy", + "eager stores", + "eager loads", + "eager loads and stores" +}; + +#define RSE_HINTS_COUNT ARRAY_SIZE(rse_hints) + +static const char *mem_attrib[]={ + "WB", /* 000 */ + "SW", /* 001 */ + "010", /* 010 */ + "011", /* 011 */ + "UC", /* 100 */ + "UCE", /* 101 */ + "WC", /* 110 */ + "NaTPage" /* 111 */ +}; + +/* + * Take a 64bit vector and produces a string such that + * if bit n is set then 2^n in clear text is generated. The adjustment + * to the right unit is also done. + * + * Input: + * - a pointer to a buffer to hold the string + * - a 64-bit vector + * Ouput: + * - a pointer to the end of the buffer + * + */ +static char * +bitvector_process(char *p, u64 vector) +{ + int i,j; + const char *units[]={ "", "K", "M", "G", "T" }; + + for (i=0, j=0; i < 64; i++ , j=i/10) { + if (vector & 0x1) { + p += sprintf(p, "%d%s ", 1 << (i-j*10), units[j]); + } + vector >>= 1; + } + return p; +} + +/* + * Take a 64bit vector and produces a string such that + * if bit n is set then register n is present. The function + * takes into account consecutive registers and prints out ranges. + * + * Input: + * - a pointer to a buffer to hold the string + * - a 64-bit vector + * Ouput: + * - a pointer to the end of the buffer + * + */ +static char * +bitregister_process(char *p, u64 *reg_info, int max) +{ + int i, begin, skip = 0; + u64 value = reg_info[0]; + + value >>= i = begin = ffs(value) - 1; + + for(; i < max; i++ ) { + + if (i != 0 && (i%64) == 0) value = *++reg_info; + + if ((value & 0x1) == 0 && skip == 0) { + if (begin <= i - 2) + p += sprintf(p, "%d-%d ", begin, i-1); + else + p += sprintf(p, "%d ", i-1); + skip = 1; + begin = -1; + } else if ((value & 0x1) && skip == 1) { + skip = 0; + begin = i; + } + value >>=1; + } + if (begin > -1) { + if (begin < 127) + p += sprintf(p, "%d-127", begin); + else + p += sprintf(p, "127"); + } + + return p; +} + +static int +power_info(char *page) +{ + s64 status; + char *p = page; + u64 halt_info_buffer[8]; + pal_power_mgmt_info_u_t *halt_info =(pal_power_mgmt_info_u_t *)halt_info_buffer; + int i; + + status = ia64_pal_halt_info(halt_info); + if (status != 0) return 0; + + for (i=0; i < 8 ; i++ ) { + if (halt_info[i].pal_power_mgmt_info_s.im == 1) { + p += sprintf(p, "Power level %d:\n" + "\tentry_latency : %d cycles\n" + "\texit_latency : %d cycles\n" + "\tpower consumption : %d mW\n" + "\tCache+TLB coherency : %s\n", i, + halt_info[i].pal_power_mgmt_info_s.entry_latency, + halt_info[i].pal_power_mgmt_info_s.exit_latency, + halt_info[i].pal_power_mgmt_info_s.power_consumption, + halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No"); + } else { + p += sprintf(p,"Power level %d: not implemented\n",i); + } + } + return p - page; +} + +static int +cache_info(char *page) +{ + char *p = page; + u64 i, levels, unique_caches; + pal_cache_config_info_t cci; + int j, k; + s64 status; + + if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) { + printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status); + return 0; + } + + p += sprintf(p, "Cache levels : %ld\nUnique caches : %ld\n\n", levels, unique_caches); + + for (i=0; i < levels; i++) { + + for (j=2; j >0 ; j--) { + + /* even without unification some level may not be present */ + if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0) { + continue; + } + p += sprintf(p, + "%s Cache level %lu:\n" + "\tSize : %lu bytes\n" + "\tAttributes : ", + cache_types[j+cci.pcci_unified], i+1, + cci.pcci_cache_size); + + if (cci.pcci_unified) p += sprintf(p, "Unified "); + + p += sprintf(p, "%s\n", cache_mattrib[cci.pcci_cache_attr]); + + p += sprintf(p, + "\tAssociativity : %d\n" + "\tLine size : %d bytes\n" + "\tStride : %d bytes\n", + cci.pcci_assoc, 1<<cci.pcci_line_size, 1<<cci.pcci_stride); + if (j == 1) + p += sprintf(p, "\tStore latency : N/A\n"); + else + p += sprintf(p, "\tStore latency : %d cycle(s)\n", + cci.pcci_st_latency); + + p += sprintf(p, + "\tLoad latency : %d cycle(s)\n" + "\tStore hints : ", cci.pcci_ld_latency); + + for(k=0; k < 8; k++ ) { + if ( cci.pcci_st_hints & 0x1) + p += sprintf(p, "[%s]", cache_st_hints[k]); + cci.pcci_st_hints >>=1; + } + p += sprintf(p, "\n\tLoad hints : "); + + for(k=0; k < 8; k++ ) { + if (cci.pcci_ld_hints & 0x1) + p += sprintf(p, "[%s]", cache_ld_hints[k]); + cci.pcci_ld_hints >>=1; + } + p += sprintf(p, + "\n\tAlias boundary : %d byte(s)\n" + "\tTag LSB : %d\n" + "\tTag MSB : %d\n", + 1<<cci.pcci_alias_boundary, cci.pcci_tag_lsb, + cci.pcci_tag_msb); + + /* when unified, data(j=2) is enough */ + if (cci.pcci_unified) break; + } + } + return p - page; +} + + +static int +vm_info(char *page) +{ + char *p = page; + u64 tr_pages =0, vw_pages=0, tc_pages; + u64 attrib; + pal_vm_info_1_u_t vm_info_1; + pal_vm_info_2_u_t vm_info_2; + pal_tc_info_u_t tc_info; + ia64_ptce_info_t ptce; + const char *sep; + int i, j; + s64 status; + + if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) { + printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); + return 0; + } + + + p += sprintf(p, + "Physical Address Space : %d bits\n" + "Virtual Address Space : %d bits\n" + "Protection Key Registers(PKR) : %d\n" + "Implemented bits in PKR.key : %d\n" + "Hash Tag ID : 0x%x\n" + "Size of RR.rid : %d\n", + vm_info_1.pal_vm_info_1_s.phys_add_size, + vm_info_2.pal_vm_info_2_s.impl_va_msb+1, vm_info_1.pal_vm_info_1_s.max_pkr+1, + vm_info_1.pal_vm_info_1_s.key_size, vm_info_1.pal_vm_info_1_s.hash_tag_id, + vm_info_2.pal_vm_info_2_s.rid_size); + + if (ia64_pal_mem_attrib(&attrib) != 0) + return 0; + + p += sprintf(p, "Supported memory attributes : "); + sep = ""; + for (i = 0; i < 8; i++) { + if (attrib & (1 << i)) { + p += sprintf(p, "%s%s", sep, mem_attrib[i]); + sep = ", "; + } + } + p += sprintf(p, "\n"); + + if ((status = ia64_pal_vm_page_size(&tr_pages, &vw_pages)) !=0) { + printk(KERN_ERR "ia64_pal_vm_page_size=%ld\n", status); + return 0; + } + + p += sprintf(p, + "\nTLB walker : %simplemented\n" + "Number of DTR : %d\n" + "Number of ITR : %d\n" + "TLB insertable page sizes : ", + vm_info_1.pal_vm_info_1_s.vw ? "" : "not ", + vm_info_1.pal_vm_info_1_s.max_dtr_entry+1, + vm_info_1.pal_vm_info_1_s.max_itr_entry+1); + + + p = bitvector_process(p, tr_pages); + + p += sprintf(p, "\nTLB purgeable page sizes : "); + + p = bitvector_process(p, vw_pages); + + if ((status=ia64_get_ptce(&ptce)) != 0) { + printk(KERN_ERR "ia64_get_ptce=%ld\n", status); + return 0; + } + + p += sprintf(p, + "\nPurge base address : 0x%016lx\n" + "Purge outer loop count : %d\n" + "Purge inner loop count : %d\n" + "Purge outer loop stride : %d\n" + "Purge inner loop stride : %d\n", + ptce.base, ptce.count[0], ptce.count[1], ptce.stride[0], ptce.stride[1]); + + p += sprintf(p, + "TC Levels : %d\n" + "Unique TC(s) : %d\n", + vm_info_1.pal_vm_info_1_s.num_tc_levels, + vm_info_1.pal_vm_info_1_s.max_unique_tcs); + + for(i=0; i < vm_info_1.pal_vm_info_1_s.num_tc_levels; i++) { + for (j=2; j>0 ; j--) { + tc_pages = 0; /* just in case */ + + + /* even without unification, some levels may not be present */ + if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) { + continue; + } + + p += sprintf(p, + "\n%s Translation Cache Level %d:\n" + "\tHash sets : %d\n" + "\tAssociativity : %d\n" + "\tNumber of entries : %d\n" + "\tFlags : ", + cache_types[j+tc_info.tc_unified], i+1, tc_info.tc_num_sets, + tc_info.tc_associativity, tc_info.tc_num_entries); + + if (tc_info.tc_pf) p += sprintf(p, "PreferredPageSizeOptimized "); + if (tc_info.tc_unified) p += sprintf(p, "Unified "); + if (tc_info.tc_reduce_tr) p += sprintf(p, "TCReduction"); + + p += sprintf(p, "\n\tSupported page sizes: "); + + p = bitvector_process(p, tc_pages); + + /* when unified date (j=2) is enough */ + if (tc_info.tc_unified) break; + } + } + p += sprintf(p, "\n"); + + return p - page; +} + + +static int +register_info(char *page) +{ + char *p = page; + u64 reg_info[2]; + u64 info; + u64 phys_stacked; + pal_hints_u_t hints; + u64 iregs, dregs; + char *info_type[]={ + "Implemented AR(s)", + "AR(s) with read side-effects", + "Implemented CR(s)", + "CR(s) with read side-effects", + }; + + for(info=0; info < 4; info++) { + + if (ia64_pal_register_info(info, ®_info[0], ®_info[1]) != 0) return 0; + + p += sprintf(p, "%-32s : ", info_type[info]); + + p = bitregister_process(p, reg_info, 128); + + p += sprintf(p, "\n"); + } + + if (ia64_pal_rse_info(&phys_stacked, &hints) != 0) return 0; + + p += sprintf(p, + "RSE stacked physical registers : %ld\n" + "RSE load/store hints : %ld (%s)\n", + phys_stacked, hints.ph_data, + hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)"); + + if (ia64_pal_debug_info(&iregs, &dregs)) + return 0; + + p += sprintf(p, + "Instruction debug register pairs : %ld\n" + "Data debug register pairs : %ld\n", iregs, dregs); + + return p - page; +} + +static const char *proc_features[]={ + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL, + NULL,NULL,NULL,NULL,NULL, + "XIP,XPSR,XFS implemented", + "XR1-XR3 implemented", + "Disable dynamic predicate prediction", + "Disable processor physical number", + "Disable dynamic data cache prefetch", + "Disable dynamic inst cache prefetch", + "Disable dynamic branch prediction", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "Disable BINIT on processor time-out", + "Disable dynamic power management (DPM)", + "Disable coherency", + "Disable cache", + "Enable CMCI promotion", + "Enable MCA to BINIT promotion", + "Enable MCA promotion", + "Enable BERR promotion" +}; + + +static int +processor_info(char *page) +{ + char *p = page; + const char **v = proc_features; + u64 avail=1, status=1, control=1; + int i; + s64 ret; + + if ((ret=ia64_pal_proc_get_features(&avail, &status, &control)) != 0) return 0; + + for(i=0; i < 64; i++, v++,avail >>=1, status >>=1, control >>=1) { + if ( ! *v ) continue; + p += sprintf(p, "%-40s : %s%s %s\n", *v, + avail & 0x1 ? "" : "NotImpl", + avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "", + avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): ""); + } + return p - page; +} + +static const char *bus_features[]={ + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL, + "Request Bus Parking", + "Bus Lock Mask", + "Enable Half Transfer", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "Enable Cache Line Repl. Shared", + "Enable Cache Line Repl. Exclusive", + "Disable Transaction Queuing", + "Disable Response Error Checking", + "Disable Bus Error Checking", + "Disable Bus Requester Internal Error Signalling", + "Disable Bus Requester Error Signalling", + "Disable Bus Initialization Event Checking", + "Disable Bus Initialization Event Signalling", + "Disable Bus Address Error Checking", + "Disable Bus Address Error Signalling", + "Disable Bus Data Error Checking" +}; + + +static int +bus_info(char *page) +{ + char *p = page; + const char **v = bus_features; + pal_bus_features_u_t av, st, ct; + u64 avail, status, control; + int i; + s64 ret; + + if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) return 0; + + avail = av.pal_bus_features_val; + status = st.pal_bus_features_val; + control = ct.pal_bus_features_val; + + for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) { + if ( ! *v ) continue; + p += sprintf(p, "%-48s : %s%s %s\n", *v, + avail & 0x1 ? "" : "NotImpl", + avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "", + avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): ""); + } + return p - page; +} + +static int +version_info(char *page) +{ + pal_version_u_t min_ver, cur_ver; + char *p = page; + + /* The PAL_VERSION call is advertised as being able to support + * both physical and virtual mode calls. This seems to be a documentation + * bug rather than firmware bug. In fact, it does only support physical mode. + * So now the code reflects this fact and the pal_version() has been updated + * accordingly. + */ + if (ia64_pal_version(&min_ver, &cur_ver) != 0) return 0; + + p += sprintf(p, + "PAL_vendor : 0x%02x (min=0x%02x)\n" + "PAL_A : %x.%x.%x (min=%x.%x.%x)\n" + "PAL_B : %x.%x.%x (min=%x.%x.%x)\n", + cur_ver.pal_version_s.pv_pal_vendor, min_ver.pal_version_s.pv_pal_vendor, + + cur_ver.pal_version_s.pv_pal_a_model>>4, + cur_ver.pal_version_s.pv_pal_a_model&0xf, cur_ver.pal_version_s.pv_pal_a_rev, + min_ver.pal_version_s.pv_pal_a_model>>4, + min_ver.pal_version_s.pv_pal_a_model&0xf, min_ver.pal_version_s.pv_pal_a_rev, + + cur_ver.pal_version_s.pv_pal_b_model>>4, + cur_ver.pal_version_s.pv_pal_b_model&0xf, cur_ver.pal_version_s.pv_pal_b_rev, + min_ver.pal_version_s.pv_pal_b_model>>4, + min_ver.pal_version_s.pv_pal_b_model&0xf, min_ver.pal_version_s.pv_pal_b_rev); + return p - page; +} + +static int +perfmon_info(char *page) +{ + char *p = page; + u64 pm_buffer[16]; + pal_perf_mon_info_u_t pm_info; + + if (ia64_pal_perf_mon_info(pm_buffer, &pm_info) != 0) return 0; + + p += sprintf(p, + "PMC/PMD pairs : %d\n" + "Counter width : %d bits\n" + "Cycle event number : %d\n" + "Retired event number : %d\n" + "Implemented PMC : ", + pm_info.pal_perf_mon_info_s.generic, pm_info.pal_perf_mon_info_s.width, + pm_info.pal_perf_mon_info_s.cycles, pm_info.pal_perf_mon_info_s.retired); + + p = bitregister_process(p, pm_buffer, 256); + p += sprintf(p, "\nImplemented PMD : "); + p = bitregister_process(p, pm_buffer+4, 256); + p += sprintf(p, "\nCycles count capable : "); + p = bitregister_process(p, pm_buffer+8, 256); + p += sprintf(p, "\nRetired bundles count capable : "); + +#ifdef CONFIG_ITANIUM + /* + * PAL_PERF_MON_INFO reports that only PMC4 can be used to count CPU_CYCLES + * which is wrong, both PMC4 and PMD5 support it. + */ + if (pm_buffer[12] == 0x10) pm_buffer[12]=0x30; +#endif + + p = bitregister_process(p, pm_buffer+12, 256); + + p += sprintf(p, "\n"); + + return p - page; +} + +static int +frequency_info(char *page) +{ + char *p = page; + struct pal_freq_ratio proc, itc, bus; + u64 base; + + if (ia64_pal_freq_base(&base) == -1) + p += sprintf(p, "Output clock : not implemented\n"); + else + p += sprintf(p, "Output clock : %ld ticks/s\n", base); + + if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0; + + p += sprintf(p, + "Processor/Clock ratio : %ld/%ld\n" + "Bus/Clock ratio : %ld/%ld\n" + "ITC/Clock ratio : %ld/%ld\n", + proc.num, proc.den, bus.num, bus.den, itc.num, itc.den); + + return p - page; +} + +static int +tr_info(char *page) +{ + char *p = page; + s64 status; + pal_tr_valid_u_t tr_valid; + u64 tr_buffer[4]; + pal_vm_info_1_u_t vm_info_1; + pal_vm_info_2_u_t vm_info_2; + u64 i, j; + u64 max[3], pgm; + struct ifa_reg { + u64 valid:1; + u64 ig:11; + u64 vpn:52; + } *ifa_reg; + struct itir_reg { + u64 rv1:2; + u64 ps:6; + u64 key:24; + u64 rv2:32; + } *itir_reg; + struct gr_reg { + u64 p:1; + u64 rv1:1; + u64 ma:3; + u64 a:1; + u64 d:1; + u64 pl:2; + u64 ar:3; + u64 ppn:38; + u64 rv2:2; + u64 ed:1; + u64 ig:11; + } *gr_reg; + struct rid_reg { + u64 ig1:1; + u64 rv1:1; + u64 ig2:6; + u64 rid:24; + u64 rv2:32; + } *rid_reg; + + if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) { + printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); + return 0; + } + max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; + max[1] = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; + + for (i=0; i < 2; i++ ) { + for (j=0; j < max[i]; j++) { + + status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid); + if (status != 0) { + printk(KERN_ERR "palinfo: pal call failed on tr[%lu:%lu]=%ld\n", + i, j, status); + continue; + } + + ifa_reg = (struct ifa_reg *)&tr_buffer[2]; + + if (ifa_reg->valid == 0) continue; + + gr_reg = (struct gr_reg *)tr_buffer; + itir_reg = (struct itir_reg *)&tr_buffer[1]; + rid_reg = (struct rid_reg *)&tr_buffer[3]; + + pgm = -1 << (itir_reg->ps - 12); + p += sprintf(p, + "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n" + "\tppn : 0x%lx\n" + "\tvpn : 0x%lx\n" + "\tps : ", + "ID"[i], j, + tr_valid.pal_tr_valid_s.access_rights_valid, + tr_valid.pal_tr_valid_s.priv_level_valid, + tr_valid.pal_tr_valid_s.dirty_bit_valid, + tr_valid.pal_tr_valid_s.mem_attr_valid, + (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12); + + p = bitvector_process(p, 1<< itir_reg->ps); + + p += sprintf(p, + "\n\tpl : %d\n" + "\tar : %d\n" + "\trid : %x\n" + "\tp : %d\n" + "\tma : %d\n" + "\td : %d\n", + gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma, + gr_reg->d); + } + } + return p - page; +} + + + +/* + * List {name,function} pairs for every entry in /proc/palinfo/cpu* + */ +static palinfo_entry_t palinfo_entries[]={ + { "version_info", version_info, }, + { "vm_info", vm_info, }, + { "cache_info", cache_info, }, + { "power_info", power_info, }, + { "register_info", register_info, }, + { "processor_info", processor_info, }, + { "perfmon_info", perfmon_info, }, + { "frequency_info", frequency_info, }, + { "bus_info", bus_info }, + { "tr_info", tr_info, } +}; + +#define NR_PALINFO_ENTRIES (int) ARRAY_SIZE(palinfo_entries) + +/* + * this array is used to keep track of the proc entries we create. This is + * required in the module mode when we need to remove all entries. The procfs code + * does not do recursion of deletion + * + * Notes: + * - +1 accounts for the cpuN directory entry in /proc/pal + */ +#define NR_PALINFO_PROC_ENTRIES (NR_CPUS*(NR_PALINFO_ENTRIES+1)) + +static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES]; +static struct proc_dir_entry *palinfo_dir; + +/* + * This data structure is used to pass which cpu,function is being requested + * It must fit in a 64bit quantity to be passed to the proc callback routine + * + * In SMP mode, when we get a request for another CPU, we must call that + * other CPU using IPI and wait for the result before returning. + */ +typedef union { + u64 value; + struct { + unsigned req_cpu: 32; /* for which CPU this info is */ + unsigned func_id: 32; /* which function is requested */ + } pal_func_cpu; +} pal_func_cpu_u_t; + +#define req_cpu pal_func_cpu.req_cpu +#define func_id pal_func_cpu.func_id + +#ifdef CONFIG_SMP + +/* + * used to hold information about final function to call + */ +typedef struct { + palinfo_func_t func; /* pointer to function to call */ + char *page; /* buffer to store results */ + int ret; /* return value from call */ +} palinfo_smp_data_t; + + +/* + * this function does the actual final call and he called + * from the smp code, i.e., this is the palinfo callback routine + */ +static void +palinfo_smp_call(void *info) +{ + palinfo_smp_data_t *data = (palinfo_smp_data_t *)info; + if (data == NULL) { + printk(KERN_ERR "palinfo: data pointer is NULL\n"); + data->ret = 0; /* no output */ + return; + } + /* does this actual call */ + data->ret = (*data->func)(data->page); +} + +/* + * function called to trigger the IPI, we need to access a remote CPU + * Return: + * 0 : error or nothing to output + * otherwise how many bytes in the "page" buffer were written + */ +static +int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page) +{ + palinfo_smp_data_t ptr; + int ret; + + ptr.func = palinfo_entries[f->func_id].proc_read; + ptr.page = page; + ptr.ret = 0; /* just in case */ + + + /* will send IPI to other CPU and wait for completion of remote call */ + if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 0, 1))) { + printk(KERN_ERR "palinfo: remote CPU call from %d to %d on function %d: " + "error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret); + return 0; + } + return ptr.ret; +} +#else /* ! CONFIG_SMP */ +static +int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page) +{ + printk(KERN_ERR "palinfo: should not be called with non SMP kernel\n"); + return 0; +} +#endif /* CONFIG_SMP */ + +/* + * Entry point routine: all calls go through this function + */ +static int +palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + int len=0; + pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&data; + + /* + * in SMP mode, we may need to call another CPU to get correct + * information. PAL, by definition, is processor specific + */ + if (f->req_cpu == get_cpu()) + len = (*palinfo_entries[f->func_id].proc_read)(page); + else + len = palinfo_handle_smp(f, page); + + put_cpu(); + + if (len <= off+count) *eof = 1; + + *start = page + off; + len -= off; + + if (len>count) len = count; + if (len<0) len = 0; + + return len; +} + +static void +create_palinfo_proc_entries(unsigned int cpu) +{ +# define CPUSTR "cpu%d" + + pal_func_cpu_u_t f; + struct proc_dir_entry **pdir; + struct proc_dir_entry *cpu_dir; + int j; + char cpustr[sizeof(CPUSTR)]; + + + /* + * we keep track of created entries in a depth-first order for + * cleanup purposes. Each entry is stored into palinfo_proc_entries + */ + sprintf(cpustr,CPUSTR, cpu); + + cpu_dir = proc_mkdir(cpustr, palinfo_dir); + + f.req_cpu = cpu; + + /* + * Compute the location to store per cpu entries + * We dont store the top level entry in this list, but + * remove it finally after removing all cpu entries. + */ + pdir = &palinfo_proc_entries[cpu*(NR_PALINFO_ENTRIES+1)]; + *pdir++ = cpu_dir; + for (j=0; j < NR_PALINFO_ENTRIES; j++) { + f.func_id = j; + *pdir = create_proc_read_entry( + palinfo_entries[j].name, 0, cpu_dir, + palinfo_read_entry, (void *)f.value); + if (*pdir) + (*pdir)->owner = THIS_MODULE; + pdir++; + } +} + +static void +remove_palinfo_proc_entries(unsigned int hcpu) +{ + int j; + struct proc_dir_entry *cpu_dir, **pdir; + + pdir = &palinfo_proc_entries[hcpu*(NR_PALINFO_ENTRIES+1)]; + cpu_dir = *pdir; + *pdir++=NULL; + for (j=0; j < (NR_PALINFO_ENTRIES); j++) { + if ((*pdir)) { + remove_proc_entry ((*pdir)->name, cpu_dir); + *pdir ++= NULL; + } + } + + if (cpu_dir) { + remove_proc_entry(cpu_dir->name, palinfo_dir); + } +} + +static int __devinit palinfo_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int hotcpu = (unsigned long)hcpu; + + switch (action) { + case CPU_ONLINE: + create_palinfo_proc_entries(hotcpu); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + remove_palinfo_proc_entries(hotcpu); + break; +#endif + } + return NOTIFY_OK; +} + +static struct notifier_block palinfo_cpu_notifier = +{ + .notifier_call = palinfo_cpu_callback, + .priority = 0, +}; + +static int __init +palinfo_init(void) +{ + int i = 0; + + printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION); + palinfo_dir = proc_mkdir("pal", NULL); + + /* Create palinfo dirs in /proc for all online cpus */ + for_each_online_cpu(i) { + create_palinfo_proc_entries(i); + } + + /* Register for future delivery via notify registration */ + register_cpu_notifier(&palinfo_cpu_notifier); + + return 0; +} + +static void __exit +palinfo_exit(void) +{ + int i = 0; + + /* remove all nodes: depth first pass. Could optimize this */ + for_each_online_cpu(i) { + remove_palinfo_proc_entries(i); + } + + /* + * Remove the top level entry finally + */ + remove_proc_entry(palinfo_dir->name, NULL); + + /* + * Unregister from cpu notifier callbacks + */ + unregister_cpu_notifier(&palinfo_cpu_notifier); +} + +module_init(palinfo_init); +module_exit(palinfo_exit); diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c new file mode 100644 index 000000000000..367804a605fa --- /dev/null +++ b/arch/ia64/kernel/patch.c @@ -0,0 +1,189 @@ +/* + * Instruction-patching support. + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/init.h> +#include <linux/string.h> + +#include <asm/patch.h> +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/system.h> +#include <asm/unistd.h> + +/* + * This was adapted from code written by Tony Luck: + * + * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle + * like this: + * + * 6 6 5 4 3 2 1 + * 3210987654321098765432109876543210987654321098765432109876543210 + * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG + * + * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB + */ +static u64 +get_imm64 (u64 insn_addr) +{ + u64 *p = (u64 *) (insn_addr & -16); /* mask out slot number */ + + return ( (p[1] & 0x0800000000000000UL) << 4) | /*A*/ + ((p[1] & 0x00000000007fffffUL) << 40) | /*B*/ + ((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/ + ((p[1] & 0x0000100000000000UL) >> 23) | /*D*/ + ((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/ + ((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/ + ((p[1] & 0x000007f000000000UL) >> 36); /*G*/ +} + +/* Patch instruction with "val" where "mask" has 1 bits. */ +void +ia64_patch (u64 insn_addr, u64 mask, u64 val) +{ + u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16); +# define insn_mask ((1UL << 41) - 1) + unsigned long shift; + + b0 = b[0]; b1 = b[1]; + shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */ + if (shift >= 64) { + m1 = mask << (shift - 64); + v1 = val << (shift - 64); + } else { + m0 = mask << shift; m1 = mask >> (64 - shift); + v0 = val << shift; v1 = val >> (64 - shift); + b[0] = (b0 & ~m0) | (v0 & m0); + } + b[1] = (b1 & ~m1) | (v1 & m1); +} + +void +ia64_patch_imm64 (u64 insn_addr, u64 val) +{ + ia64_patch(insn_addr, + 0x01fffefe000UL, ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */ + | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */ + | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */ + | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */ + | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */)); + ia64_patch(insn_addr - 1, 0x1ffffffffffUL, val >> 22); +} + +void +ia64_patch_imm60 (u64 insn_addr, u64 val) +{ + ia64_patch(insn_addr, + 0x011ffffe000UL, ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */ + | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */)); + ia64_patch(insn_addr - 1, 0x1fffffffffcUL, val >> 18); +} + +/* + * We need sometimes to load the physical address of a kernel + * object. Often we can convert the virtual address to physical + * at execution time, but sometimes (either for performance reasons + * or during error recovery) we cannot to this. Patch the marked + * bundles to load the physical address. + */ +void __init +ia64_patch_vtop (unsigned long start, unsigned long end) +{ + s32 *offp = (s32 *) start; + u64 ip; + + while (offp < (s32 *) end) { + ip = (u64) offp + *offp; + + /* replace virtual address with corresponding physical address: */ + ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip))); + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +void +ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) +{ + static int first_time = 1; + int need_workaround; + s32 *offp = (s32 *) start; + u64 *wp; + + need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0); + + if (first_time) { + first_time = 0; + if (need_workaround) + printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n"); + else + printk(KERN_INFO "McKinley Errata 9 workaround not needed; " + "disabling it\n"); + } + if (need_workaround) + return; + + while (offp < (s32 *) end) { + wp = (u64 *) ia64_imva((char *) offp + *offp); + wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */ + wp[1] = 0x0004000000000200UL; + wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */ + wp[3] = 0x0084006880000200UL; + ia64_fc(wp); ia64_fc(wp + 2); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +static void +patch_fsyscall_table (unsigned long start, unsigned long end) +{ + extern unsigned long fsyscall_table[NR_syscalls]; + s32 *offp = (s32 *) start; + u64 ip; + + while (offp < (s32 *) end) { + ip = (u64) ia64_imva((char *) offp + *offp); + ia64_patch_imm64(ip, (u64) fsyscall_table); + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +static void +patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) +{ + extern char fsys_bubble_down[]; + s32 *offp = (s32 *) start; + u64 ip; + + while (offp < (s32 *) end) { + ip = (u64) offp + *offp; + ia64_patch_imm60((u64) ia64_imva((void *) ip), + (u64) (fsys_bubble_down - (ip & -16)) / 16); + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +void +ia64_patch_gate (void) +{ +# define START(name) ((unsigned long) __start_gate_##name##_patchlist) +# define END(name) ((unsigned long)__end_gate_##name##_patchlist) + + patch_fsyscall_table(START(fsyscall), END(fsyscall)); + patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down)); + ia64_patch_vtop(START(vtop), END(vtop)); + ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); +} diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c new file mode 100644 index 000000000000..71147be3279c --- /dev/null +++ b/arch/ia64/kernel/perfmon.c @@ -0,0 +1,6676 @@ +/* + * This file implements the perfmon-2 subsystem which is used + * to program the IA-64 Performance Monitoring Unit (PMU). + * + * The initial version of perfmon.c was written by + * Ganesh Venkitachalam, IBM Corp. + * + * Then it was modified for perfmon-1.x by Stephane Eranian and + * David Mosberger, Hewlett Packard Co. + * + * Version Perfmon-2.x is a rewrite of perfmon-1.x + * by Stephane Eranian, Hewlett Packard Co. + * + * Copyright (C) 1999-2003, 2005 Hewlett Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * More information about perfmon available at: + * http://www.hpl.hp.com/research/linux/perfmon + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/smp_lock.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/sysctl.h> +#include <linux/list.h> +#include <linux/file.h> +#include <linux/poll.h> +#include <linux/vfs.h> +#include <linux/pagemap.h> +#include <linux/mount.h> +#include <linux/version.h> +#include <linux/bitops.h> + +#include <asm/errno.h> +#include <asm/intrinsics.h> +#include <asm/page.h> +#include <asm/perfmon.h> +#include <asm/processor.h> +#include <asm/signal.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/delay.h> + +#ifdef CONFIG_PERFMON +/* + * perfmon context state + */ +#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ +#define PFM_CTX_LOADED 2 /* context is loaded onto a task */ +#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ +#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ + +#define PFM_INVALID_ACTIVATION (~0UL) + +/* + * depth of message queue + */ +#define PFM_MAX_MSGS 32 +#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) + +/* + * type of a PMU register (bitmask). + * bitmask structure: + * bit0 : register implemented + * bit1 : end marker + * bit2-3 : reserved + * bit4 : pmc has pmc.pm + * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter + * bit6-7 : register type + * bit8-31: reserved + */ +#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ +#define PFM_REG_IMPL 0x1 /* register implemented */ +#define PFM_REG_END 0x2 /* end marker */ +#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ +#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ +#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ +#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ +#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ + +#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) +#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) + +#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) + +/* i assumed unsigned */ +#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) +#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) + +/* XXX: these assume that register i is implemented */ +#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) +#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) +#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) +#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) + +#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value +#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask +#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] +#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] + +#define PFM_NUM_IBRS IA64_NUM_DBG_REGS +#define PFM_NUM_DBRS IA64_NUM_DBG_REGS + +#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) +#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) +#define PFM_CTX_TASK(h) (h)->ctx_task + +#define PMU_PMC_OI 5 /* position of pmc.oi bit */ + +/* XXX: does not support more than 64 PMDs */ +#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) +#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) + +#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) + +#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) +#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) +#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) +#define PFM_CODE_RR 0 /* requesting code range restriction */ +#define PFM_DATA_RR 1 /* requestion data range restriction */ + +#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) +#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) +#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) + +#define RDEP(x) (1UL<<(x)) + +/* + * context protection macros + * in SMP: + * - we need to protect against CPU concurrency (spin_lock) + * - we need to protect against PMU overflow interrupts (local_irq_disable) + * in UP: + * - we need to protect against PMU overflow interrupts (local_irq_disable) + * + * spin_lock_irqsave()/spin_lock_irqrestore(): + * in SMP: local_irq_disable + spin_lock + * in UP : local_irq_disable + * + * spin_lock()/spin_lock(): + * in UP : removed automatically + * in SMP: protect against context accesses from other CPU. interrupts + * are not masked. This is useful for the PMU interrupt handler + * because we know we will not get PMU concurrency in that code. + */ +#define PROTECT_CTX(c, f) \ + do { \ + DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \ + spin_lock_irqsave(&(c)->ctx_lock, f); \ + DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \ + } while(0) + +#define UNPROTECT_CTX(c, f) \ + do { \ + DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \ + spin_unlock_irqrestore(&(c)->ctx_lock, f); \ + } while(0) + +#define PROTECT_CTX_NOPRINT(c, f) \ + do { \ + spin_lock_irqsave(&(c)->ctx_lock, f); \ + } while(0) + + +#define UNPROTECT_CTX_NOPRINT(c, f) \ + do { \ + spin_unlock_irqrestore(&(c)->ctx_lock, f); \ + } while(0) + + +#define PROTECT_CTX_NOIRQ(c) \ + do { \ + spin_lock(&(c)->ctx_lock); \ + } while(0) + +#define UNPROTECT_CTX_NOIRQ(c) \ + do { \ + spin_unlock(&(c)->ctx_lock); \ + } while(0) + + +#ifdef CONFIG_SMP + +#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) +#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ +#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() + +#else /* !CONFIG_SMP */ +#define SET_ACTIVATION(t) do {} while(0) +#define GET_ACTIVATION(t) do {} while(0) +#define INC_ACTIVATION(t) do {} while(0) +#endif /* CONFIG_SMP */ + +#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) +#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) +#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) + +#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) +#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) + +#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) + +/* + * cmp0 must be the value of pmc0 + */ +#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) + +#define PFMFS_MAGIC 0xa0b4d889 + +/* + * debugging + */ +#define PFM_DEBUGGING 1 +#ifdef PFM_DEBUGGING +#define DPRINT(a) \ + do { \ + if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ + } while (0) + +#define DPRINT_ovfl(a) \ + do { \ + if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ + } while (0) +#endif + +/* + * 64-bit software counter structure + * + * the next_reset_type is applied to the next call to pfm_reset_regs() + */ +typedef struct { + unsigned long val; /* virtual 64bit counter value */ + unsigned long lval; /* last reset value */ + unsigned long long_reset; /* reset value on sampling overflow */ + unsigned long short_reset; /* reset value on overflow */ + unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ + unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ + unsigned long seed; /* seed for random-number generator */ + unsigned long mask; /* mask for random-number generator */ + unsigned int flags; /* notify/do not notify */ + unsigned long eventid; /* overflow event identifier */ +} pfm_counter_t; + +/* + * context flags + */ +typedef struct { + unsigned int block:1; /* when 1, task will blocked on user notifications */ + unsigned int system:1; /* do system wide monitoring */ + unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ + unsigned int is_sampling:1; /* true if using a custom format */ + unsigned int excl_idle:1; /* exclude idle task in system wide session */ + unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ + unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ + unsigned int no_msg:1; /* no message sent on overflow */ + unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ + unsigned int reserved:22; +} pfm_context_flags_t; + +#define PFM_TRAP_REASON_NONE 0x0 /* default value */ +#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ +#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ + + +/* + * perfmon context: encapsulates all the state of a monitoring session + */ + +typedef struct pfm_context { + spinlock_t ctx_lock; /* context protection */ + + pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ + unsigned int ctx_state; /* state: active/inactive (no bitfield) */ + + struct task_struct *ctx_task; /* task to which context is attached */ + + unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ + + struct semaphore ctx_restart_sem; /* use for blocking notification mode */ + + unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ + unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ + unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ + + unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ + unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ + unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ + + unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */ + + unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ + unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ + unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ + unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ + + pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */ + + u64 ctx_saved_psr_up; /* only contains psr.up value */ + + unsigned long ctx_last_activation; /* context last activation number for last_cpu */ + unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ + unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ + + int ctx_fd; /* file descriptor used my this context */ + pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ + + pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ + void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ + unsigned long ctx_smpl_size; /* size of sampling buffer */ + void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ + + wait_queue_head_t ctx_msgq_wait; + pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; + int ctx_msgq_head; + int ctx_msgq_tail; + struct fasync_struct *ctx_async_queue; + + wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ +} pfm_context_t; + +/* + * magic number used to verify that structure is really + * a perfmon context + */ +#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) + +#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) + +#ifdef CONFIG_SMP +#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) +#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu +#else +#define SET_LAST_CPU(ctx, v) do {} while(0) +#define GET_LAST_CPU(ctx) do {} while(0) +#endif + + +#define ctx_fl_block ctx_flags.block +#define ctx_fl_system ctx_flags.system +#define ctx_fl_using_dbreg ctx_flags.using_dbreg +#define ctx_fl_is_sampling ctx_flags.is_sampling +#define ctx_fl_excl_idle ctx_flags.excl_idle +#define ctx_fl_going_zombie ctx_flags.going_zombie +#define ctx_fl_trap_reason ctx_flags.trap_reason +#define ctx_fl_no_msg ctx_flags.no_msg +#define ctx_fl_can_restart ctx_flags.can_restart + +#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0); +#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking + +/* + * global information about all sessions + * mostly used to synchronize between system wide and per-process + */ +typedef struct { + spinlock_t pfs_lock; /* lock the structure */ + + unsigned int pfs_task_sessions; /* number of per task sessions */ + unsigned int pfs_sys_sessions; /* number of per system wide sessions */ + unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ + unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ + struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ +} pfm_session_t; + +/* + * information about a PMC or PMD. + * dep_pmd[]: a bitmask of dependent PMD registers + * dep_pmc[]: a bitmask of dependent PMC registers + */ +typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); +typedef struct { + unsigned int type; + int pm_pos; + unsigned long default_value; /* power-on default value */ + unsigned long reserved_mask; /* bitmask of reserved bits */ + pfm_reg_check_t read_check; + pfm_reg_check_t write_check; + unsigned long dep_pmd[4]; + unsigned long dep_pmc[4]; +} pfm_reg_desc_t; + +/* assume cnum is a valid monitor */ +#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) + +/* + * This structure is initialized at boot time and contains + * a description of the PMU main characteristics. + * + * If the probe function is defined, detection is based + * on its return value: + * - 0 means recognized PMU + * - anything else means not supported + * When the probe function is not defined, then the pmu_family field + * is used and it must match the host CPU family such that: + * - cpu->family & config->pmu_family != 0 + */ +typedef struct { + unsigned long ovfl_val; /* overflow value for counters */ + + pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ + pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ + + unsigned int num_pmcs; /* number of PMCS: computed at init time */ + unsigned int num_pmds; /* number of PMDS: computed at init time */ + unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ + unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ + + char *pmu_name; /* PMU family name */ + unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ + unsigned int flags; /* pmu specific flags */ + unsigned int num_ibrs; /* number of IBRS: computed at init time */ + unsigned int num_dbrs; /* number of DBRS: computed at init time */ + unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ + int (*probe)(void); /* customized probe routine */ + unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ +} pmu_config_t; +/* + * PMU specific flags + */ +#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ + +/* + * debug register related type definitions + */ +typedef struct { + unsigned long ibr_mask:56; + unsigned long ibr_plm:4; + unsigned long ibr_ig:3; + unsigned long ibr_x:1; +} ibr_mask_reg_t; + +typedef struct { + unsigned long dbr_mask:56; + unsigned long dbr_plm:4; + unsigned long dbr_ig:2; + unsigned long dbr_w:1; + unsigned long dbr_r:1; +} dbr_mask_reg_t; + +typedef union { + unsigned long val; + ibr_mask_reg_t ibr; + dbr_mask_reg_t dbr; +} dbreg_t; + + +/* + * perfmon command descriptions + */ +typedef struct { + int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); + char *cmd_name; + int cmd_flags; + unsigned int cmd_narg; + size_t cmd_argsize; + int (*cmd_getsize)(void *arg, size_t *sz); +} pfm_cmd_desc_t; + +#define PFM_CMD_FD 0x01 /* command requires a file descriptor */ +#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ +#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ +#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ + + +#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name +#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) +#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) +#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) +#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) + +#define PFM_CMD_ARG_MANY -1 /* cannot be zero */ + +typedef struct { + int debug; /* turn on/off debugging via syslog */ + int debug_ovfl; /* turn on/off debug printk in overflow handler */ + int fastctxsw; /* turn on/off fast (unsecure) ctxsw */ + int expert_mode; /* turn on/off value checking */ + int debug_pfm_read; +} pfm_sysctl_t; + +typedef struct { + unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ + unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ + unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ + unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */ + unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */ + unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */ + unsigned long pfm_smpl_handler_calls; + unsigned long pfm_smpl_handler_cycles; + char pad[SMP_CACHE_BYTES] ____cacheline_aligned; +} pfm_stats_t; + +/* + * perfmon internal variables + */ +static pfm_stats_t pfm_stats[NR_CPUS]; +static pfm_session_t pfm_sessions; /* global sessions information */ + +static struct proc_dir_entry *perfmon_dir; +static pfm_uuid_t pfm_null_uuid = {0,}; + +static spinlock_t pfm_buffer_fmt_lock; +static LIST_HEAD(pfm_buffer_fmt_list); + +static pmu_config_t *pmu_conf; + +/* sysctl() controls */ +static pfm_sysctl_t pfm_sysctl; +int pfm_debug_var; + +static ctl_table pfm_ctl_table[]={ + {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, + {2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, + {3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,}, + {4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,}, + { 0, }, +}; +static ctl_table pfm_sysctl_dir[] = { + {1, "perfmon", NULL, 0, 0755, pfm_ctl_table, }, + {0,}, +}; +static ctl_table pfm_sysctl_root[] = { + {1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, }, + {0,}, +}; +static struct ctl_table_header *pfm_sysctl_header; + +static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); +static int pfm_flush(struct file *filp); + +#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) +#define pfm_get_cpu_data(a,b) per_cpu(a, b) + +static inline void +pfm_put_task(struct task_struct *task) +{ + if (task != current) put_task_struct(task); +} + +static inline void +pfm_set_task_notify(struct task_struct *task) +{ + struct thread_info *info; + + info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE); + set_bit(TIF_NOTIFY_RESUME, &info->flags); +} + +static inline void +pfm_clear_task_notify(void) +{ + clear_thread_flag(TIF_NOTIFY_RESUME); +} + +static inline void +pfm_reserve_page(unsigned long a) +{ + SetPageReserved(vmalloc_to_page((void *)a)); +} +static inline void +pfm_unreserve_page(unsigned long a) +{ + ClearPageReserved(vmalloc_to_page((void*)a)); +} + +static inline unsigned long +pfm_protect_ctx_ctxsw(pfm_context_t *x) +{ + spin_lock(&(x)->ctx_lock); + return 0UL; +} + +static inline unsigned long +pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) +{ + spin_unlock(&(x)->ctx_lock); +} + +static inline unsigned int +pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct) +{ + return do_munmap(mm, addr, len); +} + +static inline unsigned long +pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec) +{ + return get_unmapped_area(file, addr, len, pgoff, flags); +} + + +static struct super_block * +pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) +{ + return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC); +} + +static struct file_system_type pfm_fs_type = { + .name = "pfmfs", + .get_sb = pfmfs_get_sb, + .kill_sb = kill_anon_super, +}; + +DEFINE_PER_CPU(unsigned long, pfm_syst_info); +DEFINE_PER_CPU(struct task_struct *, pmu_owner); +DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); +DEFINE_PER_CPU(unsigned long, pmu_activation_number); + + +/* forward declaration */ +static struct file_operations pfm_file_ops; + +/* + * forward declarations + */ +#ifndef CONFIG_SMP +static void pfm_lazy_save_regs (struct task_struct *ta); +#endif + +void dump_pmu_state(const char *); +static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); + +#include "perfmon_itanium.h" +#include "perfmon_mckinley.h" +#include "perfmon_generic.h" + +static pmu_config_t *pmu_confs[]={ + &pmu_conf_mck, + &pmu_conf_ita, + &pmu_conf_gen, /* must be last */ + NULL +}; + + +static int pfm_end_notify_user(pfm_context_t *ctx); + +static inline void +pfm_clear_psr_pp(void) +{ + ia64_rsm(IA64_PSR_PP); + ia64_srlz_i(); +} + +static inline void +pfm_set_psr_pp(void) +{ + ia64_ssm(IA64_PSR_PP); + ia64_srlz_i(); +} + +static inline void +pfm_clear_psr_up(void) +{ + ia64_rsm(IA64_PSR_UP); + ia64_srlz_i(); +} + +static inline void +pfm_set_psr_up(void) +{ + ia64_ssm(IA64_PSR_UP); + ia64_srlz_i(); +} + +static inline unsigned long +pfm_get_psr(void) +{ + unsigned long tmp; + tmp = ia64_getreg(_IA64_REG_PSR); + ia64_srlz_i(); + return tmp; +} + +static inline void +pfm_set_psr_l(unsigned long val) +{ + ia64_setreg(_IA64_REG_PSR_L, val); + ia64_srlz_i(); +} + +static inline void +pfm_freeze_pmu(void) +{ + ia64_set_pmc(0,1UL); + ia64_srlz_d(); +} + +static inline void +pfm_unfreeze_pmu(void) +{ + ia64_set_pmc(0,0UL); + ia64_srlz_d(); +} + +static inline void +pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) +{ + int i; + + for (i=0; i < nibrs; i++) { + ia64_set_ibr(i, ibrs[i]); + ia64_dv_serialize_instruction(); + } + ia64_srlz_i(); +} + +static inline void +pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) +{ + int i; + + for (i=0; i < ndbrs; i++) { + ia64_set_dbr(i, dbrs[i]); + ia64_dv_serialize_data(); + } + ia64_srlz_d(); +} + +/* + * PMD[i] must be a counter. no check is made + */ +static inline unsigned long +pfm_read_soft_counter(pfm_context_t *ctx, int i) +{ + return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); +} + +/* + * PMD[i] must be a counter. no check is made + */ +static inline void +pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) +{ + unsigned long ovfl_val = pmu_conf->ovfl_val; + + ctx->ctx_pmds[i].val = val & ~ovfl_val; + /* + * writing to unimplemented part is ignore, so we do not need to + * mask off top part + */ + ia64_set_pmd(i, val & ovfl_val); +} + +static pfm_msg_t * +pfm_get_new_msg(pfm_context_t *ctx) +{ + int idx, next; + + next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS; + + DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); + if (next == ctx->ctx_msgq_head) return NULL; + + idx = ctx->ctx_msgq_tail; + ctx->ctx_msgq_tail = next; + + DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx)); + + return ctx->ctx_msgq+idx; +} + +static pfm_msg_t * +pfm_get_next_msg(pfm_context_t *ctx) +{ + pfm_msg_t *msg; + + DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); + + if (PFM_CTXQ_EMPTY(ctx)) return NULL; + + /* + * get oldest message + */ + msg = ctx->ctx_msgq+ctx->ctx_msgq_head; + + /* + * and move forward + */ + ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS; + + DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type)); + + return msg; +} + +static void +pfm_reset_msgq(pfm_context_t *ctx) +{ + ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; + DPRINT(("ctx=%p msgq reset\n", ctx)); +} + +static void * +pfm_rvmalloc(unsigned long size) +{ + void *mem; + unsigned long addr; + + size = PAGE_ALIGN(size); + mem = vmalloc(size); + if (mem) { + //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); + memset(mem, 0, size); + addr = (unsigned long)mem; + while (size > 0) { + pfm_reserve_page(addr); + addr+=PAGE_SIZE; + size-=PAGE_SIZE; + } + } + return mem; +} + +static void +pfm_rvfree(void *mem, unsigned long size) +{ + unsigned long addr; + + if (mem) { + DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size)); + addr = (unsigned long) mem; + while ((long) size > 0) { + pfm_unreserve_page(addr); + addr+=PAGE_SIZE; + size-=PAGE_SIZE; + } + vfree(mem); + } + return; +} + +static pfm_context_t * +pfm_context_alloc(void) +{ + pfm_context_t *ctx; + + /* + * allocate context descriptor + * must be able to free with interrupts disabled + */ + ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL); + if (ctx) { + memset(ctx, 0, sizeof(pfm_context_t)); + DPRINT(("alloc ctx @%p\n", ctx)); + } + return ctx; +} + +static void +pfm_context_free(pfm_context_t *ctx) +{ + if (ctx) { + DPRINT(("free ctx @%p\n", ctx)); + kfree(ctx); + } +} + +static void +pfm_mask_monitoring(struct task_struct *task) +{ + pfm_context_t *ctx = PFM_GET_CTX(task); + struct thread_struct *th = &task->thread; + unsigned long mask, val, ovfl_mask; + int i; + + DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid)); + + ovfl_mask = pmu_conf->ovfl_val; + /* + * monitoring can only be masked as a result of a valid + * counter overflow. In UP, it means that the PMU still + * has an owner. Note that the owner can be different + * from the current task. However the PMU state belongs + * to the owner. + * In SMP, a valid overflow only happens when task is + * current. Therefore if we come here, we know that + * the PMU state belongs to the current task, therefore + * we can access the live registers. + * + * So in both cases, the live register contains the owner's + * state. We can ONLY touch the PMU registers and NOT the PSR. + * + * As a consequence to this call, the thread->pmds[] array + * contains stale information which must be ignored + * when context is reloaded AND monitoring is active (see + * pfm_restart). + */ + mask = ctx->ctx_used_pmds[0]; + for (i = 0; mask; i++, mask>>=1) { + /* skip non used pmds */ + if ((mask & 0x1) == 0) continue; + val = ia64_get_pmd(i); + + if (PMD_IS_COUNTING(i)) { + /* + * we rebuild the full 64 bit value of the counter + */ + ctx->ctx_pmds[i].val += (val & ovfl_mask); + } else { + ctx->ctx_pmds[i].val = val; + } + DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", + i, + ctx->ctx_pmds[i].val, + val & ovfl_mask)); + } + /* + * mask monitoring by setting the privilege level to 0 + * we cannot use psr.pp/psr.up for this, it is controlled by + * the user + * + * if task is current, modify actual registers, otherwise modify + * thread save state, i.e., what will be restored in pfm_load_regs() + */ + mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; + for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0UL) continue; + ia64_set_pmc(i, th->pmcs[i] & ~0xfUL); + th->pmcs[i] &= ~0xfUL; + DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i])); + } + /* + * make all of this visible + */ + ia64_srlz_d(); +} + +/* + * must always be done with task == current + * + * context must be in MASKED state when calling + */ +static void +pfm_restore_monitoring(struct task_struct *task) +{ + pfm_context_t *ctx = PFM_GET_CTX(task); + struct thread_struct *th = &task->thread; + unsigned long mask, ovfl_mask; + unsigned long psr, val; + int i, is_system; + + is_system = ctx->ctx_fl_system; + ovfl_mask = pmu_conf->ovfl_val; + + if (task != current) { + printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid); + return; + } + if (ctx->ctx_state != PFM_CTX_MASKED) { + printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, + task->pid, current->pid, ctx->ctx_state); + return; + } + psr = pfm_get_psr(); + /* + * monitoring is masked via the PMC. + * As we restore their value, we do not want each counter to + * restart right away. We stop monitoring using the PSR, + * restore the PMC (and PMD) and then re-establish the psr + * as it was. Note that there can be no pending overflow at + * this point, because monitoring was MASKED. + * + * system-wide session are pinned and self-monitoring + */ + if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { + /* disable dcr pp */ + ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); + pfm_clear_psr_pp(); + } else { + pfm_clear_psr_up(); + } + /* + * first, we restore the PMD + */ + mask = ctx->ctx_used_pmds[0]; + for (i = 0; mask; i++, mask>>=1) { + /* skip non used pmds */ + if ((mask & 0x1) == 0) continue; + + if (PMD_IS_COUNTING(i)) { + /* + * we split the 64bit value according to + * counter width + */ + val = ctx->ctx_pmds[i].val & ovfl_mask; + ctx->ctx_pmds[i].val &= ~ovfl_mask; + } else { + val = ctx->ctx_pmds[i].val; + } + ia64_set_pmd(i, val); + + DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", + i, + ctx->ctx_pmds[i].val, + val)); + } + /* + * restore the PMCs + */ + mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; + for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0UL) continue; + th->pmcs[i] = ctx->ctx_pmcs[i]; + ia64_set_pmc(i, th->pmcs[i]); + DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i])); + } + ia64_srlz_d(); + + /* + * must restore DBR/IBR because could be modified while masked + * XXX: need to optimize + */ + if (ctx->ctx_fl_using_dbreg) { + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); + } + + /* + * now restore PSR + */ + if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { + /* enable dcr pp */ + ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); + ia64_srlz_i(); + } + pfm_set_psr_l(psr); +} + +static inline void +pfm_save_pmds(unsigned long *pmds, unsigned long mask) +{ + int i; + + ia64_srlz_d(); + + for (i=0; mask; i++, mask>>=1) { + if (mask & 0x1) pmds[i] = ia64_get_pmd(i); + } +} + +/* + * reload from thread state (used for ctxw only) + */ +static inline void +pfm_restore_pmds(unsigned long *pmds, unsigned long mask) +{ + int i; + unsigned long val, ovfl_val = pmu_conf->ovfl_val; + + for (i=0; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0) continue; + val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; + ia64_set_pmd(i, val); + } + ia64_srlz_d(); +} + +/* + * propagate PMD from context to thread-state + */ +static inline void +pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) +{ + struct thread_struct *thread = &task->thread; + unsigned long ovfl_val = pmu_conf->ovfl_val; + unsigned long mask = ctx->ctx_all_pmds[0]; + unsigned long val; + int i; + + DPRINT(("mask=0x%lx\n", mask)); + + for (i=0; mask; i++, mask>>=1) { + + val = ctx->ctx_pmds[i].val; + + /* + * We break up the 64 bit value into 2 pieces + * the lower bits go to the machine state in the + * thread (will be reloaded on ctxsw in). + * The upper part stays in the soft-counter. + */ + if (PMD_IS_COUNTING(i)) { + ctx->ctx_pmds[i].val = val & ~ovfl_val; + val &= ovfl_val; + } + thread->pmds[i] = val; + + DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", + i, + thread->pmds[i], + ctx->ctx_pmds[i].val)); + } +} + +/* + * propagate PMC from context to thread-state + */ +static inline void +pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) +{ + struct thread_struct *thread = &task->thread; + unsigned long mask = ctx->ctx_all_pmcs[0]; + int i; + + DPRINT(("mask=0x%lx\n", mask)); + + for (i=0; mask; i++, mask>>=1) { + /* masking 0 with ovfl_val yields 0 */ + thread->pmcs[i] = ctx->ctx_pmcs[i]; + DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i])); + } +} + + + +static inline void +pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) +{ + int i; + + for (i=0; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0) continue; + ia64_set_pmc(i, pmcs[i]); + } + ia64_srlz_d(); +} + +static inline int +pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b) +{ + return memcmp(a, b, sizeof(pfm_uuid_t)); +} + +static inline int +pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs) +{ + int ret = 0; + if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs); + return ret; +} + +static inline int +pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size) +{ + int ret = 0; + if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size); + return ret; +} + + +static inline int +pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, + int cpu, void *arg) +{ + int ret = 0; + if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg); + return ret; +} + +static inline int +pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags, + int cpu, void *arg) +{ + int ret = 0; + if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg); + return ret; +} + +static inline int +pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) +{ + int ret = 0; + if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs); + return ret; +} + +static inline int +pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) +{ + int ret = 0; + if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs); + return ret; +} + +static pfm_buffer_fmt_t * +__pfm_find_buffer_fmt(pfm_uuid_t uuid) +{ + struct list_head * pos; + pfm_buffer_fmt_t * entry; + + list_for_each(pos, &pfm_buffer_fmt_list) { + entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); + if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0) + return entry; + } + return NULL; +} + +/* + * find a buffer format based on its uuid + */ +static pfm_buffer_fmt_t * +pfm_find_buffer_fmt(pfm_uuid_t uuid) +{ + pfm_buffer_fmt_t * fmt; + spin_lock(&pfm_buffer_fmt_lock); + fmt = __pfm_find_buffer_fmt(uuid); + spin_unlock(&pfm_buffer_fmt_lock); + return fmt; +} + +int +pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt) +{ + int ret = 0; + + /* some sanity checks */ + if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL; + + /* we need at least a handler */ + if (fmt->fmt_handler == NULL) return -EINVAL; + + /* + * XXX: need check validity of fmt_arg_size + */ + + spin_lock(&pfm_buffer_fmt_lock); + + if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) { + printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name); + ret = -EBUSY; + goto out; + } + list_add(&fmt->fmt_list, &pfm_buffer_fmt_list); + printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name); + +out: + spin_unlock(&pfm_buffer_fmt_lock); + return ret; +} +EXPORT_SYMBOL(pfm_register_buffer_fmt); + +int +pfm_unregister_buffer_fmt(pfm_uuid_t uuid) +{ + pfm_buffer_fmt_t *fmt; + int ret = 0; + + spin_lock(&pfm_buffer_fmt_lock); + + fmt = __pfm_find_buffer_fmt(uuid); + if (!fmt) { + printk(KERN_ERR "perfmon: cannot unregister format, not found\n"); + ret = -EINVAL; + goto out; + } + list_del_init(&fmt->fmt_list); + printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name); + +out: + spin_unlock(&pfm_buffer_fmt_lock); + return ret; + +} +EXPORT_SYMBOL(pfm_unregister_buffer_fmt); + +static int +pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) +{ + unsigned long flags; + /* + * validy checks on cpu_mask have been done upstream + */ + LOCK_PFS(flags); + + DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_use_dbregs, + is_syswide, + cpu)); + + if (is_syswide) { + /* + * cannot mix system wide and per-task sessions + */ + if (pfm_sessions.pfs_task_sessions > 0UL) { + DPRINT(("system wide not possible, %u conflicting task_sessions\n", + pfm_sessions.pfs_task_sessions)); + goto abort; + } + + if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict; + + DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id())); + + pfm_sessions.pfs_sys_session[cpu] = task; + + pfm_sessions.pfs_sys_sessions++ ; + + } else { + if (pfm_sessions.pfs_sys_sessions) goto abort; + pfm_sessions.pfs_task_sessions++; + } + + DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_use_dbregs, + is_syswide, + cpu)); + + UNLOCK_PFS(flags); + + return 0; + +error_conflict: + DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", + pfm_sessions.pfs_sys_session[cpu]->pid, + smp_processor_id())); +abort: + UNLOCK_PFS(flags); + + return -EBUSY; + +} + +static int +pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) +{ + unsigned long flags; + /* + * validy checks on cpu_mask have been done upstream + */ + LOCK_PFS(flags); + + DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_use_dbregs, + is_syswide, + cpu)); + + + if (is_syswide) { + pfm_sessions.pfs_sys_session[cpu] = NULL; + /* + * would not work with perfmon+more than one bit in cpu_mask + */ + if (ctx && ctx->ctx_fl_using_dbreg) { + if (pfm_sessions.pfs_sys_use_dbregs == 0) { + printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx); + } else { + pfm_sessions.pfs_sys_use_dbregs--; + } + } + pfm_sessions.pfs_sys_sessions--; + } else { + pfm_sessions.pfs_task_sessions--; + } + DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_use_dbregs, + is_syswide, + cpu)); + + UNLOCK_PFS(flags); + + return 0; +} + +/* + * removes virtual mapping of the sampling buffer. + * IMPORTANT: cannot be called with interrupts disable, e.g. inside + * a PROTECT_CTX() section. + */ +static int +pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size) +{ + int r; + + /* sanity checks */ + if (task->mm == NULL || size == 0UL || vaddr == NULL) { + printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm); + return -EINVAL; + } + + DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size)); + + /* + * does the actual unmapping + */ + down_write(&task->mm->mmap_sem); + + DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size)); + + r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0); + + up_write(&task->mm->mmap_sem); + if (r !=0) { + printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size); + } + + DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r)); + + return 0; +} + +/* + * free actual physical storage used by sampling buffer + */ +#if 0 +static int +pfm_free_smpl_buffer(pfm_context_t *ctx) +{ + pfm_buffer_fmt_t *fmt; + + if (ctx->ctx_smpl_hdr == NULL) goto invalid_free; + + /* + * we won't use the buffer format anymore + */ + fmt = ctx->ctx_buf_fmt; + + DPRINT(("sampling buffer @%p size %lu vaddr=%p\n", + ctx->ctx_smpl_hdr, + ctx->ctx_smpl_size, + ctx->ctx_smpl_vaddr)); + + pfm_buf_fmt_exit(fmt, current, NULL, NULL); + + /* + * free the buffer + */ + pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size); + + ctx->ctx_smpl_hdr = NULL; + ctx->ctx_smpl_size = 0UL; + + return 0; + +invalid_free: + printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid); + return -EINVAL; +} +#endif + +static inline void +pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) +{ + if (fmt == NULL) return; + + pfm_buf_fmt_exit(fmt, current, NULL, NULL); + +} + +/* + * pfmfs should _never_ be mounted by userland - too much of security hassle, + * no real gain from having the whole whorehouse mounted. So we don't need + * any operations on the root directory. However, we need a non-trivial + * d_name - pfm: will go nicely and kill the special-casing in procfs. + */ +static struct vfsmount *pfmfs_mnt; + +static int __init +init_pfm_fs(void) +{ + int err = register_filesystem(&pfm_fs_type); + if (!err) { + pfmfs_mnt = kern_mount(&pfm_fs_type); + err = PTR_ERR(pfmfs_mnt); + if (IS_ERR(pfmfs_mnt)) + unregister_filesystem(&pfm_fs_type); + else + err = 0; + } + return err; +} + +static void __exit +exit_pfm_fs(void) +{ + unregister_filesystem(&pfm_fs_type); + mntput(pfmfs_mnt); +} + +static ssize_t +pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) +{ + pfm_context_t *ctx; + pfm_msg_t *msg; + ssize_t ret; + unsigned long flags; + DECLARE_WAITQUEUE(wait, current); + if (PFM_IS_FILE(filp) == 0) { + printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid); + return -EINVAL; + } + + ctx = (pfm_context_t *)filp->private_data; + if (ctx == NULL) { + printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid); + return -EINVAL; + } + + /* + * check even when there is no message + */ + if (size < sizeof(pfm_msg_t)) { + DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t))); + return -EINVAL; + } + + PROTECT_CTX(ctx, flags); + + /* + * put ourselves on the wait queue + */ + add_wait_queue(&ctx->ctx_msgq_wait, &wait); + + + for(;;) { + /* + * check wait queue + */ + + set_current_state(TASK_INTERRUPTIBLE); + + DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); + + ret = 0; + if(PFM_CTXQ_EMPTY(ctx) == 0) break; + + UNPROTECT_CTX(ctx, flags); + + /* + * check non-blocking read + */ + ret = -EAGAIN; + if(filp->f_flags & O_NONBLOCK) break; + + /* + * check pending signals + */ + if(signal_pending(current)) { + ret = -EINTR; + break; + } + /* + * no message, so wait + */ + schedule(); + + PROTECT_CTX(ctx, flags); + } + DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret)); + set_current_state(TASK_RUNNING); + remove_wait_queue(&ctx->ctx_msgq_wait, &wait); + + if (ret < 0) goto abort; + + ret = -EINVAL; + msg = pfm_get_next_msg(ctx); + if (msg == NULL) { + printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid); + goto abort_locked; + } + + DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); + + ret = -EFAULT; + if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); + +abort_locked: + UNPROTECT_CTX(ctx, flags); +abort: + return ret; +} + +static ssize_t +pfm_write(struct file *file, const char __user *ubuf, + size_t size, loff_t *ppos) +{ + DPRINT(("pfm_write called\n")); + return -EINVAL; +} + +static unsigned int +pfm_poll(struct file *filp, poll_table * wait) +{ + pfm_context_t *ctx; + unsigned long flags; + unsigned int mask = 0; + + if (PFM_IS_FILE(filp) == 0) { + printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid); + return 0; + } + + ctx = (pfm_context_t *)filp->private_data; + if (ctx == NULL) { + printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid); + return 0; + } + + + DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd)); + + poll_wait(filp, &ctx->ctx_msgq_wait, wait); + + PROTECT_CTX(ctx, flags); + + if (PFM_CTXQ_EMPTY(ctx) == 0) + mask = POLLIN | POLLRDNORM; + + UNPROTECT_CTX(ctx, flags); + + DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask)); + + return mask; +} + +static int +pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +{ + DPRINT(("pfm_ioctl called\n")); + return -EINVAL; +} + +/* + * interrupt cannot be masked when coming here + */ +static inline int +pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on) +{ + int ret; + + ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue); + + DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n", + current->pid, + fd, + on, + ctx->ctx_async_queue, ret)); + + return ret; +} + +static int +pfm_fasync(int fd, struct file *filp, int on) +{ + pfm_context_t *ctx; + int ret; + + if (PFM_IS_FILE(filp) == 0) { + printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid); + return -EBADF; + } + + ctx = (pfm_context_t *)filp->private_data; + if (ctx == NULL) { + printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid); + return -EBADF; + } + /* + * we cannot mask interrupts during this call because this may + * may go to sleep if memory is not readily avalaible. + * + * We are protected from the conetxt disappearing by the get_fd()/put_fd() + * done in caller. Serialization of this function is ensured by caller. + */ + ret = pfm_do_fasync(fd, filp, ctx, on); + + + DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n", + fd, + on, + ctx->ctx_async_queue, ret)); + + return ret; +} + +#ifdef CONFIG_SMP +/* + * this function is exclusively called from pfm_close(). + * The context is not protected at that time, nor are interrupts + * on the remote CPU. That's necessary to avoid deadlocks. + */ +static void +pfm_syswide_force_stop(void *info) +{ + pfm_context_t *ctx = (pfm_context_t *)info; + struct pt_regs *regs = ia64_task_regs(current); + struct task_struct *owner; + unsigned long flags; + int ret; + + if (ctx->ctx_cpu != smp_processor_id()) { + printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n", + ctx->ctx_cpu, + smp_processor_id()); + return; + } + owner = GET_PMU_OWNER(); + if (owner != ctx->ctx_task) { + printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n", + smp_processor_id(), + owner->pid, ctx->ctx_task->pid); + return; + } + if (GET_PMU_CTX() != ctx) { + printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n", + smp_processor_id(), + GET_PMU_CTX(), ctx); + return; + } + + DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid)); + /* + * the context is already protected in pfm_close(), we simply + * need to mask interrupts to avoid a PMU interrupt race on + * this CPU + */ + local_irq_save(flags); + + ret = pfm_context_unload(ctx, NULL, 0, regs); + if (ret) { + DPRINT(("context_unload returned %d\n", ret)); + } + + /* + * unmask interrupts, PMU interrupts are now spurious here + */ + local_irq_restore(flags); +} + +static void +pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) +{ + int ret; + + DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu)); + ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1); + DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret)); +} +#endif /* CONFIG_SMP */ + +/* + * called for each close(). Partially free resources. + * When caller is self-monitoring, the context is unloaded. + */ +static int +pfm_flush(struct file *filp) +{ + pfm_context_t *ctx; + struct task_struct *task; + struct pt_regs *regs; + unsigned long flags; + unsigned long smpl_buf_size = 0UL; + void *smpl_buf_vaddr = NULL; + int state, is_system; + + if (PFM_IS_FILE(filp) == 0) { + DPRINT(("bad magic for\n")); + return -EBADF; + } + + ctx = (pfm_context_t *)filp->private_data; + if (ctx == NULL) { + printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid); + return -EBADF; + } + + /* + * remove our file from the async queue, if we use this mode. + * This can be done without the context being protected. We come + * here when the context has become unreacheable by other tasks. + * + * We may still have active monitoring at this point and we may + * end up in pfm_overflow_handler(). However, fasync_helper() + * operates with interrupts disabled and it cleans up the + * queue. If the PMU handler is called prior to entering + * fasync_helper() then it will send a signal. If it is + * invoked after, it will find an empty queue and no + * signal will be sent. In both case, we are safe + */ + if (filp->f_flags & FASYNC) { + DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue)); + pfm_do_fasync (-1, filp, ctx, 0); + } + + PROTECT_CTX(ctx, flags); + + state = ctx->ctx_state; + is_system = ctx->ctx_fl_system; + + task = PFM_CTX_TASK(ctx); + regs = ia64_task_regs(task); + + DPRINT(("ctx_state=%d is_current=%d\n", + state, + task == current ? 1 : 0)); + + /* + * if state == UNLOADED, then task is NULL + */ + + /* + * we must stop and unload because we are losing access to the context. + */ + if (task == current) { +#ifdef CONFIG_SMP + /* + * the task IS the owner but it migrated to another CPU: that's bad + * but we must handle this cleanly. Unfortunately, the kernel does + * not provide a mechanism to block migration (while the context is loaded). + * + * We need to release the resource on the ORIGINAL cpu. + */ + if (is_system && ctx->ctx_cpu != smp_processor_id()) { + + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + /* + * keep context protected but unmask interrupt for IPI + */ + local_irq_restore(flags); + + pfm_syswide_cleanup_other_cpu(ctx); + + /* + * restore interrupt masking + */ + local_irq_save(flags); + + /* + * context is unloaded at this point + */ + } else +#endif /* CONFIG_SMP */ + { + + DPRINT(("forcing unload\n")); + /* + * stop and unload, returning with state UNLOADED + * and session unreserved. + */ + pfm_context_unload(ctx, NULL, 0, regs); + + DPRINT(("ctx_state=%d\n", ctx->ctx_state)); + } + } + + /* + * remove virtual mapping, if any, for the calling task. + * cannot reset ctx field until last user is calling close(). + * + * ctx_smpl_vaddr must never be cleared because it is needed + * by every task with access to the context + * + * When called from do_exit(), the mm context is gone already, therefore + * mm is NULL, i.e., the VMA is already gone and we do not have to + * do anything here + */ + if (ctx->ctx_smpl_vaddr && current->mm) { + smpl_buf_vaddr = ctx->ctx_smpl_vaddr; + smpl_buf_size = ctx->ctx_smpl_size; + } + + UNPROTECT_CTX(ctx, flags); + + /* + * if there was a mapping, then we systematically remove it + * at this point. Cannot be done inside critical section + * because some VM function reenables interrupts. + * + */ + if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size); + + return 0; +} +/* + * called either on explicit close() or from exit_files(). + * Only the LAST user of the file gets to this point, i.e., it is + * called only ONCE. + * + * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero + * (fput()),i.e, last task to access the file. Nobody else can access the + * file at this point. + * + * When called from exit_files(), the VMA has been freed because exit_mm() + * is executed before exit_files(). + * + * When called from exit_files(), the current task is not yet ZOMBIE but we + * flush the PMU state to the context. + */ +static int +pfm_close(struct inode *inode, struct file *filp) +{ + pfm_context_t *ctx; + struct task_struct *task; + struct pt_regs *regs; + DECLARE_WAITQUEUE(wait, current); + unsigned long flags; + unsigned long smpl_buf_size = 0UL; + void *smpl_buf_addr = NULL; + int free_possible = 1; + int state, is_system; + + DPRINT(("pfm_close called private=%p\n", filp->private_data)); + + if (PFM_IS_FILE(filp) == 0) { + DPRINT(("bad magic\n")); + return -EBADF; + } + + ctx = (pfm_context_t *)filp->private_data; + if (ctx == NULL) { + printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid); + return -EBADF; + } + + PROTECT_CTX(ctx, flags); + + state = ctx->ctx_state; + is_system = ctx->ctx_fl_system; + + task = PFM_CTX_TASK(ctx); + regs = ia64_task_regs(task); + + DPRINT(("ctx_state=%d is_current=%d\n", + state, + task == current ? 1 : 0)); + + /* + * if task == current, then pfm_flush() unloaded the context + */ + if (state == PFM_CTX_UNLOADED) goto doit; + + /* + * context is loaded/masked and task != current, we need to + * either force an unload or go zombie + */ + + /* + * The task is currently blocked or will block after an overflow. + * we must force it to wakeup to get out of the + * MASKED state and transition to the unloaded state by itself. + * + * This situation is only possible for per-task mode + */ + if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) { + + /* + * set a "partial" zombie state to be checked + * upon return from down() in pfm_handle_work(). + * + * We cannot use the ZOMBIE state, because it is checked + * by pfm_load_regs() which is called upon wakeup from down(). + * In such case, it would free the context and then we would + * return to pfm_handle_work() which would access the + * stale context. Instead, we set a flag invisible to pfm_load_regs() + * but visible to pfm_handle_work(). + * + * For some window of time, we have a zombie context with + * ctx_state = MASKED and not ZOMBIE + */ + ctx->ctx_fl_going_zombie = 1; + + /* + * force task to wake up from MASKED state + */ + up(&ctx->ctx_restart_sem); + + DPRINT(("waking up ctx_state=%d\n", state)); + + /* + * put ourself to sleep waiting for the other + * task to report completion + * + * the context is protected by mutex, therefore there + * is no risk of being notified of completion before + * begin actually on the waitq. + */ + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&ctx->ctx_zombieq, &wait); + + UNPROTECT_CTX(ctx, flags); + + /* + * XXX: check for signals : + * - ok for explicit close + * - not ok when coming from exit_files() + */ + schedule(); + + + PROTECT_CTX(ctx, flags); + + + remove_wait_queue(&ctx->ctx_zombieq, &wait); + set_current_state(TASK_RUNNING); + + /* + * context is unloaded at this point + */ + DPRINT(("after zombie wakeup ctx_state=%d for\n", state)); + } + else if (task != current) { +#ifdef CONFIG_SMP + /* + * switch context to zombie state + */ + ctx->ctx_state = PFM_CTX_ZOMBIE; + + DPRINT(("zombie ctx for [%d]\n", task->pid)); + /* + * cannot free the context on the spot. deferred until + * the task notices the ZOMBIE state + */ + free_possible = 0; +#else + pfm_context_unload(ctx, NULL, 0, regs); +#endif + } + +doit: + /* reload state, may have changed during opening of critical section */ + state = ctx->ctx_state; + + /* + * the context is still attached to a task (possibly current) + * we cannot destroy it right now + */ + + /* + * we must free the sampling buffer right here because + * we cannot rely on it being cleaned up later by the + * monitored task. It is not possible to free vmalloc'ed + * memory in pfm_load_regs(). Instead, we remove the buffer + * now. should there be subsequent PMU overflow originally + * meant for sampling, the will be converted to spurious + * and that's fine because the monitoring tools is gone anyway. + */ + if (ctx->ctx_smpl_hdr) { + smpl_buf_addr = ctx->ctx_smpl_hdr; + smpl_buf_size = ctx->ctx_smpl_size; + /* no more sampling */ + ctx->ctx_smpl_hdr = NULL; + ctx->ctx_fl_is_sampling = 0; + } + + DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n", + state, + free_possible, + smpl_buf_addr, + smpl_buf_size)); + + if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt); + + /* + * UNLOADED that the session has already been unreserved. + */ + if (state == PFM_CTX_ZOMBIE) { + pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu); + } + + /* + * disconnect file descriptor from context must be done + * before we unlock. + */ + filp->private_data = NULL; + + /* + * if we free on the spot, the context is now completely unreacheable + * from the callers side. The monitored task side is also cut, so we + * can freely cut. + * + * If we have a deferred free, only the caller side is disconnected. + */ + UNPROTECT_CTX(ctx, flags); + + /* + * All memory free operations (especially for vmalloc'ed memory) + * MUST be done with interrupts ENABLED. + */ + if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size); + + /* + * return the memory used by the context + */ + if (free_possible) pfm_context_free(ctx); + + return 0; +} + +static int +pfm_no_open(struct inode *irrelevant, struct file *dontcare) +{ + DPRINT(("pfm_no_open called\n")); + return -ENXIO; +} + + + +static struct file_operations pfm_file_ops = { + .llseek = no_llseek, + .read = pfm_read, + .write = pfm_write, + .poll = pfm_poll, + .ioctl = pfm_ioctl, + .open = pfm_no_open, /* special open code to disallow open via /proc */ + .fasync = pfm_fasync, + .release = pfm_close, + .flush = pfm_flush +}; + +static int +pfmfs_delete_dentry(struct dentry *dentry) +{ + return 1; +} + +static struct dentry_operations pfmfs_dentry_operations = { + .d_delete = pfmfs_delete_dentry, +}; + + +static int +pfm_alloc_fd(struct file **cfile) +{ + int fd, ret = 0; + struct file *file = NULL; + struct inode * inode; + char name[32]; + struct qstr this; + + fd = get_unused_fd(); + if (fd < 0) return -ENFILE; + + ret = -ENFILE; + + file = get_empty_filp(); + if (!file) goto out; + + /* + * allocate a new inode + */ + inode = new_inode(pfmfs_mnt->mnt_sb); + if (!inode) goto out; + + DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); + + inode->i_mode = S_IFCHR|S_IRUGO; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + + sprintf(name, "[%lu]", inode->i_ino); + this.name = name; + this.len = strlen(name); + this.hash = inode->i_ino; + + ret = -ENOMEM; + + /* + * allocate a new dcache entry + */ + file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); + if (!file->f_dentry) goto out; + + file->f_dentry->d_op = &pfmfs_dentry_operations; + + d_add(file->f_dentry, inode); + file->f_vfsmnt = mntget(pfmfs_mnt); + file->f_mapping = inode->i_mapping; + + file->f_op = &pfm_file_ops; + file->f_mode = FMODE_READ; + file->f_flags = O_RDONLY; + file->f_pos = 0; + + /* + * may have to delay until context is attached? + */ + fd_install(fd, file); + + /* + * the file structure we will use + */ + *cfile = file; + + return fd; +out: + if (file) put_filp(file); + put_unused_fd(fd); + return ret; +} + +static void +pfm_free_fd(int fd, struct file *file) +{ + struct files_struct *files = current->files; + + /* + * there ie no fd_uninstall(), so we do it here + */ + spin_lock(&files->file_lock); + files->fd[fd] = NULL; + spin_unlock(&files->file_lock); + + if (file) put_filp(file); + put_unused_fd(fd); +} + +static int +pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size) +{ + DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); + + while (size > 0) { + unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT; + + + if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY)) + return -ENOMEM; + + addr += PAGE_SIZE; + buf += PAGE_SIZE; + size -= PAGE_SIZE; + } + return 0; +} + +/* + * allocate a sampling buffer and remaps it into the user address space of the task + */ +static int +pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma = NULL; + unsigned long size; + void *smpl_buf; + + + /* + * the fixed header + requested size and align to page boundary + */ + size = PAGE_ALIGN(rsize); + + DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size)); + + /* + * check requested size to avoid Denial-of-service attacks + * XXX: may have to refine this test + * Check against address space limit. + * + * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) + * return -ENOMEM; + */ + if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) + return -ENOMEM; + + /* + * We do the easy to undo allocations first. + * + * pfm_rvmalloc(), clears the buffer, so there is no leak + */ + smpl_buf = pfm_rvmalloc(size); + if (smpl_buf == NULL) { + DPRINT(("Can't allocate sampling buffer\n")); + return -ENOMEM; + } + + DPRINT(("smpl_buf @%p\n", smpl_buf)); + + /* allocate vma */ + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (!vma) { + DPRINT(("Cannot allocate vma\n")); + goto error_kmem; + } + memset(vma, 0, sizeof(*vma)); + + /* + * partially initialize the vma for the sampling buffer + */ + vma->vm_mm = mm; + vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; + vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ + + /* + * Now we have everything we need and we can initialize + * and connect all the data structures + */ + + ctx->ctx_smpl_hdr = smpl_buf; + ctx->ctx_smpl_size = size; /* aligned size */ + + /* + * Let's do the difficult operations next. + * + * now we atomically find some area in the address space and + * remap the buffer in it. + */ + down_write(&task->mm->mmap_sem); + + /* find some free area in address space, must have mmap sem held */ + vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0); + if (vma->vm_start == 0UL) { + DPRINT(("Cannot find unmapped area for size %ld\n", size)); + up_write(&task->mm->mmap_sem); + goto error; + } + vma->vm_end = vma->vm_start + size; + vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; + + DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); + + /* can only be applied to current task, need to have the mm semaphore held when called */ + if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { + DPRINT(("Can't remap buffer\n")); + up_write(&task->mm->mmap_sem); + goto error; + } + + /* + * now insert the vma in the vm list for the process, must be + * done with mmap lock held + */ + insert_vm_struct(mm, vma); + + mm->total_vm += size >> PAGE_SHIFT; + vm_stat_account(vma); + up_write(&task->mm->mmap_sem); + + /* + * keep track of user level virtual address + */ + ctx->ctx_smpl_vaddr = (void *)vma->vm_start; + *(unsigned long *)user_vaddr = vma->vm_start; + + return 0; + +error: + kmem_cache_free(vm_area_cachep, vma); +error_kmem: + pfm_rvfree(smpl_buf, size); + + return -ENOMEM; +} + +/* + * XXX: do something better here + */ +static int +pfm_bad_permissions(struct task_struct *task) +{ + /* inspired by ptrace_attach() */ + DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", + current->uid, + current->gid, + task->euid, + task->suid, + task->uid, + task->egid, + task->sgid)); + + return ((current->uid != task->euid) + || (current->uid != task->suid) + || (current->uid != task->uid) + || (current->gid != task->egid) + || (current->gid != task->sgid) + || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE); +} + +static int +pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx) +{ + int ctx_flags; + + /* valid signal */ + + ctx_flags = pfx->ctx_flags; + + if (ctx_flags & PFM_FL_SYSTEM_WIDE) { + + /* + * cannot block in this mode + */ + if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { + DPRINT(("cannot use blocking mode when in system wide monitoring\n")); + return -EINVAL; + } + } else { + } + /* probably more to add here */ + + return 0; +} + +static int +pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags, + unsigned int cpu, pfarg_context_t *arg) +{ + pfm_buffer_fmt_t *fmt = NULL; + unsigned long size = 0UL; + void *uaddr = NULL; + void *fmt_arg = NULL; + int ret = 0; +#define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1) + + /* invoke and lock buffer format, if found */ + fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); + if (fmt == NULL) { + DPRINT(("[%d] cannot find buffer format\n", task->pid)); + return -EINVAL; + } + + /* + * buffer argument MUST be contiguous to pfarg_context_t + */ + if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg); + + ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); + + DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret)); + + if (ret) goto error; + + /* link buffer format and context */ + ctx->ctx_buf_fmt = fmt; + + /* + * check if buffer format wants to use perfmon buffer allocation/mapping service + */ + ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size); + if (ret) goto error; + + if (size) { + /* + * buffer is always remapped into the caller's address space + */ + ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr); + if (ret) goto error; + + /* keep track of user address of buffer */ + arg->ctx_smpl_vaddr = uaddr; + } + ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg); + +error: + return ret; +} + +static void +pfm_reset_pmu_state(pfm_context_t *ctx) +{ + int i; + + /* + * install reset values for PMC. + */ + for (i=1; PMC_IS_LAST(i) == 0; i++) { + if (PMC_IS_IMPL(i) == 0) continue; + ctx->ctx_pmcs[i] = PMC_DFL_VAL(i); + DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i])); + } + /* + * PMD registers are set to 0UL when the context in memset() + */ + + /* + * On context switched restore, we must restore ALL pmc and ALL pmd even + * when they are not actively used by the task. In UP, the incoming process + * may otherwise pick up left over PMC, PMD state from the previous process. + * As opposed to PMD, stale PMC can cause harm to the incoming + * process because they may change what is being measured. + * Therefore, we must systematically reinstall the entire + * PMC state. In SMP, the same thing is possible on the + * same CPU but also on between 2 CPUs. + * + * The problem with PMD is information leaking especially + * to user level when psr.sp=0 + * + * There is unfortunately no easy way to avoid this problem + * on either UP or SMP. This definitively slows down the + * pfm_load_regs() function. + */ + + /* + * bitmask of all PMCs accessible to this context + * + * PMC0 is treated differently. + */ + ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; + + /* + * bitmask of all PMDs that are accesible to this context + */ + ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; + + DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0])); + + /* + * useful in case of re-enable after disable + */ + ctx->ctx_used_ibrs[0] = 0UL; + ctx->ctx_used_dbrs[0] = 0UL; +} + +static int +pfm_ctx_getsize(void *arg, size_t *sz) +{ + pfarg_context_t *req = (pfarg_context_t *)arg; + pfm_buffer_fmt_t *fmt; + + *sz = 0; + + if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0; + + fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id); + if (fmt == NULL) { + DPRINT(("cannot find buffer format\n")); + return -EINVAL; + } + /* get just enough to copy in user parameters */ + *sz = fmt->fmt_arg_size; + DPRINT(("arg_size=%lu\n", *sz)); + + return 0; +} + + + +/* + * cannot attach if : + * - kernel task + * - task not owned by caller + * - task incompatible with context mode + */ +static int +pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) +{ + /* + * no kernel task or task not owner by caller + */ + if (task->mm == NULL) { + DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid)); + return -EPERM; + } + if (pfm_bad_permissions(task)) { + DPRINT(("no permission to attach to [%d]\n", task->pid)); + return -EPERM; + } + /* + * cannot block in self-monitoring mode + */ + if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) { + DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid)); + return -EINVAL; + } + + if (task->exit_state == EXIT_ZOMBIE) { + DPRINT(("cannot attach to zombie task [%d]\n", task->pid)); + return -EBUSY; + } + + /* + * always ok for self + */ + if (task == current) return 0; + + if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) { + DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state)); + return -EBUSY; + } + /* + * make sure the task is off any CPU + */ + wait_task_inactive(task); + + /* more to come... */ + + return 0; +} + +static int +pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) +{ + struct task_struct *p = current; + int ret; + + /* XXX: need to add more checks here */ + if (pid < 2) return -EPERM; + + if (pid != current->pid) { + + read_lock(&tasklist_lock); + + p = find_task_by_pid(pid); + + /* make sure task cannot go away while we operate on it */ + if (p) get_task_struct(p); + + read_unlock(&tasklist_lock); + + if (p == NULL) return -ESRCH; + } + + ret = pfm_task_incompatible(ctx, p); + if (ret == 0) { + *task = p; + } else if (p != current) { + pfm_put_task(p); + } + return ret; +} + + + +static int +pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + pfarg_context_t *req = (pfarg_context_t *)arg; + struct file *filp; + int ctx_flags; + int ret; + + /* let's check the arguments first */ + ret = pfarg_is_sane(current, req); + if (ret < 0) return ret; + + ctx_flags = req->ctx_flags; + + ret = -ENOMEM; + + ctx = pfm_context_alloc(); + if (!ctx) goto error; + + ret = pfm_alloc_fd(&filp); + if (ret < 0) goto error_file; + + req->ctx_fd = ctx->ctx_fd = ret; + + /* + * attach context to file + */ + filp->private_data = ctx; + + /* + * does the user want to sample? + */ + if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { + ret = pfm_setup_buffer_fmt(current, ctx, ctx_flags, 0, req); + if (ret) goto buffer_error; + } + + /* + * init context protection lock + */ + spin_lock_init(&ctx->ctx_lock); + + /* + * context is unloaded + */ + ctx->ctx_state = PFM_CTX_UNLOADED; + + /* + * initialization of context's flags + */ + ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; + ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; + ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */ + ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; + /* + * will move to set properties + * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; + */ + + /* + * init restart semaphore to locked + */ + sema_init(&ctx->ctx_restart_sem, 0); + + /* + * activation is used in SMP only + */ + ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; + SET_LAST_CPU(ctx, -1); + + /* + * initialize notification message queue + */ + ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; + init_waitqueue_head(&ctx->ctx_msgq_wait); + init_waitqueue_head(&ctx->ctx_zombieq); + + DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n", + ctx, + ctx_flags, + ctx->ctx_fl_system, + ctx->ctx_fl_block, + ctx->ctx_fl_excl_idle, + ctx->ctx_fl_no_msg, + ctx->ctx_fd)); + + /* + * initialize soft PMU state + */ + pfm_reset_pmu_state(ctx); + + return 0; + +buffer_error: + pfm_free_fd(ctx->ctx_fd, filp); + + if (ctx->ctx_buf_fmt) { + pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs); + } +error_file: + pfm_context_free(ctx); + +error: + return ret; +} + +static inline unsigned long +pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) +{ + unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; + unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; + extern unsigned long carta_random32 (unsigned long seed); + + if (reg->flags & PFM_REGFL_RANDOM) { + new_seed = carta_random32(old_seed); + val -= (old_seed & mask); /* counter values are negative numbers! */ + if ((mask >> 32) != 0) + /* construct a full 64-bit random value: */ + new_seed |= carta_random32(old_seed >> 32) << 32; + reg->seed = new_seed; + } + reg->lval = val; + return val; +} + +static void +pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) +{ + unsigned long mask = ovfl_regs[0]; + unsigned long reset_others = 0UL; + unsigned long val; + int i; + + /* + * now restore reset value on sampling overflowed counters + */ + mask >>= PMU_FIRST_COUNTER; + for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { + + if ((mask & 0x1UL) == 0UL) continue; + + ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); + reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; + + DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); + } + + /* + * Now take care of resetting the other registers + */ + for(i = 0; reset_others; i++, reset_others >>= 1) { + + if ((reset_others & 0x1) == 0) continue; + + ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); + + DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", + is_long_reset ? "long" : "short", i, val)); + } +} + +static void +pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) +{ + unsigned long mask = ovfl_regs[0]; + unsigned long reset_others = 0UL; + unsigned long val; + int i; + + DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset)); + + if (ctx->ctx_state == PFM_CTX_MASKED) { + pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset); + return; + } + + /* + * now restore reset value on sampling overflowed counters + */ + mask >>= PMU_FIRST_COUNTER; + for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { + + if ((mask & 0x1UL) == 0UL) continue; + + val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); + reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; + + DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); + + pfm_write_soft_counter(ctx, i, val); + } + + /* + * Now take care of resetting the other registers + */ + for(i = 0; reset_others; i++, reset_others >>= 1) { + + if ((reset_others & 0x1) == 0) continue; + + val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); + + if (PMD_IS_COUNTING(i)) { + pfm_write_soft_counter(ctx, i, val); + } else { + ia64_set_pmd(i, val); + } + DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", + is_long_reset ? "long" : "short", i, val)); + } + ia64_srlz_d(); +} + +static int +pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct thread_struct *thread = NULL; + struct task_struct *task; + pfarg_reg_t *req = (pfarg_reg_t *)arg; + unsigned long value, pmc_pm; + unsigned long smpl_pmds, reset_pmds, impl_pmds; + unsigned int cnum, reg_flags, flags, pmc_type; + int i, can_access_pmu = 0, is_loaded, is_system, expert_mode; + int is_monitor, is_counting, state; + int ret = -EINVAL; + pfm_reg_check_t wr_func; +#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) + + state = ctx->ctx_state; + is_loaded = state == PFM_CTX_LOADED ? 1 : 0; + is_system = ctx->ctx_fl_system; + task = ctx->ctx_task; + impl_pmds = pmu_conf->impl_pmds[0]; + + if (state == PFM_CTX_ZOMBIE) return -EINVAL; + + if (is_loaded) { + thread = &task->thread; + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (is_system && ctx->ctx_cpu != smp_processor_id()) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; + } + expert_mode = pfm_sysctl.expert_mode; + + for (i = 0; i < count; i++, req++) { + + cnum = req->reg_num; + reg_flags = req->reg_flags; + value = req->reg_value; + smpl_pmds = req->reg_smpl_pmds[0]; + reset_pmds = req->reg_reset_pmds[0]; + flags = 0; + + + if (cnum >= PMU_MAX_PMCS) { + DPRINT(("pmc%u is invalid\n", cnum)); + goto error; + } + + pmc_type = pmu_conf->pmc_desc[cnum].type; + pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; + is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; + is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; + + /* + * we reject all non implemented PMC as well + * as attempts to modify PMC[0-3] which are used + * as status registers by the PMU + */ + if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) { + DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); + goto error; + } + wr_func = pmu_conf->pmc_desc[cnum].write_check; + /* + * If the PMC is a monitor, then if the value is not the default: + * - system-wide session: PMCx.pm=1 (privileged monitor) + * - per-task : PMCx.pm=0 (user monitor) + */ + if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) { + DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n", + cnum, + pmc_pm, + is_system)); + goto error; + } + + if (is_counting) { + /* + * enforce generation of overflow interrupt. Necessary on all + * CPUs. + */ + value |= 1 << PMU_PMC_OI; + + if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { + flags |= PFM_REGFL_OVFL_NOTIFY; + } + + if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; + + /* verify validity of smpl_pmds */ + if ((smpl_pmds & impl_pmds) != smpl_pmds) { + DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); + goto error; + } + + /* verify validity of reset_pmds */ + if ((reset_pmds & impl_pmds) != reset_pmds) { + DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); + goto error; + } + } else { + if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { + DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum)); + goto error; + } + /* eventid on non-counting monitors are ignored */ + } + + /* + * execute write checker, if any + */ + if (likely(expert_mode == 0 && wr_func)) { + ret = (*wr_func)(task, ctx, cnum, &value, regs); + if (ret) goto error; + ret = -EINVAL; + } + + /* + * no error on this register + */ + PFM_REG_RETFLAG_SET(req->reg_flags, 0); + + /* + * Now we commit the changes to the software state + */ + + /* + * update overflow information + */ + if (is_counting) { + /* + * full flag update each time a register is programmed + */ + ctx->ctx_pmds[cnum].flags = flags; + + ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds; + ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds; + ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid; + + /* + * Mark all PMDS to be accessed as used. + * + * We do not keep track of PMC because we have to + * systematically restore ALL of them. + * + * We do not update the used_monitors mask, because + * if we have not programmed them, then will be in + * a quiescent state, therefore we will not need to + * mask/restore then when context is MASKED. + */ + CTX_USED_PMD(ctx, reset_pmds); + CTX_USED_PMD(ctx, smpl_pmds); + /* + * make sure we do not try to reset on + * restart because we have established new values + */ + if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; + } + /* + * Needed in case the user does not initialize the equivalent + * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no + * possible leak here. + */ + CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]); + + /* + * keep track of the monitor PMC that we are using. + * we save the value of the pmc in ctx_pmcs[] and if + * the monitoring is not stopped for the context we also + * place it in the saved state area so that it will be + * picked up later by the context switch code. + * + * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). + * + * The value in thread->pmcs[] may be modified on overflow, i.e., when + * monitoring needs to be stopped. + */ + if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); + + /* + * update context state + */ + ctx->ctx_pmcs[cnum] = value; + + if (is_loaded) { + /* + * write thread state + */ + if (is_system == 0) thread->pmcs[cnum] = value; + + /* + * write hardware register if we can + */ + if (can_access_pmu) { + ia64_set_pmc(cnum, value); + } +#ifdef CONFIG_SMP + else { + /* + * per-task SMP only here + * + * we are guaranteed that the task is not running on the other CPU, + * we indicate that this PMD will need to be reloaded if the task + * is rescheduled on the CPU it ran last on. + */ + ctx->ctx_reload_pmcs[0] |= 1UL << cnum; + } +#endif + } + + DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", + cnum, + value, + is_loaded, + can_access_pmu, + flags, + ctx->ctx_all_pmcs[0], + ctx->ctx_used_pmds[0], + ctx->ctx_pmds[cnum].eventid, + smpl_pmds, + reset_pmds, + ctx->ctx_reload_pmcs[0], + ctx->ctx_used_monitors[0], + ctx->ctx_ovfl_regs[0])); + } + + /* + * make sure the changes are visible + */ + if (can_access_pmu) ia64_srlz_d(); + + return 0; +error: + PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); + return ret; +} + +static int +pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct thread_struct *thread = NULL; + struct task_struct *task; + pfarg_reg_t *req = (pfarg_reg_t *)arg; + unsigned long value, hw_value, ovfl_mask; + unsigned int cnum; + int i, can_access_pmu = 0, state; + int is_counting, is_loaded, is_system, expert_mode; + int ret = -EINVAL; + pfm_reg_check_t wr_func; + + + state = ctx->ctx_state; + is_loaded = state == PFM_CTX_LOADED ? 1 : 0; + is_system = ctx->ctx_fl_system; + ovfl_mask = pmu_conf->ovfl_val; + task = ctx->ctx_task; + + if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL; + + /* + * on both UP and SMP, we can only write to the PMC when the task is + * the owner of the local PMU. + */ + if (likely(is_loaded)) { + thread = &task->thread; + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; + } + expert_mode = pfm_sysctl.expert_mode; + + for (i = 0; i < count; i++, req++) { + + cnum = req->reg_num; + value = req->reg_value; + + if (!PMD_IS_IMPL(cnum)) { + DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum)); + goto abort_mission; + } + is_counting = PMD_IS_COUNTING(cnum); + wr_func = pmu_conf->pmd_desc[cnum].write_check; + + /* + * execute write checker, if any + */ + if (unlikely(expert_mode == 0 && wr_func)) { + unsigned long v = value; + + ret = (*wr_func)(task, ctx, cnum, &v, regs); + if (ret) goto abort_mission; + + value = v; + ret = -EINVAL; + } + + /* + * no error on this register + */ + PFM_REG_RETFLAG_SET(req->reg_flags, 0); + + /* + * now commit changes to software state + */ + hw_value = value; + + /* + * update virtualized (64bits) counter + */ + if (is_counting) { + /* + * write context state + */ + ctx->ctx_pmds[cnum].lval = value; + + /* + * when context is load we use the split value + */ + if (is_loaded) { + hw_value = value & ovfl_mask; + value = value & ~ovfl_mask; + } + } + /* + * update reset values (not just for counters) + */ + ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset; + ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset; + + /* + * update randomization parameters (not just for counters) + */ + ctx->ctx_pmds[cnum].seed = req->reg_random_seed; + ctx->ctx_pmds[cnum].mask = req->reg_random_mask; + + /* + * update context value + */ + ctx->ctx_pmds[cnum].val = value; + + /* + * Keep track of what we use + * + * We do not keep track of PMC because we have to + * systematically restore ALL of them. + */ + CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum)); + + /* + * mark this PMD register used as well + */ + CTX_USED_PMD(ctx, RDEP(cnum)); + + /* + * make sure we do not try to reset on + * restart because we have established new values + */ + if (is_counting && state == PFM_CTX_MASKED) { + ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; + } + + if (is_loaded) { + /* + * write thread state + */ + if (is_system == 0) thread->pmds[cnum] = hw_value; + + /* + * write hardware register if we can + */ + if (can_access_pmu) { + ia64_set_pmd(cnum, hw_value); + } else { +#ifdef CONFIG_SMP + /* + * we are guaranteed that the task is not running on the other CPU, + * we indicate that this PMD will need to be reloaded if the task + * is rescheduled on the CPU it ran last on. + */ + ctx->ctx_reload_pmds[0] |= 1UL << cnum; +#endif + } + } + + DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " + "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", + cnum, + value, + is_loaded, + can_access_pmu, + hw_value, + ctx->ctx_pmds[cnum].val, + ctx->ctx_pmds[cnum].short_reset, + ctx->ctx_pmds[cnum].long_reset, + PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', + ctx->ctx_pmds[cnum].seed, + ctx->ctx_pmds[cnum].mask, + ctx->ctx_used_pmds[0], + ctx->ctx_pmds[cnum].reset_pmds[0], + ctx->ctx_reload_pmds[0], + ctx->ctx_all_pmds[0], + ctx->ctx_ovfl_regs[0])); + } + + /* + * make changes visible + */ + if (can_access_pmu) ia64_srlz_d(); + + return 0; + +abort_mission: + /* + * for now, we have only one possibility for error + */ + PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); + return ret; +} + +/* + * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function. + * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an + * interrupt is delivered during the call, it will be kept pending until we leave, making + * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are + * guaranteed to return consistent data to the user, it may simply be old. It is not + * trivial to treat the overflow while inside the call because you may end up in + * some module sampling buffer code causing deadlocks. + */ +static int +pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct thread_struct *thread = NULL; + struct task_struct *task; + unsigned long val = 0UL, lval, ovfl_mask, sval; + pfarg_reg_t *req = (pfarg_reg_t *)arg; + unsigned int cnum, reg_flags = 0; + int i, can_access_pmu = 0, state; + int is_loaded, is_system, is_counting, expert_mode; + int ret = -EINVAL; + pfm_reg_check_t rd_func; + + /* + * access is possible when loaded only for + * self-monitoring tasks or in UP mode + */ + + state = ctx->ctx_state; + is_loaded = state == PFM_CTX_LOADED ? 1 : 0; + is_system = ctx->ctx_fl_system; + ovfl_mask = pmu_conf->ovfl_val; + task = ctx->ctx_task; + + if (state == PFM_CTX_ZOMBIE) return -EINVAL; + + if (likely(is_loaded)) { + thread = &task->thread; + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + /* + * this can be true when not self-monitoring only in UP + */ + can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; + + if (can_access_pmu) ia64_srlz_d(); + } + expert_mode = pfm_sysctl.expert_mode; + + DPRINT(("ld=%d apmu=%d ctx_state=%d\n", + is_loaded, + can_access_pmu, + state)); + + /* + * on both UP and SMP, we can only read the PMD from the hardware register when + * the task is the owner of the local PMU. + */ + + for (i = 0; i < count; i++, req++) { + + cnum = req->reg_num; + reg_flags = req->reg_flags; + + if (unlikely(!PMD_IS_IMPL(cnum))) goto error; + /* + * we can only read the register that we use. That includes + * the one we explicitely initialize AND the one we want included + * in the sampling buffer (smpl_regs). + * + * Having this restriction allows optimization in the ctxsw routine + * without compromising security (leaks) + */ + if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error; + + sval = ctx->ctx_pmds[cnum].val; + lval = ctx->ctx_pmds[cnum].lval; + is_counting = PMD_IS_COUNTING(cnum); + + /* + * If the task is not the current one, then we check if the + * PMU state is still in the local live register due to lazy ctxsw. + * If true, then we read directly from the registers. + */ + if (can_access_pmu){ + val = ia64_get_pmd(cnum); + } else { + /* + * context has been saved + * if context is zombie, then task does not exist anymore. + * In this case, we use the full value saved in the context (pfm_flush_regs()). + */ + val = is_loaded ? thread->pmds[cnum] : 0UL; + } + rd_func = pmu_conf->pmd_desc[cnum].read_check; + + if (is_counting) { + /* + * XXX: need to check for overflow when loaded + */ + val &= ovfl_mask; + val += sval; + } + + /* + * execute read checker, if any + */ + if (unlikely(expert_mode == 0 && rd_func)) { + unsigned long v = val; + ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); + if (ret) goto error; + val = v; + ret = -EINVAL; + } + + PFM_REG_RETFLAG_SET(reg_flags, 0); + + DPRINT(("pmd[%u]=0x%lx\n", cnum, val)); + + /* + * update register return value, abort all if problem during copy. + * we only modify the reg_flags field. no check mode is fine because + * access has been verified upfront in sys_perfmonctl(). + */ + req->reg_value = val; + req->reg_flags = reg_flags; + req->reg_last_reset_val = lval; + } + + return 0; + +error: + PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); + return ret; +} + +int +pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) +{ + pfm_context_t *ctx; + + if (req == NULL) return -EINVAL; + + ctx = GET_PMU_CTX(); + + if (ctx == NULL) return -EINVAL; + + /* + * for now limit to current task, which is enough when calling + * from overflow handler + */ + if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; + + return pfm_write_pmcs(ctx, req, nreq, regs); +} +EXPORT_SYMBOL(pfm_mod_write_pmcs); + +int +pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) +{ + pfm_context_t *ctx; + + if (req == NULL) return -EINVAL; + + ctx = GET_PMU_CTX(); + + if (ctx == NULL) return -EINVAL; + + /* + * for now limit to current task, which is enough when calling + * from overflow handler + */ + if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; + + return pfm_read_pmds(ctx, req, nreq, regs); +} +EXPORT_SYMBOL(pfm_mod_read_pmds); + +/* + * Only call this function when a process it trying to + * write the debug registers (reading is always allowed) + */ +int +pfm_use_debug_registers(struct task_struct *task) +{ + pfm_context_t *ctx = task->thread.pfm_context; + unsigned long flags; + int ret = 0; + + if (pmu_conf->use_rr_dbregs == 0) return 0; + + DPRINT(("called for [%d]\n", task->pid)); + + /* + * do it only once + */ + if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0; + + /* + * Even on SMP, we do not need to use an atomic here because + * the only way in is via ptrace() and this is possible only when the + * process is stopped. Even in the case where the ctxsw out is not totally + * completed by the time we come here, there is no way the 'stopped' process + * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. + * So this is always safe. + */ + if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; + + LOCK_PFS(flags); + + /* + * We cannot allow setting breakpoints when system wide monitoring + * sessions are using the debug registers. + */ + if (pfm_sessions.pfs_sys_use_dbregs> 0) + ret = -1; + else + pfm_sessions.pfs_ptrace_use_dbregs++; + + DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", + pfm_sessions.pfs_ptrace_use_dbregs, + pfm_sessions.pfs_sys_use_dbregs, + task->pid, ret)); + + UNLOCK_PFS(flags); + + return ret; +} + +/* + * This function is called for every task that exits with the + * IA64_THREAD_DBG_VALID set. This indicates a task which was + * able to use the debug registers for debugging purposes via + * ptrace(). Therefore we know it was not using them for + * perfmormance monitoring, so we only decrement the number + * of "ptraced" debug register users to keep the count up to date + */ +int +pfm_release_debug_registers(struct task_struct *task) +{ + unsigned long flags; + int ret; + + if (pmu_conf->use_rr_dbregs == 0) return 0; + + LOCK_PFS(flags); + if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { + printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid); + ret = -1; + } else { + pfm_sessions.pfs_ptrace_use_dbregs--; + ret = 0; + } + UNLOCK_PFS(flags); + + return ret; +} + +static int +pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct task_struct *task; + pfm_buffer_fmt_t *fmt; + pfm_ovfl_ctrl_t rst_ctrl; + int state, is_system; + int ret = 0; + + state = ctx->ctx_state; + fmt = ctx->ctx_buf_fmt; + is_system = ctx->ctx_fl_system; + task = PFM_CTX_TASK(ctx); + + switch(state) { + case PFM_CTX_MASKED: + break; + case PFM_CTX_LOADED: + if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; + /* fall through */ + case PFM_CTX_UNLOADED: + case PFM_CTX_ZOMBIE: + DPRINT(("invalid state=%d\n", state)); + return -EBUSY; + default: + DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state)); + return -EINVAL; + } + + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (is_system && ctx->ctx_cpu != smp_processor_id()) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + + /* sanity check */ + if (unlikely(task == NULL)) { + printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid); + return -EINVAL; + } + + if (task == current || is_system) { + + fmt = ctx->ctx_buf_fmt; + + DPRINT(("restarting self %d ovfl=0x%lx\n", + task->pid, + ctx->ctx_ovfl_regs[0])); + + if (CTX_HAS_SMPL(ctx)) { + + prefetch(ctx->ctx_smpl_hdr); + + rst_ctrl.bits.mask_monitoring = 0; + rst_ctrl.bits.reset_ovfl_pmds = 0; + + if (state == PFM_CTX_LOADED) + ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); + else + ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); + } else { + rst_ctrl.bits.mask_monitoring = 0; + rst_ctrl.bits.reset_ovfl_pmds = 1; + } + + if (ret == 0) { + if (rst_ctrl.bits.reset_ovfl_pmds) + pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); + + if (rst_ctrl.bits.mask_monitoring == 0) { + DPRINT(("resuming monitoring for [%d]\n", task->pid)); + + if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task); + } else { + DPRINT(("keeping monitoring stopped for [%d]\n", task->pid)); + + // cannot use pfm_stop_monitoring(task, regs); + } + } + /* + * clear overflowed PMD mask to remove any stale information + */ + ctx->ctx_ovfl_regs[0] = 0UL; + + /* + * back to LOADED state + */ + ctx->ctx_state = PFM_CTX_LOADED; + + /* + * XXX: not really useful for self monitoring + */ + ctx->ctx_fl_can_restart = 0; + + return 0; + } + + /* + * restart another task + */ + + /* + * When PFM_CTX_MASKED, we cannot issue a restart before the previous + * one is seen by the task. + */ + if (state == PFM_CTX_MASKED) { + if (ctx->ctx_fl_can_restart == 0) return -EINVAL; + /* + * will prevent subsequent restart before this one is + * seen by other task + */ + ctx->ctx_fl_can_restart = 0; + } + + /* + * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e. + * the task is blocked or on its way to block. That's the normal + * restart path. If the monitoring is not masked, then the task + * can be actively monitoring and we cannot directly intervene. + * Therefore we use the trap mechanism to catch the task and + * force it to reset the buffer/reset PMDs. + * + * if non-blocking, then we ensure that the task will go into + * pfm_handle_work() before returning to user mode. + * + * We cannot explicitely reset another task, it MUST always + * be done by the task itself. This works for system wide because + * the tool that is controlling the session is logically doing + * "self-monitoring". + */ + if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { + DPRINT(("unblocking [%d] \n", task->pid)); + up(&ctx->ctx_restart_sem); + } else { + DPRINT(("[%d] armed exit trap\n", task->pid)); + + ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET; + + PFM_SET_WORK_PENDING(task, 1); + + pfm_set_task_notify(task); + + /* + * XXX: send reschedule if task runs on another CPU + */ + } + return 0; +} + +static int +pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + unsigned int m = *(unsigned int *)arg; + + pfm_sysctl.debug = m == 0 ? 0 : 1; + + pfm_debug_var = pfm_sysctl.debug; + + printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); + + if (m == 0) { + memset(pfm_stats, 0, sizeof(pfm_stats)); + for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL; + } + return 0; +} + +/* + * arg can be NULL and count can be zero for this function + */ +static int +pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct thread_struct *thread = NULL; + struct task_struct *task; + pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; + unsigned long flags; + dbreg_t dbreg; + unsigned int rnum; + int first_time; + int ret = 0, state; + int i, can_access_pmu = 0; + int is_system, is_loaded; + + if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; + + state = ctx->ctx_state; + is_loaded = state == PFM_CTX_LOADED ? 1 : 0; + is_system = ctx->ctx_fl_system; + task = ctx->ctx_task; + + if (state == PFM_CTX_ZOMBIE) return -EINVAL; + + /* + * on both UP and SMP, we can only write to the PMC when the task is + * the owner of the local PMU. + */ + if (is_loaded) { + thread = &task->thread; + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; + } + + /* + * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w + * ensuring that no real breakpoint can be installed via this call. + * + * IMPORTANT: regs can be NULL in this function + */ + + first_time = ctx->ctx_fl_using_dbreg == 0; + + /* + * don't bother if we are loaded and task is being debugged + */ + if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { + DPRINT(("debug registers already in use for [%d]\n", task->pid)); + return -EBUSY; + } + + /* + * check for debug registers in system wide mode + * + * If though a check is done in pfm_context_load(), + * we must repeat it here, in case the registers are + * written after the context is loaded + */ + if (is_loaded) { + LOCK_PFS(flags); + + if (first_time && is_system) { + if (pfm_sessions.pfs_ptrace_use_dbregs) + ret = -EBUSY; + else + pfm_sessions.pfs_sys_use_dbregs++; + } + UNLOCK_PFS(flags); + } + + if (ret != 0) return ret; + + /* + * mark ourself as user of the debug registers for + * perfmon purposes. + */ + ctx->ctx_fl_using_dbreg = 1; + + /* + * clear hardware registers to make sure we don't + * pick up stale state. + * + * for a system wide session, we do not use + * thread.dbr, thread.ibr because this process + * never leaves the current CPU and the state + * is shared by all processes running on it + */ + if (first_time && can_access_pmu) { + DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid)); + for (i=0; i < pmu_conf->num_ibrs; i++) { + ia64_set_ibr(i, 0UL); + ia64_dv_serialize_instruction(); + } + ia64_srlz_i(); + for (i=0; i < pmu_conf->num_dbrs; i++) { + ia64_set_dbr(i, 0UL); + ia64_dv_serialize_data(); + } + ia64_srlz_d(); + } + + /* + * Now install the values into the registers + */ + for (i = 0; i < count; i++, req++) { + + rnum = req->dbreg_num; + dbreg.val = req->dbreg_value; + + ret = -EINVAL; + + if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) { + DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", + rnum, dbreg.val, mode, i, count)); + + goto abort_mission; + } + + /* + * make sure we do not install enabled breakpoint + */ + if (rnum & 0x1) { + if (mode == PFM_CODE_RR) + dbreg.ibr.ibr_x = 0; + else + dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; + } + + PFM_REG_RETFLAG_SET(req->dbreg_flags, 0); + + /* + * Debug registers, just like PMC, can only be modified + * by a kernel call. Moreover, perfmon() access to those + * registers are centralized in this routine. The hardware + * does not modify the value of these registers, therefore, + * if we save them as they are written, we can avoid having + * to save them on context switch out. This is made possible + * by the fact that when perfmon uses debug registers, ptrace() + * won't be able to modify them concurrently. + */ + if (mode == PFM_CODE_RR) { + CTX_USED_IBR(ctx, rnum); + + if (can_access_pmu) { + ia64_set_ibr(rnum, dbreg.val); + ia64_dv_serialize_instruction(); + } + + ctx->ctx_ibrs[rnum] = dbreg.val; + + DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", + rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); + } else { + CTX_USED_DBR(ctx, rnum); + + if (can_access_pmu) { + ia64_set_dbr(rnum, dbreg.val); + ia64_dv_serialize_data(); + } + ctx->ctx_dbrs[rnum] = dbreg.val; + + DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", + rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); + } + } + + return 0; + +abort_mission: + /* + * in case it was our first attempt, we undo the global modifications + */ + if (first_time) { + LOCK_PFS(flags); + if (ctx->ctx_fl_system) { + pfm_sessions.pfs_sys_use_dbregs--; + } + UNLOCK_PFS(flags); + ctx->ctx_fl_using_dbreg = 0; + } + /* + * install error return flag + */ + PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL); + + return ret; +} + +static int +pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs); +} + +static int +pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs); +} + +int +pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) +{ + pfm_context_t *ctx; + + if (req == NULL) return -EINVAL; + + ctx = GET_PMU_CTX(); + + if (ctx == NULL) return -EINVAL; + + /* + * for now limit to current task, which is enough when calling + * from overflow handler + */ + if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; + + return pfm_write_ibrs(ctx, req, nreq, regs); +} +EXPORT_SYMBOL(pfm_mod_write_ibrs); + +int +pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) +{ + pfm_context_t *ctx; + + if (req == NULL) return -EINVAL; + + ctx = GET_PMU_CTX(); + + if (ctx == NULL) return -EINVAL; + + /* + * for now limit to current task, which is enough when calling + * from overflow handler + */ + if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; + + return pfm_write_dbrs(ctx, req, nreq, regs); +} +EXPORT_SYMBOL(pfm_mod_write_dbrs); + + +static int +pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + pfarg_features_t *req = (pfarg_features_t *)arg; + + req->ft_version = PFM_VERSION; + return 0; +} + +static int +pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct pt_regs *tregs; + struct task_struct *task = PFM_CTX_TASK(ctx); + int state, is_system; + + state = ctx->ctx_state; + is_system = ctx->ctx_fl_system; + + /* + * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE) + */ + if (state == PFM_CTX_UNLOADED) return -EINVAL; + + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (is_system && ctx->ctx_cpu != smp_processor_id()) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + DPRINT(("task [%d] ctx_state=%d is_system=%d\n", + PFM_CTX_TASK(ctx)->pid, + state, + is_system)); + /* + * in system mode, we need to update the PMU directly + * and the user level state of the caller, which may not + * necessarily be the creator of the context. + */ + if (is_system) { + /* + * Update local PMU first + * + * disable dcr pp + */ + ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); + ia64_srlz_i(); + + /* + * update local cpuinfo + */ + PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); + + /* + * stop monitoring, does srlz.i + */ + pfm_clear_psr_pp(); + + /* + * stop monitoring in the caller + */ + ia64_psr(regs)->pp = 0; + + return 0; + } + /* + * per-task mode + */ + + if (task == current) { + /* stop monitoring at kernel level */ + pfm_clear_psr_up(); + + /* + * stop monitoring at the user level + */ + ia64_psr(regs)->up = 0; + } else { + tregs = ia64_task_regs(task); + + /* + * stop monitoring at the user level + */ + ia64_psr(tregs)->up = 0; + + /* + * monitoring disabled in kernel at next reschedule + */ + ctx->ctx_saved_psr_up = 0; + DPRINT(("task=[%d]\n", task->pid)); + } + return 0; +} + + +static int +pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct pt_regs *tregs; + int state, is_system; + + state = ctx->ctx_state; + is_system = ctx->ctx_fl_system; + + if (state != PFM_CTX_LOADED) return -EINVAL; + + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (is_system && ctx->ctx_cpu != smp_processor_id()) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + + /* + * in system mode, we need to update the PMU directly + * and the user level state of the caller, which may not + * necessarily be the creator of the context. + */ + if (is_system) { + + /* + * set user level psr.pp for the caller + */ + ia64_psr(regs)->pp = 1; + + /* + * now update the local PMU and cpuinfo + */ + PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); + + /* + * start monitoring at kernel level + */ + pfm_set_psr_pp(); + + /* enable dcr pp */ + ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); + ia64_srlz_i(); + + return 0; + } + + /* + * per-process mode + */ + + if (ctx->ctx_task == current) { + + /* start monitoring at kernel level */ + pfm_set_psr_up(); + + /* + * activate monitoring at user level + */ + ia64_psr(regs)->up = 1; + + } else { + tregs = ia64_task_regs(ctx->ctx_task); + + /* + * start monitoring at the kernel level the next + * time the task is scheduled + */ + ctx->ctx_saved_psr_up = IA64_PSR_UP; + + /* + * activate monitoring at user level + */ + ia64_psr(tregs)->up = 1; + } + return 0; +} + +static int +pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + pfarg_reg_t *req = (pfarg_reg_t *)arg; + unsigned int cnum; + int i; + int ret = -EINVAL; + + for (i = 0; i < count; i++, req++) { + + cnum = req->reg_num; + + if (!PMC_IS_IMPL(cnum)) goto abort_mission; + + req->reg_value = PMC_DFL_VAL(cnum); + + PFM_REG_RETFLAG_SET(req->reg_flags, 0); + + DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value)); + } + return 0; + +abort_mission: + PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); + return ret; +} + +static int +pfm_check_task_exist(pfm_context_t *ctx) +{ + struct task_struct *g, *t; + int ret = -ESRCH; + + read_lock(&tasklist_lock); + + do_each_thread (g, t) { + if (t->thread.pfm_context == ctx) { + ret = 0; + break; + } + } while_each_thread (g, t); + + read_unlock(&tasklist_lock); + + DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); + + return ret; +} + +static int +pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct task_struct *task; + struct thread_struct *thread; + struct pfm_context_t *old; + unsigned long flags; +#ifndef CONFIG_SMP + struct task_struct *owner_task = NULL; +#endif + pfarg_load_t *req = (pfarg_load_t *)arg; + unsigned long *pmcs_source, *pmds_source; + int the_cpu; + int ret = 0; + int state, is_system, set_dbregs = 0; + + state = ctx->ctx_state; + is_system = ctx->ctx_fl_system; + /* + * can only load from unloaded or terminated state + */ + if (state != PFM_CTX_UNLOADED) { + DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", + req->load_pid, + ctx->ctx_state)); + return -EINVAL; + } + + DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); + + if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) { + DPRINT(("cannot use blocking mode on self\n")); + return -EINVAL; + } + + ret = pfm_get_task(ctx, req->load_pid, &task); + if (ret) { + DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret)); + return ret; + } + + ret = -EINVAL; + + /* + * system wide is self monitoring only + */ + if (is_system && task != current) { + DPRINT(("system wide is self monitoring only load_pid=%d\n", + req->load_pid)); + goto error; + } + + thread = &task->thread; + + ret = 0; + /* + * cannot load a context which is using range restrictions, + * into a task that is being debugged. + */ + if (ctx->ctx_fl_using_dbreg) { + if (thread->flags & IA64_THREAD_DBG_VALID) { + ret = -EBUSY; + DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid)); + goto error; + } + LOCK_PFS(flags); + + if (is_system) { + if (pfm_sessions.pfs_ptrace_use_dbregs) { + DPRINT(("cannot load [%d] dbregs in use\n", task->pid)); + ret = -EBUSY; + } else { + pfm_sessions.pfs_sys_use_dbregs++; + DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs)); + set_dbregs = 1; + } + } + + UNLOCK_PFS(flags); + + if (ret) goto error; + } + + /* + * SMP system-wide monitoring implies self-monitoring. + * + * The programming model expects the task to + * be pinned on a CPU throughout the session. + * Here we take note of the current CPU at the + * time the context is loaded. No call from + * another CPU will be allowed. + * + * The pinning via shed_setaffinity() + * must be done by the calling task prior + * to this call. + * + * systemwide: keep track of CPU this session is supposed to run on + */ + the_cpu = ctx->ctx_cpu = smp_processor_id(); + + ret = -EBUSY; + /* + * now reserve the session + */ + ret = pfm_reserve_session(current, is_system, the_cpu); + if (ret) goto error; + + /* + * task is necessarily stopped at this point. + * + * If the previous context was zombie, then it got removed in + * pfm_save_regs(). Therefore we should not see it here. + * If we see a context, then this is an active context + * + * XXX: needs to be atomic + */ + DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", + thread->pfm_context, ctx)); + + old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); + if (old != NULL) { + DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); + goto error_unres; + } + + pfm_reset_msgq(ctx); + + ctx->ctx_state = PFM_CTX_LOADED; + + /* + * link context to task + */ + ctx->ctx_task = task; + + if (is_system) { + /* + * we load as stopped + */ + PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE); + PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); + + if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE); + } else { + thread->flags |= IA64_THREAD_PM_VALID; + } + + /* + * propagate into thread-state + */ + pfm_copy_pmds(task, ctx); + pfm_copy_pmcs(task, ctx); + + pmcs_source = thread->pmcs; + pmds_source = thread->pmds; + + /* + * always the case for system-wide + */ + if (task == current) { + + if (is_system == 0) { + + /* allow user level control */ + ia64_psr(regs)->sp = 0; + DPRINT(("clearing psr.sp for [%d]\n", task->pid)); + + SET_LAST_CPU(ctx, smp_processor_id()); + INC_ACTIVATION(); + SET_ACTIVATION(ctx); +#ifndef CONFIG_SMP + /* + * push the other task out, if any + */ + owner_task = GET_PMU_OWNER(); + if (owner_task) pfm_lazy_save_regs(owner_task); +#endif + } + /* + * load all PMD from ctx to PMU (as opposed to thread state) + * restore all PMC from ctx to PMU + */ + pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]); + pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]); + + ctx->ctx_reload_pmcs[0] = 0UL; + ctx->ctx_reload_pmds[0] = 0UL; + + /* + * guaranteed safe by earlier check against DBG_VALID + */ + if (ctx->ctx_fl_using_dbreg) { + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); + } + /* + * set new ownership + */ + SET_PMU_OWNER(task, ctx); + + DPRINT(("context loaded on PMU for [%d]\n", task->pid)); + } else { + /* + * when not current, task MUST be stopped, so this is safe + */ + regs = ia64_task_regs(task); + + /* force a full reload */ + ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; + SET_LAST_CPU(ctx, -1); + + /* initial saved psr (stopped) */ + ctx->ctx_saved_psr_up = 0UL; + ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; + } + + ret = 0; + +error_unres: + if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu); +error: + /* + * we must undo the dbregs setting (for system-wide) + */ + if (ret && set_dbregs) { + LOCK_PFS(flags); + pfm_sessions.pfs_sys_use_dbregs--; + UNLOCK_PFS(flags); + } + /* + * release task, there is now a link with the context + */ + if (is_system == 0 && task != current) { + pfm_put_task(task); + + if (ret == 0) { + ret = pfm_check_task_exist(ctx); + if (ret) { + ctx->ctx_state = PFM_CTX_UNLOADED; + ctx->ctx_task = NULL; + } + } + } + return ret; +} + +/* + * in this function, we do not need to increase the use count + * for the task via get_task_struct(), because we hold the + * context lock. If the task were to disappear while having + * a context attached, it would go through pfm_exit_thread() + * which also grabs the context lock and would therefore be blocked + * until we are here. + */ +static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx); + +static int +pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct task_struct *task = PFM_CTX_TASK(ctx); + struct pt_regs *tregs; + int prev_state, is_system; + int ret; + + DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1)); + + prev_state = ctx->ctx_state; + is_system = ctx->ctx_fl_system; + + /* + * unload only when necessary + */ + if (prev_state == PFM_CTX_UNLOADED) { + DPRINT(("ctx_state=%d, nothing to do\n", prev_state)); + return 0; + } + + /* + * clear psr and dcr bits + */ + ret = pfm_stop(ctx, NULL, 0, regs); + if (ret) return ret; + + ctx->ctx_state = PFM_CTX_UNLOADED; + + /* + * in system mode, we need to update the PMU directly + * and the user level state of the caller, which may not + * necessarily be the creator of the context. + */ + if (is_system) { + + /* + * Update cpuinfo + * + * local PMU is taken care of in pfm_stop() + */ + PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE); + PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE); + + /* + * save PMDs in context + * release ownership + */ + pfm_flush_pmds(current, ctx); + + /* + * at this point we are done with the PMU + * so we can unreserve the resource. + */ + if (prev_state != PFM_CTX_ZOMBIE) + pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu); + + /* + * disconnect context from task + */ + task->thread.pfm_context = NULL; + /* + * disconnect task from context + */ + ctx->ctx_task = NULL; + + /* + * There is nothing more to cleanup here. + */ + return 0; + } + + /* + * per-task mode + */ + tregs = task == current ? regs : ia64_task_regs(task); + + if (task == current) { + /* + * cancel user level control + */ + ia64_psr(regs)->sp = 1; + + DPRINT(("setting psr.sp for [%d]\n", task->pid)); + } + /* + * save PMDs to context + * release ownership + */ + pfm_flush_pmds(task, ctx); + + /* + * at this point we are done with the PMU + * so we can unreserve the resource. + * + * when state was ZOMBIE, we have already unreserved. + */ + if (prev_state != PFM_CTX_ZOMBIE) + pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu); + + /* + * reset activation counter and psr + */ + ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; + SET_LAST_CPU(ctx, -1); + + /* + * PMU state will not be restored + */ + task->thread.flags &= ~IA64_THREAD_PM_VALID; + + /* + * break links between context and task + */ + task->thread.pfm_context = NULL; + ctx->ctx_task = NULL; + + PFM_SET_WORK_PENDING(task, 0); + + ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; + ctx->ctx_fl_can_restart = 0; + ctx->ctx_fl_going_zombie = 0; + + DPRINT(("disconnected [%d] from context\n", task->pid)); + + return 0; +} + + +/* + * called only from exit_thread(): task == current + * we come here only if current has a context attached (loaded or masked) + */ +void +pfm_exit_thread(struct task_struct *task) +{ + pfm_context_t *ctx; + unsigned long flags; + struct pt_regs *regs = ia64_task_regs(task); + int ret, state; + int free_ok = 0; + + ctx = PFM_GET_CTX(task); + + PROTECT_CTX(ctx, flags); + + DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid)); + + state = ctx->ctx_state; + switch(state) { + case PFM_CTX_UNLOADED: + /* + * only comes to thios function if pfm_context is not NULL, i.e., cannot + * be in unloaded state + */ + printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid); + break; + case PFM_CTX_LOADED: + case PFM_CTX_MASKED: + ret = pfm_context_unload(ctx, NULL, 0, regs); + if (ret) { + printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret); + } + DPRINT(("ctx unloaded for current state was %d\n", state)); + + pfm_end_notify_user(ctx); + break; + case PFM_CTX_ZOMBIE: + ret = pfm_context_unload(ctx, NULL, 0, regs); + if (ret) { + printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret); + } + free_ok = 1; + break; + default: + printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state); + break; + } + UNPROTECT_CTX(ctx, flags); + + { u64 psr = pfm_get_psr(); + BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); + BUG_ON(GET_PMU_OWNER()); + BUG_ON(ia64_psr(regs)->up); + BUG_ON(ia64_psr(regs)->pp); + } + + /* + * All memory free operations (especially for vmalloc'ed memory) + * MUST be done with interrupts ENABLED. + */ + if (free_ok) pfm_context_free(ctx); +} + +/* + * functions MUST be listed in the increasing order of their index (see permfon.h) + */ +#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz } +#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL } +#define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP) +#define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW) +#define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL} + +static pfm_cmd_desc_t pfm_cmd_tab[]={ +/* 0 */PFM_CMD_NONE, +/* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), +/* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), +/* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), +/* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS), +/* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS), +/* 6 */PFM_CMD_NONE, +/* 7 */PFM_CMD_NONE, +/* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize), +/* 9 */PFM_CMD_NONE, +/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW), +/* 11 */PFM_CMD_NONE, +/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL), +/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL), +/* 14 */PFM_CMD_NONE, +/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), +/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL), +/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS), +/* 18 */PFM_CMD_NONE, +/* 19 */PFM_CMD_NONE, +/* 20 */PFM_CMD_NONE, +/* 21 */PFM_CMD_NONE, +/* 22 */PFM_CMD_NONE, +/* 23 */PFM_CMD_NONE, +/* 24 */PFM_CMD_NONE, +/* 25 */PFM_CMD_NONE, +/* 26 */PFM_CMD_NONE, +/* 27 */PFM_CMD_NONE, +/* 28 */PFM_CMD_NONE, +/* 29 */PFM_CMD_NONE, +/* 30 */PFM_CMD_NONE, +/* 31 */PFM_CMD_NONE, +/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL), +/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL) +}; +#define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) + +static int +pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) +{ + struct task_struct *task; + int state, old_state; + +recheck: + state = ctx->ctx_state; + task = ctx->ctx_task; + + if (task == NULL) { + DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state)); + return 0; + } + + DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", + ctx->ctx_fd, + state, + task->pid, + task->state, PFM_CMD_STOPPED(cmd))); + + /* + * self-monitoring always ok. + * + * for system-wide the caller can either be the creator of the + * context (to one to which the context is attached to) OR + * a task running on the same CPU as the session. + */ + if (task == current || ctx->ctx_fl_system) return 0; + + /* + * if context is UNLOADED we are safe to go + */ + if (state == PFM_CTX_UNLOADED) return 0; + + /* + * no command can operate on a zombie context + */ + if (state == PFM_CTX_ZOMBIE) { + DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); + return -EINVAL; + } + + /* + * context is LOADED or MASKED. Some commands may need to have + * the task stopped. + * + * We could lift this restriction for UP but it would mean that + * the user has no guarantee the task would not run between + * two successive calls to perfmonctl(). That's probably OK. + * If this user wants to ensure the task does not run, then + * the task must be stopped. + */ + if (PFM_CMD_STOPPED(cmd)) { + if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) { + DPRINT(("[%d] task not in stopped state\n", task->pid)); + return -EBUSY; + } + /* + * task is now stopped, wait for ctxsw out + * + * This is an interesting point in the code. + * We need to unprotect the context because + * the pfm_save_regs() routines needs to grab + * the same lock. There are danger in doing + * this because it leaves a window open for + * another task to get access to the context + * and possibly change its state. The one thing + * that is not possible is for the context to disappear + * because we are protected by the VFS layer, i.e., + * get_fd()/put_fd(). + */ + old_state = state; + + UNPROTECT_CTX(ctx, flags); + + wait_task_inactive(task); + + PROTECT_CTX(ctx, flags); + + /* + * we must recheck to verify if state has changed + */ + if (ctx->ctx_state != old_state) { + DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state)); + goto recheck; + } + } + return 0; +} + +/* + * system-call entry point (must return long) + */ +asmlinkage long +sys_perfmonctl (int fd, int cmd, void __user *arg, int count) +{ + struct file *file = NULL; + pfm_context_t *ctx = NULL; + unsigned long flags = 0UL; + void *args_k = NULL; + long ret; /* will expand int return types */ + size_t base_sz, sz, xtra_sz = 0; + int narg, completed_args = 0, call_made = 0, cmd_flags; + int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); + int (*getsize)(void *arg, size_t *sz); +#define PFM_MAX_ARGSIZE 4096 + + /* + * reject any call if perfmon was disabled at initialization + */ + if (unlikely(pmu_conf == NULL)) return -ENOSYS; + + if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) { + DPRINT(("invalid cmd=%d\n", cmd)); + return -EINVAL; + } + + func = pfm_cmd_tab[cmd].cmd_func; + narg = pfm_cmd_tab[cmd].cmd_narg; + base_sz = pfm_cmd_tab[cmd].cmd_argsize; + getsize = pfm_cmd_tab[cmd].cmd_getsize; + cmd_flags = pfm_cmd_tab[cmd].cmd_flags; + + if (unlikely(func == NULL)) { + DPRINT(("invalid cmd=%d\n", cmd)); + return -EINVAL; + } + + DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n", + PFM_CMD_NAME(cmd), + cmd, + narg, + base_sz, + count)); + + /* + * check if number of arguments matches what the command expects + */ + if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))) + return -EINVAL; + +restart_args: + sz = xtra_sz + base_sz*count; + /* + * limit abuse to min page size + */ + if (unlikely(sz > PFM_MAX_ARGSIZE)) { + printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz); + return -E2BIG; + } + + /* + * allocate default-sized argument buffer + */ + if (likely(count && args_k == NULL)) { + args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL); + if (args_k == NULL) return -ENOMEM; + } + + ret = -EFAULT; + + /* + * copy arguments + * + * assume sz = 0 for command without parameters + */ + if (sz && copy_from_user(args_k, arg, sz)) { + DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg)); + goto error_args; + } + + /* + * check if command supports extra parameters + */ + if (completed_args == 0 && getsize) { + /* + * get extra parameters size (based on main argument) + */ + ret = (*getsize)(args_k, &xtra_sz); + if (ret) goto error_args; + + completed_args = 1; + + DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz)); + + /* retry if necessary */ + if (likely(xtra_sz)) goto restart_args; + } + + if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd; + + ret = -EBADF; + + file = fget(fd); + if (unlikely(file == NULL)) { + DPRINT(("invalid fd %d\n", fd)); + goto error_args; + } + if (unlikely(PFM_IS_FILE(file) == 0)) { + DPRINT(("fd %d not related to perfmon\n", fd)); + goto error_args; + } + + ctx = (pfm_context_t *)file->private_data; + if (unlikely(ctx == NULL)) { + DPRINT(("no context for fd %d\n", fd)); + goto error_args; + } + prefetch(&ctx->ctx_state); + + PROTECT_CTX(ctx, flags); + + /* + * check task is stopped + */ + ret = pfm_check_task_state(ctx, cmd, flags); + if (unlikely(ret)) goto abort_locked; + +skip_fd: + ret = (*func)(ctx, args_k, count, ia64_task_regs(current)); + + call_made = 1; + +abort_locked: + if (likely(ctx)) { + DPRINT(("context unlocked\n")); + UNPROTECT_CTX(ctx, flags); + fput(file); + } + + /* copy argument back to user, if needed */ + if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; + +error_args: + if (args_k) kfree(args_k); + + DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); + + return ret; +} + +static void +pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs) +{ + pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt; + pfm_ovfl_ctrl_t rst_ctrl; + int state; + int ret = 0; + + state = ctx->ctx_state; + /* + * Unlock sampling buffer and reset index atomically + * XXX: not really needed when blocking + */ + if (CTX_HAS_SMPL(ctx)) { + + rst_ctrl.bits.mask_monitoring = 0; + rst_ctrl.bits.reset_ovfl_pmds = 0; + + if (state == PFM_CTX_LOADED) + ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); + else + ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); + } else { + rst_ctrl.bits.mask_monitoring = 0; + rst_ctrl.bits.reset_ovfl_pmds = 1; + } + + if (ret == 0) { + if (rst_ctrl.bits.reset_ovfl_pmds) { + pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET); + } + if (rst_ctrl.bits.mask_monitoring == 0) { + DPRINT(("resuming monitoring\n")); + if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current); + } else { + DPRINT(("stopping monitoring\n")); + //pfm_stop_monitoring(current, regs); + } + ctx->ctx_state = PFM_CTX_LOADED; + } +} + +/* + * context MUST BE LOCKED when calling + * can only be called for current + */ +static void +pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) +{ + int ret; + + DPRINT(("entering for [%d]\n", current->pid)); + + ret = pfm_context_unload(ctx, NULL, 0, regs); + if (ret) { + printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret); + } + + /* + * and wakeup controlling task, indicating we are now disconnected + */ + wake_up_interruptible(&ctx->ctx_zombieq); + + /* + * given that context is still locked, the controlling + * task will only get access when we return from + * pfm_handle_work(). + */ +} + +static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); + +void +pfm_handle_work(void) +{ + pfm_context_t *ctx; + struct pt_regs *regs; + unsigned long flags; + unsigned long ovfl_regs; + unsigned int reason; + int ret; + + ctx = PFM_GET_CTX(current); + if (ctx == NULL) { + printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid); + return; + } + + PROTECT_CTX(ctx, flags); + + PFM_SET_WORK_PENDING(current, 0); + + pfm_clear_task_notify(); + + regs = ia64_task_regs(current); + + /* + * extract reason for being here and clear + */ + reason = ctx->ctx_fl_trap_reason; + ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; + ovfl_regs = ctx->ctx_ovfl_regs[0]; + + DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state)); + + /* + * must be done before we check for simple-reset mode + */ + if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie; + + + //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; + if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking; + + UNPROTECT_CTX(ctx, flags); + + /* + * pfm_handle_work() is currently called with interrupts disabled. + * The down_interruptible call may sleep, therefore we + * must re-enable interrupts to avoid deadlocks. It is + * safe to do so because this function is called ONLY + * when returning to user level (PUStk=1), in which case + * there is no risk of kernel stack overflow due to deep + * interrupt nesting. + */ + BUG_ON(flags & IA64_PSR_I); + local_irq_enable(); + + DPRINT(("before block sleeping\n")); + + /* + * may go through without blocking on SMP systems + * if restart has been received already by the time we call down() + */ + ret = down_interruptible(&ctx->ctx_restart_sem); + + DPRINT(("after block sleeping ret=%d\n", ret)); + + /* + * disable interrupts to restore state we had upon entering + * this function + */ + local_irq_disable(); + + PROTECT_CTX(ctx, flags); + + /* + * we need to read the ovfl_regs only after wake-up + * because we may have had pfm_write_pmds() in between + * and that can changed PMD values and therefore + * ovfl_regs is reset for these new PMD values. + */ + ovfl_regs = ctx->ctx_ovfl_regs[0]; + + if (ctx->ctx_fl_going_zombie) { +do_zombie: + DPRINT(("context is zombie, bailing out\n")); + pfm_context_force_terminate(ctx, regs); + goto nothing_to_do; + } + /* + * in case of interruption of down() we don't restart anything + */ + if (ret < 0) goto nothing_to_do; + +skip_blocking: + pfm_resume_after_ovfl(ctx, ovfl_regs, regs); + ctx->ctx_ovfl_regs[0] = 0UL; + +nothing_to_do: + + UNPROTECT_CTX(ctx, flags); +} + +static int +pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg) +{ + if (ctx->ctx_state == PFM_CTX_ZOMBIE) { + DPRINT(("ignoring overflow notification, owner is zombie\n")); + return 0; + } + + DPRINT(("waking up somebody\n")); + + if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait); + + /* + * safe, we are not in intr handler, nor in ctxsw when + * we come here + */ + kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN); + + return 0; +} + +static int +pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds) +{ + pfm_msg_t *msg = NULL; + + if (ctx->ctx_fl_no_msg == 0) { + msg = pfm_get_new_msg(ctx); + if (msg == NULL) { + printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n"); + return -1; + } + + msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; + msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd; + msg->pfm_ovfl_msg.msg_active_set = 0; + msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds; + msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL; + msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL; + msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL; + msg->pfm_ovfl_msg.msg_tstamp = 0UL; + } + + DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n", + msg, + ctx->ctx_fl_no_msg, + ctx->ctx_fd, + ovfl_pmds)); + + return pfm_notify_user(ctx, msg); +} + +static int +pfm_end_notify_user(pfm_context_t *ctx) +{ + pfm_msg_t *msg; + + msg = pfm_get_new_msg(ctx); + if (msg == NULL) { + printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n"); + return -1; + } + /* no leak */ + memset(msg, 0, sizeof(*msg)); + + msg->pfm_end_msg.msg_type = PFM_MSG_END; + msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd; + msg->pfm_ovfl_msg.msg_tstamp = 0UL; + + DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n", + msg, + ctx->ctx_fl_no_msg, + ctx->ctx_fd)); + + return pfm_notify_user(ctx, msg); +} + +/* + * main overflow processing routine. + * it can be called from the interrupt path or explicitely during the context switch code + */ +static void +pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs) +{ + pfm_ovfl_arg_t *ovfl_arg; + unsigned long mask; + unsigned long old_val, ovfl_val, new_val; + unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds; + unsigned long tstamp; + pfm_ovfl_ctrl_t ovfl_ctrl; + unsigned int i, has_smpl; + int must_notify = 0; + + if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring; + + /* + * sanity test. Should never happen + */ + if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; + + tstamp = ia64_get_itc(); + mask = pmc0 >> PMU_FIRST_COUNTER; + ovfl_val = pmu_conf->ovfl_val; + has_smpl = CTX_HAS_SMPL(ctx); + + DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " + "used_pmds=0x%lx\n", + pmc0, + task ? task->pid: -1, + (regs ? regs->cr_iip : 0), + CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", + ctx->ctx_used_pmds[0])); + + + /* + * first we update the virtual counters + * assume there was a prior ia64_srlz_d() issued + */ + for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { + + /* skip pmd which did not overflow */ + if ((mask & 0x1) == 0) continue; + + /* + * Note that the pmd is not necessarily 0 at this point as qualified events + * may have happened before the PMU was frozen. The residual count is not + * taken into consideration here but will be with any read of the pmd via + * pfm_read_pmds(). + */ + old_val = new_val = ctx->ctx_pmds[i].val; + new_val += 1 + ovfl_val; + ctx->ctx_pmds[i].val = new_val; + + /* + * check for overflow condition + */ + if (likely(old_val > new_val)) { + ovfl_pmds |= 1UL << i; + if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i; + } + + DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", + i, + new_val, + old_val, + ia64_get_pmd(i) & ovfl_val, + ovfl_pmds, + ovfl_notify)); + } + + /* + * there was no 64-bit overflow, nothing else to do + */ + if (ovfl_pmds == 0UL) return; + + /* + * reset all control bits + */ + ovfl_ctrl.val = 0; + reset_pmds = 0UL; + + /* + * if a sampling format module exists, then we "cache" the overflow by + * calling the module's handler() routine. + */ + if (has_smpl) { + unsigned long start_cycles, end_cycles; + unsigned long pmd_mask; + int j, k, ret = 0; + int this_cpu = smp_processor_id(); + + pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; + ovfl_arg = &ctx->ctx_ovfl_arg; + + prefetch(ctx->ctx_smpl_hdr); + + for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) { + + mask = 1UL << i; + + if ((pmd_mask & 0x1) == 0) continue; + + ovfl_arg->ovfl_pmd = (unsigned char )i; + ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0; + ovfl_arg->active_set = 0; + ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ + ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; + + ovfl_arg->pmd_value = ctx->ctx_pmds[i].val; + ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval; + ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid; + + /* + * copy values of pmds of interest. Sampling format may copy them + * into sampling buffer. + */ + if (smpl_pmds) { + for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) { + if ((smpl_pmds & 0x1) == 0) continue; + ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); + DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1])); + } + } + + pfm_stats[this_cpu].pfm_smpl_handler_calls++; + + start_cycles = ia64_get_itc(); + + /* + * call custom buffer format record (handler) routine + */ + ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp); + + end_cycles = ia64_get_itc(); + + /* + * For those controls, we take the union because they have + * an all or nothing behavior. + */ + ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user; + ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task; + ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring; + /* + * build the bitmask of pmds to reset now + */ + if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; + + pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; + } + /* + * when the module cannot handle the rest of the overflows, we abort right here + */ + if (ret && pmd_mask) { + DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n", + pmd_mask<<PMU_FIRST_COUNTER)); + } + /* + * remove the pmds we reset now from the set of pmds to reset in pfm_restart() + */ + ovfl_pmds &= ~reset_pmds; + } else { + /* + * when no sampling module is used, then the default + * is to notify on overflow if requested by user + */ + ovfl_ctrl.bits.notify_user = ovfl_notify ? 1 : 0; + ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0; + ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */ + ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1; + /* + * if needed, we reset all overflowed pmds + */ + if (ovfl_notify == 0) reset_pmds = ovfl_pmds; + } + + DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds)); + + /* + * reset the requested PMD registers using the short reset values + */ + if (reset_pmds) { + unsigned long bm = reset_pmds; + pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET); + } + + if (ovfl_notify && ovfl_ctrl.bits.notify_user) { + /* + * keep track of what to reset when unblocking + */ + ctx->ctx_ovfl_regs[0] = ovfl_pmds; + + /* + * check for blocking context + */ + if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) { + + ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK; + + /* + * set the perfmon specific checking pending work for the task + */ + PFM_SET_WORK_PENDING(task, 1); + + /* + * when coming from ctxsw, current still points to the + * previous task, therefore we must work with task and not current. + */ + pfm_set_task_notify(task); + } + /* + * defer until state is changed (shorten spin window). the context is locked + * anyway, so the signal receiver would come spin for nothing. + */ + must_notify = 1; + } + + DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n", + GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1, + PFM_GET_WORK_PENDING(task), + ctx->ctx_fl_trap_reason, + ovfl_pmds, + ovfl_notify, + ovfl_ctrl.bits.mask_monitoring ? 1 : 0)); + /* + * in case monitoring must be stopped, we toggle the psr bits + */ + if (ovfl_ctrl.bits.mask_monitoring) { + pfm_mask_monitoring(task); + ctx->ctx_state = PFM_CTX_MASKED; + ctx->ctx_fl_can_restart = 1; + } + + /* + * send notification now + */ + if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify); + + return; + +sanity_check: + printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", + smp_processor_id(), + task ? task->pid : -1, + pmc0); + return; + +stop_monitoring: + /* + * in SMP, zombie context is never restored but reclaimed in pfm_load_regs(). + * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can + * come here as zombie only if the task is the current task. In which case, we + * can access the PMU hardware directly. + * + * Note that zombies do have PM_VALID set. So here we do the minimal. + * + * In case the context was zombified it could not be reclaimed at the time + * the monitoring program exited. At this point, the PMU reservation has been + * returned, the sampiing buffer has been freed. We must convert this call + * into a spurious interrupt. However, we must also avoid infinite overflows + * by stopping monitoring for this task. We can only come here for a per-task + * context. All we need to do is to stop monitoring using the psr bits which + * are always task private. By re-enabling secure montioring, we ensure that + * the monitored task will not be able to re-activate monitoring. + * The task will eventually be context switched out, at which point the context + * will be reclaimed (that includes releasing ownership of the PMU). + * + * So there might be a window of time where the number of per-task session is zero + * yet one PMU might have a owner and get at most one overflow interrupt for a zombie + * context. This is safe because if a per-task session comes in, it will push this one + * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide + * session is force on that CPU, given that we use task pinning, pfm_save_regs() will + * also push our zombie context out. + * + * Overall pretty hairy stuff.... + */ + DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1)); + pfm_clear_psr_up(); + ia64_psr(regs)->up = 0; + ia64_psr(regs)->sp = 1; + return; +} + +static int +pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs) +{ + struct task_struct *task; + pfm_context_t *ctx; + unsigned long flags; + u64 pmc0; + int this_cpu = smp_processor_id(); + int retval = 0; + + pfm_stats[this_cpu].pfm_ovfl_intr_count++; + + /* + * srlz.d done before arriving here + */ + pmc0 = ia64_get_pmc(0); + + task = GET_PMU_OWNER(); + ctx = GET_PMU_CTX(); + + /* + * if we have some pending bits set + * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1 + */ + if (PMC0_HAS_OVFL(pmc0) && task) { + /* + * we assume that pmc0.fr is always set here + */ + + /* sanity check */ + if (!ctx) goto report_spurious1; + + if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) + goto report_spurious2; + + PROTECT_CTX_NOPRINT(ctx, flags); + + pfm_overflow_handler(task, ctx, pmc0, regs); + + UNPROTECT_CTX_NOPRINT(ctx, flags); + + } else { + pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++; + retval = -1; + } + /* + * keep it unfrozen at all times + */ + pfm_unfreeze_pmu(); + + return retval; + +report_spurious1: + printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n", + this_cpu, task->pid); + pfm_unfreeze_pmu(); + return -1; +report_spurious2: + printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", + this_cpu, + task->pid); + pfm_unfreeze_pmu(); + return -1; +} + +static irqreturn_t +pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs) +{ + unsigned long start_cycles, total_cycles; + unsigned long min, max; + int this_cpu; + int ret; + + this_cpu = get_cpu(); + min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; + max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; + + start_cycles = ia64_get_itc(); + + ret = pfm_do_interrupt_handler(irq, arg, regs); + + total_cycles = ia64_get_itc(); + + /* + * don't measure spurious interrupts + */ + if (likely(ret == 0)) { + total_cycles -= start_cycles; + + if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; + if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; + + pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; + } + put_cpu_no_resched(); + return IRQ_HANDLED; +} + +/* + * /proc/perfmon interface, for debug only + */ + +#define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1) + +static void * +pfm_proc_start(struct seq_file *m, loff_t *pos) +{ + if (*pos == 0) { + return PFM_PROC_SHOW_HEADER; + } + + while (*pos <= NR_CPUS) { + if (cpu_online(*pos - 1)) { + return (void *)*pos; + } + ++*pos; + } + return NULL; +} + +static void * +pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return pfm_proc_start(m, pos); +} + +static void +pfm_proc_stop(struct seq_file *m, void *v) +{ +} + +static void +pfm_proc_show_header(struct seq_file *m) +{ + struct list_head * pos; + pfm_buffer_fmt_t * entry; + unsigned long flags; + + seq_printf(m, + "perfmon version : %u.%u\n" + "model : %s\n" + "fastctxsw : %s\n" + "expert mode : %s\n" + "ovfl_mask : 0x%lx\n" + "PMU flags : 0x%x\n", + PFM_VERSION_MAJ, PFM_VERSION_MIN, + pmu_conf->pmu_name, + pfm_sysctl.fastctxsw > 0 ? "Yes": "No", + pfm_sysctl.expert_mode > 0 ? "Yes": "No", + pmu_conf->ovfl_val, + pmu_conf->flags); + + LOCK_PFS(flags); + + seq_printf(m, + "proc_sessions : %u\n" + "sys_sessions : %u\n" + "sys_use_dbregs : %u\n" + "ptrace_use_dbregs : %u\n", + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_sys_use_dbregs, + pfm_sessions.pfs_ptrace_use_dbregs); + + UNLOCK_PFS(flags); + + spin_lock(&pfm_buffer_fmt_lock); + + list_for_each(pos, &pfm_buffer_fmt_list) { + entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); + seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n", + entry->fmt_uuid[0], + entry->fmt_uuid[1], + entry->fmt_uuid[2], + entry->fmt_uuid[3], + entry->fmt_uuid[4], + entry->fmt_uuid[5], + entry->fmt_uuid[6], + entry->fmt_uuid[7], + entry->fmt_uuid[8], + entry->fmt_uuid[9], + entry->fmt_uuid[10], + entry->fmt_uuid[11], + entry->fmt_uuid[12], + entry->fmt_uuid[13], + entry->fmt_uuid[14], + entry->fmt_uuid[15], + entry->fmt_name); + } + spin_unlock(&pfm_buffer_fmt_lock); + +} + +static int +pfm_proc_show(struct seq_file *m, void *v) +{ + unsigned long psr; + unsigned int i; + int cpu; + + if (v == PFM_PROC_SHOW_HEADER) { + pfm_proc_show_header(m); + return 0; + } + + /* show info for CPU (v - 1) */ + + cpu = (long)v - 1; + seq_printf(m, + "CPU%-2d overflow intrs : %lu\n" + "CPU%-2d overflow cycles : %lu\n" + "CPU%-2d overflow min : %lu\n" + "CPU%-2d overflow max : %lu\n" + "CPU%-2d smpl handler calls : %lu\n" + "CPU%-2d smpl handler cycles : %lu\n" + "CPU%-2d spurious intrs : %lu\n" + "CPU%-2d replay intrs : %lu\n" + "CPU%-2d syst_wide : %d\n" + "CPU%-2d dcr_pp : %d\n" + "CPU%-2d exclude idle : %d\n" + "CPU%-2d owner : %d\n" + "CPU%-2d context : %p\n" + "CPU%-2d activations : %lu\n", + cpu, pfm_stats[cpu].pfm_ovfl_intr_count, + cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles, + cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min, + cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max, + cpu, pfm_stats[cpu].pfm_smpl_handler_calls, + cpu, pfm_stats[cpu].pfm_smpl_handler_cycles, + cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count, + cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count, + cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0, + cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0, + cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0, + cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1, + cpu, pfm_get_cpu_data(pmu_ctx, cpu), + cpu, pfm_get_cpu_data(pmu_activation_number, cpu)); + + if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) { + + psr = pfm_get_psr(); + + ia64_srlz_d(); + + seq_printf(m, + "CPU%-2d psr : 0x%lx\n" + "CPU%-2d pmc0 : 0x%lx\n", + cpu, psr, + cpu, ia64_get_pmc(0)); + + for (i=0; PMC_IS_LAST(i) == 0; i++) { + if (PMC_IS_COUNTING(i) == 0) continue; + seq_printf(m, + "CPU%-2d pmc%u : 0x%lx\n" + "CPU%-2d pmd%u : 0x%lx\n", + cpu, i, ia64_get_pmc(i), + cpu, i, ia64_get_pmd(i)); + } + } + return 0; +} + +struct seq_operations pfm_seq_ops = { + .start = pfm_proc_start, + .next = pfm_proc_next, + .stop = pfm_proc_stop, + .show = pfm_proc_show +}; + +static int +pfm_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &pfm_seq_ops); +} + + +/* + * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens + * during pfm_enable() hence before pfm_start(). We cannot assume monitoring + * is active or inactive based on mode. We must rely on the value in + * local_cpu_data->pfm_syst_info + */ +void +pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin) +{ + struct pt_regs *regs; + unsigned long dcr; + unsigned long dcr_pp; + + dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0; + + /* + * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 + * on every CPU, so we can rely on the pid to identify the idle task. + */ + if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { + regs = ia64_task_regs(task); + ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; + return; + } + /* + * if monitoring has started + */ + if (dcr_pp) { + dcr = ia64_getreg(_IA64_REG_CR_DCR); + /* + * context switching in? + */ + if (is_ctxswin) { + /* mask monitoring for the idle task */ + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); + pfm_clear_psr_pp(); + ia64_srlz_i(); + return; + } + /* + * context switching out + * restore monitoring for next task + * + * Due to inlining this odd if-then-else construction generates + * better code. + */ + ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP); + pfm_set_psr_pp(); + ia64_srlz_i(); + } +} + +#ifdef CONFIG_SMP + +static void +pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) +{ + struct task_struct *task = ctx->ctx_task; + + ia64_psr(regs)->up = 0; + ia64_psr(regs)->sp = 1; + + if (GET_PMU_OWNER() == task) { + DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid)); + SET_PMU_OWNER(NULL, NULL); + } + + /* + * disconnect the task from the context and vice-versa + */ + PFM_SET_WORK_PENDING(task, 0); + + task->thread.pfm_context = NULL; + task->thread.flags &= ~IA64_THREAD_PM_VALID; + + DPRINT(("force cleanup for [%d]\n", task->pid)); +} + + +/* + * in 2.6, interrupts are masked when we come here and the runqueue lock is held + */ +void +pfm_save_regs(struct task_struct *task) +{ + pfm_context_t *ctx; + struct thread_struct *t; + unsigned long flags; + u64 psr; + + + ctx = PFM_GET_CTX(task); + if (ctx == NULL) return; + t = &task->thread; + + /* + * we always come here with interrupts ALREADY disabled by + * the scheduler. So we simply need to protect against concurrent + * access, not CPU concurrency. + */ + flags = pfm_protect_ctx_ctxsw(ctx); + + if (ctx->ctx_state == PFM_CTX_ZOMBIE) { + struct pt_regs *regs = ia64_task_regs(task); + + pfm_clear_psr_up(); + + pfm_force_cleanup(ctx, regs); + + BUG_ON(ctx->ctx_smpl_hdr); + + pfm_unprotect_ctx_ctxsw(ctx, flags); + + pfm_context_free(ctx); + return; + } + + /* + * save current PSR: needed because we modify it + */ + ia64_srlz_d(); + psr = pfm_get_psr(); + + BUG_ON(psr & (IA64_PSR_I)); + + /* + * stop monitoring: + * This is the last instruction which may generate an overflow + * + * We do not need to set psr.sp because, it is irrelevant in kernel. + * It will be restored from ipsr when going back to user level + */ + pfm_clear_psr_up(); + + /* + * keep a copy of psr.up (for reload) + */ + ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; + + /* + * release ownership of this PMU. + * PM interrupts are masked, so nothing + * can happen. + */ + SET_PMU_OWNER(NULL, NULL); + + /* + * we systematically save the PMD as we have no + * guarantee we will be schedule at that same + * CPU again. + */ + pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); + + /* + * save pmc0 ia64_srlz_d() done in pfm_save_pmds() + * we will need it on the restore path to check + * for pending overflow. + */ + t->pmcs[0] = ia64_get_pmc(0); + + /* + * unfreeze PMU if had pending overflows + */ + if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + + /* + * finally, allow context access. + * interrupts will still be masked after this call. + */ + pfm_unprotect_ctx_ctxsw(ctx, flags); +} + +#else /* !CONFIG_SMP */ +void +pfm_save_regs(struct task_struct *task) +{ + pfm_context_t *ctx; + u64 psr; + + ctx = PFM_GET_CTX(task); + if (ctx == NULL) return; + + /* + * save current PSR: needed because we modify it + */ + psr = pfm_get_psr(); + + BUG_ON(psr & (IA64_PSR_I)); + + /* + * stop monitoring: + * This is the last instruction which may generate an overflow + * + * We do not need to set psr.sp because, it is irrelevant in kernel. + * It will be restored from ipsr when going back to user level + */ + pfm_clear_psr_up(); + + /* + * keep a copy of psr.up (for reload) + */ + ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; +} + +static void +pfm_lazy_save_regs (struct task_struct *task) +{ + pfm_context_t *ctx; + struct thread_struct *t; + unsigned long flags; + + { u64 psr = pfm_get_psr(); + BUG_ON(psr & IA64_PSR_UP); + } + + ctx = PFM_GET_CTX(task); + t = &task->thread; + + /* + * we need to mask PMU overflow here to + * make sure that we maintain pmc0 until + * we save it. overflow interrupts are + * treated as spurious if there is no + * owner. + * + * XXX: I don't think this is necessary + */ + PROTECT_CTX(ctx,flags); + + /* + * release ownership of this PMU. + * must be done before we save the registers. + * + * after this call any PMU interrupt is treated + * as spurious. + */ + SET_PMU_OWNER(NULL, NULL); + + /* + * save all the pmds we use + */ + pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); + + /* + * save pmc0 ia64_srlz_d() done in pfm_save_pmds() + * it is needed to check for pended overflow + * on the restore path + */ + t->pmcs[0] = ia64_get_pmc(0); + + /* + * unfreeze PMU if had pending overflows + */ + if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + + /* + * now get can unmask PMU interrupts, they will + * be treated as purely spurious and we will not + * lose any information + */ + UNPROTECT_CTX(ctx,flags); +} +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_SMP +/* + * in 2.6, interrupts are masked when we come here and the runqueue lock is held + */ +void +pfm_load_regs (struct task_struct *task) +{ + pfm_context_t *ctx; + struct thread_struct *t; + unsigned long pmc_mask = 0UL, pmd_mask = 0UL; + unsigned long flags; + u64 psr, psr_up; + int need_irq_resend; + + ctx = PFM_GET_CTX(task); + if (unlikely(ctx == NULL)) return; + + BUG_ON(GET_PMU_OWNER()); + + t = &task->thread; + /* + * possible on unload + */ + if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return; + + /* + * we always come here with interrupts ALREADY disabled by + * the scheduler. So we simply need to protect against concurrent + * access, not CPU concurrency. + */ + flags = pfm_protect_ctx_ctxsw(ctx); + psr = pfm_get_psr(); + + need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; + + BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); + BUG_ON(psr & IA64_PSR_I); + + if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { + struct pt_regs *regs = ia64_task_regs(task); + + BUG_ON(ctx->ctx_smpl_hdr); + + pfm_force_cleanup(ctx, regs); + + pfm_unprotect_ctx_ctxsw(ctx, flags); + + /* + * this one (kmalloc'ed) is fine with interrupts disabled + */ + pfm_context_free(ctx); + + return; + } + + /* + * we restore ALL the debug registers to avoid picking up + * stale state. + */ + if (ctx->ctx_fl_using_dbreg) { + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); + } + /* + * retrieve saved psr.up + */ + psr_up = ctx->ctx_saved_psr_up; + + /* + * if we were the last user of the PMU on that CPU, + * then nothing to do except restore psr + */ + if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) { + + /* + * retrieve partial reload masks (due to user modifications) + */ + pmc_mask = ctx->ctx_reload_pmcs[0]; + pmd_mask = ctx->ctx_reload_pmds[0]; + + } else { + /* + * To avoid leaking information to the user level when psr.sp=0, + * we must reload ALL implemented pmds (even the ones we don't use). + * In the kernel we only allow PFM_READ_PMDS on registers which + * we initialized or requested (sampling) so there is no risk there. + */ + pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; + + /* + * ALL accessible PMCs are systematically reloaded, unused registers + * get their default (from pfm_reset_pmu_state()) values to avoid picking + * up stale configuration. + * + * PMC0 is never in the mask. It is always restored separately. + */ + pmc_mask = ctx->ctx_all_pmcs[0]; + } + /* + * when context is MASKED, we will restore PMC with plm=0 + * and PMD with stale information, but that's ok, nothing + * will be captured. + * + * XXX: optimize here + */ + if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask); + if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask); + + /* + * check for pending overflow at the time the state + * was saved. + */ + if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { + /* + * reload pmc0 with the overflow information + * On McKinley PMU, this will trigger a PMU interrupt + */ + ia64_set_pmc(0, t->pmcs[0]); + ia64_srlz_d(); + t->pmcs[0] = 0UL; + + /* + * will replay the PMU interrupt + */ + if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR); + + pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; + } + + /* + * we just did a reload, so we reset the partial reload fields + */ + ctx->ctx_reload_pmcs[0] = 0UL; + ctx->ctx_reload_pmds[0] = 0UL; + + SET_LAST_CPU(ctx, smp_processor_id()); + + /* + * dump activation value for this PMU + */ + INC_ACTIVATION(); + /* + * record current activation for this context + */ + SET_ACTIVATION(ctx); + + /* + * establish new ownership. + */ + SET_PMU_OWNER(task, ctx); + + /* + * restore the psr.up bit. measurement + * is active again. + * no PMU interrupt can happen at this point + * because we still have interrupts disabled. + */ + if (likely(psr_up)) pfm_set_psr_up(); + + /* + * allow concurrent access to context + */ + pfm_unprotect_ctx_ctxsw(ctx, flags); +} +#else /* !CONFIG_SMP */ +/* + * reload PMU state for UP kernels + * in 2.5 we come here with interrupts disabled + */ +void +pfm_load_regs (struct task_struct *task) +{ + struct thread_struct *t; + pfm_context_t *ctx; + struct task_struct *owner; + unsigned long pmd_mask, pmc_mask; + u64 psr, psr_up; + int need_irq_resend; + + owner = GET_PMU_OWNER(); + ctx = PFM_GET_CTX(task); + t = &task->thread; + psr = pfm_get_psr(); + + BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); + BUG_ON(psr & IA64_PSR_I); + + /* + * we restore ALL the debug registers to avoid picking up + * stale state. + * + * This must be done even when the task is still the owner + * as the registers may have been modified via ptrace() + * (not perfmon) by the previous task. + */ + if (ctx->ctx_fl_using_dbreg) { + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); + } + + /* + * retrieved saved psr.up + */ + psr_up = ctx->ctx_saved_psr_up; + need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; + + /* + * short path, our state is still there, just + * need to restore psr and we go + * + * we do not touch either PMC nor PMD. the psr is not touched + * by the overflow_handler. So we are safe w.r.t. to interrupt + * concurrency even without interrupt masking. + */ + if (likely(owner == task)) { + if (likely(psr_up)) pfm_set_psr_up(); + return; + } + + /* + * someone else is still using the PMU, first push it out and + * then we'll be able to install our stuff ! + * + * Upon return, there will be no owner for the current PMU + */ + if (owner) pfm_lazy_save_regs(owner); + + /* + * To avoid leaking information to the user level when psr.sp=0, + * we must reload ALL implemented pmds (even the ones we don't use). + * In the kernel we only allow PFM_READ_PMDS on registers which + * we initialized or requested (sampling) so there is no risk there. + */ + pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; + + /* + * ALL accessible PMCs are systematically reloaded, unused registers + * get their default (from pfm_reset_pmu_state()) values to avoid picking + * up stale configuration. + * + * PMC0 is never in the mask. It is always restored separately + */ + pmc_mask = ctx->ctx_all_pmcs[0]; + + pfm_restore_pmds(t->pmds, pmd_mask); + pfm_restore_pmcs(t->pmcs, pmc_mask); + + /* + * check for pending overflow at the time the state + * was saved. + */ + if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { + /* + * reload pmc0 with the overflow information + * On McKinley PMU, this will trigger a PMU interrupt + */ + ia64_set_pmc(0, t->pmcs[0]); + ia64_srlz_d(); + + t->pmcs[0] = 0UL; + + /* + * will replay the PMU interrupt + */ + if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR); + + pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; + } + + /* + * establish new ownership. + */ + SET_PMU_OWNER(task, ctx); + + /* + * restore the psr.up bit. measurement + * is active again. + * no PMU interrupt can happen at this point + * because we still have interrupts disabled. + */ + if (likely(psr_up)) pfm_set_psr_up(); +} +#endif /* CONFIG_SMP */ + +/* + * this function assumes monitoring is stopped + */ +static void +pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) +{ + u64 pmc0; + unsigned long mask2, val, pmd_val, ovfl_val; + int i, can_access_pmu = 0; + int is_self; + + /* + * is the caller the task being monitored (or which initiated the + * session for system wide measurements) + */ + is_self = ctx->ctx_task == task ? 1 : 0; + + /* + * can access PMU is task is the owner of the PMU state on the current CPU + * or if we are running on the CPU bound to the context in system-wide mode + * (that is not necessarily the task the context is attached to in this mode). + * In system-wide we always have can_access_pmu true because a task running on an + * invalid processor is flagged earlier in the call stack (see pfm_stop). + */ + can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id()); + if (can_access_pmu) { + /* + * Mark the PMU as not owned + * This will cause the interrupt handler to do nothing in case an overflow + * interrupt was in-flight + * This also guarantees that pmc0 will contain the final state + * It virtually gives us full control on overflow processing from that point + * on. + */ + SET_PMU_OWNER(NULL, NULL); + DPRINT(("releasing ownership\n")); + + /* + * read current overflow status: + * + * we are guaranteed to read the final stable state + */ + ia64_srlz_d(); + pmc0 = ia64_get_pmc(0); /* slow */ + + /* + * reset freeze bit, overflow status information destroyed + */ + pfm_unfreeze_pmu(); + } else { + pmc0 = task->thread.pmcs[0]; + /* + * clear whatever overflow status bits there were + */ + task->thread.pmcs[0] = 0; + } + ovfl_val = pmu_conf->ovfl_val; + /* + * we save all the used pmds + * we take care of overflows for counting PMDs + * + * XXX: sampling situation is not taken into account here + */ + mask2 = ctx->ctx_used_pmds[0]; + + DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2)); + + for (i = 0; mask2; i++, mask2>>=1) { + + /* skip non used pmds */ + if ((mask2 & 0x1) == 0) continue; + + /* + * can access PMU always true in system wide mode + */ + val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i]; + + if (PMD_IS_COUNTING(i)) { + DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", + task->pid, + i, + ctx->ctx_pmds[i].val, + val & ovfl_val)); + + /* + * we rebuild the full 64 bit value of the counter + */ + val = ctx->ctx_pmds[i].val + (val & ovfl_val); + + /* + * now everything is in ctx_pmds[] and we need + * to clear the saved context from save_regs() such that + * pfm_read_pmds() gets the correct value + */ + pmd_val = 0UL; + + /* + * take care of overflow inline + */ + if (pmc0 & (1UL << i)) { + val += 1 + ovfl_val; + DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i)); + } + } + + DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val)); + + if (is_self) task->thread.pmds[i] = pmd_val; + + ctx->ctx_pmds[i].val = val; + } +} + +static struct irqaction perfmon_irqaction = { + .handler = pfm_interrupt_handler, + .flags = SA_INTERRUPT, + .name = "perfmon" +}; + +/* + * perfmon initialization routine, called from the initcall() table + */ +static int init_pfm_fs(void); + +static int __init +pfm_probe_pmu(void) +{ + pmu_config_t **p; + int family; + + family = local_cpu_data->family; + p = pmu_confs; + + while(*p) { + if ((*p)->probe) { + if ((*p)->probe() == 0) goto found; + } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) { + goto found; + } + p++; + } + return -1; +found: + pmu_conf = *p; + return 0; +} + +static struct file_operations pfm_proc_fops = { + .open = pfm_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +int __init +pfm_init(void) +{ + unsigned int n, n_counters, i; + + printk("perfmon: version %u.%u IRQ %u\n", + PFM_VERSION_MAJ, + PFM_VERSION_MIN, + IA64_PERFMON_VECTOR); + + if (pfm_probe_pmu()) { + printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", + local_cpu_data->family); + return -ENODEV; + } + + /* + * compute the number of implemented PMD/PMC from the + * description tables + */ + n = 0; + for (i=0; PMC_IS_LAST(i) == 0; i++) { + if (PMC_IS_IMPL(i) == 0) continue; + pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63); + n++; + } + pmu_conf->num_pmcs = n; + + n = 0; n_counters = 0; + for (i=0; PMD_IS_LAST(i) == 0; i++) { + if (PMD_IS_IMPL(i) == 0) continue; + pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63); + n++; + if (PMD_IS_COUNTING(i)) n_counters++; + } + pmu_conf->num_pmds = n; + pmu_conf->num_counters = n_counters; + + /* + * sanity checks on the number of debug registers + */ + if (pmu_conf->use_rr_dbregs) { + if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) { + printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs); + pmu_conf = NULL; + return -1; + } + if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) { + printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs); + pmu_conf = NULL; + return -1; + } + } + + printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n", + pmu_conf->pmu_name, + pmu_conf->num_pmcs, + pmu_conf->num_pmds, + pmu_conf->num_counters, + ffz(pmu_conf->ovfl_val)); + + /* sanity check */ + if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) { + printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); + pmu_conf = NULL; + return -1; + } + + /* + * create /proc/perfmon (mostly for debugging purposes) + */ + perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL); + if (perfmon_dir == NULL) { + printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); + pmu_conf = NULL; + return -1; + } + /* + * install customized file operations for /proc/perfmon entry + */ + perfmon_dir->proc_fops = &pfm_proc_fops; + + /* + * create /proc/sys/kernel/perfmon (for debugging purposes) + */ + pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0); + + /* + * initialize all our spinlocks + */ + spin_lock_init(&pfm_sessions.pfs_lock); + spin_lock_init(&pfm_buffer_fmt_lock); + + init_pfm_fs(); + + for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL; + + return 0; +} + +__initcall(pfm_init); + +/* + * this function is called before pfm_init() + */ +void +pfm_init_percpu (void) +{ + /* + * make sure no measurement is active + * (may inherit programmed PMCs from EFI). + */ + pfm_clear_psr_pp(); + pfm_clear_psr_up(); + + /* + * we run with the PMU not frozen at all times + */ + pfm_unfreeze_pmu(); + + if (smp_processor_id() == 0) + register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); + + ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); + ia64_srlz_d(); +} + +/* + * used for debug purposes only + */ +void +dump_pmu_state(const char *from) +{ + struct task_struct *task; + struct thread_struct *t; + struct pt_regs *regs; + pfm_context_t *ctx; + unsigned long psr, dcr, info, flags; + int i, this_cpu; + + local_irq_save(flags); + + this_cpu = smp_processor_id(); + regs = ia64_task_regs(current); + info = PFM_CPUINFO_GET(); + dcr = ia64_getreg(_IA64_REG_CR_DCR); + + if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) { + local_irq_restore(flags); + return; + } + + printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", + this_cpu, + from, + current->pid, + regs->cr_iip, + current->comm); + + task = GET_PMU_OWNER(); + ctx = GET_PMU_CTX(); + + printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx); + + psr = pfm_get_psr(); + + printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", + this_cpu, + ia64_get_pmc(0), + psr & IA64_PSR_PP ? 1 : 0, + psr & IA64_PSR_UP ? 1 : 0, + dcr & IA64_DCR_PP ? 1 : 0, + info, + ia64_psr(regs)->up, + ia64_psr(regs)->pp); + + ia64_psr(regs)->up = 0; + ia64_psr(regs)->pp = 0; + + t = ¤t->thread; + + for (i=1; PMC_IS_LAST(i) == 0; i++) { + if (PMC_IS_IMPL(i) == 0) continue; + printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]); + } + + for (i=1; PMD_IS_LAST(i) == 0; i++) { + if (PMD_IS_IMPL(i) == 0) continue; + printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]); + } + + if (ctx) { + printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n", + this_cpu, + ctx->ctx_state, + ctx->ctx_smpl_vaddr, + ctx->ctx_smpl_hdr, + ctx->ctx_msgq_head, + ctx->ctx_msgq_tail, + ctx->ctx_saved_psr_up); + } + local_irq_restore(flags); +} + +/* + * called from process.c:copy_thread(). task is new child. + */ +void +pfm_inherit(struct task_struct *task, struct pt_regs *regs) +{ + struct thread_struct *thread; + + DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid)); + + thread = &task->thread; + + /* + * cut links inherited from parent (current) + */ + thread->pfm_context = NULL; + + PFM_SET_WORK_PENDING(task, 0); + + /* + * the psr bits are already set properly in copy_threads() + */ +} +#else /* !CONFIG_PERFMON */ +asmlinkage long +sys_perfmonctl (int fd, int cmd, void *arg, int count) +{ + return -ENOSYS; +} +#endif /* CONFIG_PERFMON */ diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c new file mode 100644 index 000000000000..965d29004555 --- /dev/null +++ b/arch/ia64/kernel/perfmon_default_smpl.c @@ -0,0 +1,306 @@ +/* + * Copyright (C) 2002-2003 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * + * This file implements the default sampling buffer format + * for the Linux/ia64 perfmon-2 subsystem. + */ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/config.h> +#include <linux/init.h> +#include <asm/delay.h> +#include <linux/smp.h> + +#include <asm/perfmon.h> +#include <asm/perfmon_default_smpl.h> + +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); +MODULE_DESCRIPTION("perfmon default sampling format"); +MODULE_LICENSE("GPL"); + +MODULE_PARM(debug, "i"); +MODULE_PARM_DESC(debug, "debug"); + +MODULE_PARM(debug_ovfl, "i"); +MODULE_PARM_DESC(debug_ovfl, "debug ovfl"); + + +#define DEFAULT_DEBUG 1 + +#ifdef DEFAULT_DEBUG +#define DPRINT(a) \ + do { \ + if (unlikely(debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ + } while (0) + +#define DPRINT_ovfl(a) \ + do { \ + if (unlikely(debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ + } while (0) + +#else +#define DPRINT(a) +#define DPRINT_ovfl(a) +#endif + +static int debug, debug_ovfl; + +static int +default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data) +{ + pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data; + int ret = 0; + + if (data == NULL) { + DPRINT(("[%d] no argument passed\n", task->pid)); + return -EINVAL; + } + + DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu)); + + /* + * must hold at least the buffer header + one minimally sized entry + */ + if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL; + + DPRINT(("buf_size=%lu\n", arg->buf_size)); + + return ret; +} + +static int +default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size) +{ + pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; + + /* + * size has been validated in default_validate + */ + *size = arg->buf_size; + + return 0; +} + +static int +default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data) +{ + pfm_default_smpl_hdr_t *hdr; + pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; + + hdr = (pfm_default_smpl_hdr_t *)buf; + + hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION; + hdr->hdr_buf_size = arg->buf_size; + hdr->hdr_cur_offs = sizeof(*hdr); + hdr->hdr_overflows = 0UL; + hdr->hdr_count = 0UL; + + DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n", + task->pid, + buf, + hdr->hdr_buf_size, + sizeof(*hdr), + hdr->hdr_version, + hdr->hdr_cur_offs)); + + return 0; +} + +static int +default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp) +{ + pfm_default_smpl_hdr_t *hdr; + pfm_default_smpl_entry_t *ent; + void *cur, *last; + unsigned long *e, entry_size; + unsigned int npmds, i; + unsigned char ovfl_pmd; + unsigned char ovfl_notify; + + if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) { + DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg)); + return -EINVAL; + } + + hdr = (pfm_default_smpl_hdr_t *)buf; + cur = buf+hdr->hdr_cur_offs; + last = buf+hdr->hdr_buf_size; + ovfl_pmd = arg->ovfl_pmd; + ovfl_notify = arg->ovfl_notify; + + /* + * precheck for sanity + */ + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; + + npmds = hweight64(arg->smpl_pmds[0]); + + ent = (pfm_default_smpl_entry_t *)cur; + + prefetch(arg->smpl_pmds_values); + + entry_size = sizeof(*ent) + (npmds << 3); + + /* position for first pmd */ + e = (unsigned long *)(ent+1); + + hdr->hdr_count++; + + DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n", + task->pid, + hdr->hdr_count, + cur, last, + last-cur, + ovfl_pmd, + ovfl_notify, npmds)); + + /* + * current = task running at the time of the overflow. + * + * per-task mode: + * - this is ususally the task being monitored. + * Under certain conditions, it might be a different task + * + * system-wide: + * - this is not necessarily the task controlling the session + */ + ent->pid = current->pid; + ent->ovfl_pmd = ovfl_pmd; + ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val; + + /* + * where did the fault happen (includes slot number) + */ + ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3); + + ent->tstamp = stamp; + ent->cpu = smp_processor_id(); + ent->set = arg->active_set; + ent->tgid = current->tgid; + + /* + * selectively store PMDs in increasing index number + */ + if (npmds) { + unsigned long *val = arg->smpl_pmds_values; + for(i=0; i < npmds; i++) { + *e++ = *val++; + } + } + + /* + * update position for next entry + */ + hdr->hdr_cur_offs += entry_size; + cur += entry_size; + + /* + * post check to avoid losing the last sample + */ + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; + + /* + * keep same ovfl_pmds, ovfl_notify + */ + arg->ovfl_ctrl.bits.notify_user = 0; + arg->ovfl_ctrl.bits.block_task = 0; + arg->ovfl_ctrl.bits.mask_monitoring = 0; + arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */ + + return 0; +full: + DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify)); + + /* + * increment number of buffer overflow. + * important to detect duplicate set of samples. + */ + hdr->hdr_overflows++; + + /* + * if no notification requested, then we saturate the buffer + */ + if (ovfl_notify == 0) { + arg->ovfl_ctrl.bits.notify_user = 0; + arg->ovfl_ctrl.bits.block_task = 0; + arg->ovfl_ctrl.bits.mask_monitoring = 1; + arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; + } else { + arg->ovfl_ctrl.bits.notify_user = 1; + arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */ + arg->ovfl_ctrl.bits.mask_monitoring = 1; + arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */ + } + return -1; /* we are full, sorry */ +} + +static int +default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) +{ + pfm_default_smpl_hdr_t *hdr; + + hdr = (pfm_default_smpl_hdr_t *)buf; + + hdr->hdr_count = 0UL; + hdr->hdr_cur_offs = sizeof(*hdr); + + ctrl->bits.mask_monitoring = 0; + ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */ + + return 0; +} + +static int +default_exit(struct task_struct *task, void *buf, struct pt_regs *regs) +{ + DPRINT(("[%d] exit(%p)\n", task->pid, buf)); + return 0; +} + +static pfm_buffer_fmt_t default_fmt={ + .fmt_name = "default_format", + .fmt_uuid = PFM_DEFAULT_SMPL_UUID, + .fmt_arg_size = sizeof(pfm_default_smpl_arg_t), + .fmt_validate = default_validate, + .fmt_getsize = default_get_size, + .fmt_init = default_init, + .fmt_handler = default_handler, + .fmt_restart = default_restart, + .fmt_restart_active = default_restart, + .fmt_exit = default_exit, +}; + +static int __init +pfm_default_smpl_init_module(void) +{ + int ret; + + ret = pfm_register_buffer_fmt(&default_fmt); + if (ret == 0) { + printk("perfmon_default_smpl: %s v%u.%u registered\n", + default_fmt.fmt_name, + PFM_DEFAULT_SMPL_VERSION_MAJ, + PFM_DEFAULT_SMPL_VERSION_MIN); + } else { + printk("perfmon_default_smpl: %s cannot register ret=%d\n", + default_fmt.fmt_name, + ret); + } + + return ret; +} + +static void __exit +pfm_default_smpl_cleanup_module(void) +{ + int ret; + ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid); + + printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret); +} + +module_init(pfm_default_smpl_init_module); +module_exit(pfm_default_smpl_cleanup_module); + diff --git a/arch/ia64/kernel/perfmon_generic.h b/arch/ia64/kernel/perfmon_generic.h new file mode 100644 index 000000000000..67489478041e --- /dev/null +++ b/arch/ia64/kernel/perfmon_generic.h @@ -0,0 +1,45 @@ +/* + * This file contains the generic PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (C) 2002-2003 Hewlett Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + */ + +static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, +/* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, +/* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, +/* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf_gen={ + .pmu_name = "Generic", + .pmu_family = 0xff, /* any */ + .ovfl_val = (1UL << 32) - 1, + .num_ibrs = 0, /* does not use */ + .num_dbrs = 0, /* does not use */ + .pmd_desc = pfm_gen_pmd_desc, + .pmc_desc = pfm_gen_pmc_desc +}; + diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h new file mode 100644 index 000000000000..d1d508a0fbd3 --- /dev/null +++ b/arch/ia64/kernel/perfmon_itanium.h @@ -0,0 +1,115 @@ +/* + * This file contains the Itanium PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (C) 2002-2003 Hewlett Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + */ +static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); + +static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, +/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static int +pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + int ret; + int is_loaded; + + /* sanitfy check */ + if (ctx == NULL) return -EINVAL; + + is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; + + /* + * we must clear the (instruction) debug registers if pmc13.ta bit is cleared + * before they are written (fl_using_dbreg==0) to avoid picking up stale information. + */ + if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val)); + + /* don't mix debug with perfmon */ + if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs); + if (ret) return ret; + } + + /* + * we must clear the (data) debug registers if pmc11.pt bit is cleared + * before they are written (fl_using_dbreg==0) to avoid picking up stale information. + */ + if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val)); + + /* don't mix debug with perfmon */ + if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs); + if (ret) return ret; + } + return 0; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf_ita={ + .pmu_name = "Itanium", + .pmu_family = 0x7, + .ovfl_val = (1UL << 32) - 1, + .pmd_desc = pfm_ita_pmd_desc, + .pmc_desc = pfm_ita_pmc_desc, + .num_ibrs = 8, + .num_dbrs = 8, + .use_rr_dbregs = 1, /* debug register are use for range retrictions */ +}; + + diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h new file mode 100644 index 000000000000..9becccda2897 --- /dev/null +++ b/arch/ia64/kernel/perfmon_mckinley.h @@ -0,0 +1,187 @@ +/* + * This file contains the McKinley PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (C) 2002-2003 Hewlett Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + */ +static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); + +static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, +/* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +/* + * PMC reserved fields must have their power-up values preserved + */ +static int +pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + unsigned long tmp1, tmp2, ival = *val; + + /* remove reserved areas from user value */ + tmp1 = ival & PMC_RSVD_MASK(cnum); + + /* get reserved fields values */ + tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum); + + *val = tmp1 | tmp2; + + DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", + cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); + return 0; +} + +/* + * task can be NULL if the context is unloaded + */ +static int +pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + int ret = 0, check_case1 = 0; + unsigned long val8 = 0, val14 = 0, val13 = 0; + int is_loaded; + + /* first preserve the reserved fields */ + pfm_mck_reserved(cnum, val, regs); + + /* sanitfy check */ + if (ctx == NULL) return -EINVAL; + + is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; + + /* + * we must clear the debug registers if pmc13 has a value which enable + * memory pipeline event constraints. In this case we need to clear the + * the debug registers if they have not yet been accessed. This is required + * to avoid picking stale state. + * PMC13 is "active" if: + * one of the pmc13.cfg_dbrpXX field is different from 0x3 + * AND + * at the corresponding pmc13.ena_dbrpXX is set. + */ + DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded)); + + if (cnum == 13 && is_loaded + && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val)); + + /* don't mix debug with perfmon */ + if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs); + if (ret) return ret; + } + /* + * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled + * before they are (fl_using_dbreg==0) to avoid picking up stale information. + */ + if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val)); + + /* don't mix debug with perfmon */ + if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs); + if (ret) return ret; + + } + + switch(cnum) { + case 4: *val |= 1UL << 23; /* force power enable bit */ + break; + case 8: val8 = *val; + val13 = ctx->ctx_pmcs[13]; + val14 = ctx->ctx_pmcs[14]; + check_case1 = 1; + break; + case 13: val8 = ctx->ctx_pmcs[8]; + val13 = *val; + val14 = ctx->ctx_pmcs[14]; + check_case1 = 1; + break; + case 14: val8 = ctx->ctx_pmcs[8]; + val13 = ctx->ctx_pmcs[13]; + val14 = *val; + check_case1 = 1; + break; + } + /* check illegal configuration which can produce inconsistencies in tagging + * i-side events in L1D and L2 caches + */ + if (check_case1) { + ret = ((val13 >> 45) & 0xf) == 0 + && ((val8 & 0x1) == 0) + && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0) + ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0)); + + if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n")); + } + + return ret ? -EINVAL : 0; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf_mck={ + .pmu_name = "Itanium 2", + .pmu_family = 0x1f, + .flags = PFM_PMU_IRQ_RESEND, + .ovfl_val = (1UL << 47) - 1, + .pmd_desc = pfm_mck_pmd_desc, + .pmc_desc = pfm_mck_pmc_desc, + .num_ibrs = 8, + .num_dbrs = 8, + .use_rr_dbregs = 1 /* debug register are use for range retrictions */ +}; + + diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c new file mode 100644 index 000000000000..91293388dd29 --- /dev/null +++ b/arch/ia64/kernel/process.c @@ -0,0 +1,800 @@ +/* + * Architecture-specific setup. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ +#define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */ +#include <linux/config.h> + +#include <linux/cpu.h> +#include <linux/pm.h> +#include <linux/elf.h> +#include <linux/errno.h> +#include <linux/kallsyms.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/notifier.h> +#include <linux/personality.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/thread_info.h> +#include <linux/unistd.h> +#include <linux/efi.h> +#include <linux/interrupt.h> +#include <linux/delay.h> + +#include <asm/cpu.h> +#include <asm/delay.h> +#include <asm/elf.h> +#include <asm/ia32.h> +#include <asm/irq.h> +#include <asm/pgalloc.h> +#include <asm/processor.h> +#include <asm/sal.h> +#include <asm/tlbflush.h> +#include <asm/uaccess.h> +#include <asm/unwind.h> +#include <asm/user.h> + +#include "entry.h" + +#ifdef CONFIG_PERFMON +# include <asm/perfmon.h> +#endif + +#include "sigframe.h" + +void (*ia64_mark_idle)(int); +static cpumask_t cpu_idle_map; + +unsigned long boot_option_idle_override = 0; +EXPORT_SYMBOL(boot_option_idle_override); + +void +ia64_do_show_stack (struct unw_frame_info *info, void *arg) +{ + unsigned long ip, sp, bsp; + char buf[128]; /* don't make it so big that it overflows the stack! */ + + printk("\nCall Trace:\n"); + do { + unw_get_ip(info, &ip); + if (ip == 0) + break; + + unw_get_sp(info, &sp); + unw_get_bsp(info, &bsp); + snprintf(buf, sizeof(buf), + " [<%016lx>] %%s\n" + " sp=%016lx bsp=%016lx\n", + ip, sp, bsp); + print_symbol(buf, ip); + } while (unw_unwind(info) >= 0); +} + +void +show_stack (struct task_struct *task, unsigned long *sp) +{ + if (!task) + unw_init_running(ia64_do_show_stack, NULL); + else { + struct unw_frame_info info; + + unw_init_from_blocked_task(&info, task); + ia64_do_show_stack(&info, NULL); + } +} + +void +dump_stack (void) +{ + show_stack(NULL, NULL); +} + +EXPORT_SYMBOL(dump_stack); + +void +show_regs (struct pt_regs *regs) +{ + unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; + + print_modules(); + printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm); + printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s\n", + regs->cr_ipsr, regs->cr_ifs, ip, print_tainted()); + print_symbol("ip is at %s\n", ip); + printk("unat: %016lx pfs : %016lx rsc : %016lx\n", + regs->ar_unat, regs->ar_pfs, regs->ar_rsc); + printk("rnat: %016lx bsps: %016lx pr : %016lx\n", + regs->ar_rnat, regs->ar_bspstore, regs->pr); + printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", + regs->loadrs, regs->ar_ccv, regs->ar_fpsr); + printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd); + printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6, regs->b7); + printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", + regs->f6.u.bits[1], regs->f6.u.bits[0], + regs->f7.u.bits[1], regs->f7.u.bits[0]); + printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", + regs->f8.u.bits[1], regs->f8.u.bits[0], + regs->f9.u.bits[1], regs->f9.u.bits[0]); + printk("f10 : %05lx%016lx f11 : %05lx%016lx\n", + regs->f10.u.bits[1], regs->f10.u.bits[0], + regs->f11.u.bits[1], regs->f11.u.bits[0]); + + printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2, regs->r3); + printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10); + printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13); + printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16); + printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19); + printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22); + printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25); + printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28); + printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31); + + if (user_mode(regs)) { + /* print the stacked registers */ + unsigned long val, *bsp, ndirty; + int i, sof, is_nat = 0; + + sof = regs->cr_ifs & 0x7f; /* size of frame */ + ndirty = (regs->loadrs >> 19); + bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty); + for (i = 0; i < sof; ++i) { + get_user(val, (unsigned long __user *) ia64_rse_skip_regs(bsp, i)); + printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val, + ((i == sof - 1) || (i % 3) == 2) ? "\n" : " "); + } + } else + show_stack(NULL, NULL); +} + +void +do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall) +{ + if (fsys_mode(current, &scr->pt)) { + /* defer signal-handling etc. until we return to privilege-level 0. */ + if (!ia64_psr(&scr->pt)->lp) + ia64_psr(&scr->pt)->lp = 1; + return; + } + +#ifdef CONFIG_PERFMON + if (current->thread.pfm_needs_checking) + pfm_handle_work(); +#endif + + /* deal with pending signal delivery */ + if (test_thread_flag(TIF_SIGPENDING)) + ia64_do_signal(oldset, scr, in_syscall); +} + +static int pal_halt = 1; +static int __init nohalt_setup(char * str) +{ + pal_halt = 0; + return 1; +} +__setup("nohalt", nohalt_setup); + +/* + * We use this if we don't have any better idle routine.. + */ +void +default_idle (void) +{ + unsigned long pmu_active = ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_PP | IA64_PSR_UP); + + while (!need_resched()) + if (pal_halt && !pmu_active) + safe_halt(); + else + cpu_relax(); +} + +#ifdef CONFIG_HOTPLUG_CPU +/* We don't actually take CPU down, just spin without interrupts. */ +static inline void play_dead(void) +{ + extern void ia64_cpu_local_tick (void); + /* Ack it */ + __get_cpu_var(cpu_state) = CPU_DEAD; + + /* We shouldn't have to disable interrupts while dead, but + * some interrupts just don't seem to go away, and this makes + * it "work" for testing purposes. */ + max_xtp(); + local_irq_disable(); + /* Death loop */ + while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + cpu_relax(); + + /* + * Enable timer interrupts from now on + * Not required if we put processor in SAL_BOOT_RENDEZ mode. + */ + local_flush_tlb_all(); + cpu_set(smp_processor_id(), cpu_online_map); + wmb(); + ia64_cpu_local_tick (); + local_irq_enable(); +} +#else +static inline void play_dead(void) +{ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + + +void cpu_idle_wait(void) +{ + int cpu; + cpumask_t map; + + for_each_online_cpu(cpu) + cpu_set(cpu, cpu_idle_map); + + wmb(); + do { + ssleep(1); + cpus_and(map, cpu_idle_map, cpu_online_map); + } while (!cpus_empty(map)); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + +void __attribute__((noreturn)) +cpu_idle (void) +{ + void (*mark_idle)(int) = ia64_mark_idle; + int cpu = smp_processor_id(); + + /* endless idle loop with no priority at all */ + while (1) { +#ifdef CONFIG_SMP + if (!need_resched()) + min_xtp(); +#endif + while (!need_resched()) { + void (*idle)(void); + + if (mark_idle) + (*mark_idle)(1); + + if (cpu_isset(cpu, cpu_idle_map)) + cpu_clear(cpu, cpu_idle_map); + rmb(); + idle = pm_idle; + if (!idle) + idle = default_idle; + (*idle)(); + } + + if (mark_idle) + (*mark_idle)(0); + +#ifdef CONFIG_SMP + normal_xtp(); +#endif + schedule(); + check_pgt_cache(); + if (cpu_is_offline(smp_processor_id())) + play_dead(); + } +} + +void +ia64_save_extra (struct task_struct *task) +{ +#ifdef CONFIG_PERFMON + unsigned long info; +#endif + + if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) + ia64_save_debug_regs(&task->thread.dbr[0]); + +#ifdef CONFIG_PERFMON + if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) + pfm_save_regs(task); + + info = __get_cpu_var(pfm_syst_info); + if (info & PFM_CPUINFO_SYST_WIDE) + pfm_syst_wide_update_task(task, info, 0); +#endif + +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(ia64_task_regs(task))) + ia32_save_state(task); +#endif +} + +void +ia64_load_extra (struct task_struct *task) +{ +#ifdef CONFIG_PERFMON + unsigned long info; +#endif + + if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) + ia64_load_debug_regs(&task->thread.dbr[0]); + +#ifdef CONFIG_PERFMON + if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) + pfm_load_regs(task); + + info = __get_cpu_var(pfm_syst_info); + if (info & PFM_CPUINFO_SYST_WIDE) + pfm_syst_wide_update_task(task, info, 1); +#endif + +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(ia64_task_regs(task))) + ia32_load_state(task); +#endif +} + +/* + * Copy the state of an ia-64 thread. + * + * We get here through the following call chain: + * + * from user-level: from kernel: + * + * <clone syscall> <some kernel call frames> + * sys_clone : + * do_fork do_fork + * copy_thread copy_thread + * + * This means that the stack layout is as follows: + * + * +---------------------+ (highest addr) + * | struct pt_regs | + * +---------------------+ + * | struct switch_stack | + * +---------------------+ + * | | + * | memory stack | + * | | <-- sp (lowest addr) + * +---------------------+ + * + * Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an + * integer to address X causes bit N in ar.unat to be set to the NaT bit of the register, + * with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the + * pt_regs structure in the parent is congruent to that of the child, modulo 512. Since + * the stack is page aligned and the page size is at least 4KB, this is always the case, + * so there is nothing to worry about. + */ +int +copy_thread (int nr, unsigned long clone_flags, + unsigned long user_stack_base, unsigned long user_stack_size, + struct task_struct *p, struct pt_regs *regs) +{ + extern char ia64_ret_from_clone, ia32_ret_from_clone; + struct switch_stack *child_stack, *stack; + unsigned long rbs, child_rbs, rbs_size; + struct pt_regs *child_ptregs; + int retval = 0; + +#ifdef CONFIG_SMP + /* + * For SMP idle threads, fork_by_hand() calls do_fork with + * NULL regs. + */ + if (!regs) + return 0; +#endif + + stack = ((struct switch_stack *) regs) - 1; + + child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1; + child_stack = (struct switch_stack *) child_ptregs - 1; + + /* copy parent's switch_stack & pt_regs to child: */ + memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack)); + + rbs = (unsigned long) current + IA64_RBS_OFFSET; + child_rbs = (unsigned long) p + IA64_RBS_OFFSET; + rbs_size = stack->ar_bspstore - rbs; + + /* copy the parent's register backing store to the child: */ + memcpy((void *) child_rbs, (void *) rbs, rbs_size); + + if (likely(user_mode(child_ptregs))) { + if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs)) + child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */ + if (user_stack_base) { + child_ptregs->r12 = user_stack_base + user_stack_size - 16; + child_ptregs->ar_bspstore = user_stack_base; + child_ptregs->ar_rnat = 0; + child_ptregs->loadrs = 0; + } + } else { + /* + * Note: we simply preserve the relative position of + * the stack pointer here. There is no need to + * allocate a scratch area here, since that will have + * been taken care of by the caller of sys_clone() + * already. + */ + child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */ + child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */ + } + child_stack->ar_bspstore = child_rbs + rbs_size; + if (IS_IA32_PROCESS(regs)) + child_stack->b0 = (unsigned long) &ia32_ret_from_clone; + else + child_stack->b0 = (unsigned long) &ia64_ret_from_clone; + + /* copy parts of thread_struct: */ + p->thread.ksp = (unsigned long) child_stack - 16; + + /* stop some PSR bits from being inherited. + * the psr.up/psr.pp bits must be cleared on fork but inherited on execve() + * therefore we must specify them explicitly here and not include them in + * IA64_PSR_BITS_TO_CLEAR. + */ + child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET) + & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP)); + + /* + * NOTE: The calling convention considers all floating point + * registers in the high partition (fph) to be scratch. Since + * the only way to get to this point is through a system call, + * we know that the values in fph are all dead. Hence, there + * is no need to inherit the fph state from the parent to the + * child and all we have to do is to make sure that + * IA64_THREAD_FPH_VALID is cleared in the child. + * + * XXX We could push this optimization a bit further by + * clearing IA64_THREAD_FPH_VALID on ANY system call. + * However, it's not clear this is worth doing. Also, it + * would be a slight deviation from the normal Linux system + * call behavior where scratch registers are preserved across + * system calls (unless used by the system call itself). + */ +# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \ + | IA64_THREAD_PM_VALID) +# define THREAD_FLAGS_TO_SET 0 + p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR) + | THREAD_FLAGS_TO_SET); + ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */ +#ifdef CONFIG_IA32_SUPPORT + /* + * If we're cloning an IA32 task then save the IA32 extra + * state from the current task to the new task + */ + if (IS_IA32_PROCESS(ia64_task_regs(current))) { + ia32_save_state(p); + if (clone_flags & CLONE_SETTLS) + retval = ia32_clone_tls(p, child_ptregs); + + /* Copy partially mapped page list */ + if (!retval) + retval = ia32_copy_partial_page_list(p, clone_flags); + } +#endif + +#ifdef CONFIG_PERFMON + if (current->thread.pfm_context) + pfm_inherit(p, child_ptregs); +#endif + return retval; +} + +static void +do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg) +{ + unsigned long mask, sp, nat_bits = 0, ip, ar_rnat, urbs_end, cfm; + elf_greg_t *dst = arg; + struct pt_regs *pt; + char nat; + int i; + + memset(dst, 0, sizeof(elf_gregset_t)); /* don't leak any kernel bits to user-level */ + + if (unw_unwind_to_user(info) < 0) + return; + + unw_get_sp(info, &sp); + pt = (struct pt_regs *) (sp + 16); + + urbs_end = ia64_get_user_rbs_end(task, pt, &cfm); + + if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0) + return; + + ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end), + &ar_rnat); + + /* + * coredump format: + * r0-r31 + * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) + * predicate registers (p0-p63) + * b0-b7 + * ip cfm user-mask + * ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec + */ + + /* r0 is zero */ + for (i = 1, mask = (1UL << i); i < 32; ++i) { + unw_get_gr(info, i, &dst[i], &nat); + if (nat) + nat_bits |= mask; + mask <<= 1; + } + dst[32] = nat_bits; + unw_get_pr(info, &dst[33]); + + for (i = 0; i < 8; ++i) + unw_get_br(info, i, &dst[34 + i]); + + unw_get_rp(info, &ip); + dst[42] = ip + ia64_psr(pt)->ri; + dst[43] = cfm; + dst[44] = pt->cr_ipsr & IA64_PSR_UM; + + unw_get_ar(info, UNW_AR_RSC, &dst[45]); + /* + * For bsp and bspstore, unw_get_ar() would return the kernel + * addresses, but we need the user-level addresses instead: + */ + dst[46] = urbs_end; /* note: by convention PT_AR_BSP points to the end of the urbs! */ + dst[47] = pt->ar_bspstore; + dst[48] = ar_rnat; + unw_get_ar(info, UNW_AR_CCV, &dst[49]); + unw_get_ar(info, UNW_AR_UNAT, &dst[50]); + unw_get_ar(info, UNW_AR_FPSR, &dst[51]); + dst[52] = pt->ar_pfs; /* UNW_AR_PFS is == to pt->cr_ifs for interrupt frames */ + unw_get_ar(info, UNW_AR_LC, &dst[53]); + unw_get_ar(info, UNW_AR_EC, &dst[54]); + unw_get_ar(info, UNW_AR_CSD, &dst[55]); + unw_get_ar(info, UNW_AR_SSD, &dst[56]); +} + +void +do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void *arg) +{ + elf_fpreg_t *dst = arg; + int i; + + memset(dst, 0, sizeof(elf_fpregset_t)); /* don't leak any "random" bits */ + + if (unw_unwind_to_user(info) < 0) + return; + + /* f0 is 0.0, f1 is 1.0 */ + + for (i = 2; i < 32; ++i) + unw_get_fr(info, i, dst + i); + + ia64_flush_fph(task); + if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0) + memcpy(dst + 32, task->thread.fph, 96*16); +} + +void +do_copy_regs (struct unw_frame_info *info, void *arg) +{ + do_copy_task_regs(current, info, arg); +} + +void +do_dump_fpu (struct unw_frame_info *info, void *arg) +{ + do_dump_task_fpu(current, info, arg); +} + +int +dump_task_regs(struct task_struct *task, elf_gregset_t *regs) +{ + struct unw_frame_info tcore_info; + + if (current == task) { + unw_init_running(do_copy_regs, regs); + } else { + memset(&tcore_info, 0, sizeof(tcore_info)); + unw_init_from_blocked_task(&tcore_info, task); + do_copy_task_regs(task, &tcore_info, regs); + } + return 1; +} + +void +ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst) +{ + unw_init_running(do_copy_regs, dst); +} + +int +dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst) +{ + struct unw_frame_info tcore_info; + + if (current == task) { + unw_init_running(do_dump_fpu, dst); + } else { + memset(&tcore_info, 0, sizeof(tcore_info)); + unw_init_from_blocked_task(&tcore_info, task); + do_dump_task_fpu(task, &tcore_info, dst); + } + return 1; +} + +int +dump_fpu (struct pt_regs *pt, elf_fpregset_t dst) +{ + unw_init_running(do_dump_fpu, dst); + return 1; /* f0-f31 are always valid so we always return 1 */ +} + +long +sys_execve (char __user *filename, char __user * __user *argv, char __user * __user *envp, + struct pt_regs *regs) +{ + char *fname; + int error; + + fname = getname(filename); + error = PTR_ERR(fname); + if (IS_ERR(fname)) + goto out; + error = do_execve(fname, argv, envp, regs); + putname(fname); +out: + return error; +} + +pid_t +kernel_thread (int (*fn)(void *), void *arg, unsigned long flags) +{ + extern void start_kernel_thread (void); + unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread; + struct { + struct switch_stack sw; + struct pt_regs pt; + } regs; + + memset(®s, 0, sizeof(regs)); + regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */ + regs.pt.r1 = helper_fptr[1]; /* set GP */ + regs.pt.r9 = (unsigned long) fn; /* 1st argument */ + regs.pt.r11 = (unsigned long) arg; /* 2nd argument */ + /* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read. */ + regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN; + regs.pt.cr_ifs = 1UL << 63; /* mark as valid, empty frame */ + regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR); + regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET; + regs.sw.pr = (1 << PRED_KERNEL_STACK); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s.pt, 0, NULL, NULL); +} +EXPORT_SYMBOL(kernel_thread); + +/* This gets called from kernel_thread() via ia64_invoke_thread_helper(). */ +int +kernel_thread_helper (int (*fn)(void *), void *arg) +{ +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(ia64_task_regs(current))) { + /* A kernel thread is always a 64-bit process. */ + current->thread.map_base = DEFAULT_MAP_BASE; + current->thread.task_size = DEFAULT_TASK_SIZE; + ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob); + ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1); + } +#endif + return (*fn)(arg); +} + +/* + * Flush thread state. This is called when a thread does an execve(). + */ +void +flush_thread (void) +{ + /* drop floating-point and debug-register state if it exists: */ + current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID); + ia64_drop_fpu(current); + if (IS_IA32_PROCESS(ia64_task_regs(current))) + ia32_drop_partial_page_list(current); +} + +/* + * Clean up state associated with current thread. This is called when + * the thread calls exit(). + */ +void +exit_thread (void) +{ + ia64_drop_fpu(current); +#ifdef CONFIG_PERFMON + /* if needed, stop monitoring and flush state to perfmon context */ + if (current->thread.pfm_context) + pfm_exit_thread(current); + + /* free debug register resources */ + if (current->thread.flags & IA64_THREAD_DBG_VALID) + pfm_release_debug_registers(current); +#endif + if (IS_IA32_PROCESS(ia64_task_regs(current))) + ia32_drop_partial_page_list(current); +} + +unsigned long +get_wchan (struct task_struct *p) +{ + struct unw_frame_info info; + unsigned long ip; + int count = 0; + + /* + * Note: p may not be a blocked task (it could be current or + * another process running on some other CPU. Rather than + * trying to determine if p is really blocked, we just assume + * it's blocked and rely on the unwind routines to fail + * gracefully if the process wasn't really blocked after all. + * --davidm 99/12/15 + */ + unw_init_from_blocked_task(&info, p); + do { + if (unw_unwind(&info) < 0) + return 0; + unw_get_ip(&info, &ip); + if (!in_sched_functions(ip)) + return ip; + } while (count++ < 16); + return 0; +} + +void +cpu_halt (void) +{ + pal_power_mgmt_info_u_t power_info[8]; + unsigned long min_power; + int i, min_power_state; + + if (ia64_pal_halt_info(power_info) != 0) + return; + + min_power_state = 0; + min_power = power_info[0].pal_power_mgmt_info_s.power_consumption; + for (i = 1; i < 8; ++i) + if (power_info[i].pal_power_mgmt_info_s.im + && power_info[i].pal_power_mgmt_info_s.power_consumption < min_power) { + min_power = power_info[i].pal_power_mgmt_info_s.power_consumption; + min_power_state = i; + } + + while (1) + ia64_pal_halt(min_power_state); +} + +void +machine_restart (char *restart_cmd) +{ + (*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL); +} + +EXPORT_SYMBOL(machine_restart); + +void +machine_halt (void) +{ + cpu_halt(); +} + +EXPORT_SYMBOL(machine_halt); + +void +machine_power_off (void) +{ + if (pm_power_off) + pm_power_off(); + machine_halt(); +} + +EXPORT_SYMBOL(machine_power_off); diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c new file mode 100644 index 000000000000..55789fcd7210 --- /dev/null +++ b/arch/ia64/kernel/ptrace.c @@ -0,0 +1,1627 @@ +/* + * Kernel support for the ptrace() and syscall tracing interfaces. + * + * Copyright (C) 1999-2005 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * Derived from the x86 and Alpha versions. + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/smp_lock.h> +#include <linux/user.h> +#include <linux/security.h> +#include <linux/audit.h> + +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace_offsets.h> +#include <asm/rse.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/unwind.h> +#ifdef CONFIG_PERFMON +#include <asm/perfmon.h> +#endif + +#include "entry.h" + +/* + * Bits in the PSR that we allow ptrace() to change: + * be, up, ac, mfl, mfh (the user mask; five bits total) + * db (debug breakpoint fault; one bit) + * id (instruction debug fault disable; one bit) + * dd (data debug fault disable; one bit) + * ri (restart instruction; two bits) + * is (instruction set; one bit) + */ +#define IPSR_MASK (IA64_PSR_UM | IA64_PSR_DB | IA64_PSR_IS \ + | IA64_PSR_ID | IA64_PSR_DD | IA64_PSR_RI) + +#define MASK(nbits) ((1UL << (nbits)) - 1) /* mask with NBITS bits set */ +#define PFM_MASK MASK(38) + +#define PTRACE_DEBUG 0 + +#if PTRACE_DEBUG +# define dprintk(format...) printk(format) +# define inline +#else +# define dprintk(format...) +#endif + +/* Return TRUE if PT was created due to kernel-entry via a system-call. */ + +static inline int +in_syscall (struct pt_regs *pt) +{ + return (long) pt->cr_ifs >= 0; +} + +/* + * Collect the NaT bits for r1-r31 from scratch_unat and return a NaT + * bitset where bit i is set iff the NaT bit of register i is set. + */ +unsigned long +ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat) +{ +# define GET_BITS(first, last, unat) \ + ({ \ + unsigned long bit = ia64_unat_pos(&pt->r##first); \ + unsigned long nbits = (last - first + 1); \ + unsigned long mask = MASK(nbits) << first; \ + unsigned long dist; \ + if (bit < first) \ + dist = 64 + bit - first; \ + else \ + dist = bit - first; \ + ia64_rotr(unat, dist) & mask; \ + }) + unsigned long val; + + /* + * Registers that are stored consecutively in struct pt_regs + * can be handled in parallel. If the register order in + * struct_pt_regs changes, this code MUST be updated. + */ + val = GET_BITS( 1, 1, scratch_unat); + val |= GET_BITS( 2, 3, scratch_unat); + val |= GET_BITS(12, 13, scratch_unat); + val |= GET_BITS(14, 14, scratch_unat); + val |= GET_BITS(15, 15, scratch_unat); + val |= GET_BITS( 8, 11, scratch_unat); + val |= GET_BITS(16, 31, scratch_unat); + return val; + +# undef GET_BITS +} + +/* + * Set the NaT bits for the scratch registers according to NAT and + * return the resulting unat (assuming the scratch registers are + * stored in PT). + */ +unsigned long +ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat) +{ +# define PUT_BITS(first, last, nat) \ + ({ \ + unsigned long bit = ia64_unat_pos(&pt->r##first); \ + unsigned long nbits = (last - first + 1); \ + unsigned long mask = MASK(nbits) << first; \ + long dist; \ + if (bit < first) \ + dist = 64 + bit - first; \ + else \ + dist = bit - first; \ + ia64_rotl(nat & mask, dist); \ + }) + unsigned long scratch_unat; + + /* + * Registers that are stored consecutively in struct pt_regs + * can be handled in parallel. If the register order in + * struct_pt_regs changes, this code MUST be updated. + */ + scratch_unat = PUT_BITS( 1, 1, nat); + scratch_unat |= PUT_BITS( 2, 3, nat); + scratch_unat |= PUT_BITS(12, 13, nat); + scratch_unat |= PUT_BITS(14, 14, nat); + scratch_unat |= PUT_BITS(15, 15, nat); + scratch_unat |= PUT_BITS( 8, 11, nat); + scratch_unat |= PUT_BITS(16, 31, nat); + + return scratch_unat; + +# undef PUT_BITS +} + +#define IA64_MLX_TEMPLATE 0x2 +#define IA64_MOVL_OPCODE 6 + +void +ia64_increment_ip (struct pt_regs *regs) +{ + unsigned long w0, ri = ia64_psr(regs)->ri + 1; + + if (ri > 2) { + ri = 0; + regs->cr_iip += 16; + } else if (ri == 2) { + get_user(w0, (char __user *) regs->cr_iip + 0); + if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) { + /* + * rfi'ing to slot 2 of an MLX bundle causes + * an illegal operation fault. We don't want + * that to happen... + */ + ri = 0; + regs->cr_iip += 16; + } + } + ia64_psr(regs)->ri = ri; +} + +void +ia64_decrement_ip (struct pt_regs *regs) +{ + unsigned long w0, ri = ia64_psr(regs)->ri - 1; + + if (ia64_psr(regs)->ri == 0) { + regs->cr_iip -= 16; + ri = 2; + get_user(w0, (char __user *) regs->cr_iip + 0); + if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) { + /* + * rfi'ing to slot 2 of an MLX bundle causes + * an illegal operation fault. We don't want + * that to happen... + */ + ri = 1; + } + } + ia64_psr(regs)->ri = ri; +} + +/* + * This routine is used to read an rnat bits that are stored on the + * kernel backing store. Since, in general, the alignment of the user + * and kernel are different, this is not completely trivial. In + * essence, we need to construct the user RNAT based on up to two + * kernel RNAT values and/or the RNAT value saved in the child's + * pt_regs. + * + * user rbs + * + * +--------+ <-- lowest address + * | slot62 | + * +--------+ + * | rnat | 0x....1f8 + * +--------+ + * | slot00 | \ + * +--------+ | + * | slot01 | > child_regs->ar_rnat + * +--------+ | + * | slot02 | / kernel rbs + * +--------+ +--------+ + * <- child_regs->ar_bspstore | slot61 | <-- krbs + * +- - - - + +--------+ + * | slot62 | + * +- - - - + +--------+ + * | rnat | + * +- - - - + +--------+ + * vrnat | slot00 | + * +- - - - + +--------+ + * = = + * +--------+ + * | slot00 | \ + * +--------+ | + * | slot01 | > child_stack->ar_rnat + * +--------+ | + * | slot02 | / + * +--------+ + * <--- child_stack->ar_bspstore + * + * The way to think of this code is as follows: bit 0 in the user rnat + * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat + * value. The kernel rnat value holding this bit is stored in + * variable rnat0. rnat1 is loaded with the kernel rnat value that + * form the upper bits of the user rnat value. + * + * Boundary cases: + * + * o when reading the rnat "below" the first rnat slot on the kernel + * backing store, rnat0/rnat1 are set to 0 and the low order bits are + * merged in from pt->ar_rnat. + * + * o when reading the rnat "above" the last rnat slot on the kernel + * backing store, rnat0/rnat1 gets its value from sw->ar_rnat. + */ +static unsigned long +get_rnat (struct task_struct *task, struct switch_stack *sw, + unsigned long *krbs, unsigned long *urnat_addr, + unsigned long *urbs_end) +{ + unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr; + unsigned long umask = 0, mask, m; + unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; + long num_regs, nbits; + struct pt_regs *pt; + + pt = ia64_task_regs(task); + kbsp = (unsigned long *) sw->ar_bspstore; + ubspstore = (unsigned long *) pt->ar_bspstore; + + if (urbs_end < urnat_addr) + nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_end); + else + nbits = 63; + mask = MASK(nbits); + /* + * First, figure out which bit number slot 0 in user-land maps + * to in the kernel rnat. Do this by figuring out how many + * register slots we're beyond the user's backingstore and + * then computing the equivalent address in kernel space. + */ + num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1); + slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); + shift = ia64_rse_slot_num(slot0_kaddr); + rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); + rnat0_kaddr = rnat1_kaddr - 64; + + if (ubspstore + 63 > urnat_addr) { + /* some bits need to be merged in from pt->ar_rnat */ + umask = MASK(ia64_rse_slot_num(ubspstore)) & mask; + urnat = (pt->ar_rnat & umask); + mask &= ~umask; + if (!mask) + return urnat; + } + + m = mask << shift; + if (rnat0_kaddr >= kbsp) + rnat0 = sw->ar_rnat; + else if (rnat0_kaddr > krbs) + rnat0 = *rnat0_kaddr; + urnat |= (rnat0 & m) >> shift; + + m = mask >> (63 - shift); + if (rnat1_kaddr >= kbsp) + rnat1 = sw->ar_rnat; + else if (rnat1_kaddr > krbs) + rnat1 = *rnat1_kaddr; + urnat |= (rnat1 & m) << (63 - shift); + return urnat; +} + +/* + * The reverse of get_rnat. + */ +static void +put_rnat (struct task_struct *task, struct switch_stack *sw, + unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat, + unsigned long *urbs_end) +{ + unsigned long rnat0 = 0, rnat1 = 0, *slot0_kaddr, umask = 0, mask, m; + unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; + long num_regs, nbits; + struct pt_regs *pt; + unsigned long cfm, *urbs_kargs; + + pt = ia64_task_regs(task); + kbsp = (unsigned long *) sw->ar_bspstore; + ubspstore = (unsigned long *) pt->ar_bspstore; + + urbs_kargs = urbs_end; + if (in_syscall(pt)) { + /* + * If entered via syscall, don't allow user to set rnat bits + * for syscall args. + */ + cfm = pt->cr_ifs; + urbs_kargs = ia64_rse_skip_regs(urbs_end, -(cfm & 0x7f)); + } + + if (urbs_kargs >= urnat_addr) + nbits = 63; + else { + if ((urnat_addr - 63) >= urbs_kargs) + return; + nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_kargs); + } + mask = MASK(nbits); + + /* + * First, figure out which bit number slot 0 in user-land maps + * to in the kernel rnat. Do this by figuring out how many + * register slots we're beyond the user's backingstore and + * then computing the equivalent address in kernel space. + */ + num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1); + slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); + shift = ia64_rse_slot_num(slot0_kaddr); + rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); + rnat0_kaddr = rnat1_kaddr - 64; + + if (ubspstore + 63 > urnat_addr) { + /* some bits need to be place in pt->ar_rnat: */ + umask = MASK(ia64_rse_slot_num(ubspstore)) & mask; + pt->ar_rnat = (pt->ar_rnat & ~umask) | (urnat & umask); + mask &= ~umask; + if (!mask) + return; + } + /* + * Note: Section 11.1 of the EAS guarantees that bit 63 of an + * rnat slot is ignored. so we don't have to clear it here. + */ + rnat0 = (urnat << shift); + m = mask << shift; + if (rnat0_kaddr >= kbsp) + sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat0 & m); + else if (rnat0_kaddr > krbs) + *rnat0_kaddr = ((*rnat0_kaddr & ~m) | (rnat0 & m)); + + rnat1 = (urnat >> (63 - shift)); + m = mask >> (63 - shift); + if (rnat1_kaddr >= kbsp) + sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat1 & m); + else if (rnat1_kaddr > krbs) + *rnat1_kaddr = ((*rnat1_kaddr & ~m) | (rnat1 & m)); +} + +static inline int +on_kernel_rbs (unsigned long addr, unsigned long bspstore, + unsigned long urbs_end) +{ + unsigned long *rnat_addr = ia64_rse_rnat_addr((unsigned long *) + urbs_end); + return (addr >= bspstore && addr <= (unsigned long) rnat_addr); +} + +/* + * Read a word from the user-level backing store of task CHILD. ADDR + * is the user-level address to read the word from, VAL a pointer to + * the return value, and USER_BSP gives the end of the user-level + * backing store (i.e., it's the address that would be in ar.bsp after + * the user executed a "cover" instruction). + * + * This routine takes care of accessing the kernel register backing + * store for those registers that got spilled there. It also takes + * care of calculating the appropriate RNaT collection words. + */ +long +ia64_peek (struct task_struct *child, struct switch_stack *child_stack, + unsigned long user_rbs_end, unsigned long addr, long *val) +{ + unsigned long *bspstore, *krbs, regnum, *laddr, *urbs_end, *rnat_addr; + struct pt_regs *child_regs; + size_t copied; + long ret; + + urbs_end = (long *) user_rbs_end; + laddr = (unsigned long *) addr; + child_regs = ia64_task_regs(child); + bspstore = (unsigned long *) child_regs->ar_bspstore; + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + if (on_kernel_rbs(addr, (unsigned long) bspstore, + (unsigned long) urbs_end)) + { + /* + * Attempt to read the RBS in an area that's actually + * on the kernel RBS => read the corresponding bits in + * the kernel RBS. + */ + rnat_addr = ia64_rse_rnat_addr(laddr); + ret = get_rnat(child, child_stack, krbs, rnat_addr, urbs_end); + + if (laddr == rnat_addr) { + /* return NaT collection word itself */ + *val = ret; + return 0; + } + + if (((1UL << ia64_rse_slot_num(laddr)) & ret) != 0) { + /* + * It is implementation dependent whether the + * data portion of a NaT value gets saved on a + * st8.spill or RSE spill (e.g., see EAS 2.6, + * 4.4.4.6 Register Spill and Fill). To get + * consistent behavior across all possible + * IA-64 implementations, we return zero in + * this case. + */ + *val = 0; + return 0; + } + + if (laddr < urbs_end) { + /* + * The desired word is on the kernel RBS and + * is not a NaT. + */ + regnum = ia64_rse_num_regs(bspstore, laddr); + *val = *ia64_rse_skip_regs(krbs, regnum); + return 0; + } + } + copied = access_process_vm(child, addr, &ret, sizeof(ret), 0); + if (copied != sizeof(ret)) + return -EIO; + *val = ret; + return 0; +} + +long +ia64_poke (struct task_struct *child, struct switch_stack *child_stack, + unsigned long user_rbs_end, unsigned long addr, long val) +{ + unsigned long *bspstore, *krbs, regnum, *laddr; + unsigned long *urbs_end = (long *) user_rbs_end; + struct pt_regs *child_regs; + + laddr = (unsigned long *) addr; + child_regs = ia64_task_regs(child); + bspstore = (unsigned long *) child_regs->ar_bspstore; + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + if (on_kernel_rbs(addr, (unsigned long) bspstore, + (unsigned long) urbs_end)) + { + /* + * Attempt to write the RBS in an area that's actually + * on the kernel RBS => write the corresponding bits + * in the kernel RBS. + */ + if (ia64_rse_is_rnat_slot(laddr)) + put_rnat(child, child_stack, krbs, laddr, val, + urbs_end); + else { + if (laddr < urbs_end) { + regnum = ia64_rse_num_regs(bspstore, laddr); + *ia64_rse_skip_regs(krbs, regnum) = val; + } + } + } else if (access_process_vm(child, addr, &val, sizeof(val), 1) + != sizeof(val)) + return -EIO; + return 0; +} + +/* + * Calculate the address of the end of the user-level register backing + * store. This is the address that would have been stored in ar.bsp + * if the user had executed a "cover" instruction right before + * entering the kernel. If CFMP is not NULL, it is used to return the + * "current frame mask" that was active at the time the kernel was + * entered. + */ +unsigned long +ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt, + unsigned long *cfmp) +{ + unsigned long *krbs, *bspstore, cfm = pt->cr_ifs; + long ndirty; + + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + bspstore = (unsigned long *) pt->ar_bspstore; + ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); + + if (in_syscall(pt)) + ndirty += (cfm & 0x7f); + else + cfm &= ~(1UL << 63); /* clear valid bit */ + + if (cfmp) + *cfmp = cfm; + return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty); +} + +/* + * Synchronize (i.e, write) the RSE backing store living in kernel + * space to the VM of the CHILD task. SW and PT are the pointers to + * the switch_stack and pt_regs structures, respectively. + * USER_RBS_END is the user-level address at which the backing store + * ends. + */ +long +ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw, + unsigned long user_rbs_start, unsigned long user_rbs_end) +{ + unsigned long addr, val; + long ret; + + /* now copy word for word from kernel rbs to user rbs: */ + for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { + ret = ia64_peek(child, sw, user_rbs_end, addr, &val); + if (ret < 0) + return ret; + if (access_process_vm(child, addr, &val, sizeof(val), 1) + != sizeof(val)) + return -EIO; + } + return 0; +} + +static inline int +thread_matches (struct task_struct *thread, unsigned long addr) +{ + unsigned long thread_rbs_end; + struct pt_regs *thread_regs; + + if (ptrace_check_attach(thread, 0) < 0) + /* + * If the thread is not in an attachable state, we'll + * ignore it. The net effect is that if ADDR happens + * to overlap with the portion of the thread's + * register backing store that is currently residing + * on the thread's kernel stack, then ptrace() may end + * up accessing a stale value. But if the thread + * isn't stopped, that's a problem anyhow, so we're + * doing as well as we can... + */ + return 0; + + thread_regs = ia64_task_regs(thread); + thread_rbs_end = ia64_get_user_rbs_end(thread, thread_regs, NULL); + if (!on_kernel_rbs(addr, thread_regs->ar_bspstore, thread_rbs_end)) + return 0; + + return 1; /* looks like we've got a winner */ +} + +/* + * GDB apparently wants to be able to read the register-backing store + * of any thread when attached to a given process. If we are peeking + * or poking an address that happens to reside in the kernel-backing + * store of another thread, we need to attach to that thread, because + * otherwise we end up accessing stale data. + * + * task_list_lock must be read-locked before calling this routine! + */ +static struct task_struct * +find_thread_for_addr (struct task_struct *child, unsigned long addr) +{ + struct task_struct *g, *p; + struct mm_struct *mm; + int mm_users; + + if (!(mm = get_task_mm(child))) + return child; + + /* -1 because of our get_task_mm(): */ + mm_users = atomic_read(&mm->mm_users) - 1; + if (mm_users <= 1) + goto out; /* not multi-threaded */ + + /* + * First, traverse the child's thread-list. Good for scalability with + * NPTL-threads. + */ + p = child; + do { + if (thread_matches(p, addr)) { + child = p; + goto out; + } + if (mm_users-- <= 1) + goto out; + } while ((p = next_thread(p)) != child); + + do_each_thread(g, p) { + if (child->mm != mm) + continue; + + if (thread_matches(p, addr)) { + child = p; + goto out; + } + } while_each_thread(g, p); + out: + mmput(mm); + return child; +} + +/* + * Write f32-f127 back to task->thread.fph if it has been modified. + */ +inline void +ia64_flush_fph (struct task_struct *task) +{ + struct ia64_psr *psr = ia64_psr(ia64_task_regs(task)); + + if (ia64_is_local_fpu_owner(task) && psr->mfh) { + psr->mfh = 0; + task->thread.flags |= IA64_THREAD_FPH_VALID; + ia64_save_fpu(&task->thread.fph[0]); + } +} + +/* + * Sync the fph state of the task so that it can be manipulated + * through thread.fph. If necessary, f32-f127 are written back to + * thread.fph or, if the fph state hasn't been used before, thread.fph + * is cleared to zeroes. Also, access to f32-f127 is disabled to + * ensure that the task picks up the state from thread.fph when it + * executes again. + */ +void +ia64_sync_fph (struct task_struct *task) +{ + struct ia64_psr *psr = ia64_psr(ia64_task_regs(task)); + + ia64_flush_fph(task); + if (!(task->thread.flags & IA64_THREAD_FPH_VALID)) { + task->thread.flags |= IA64_THREAD_FPH_VALID; + memset(&task->thread.fph, 0, sizeof(task->thread.fph)); + } + ia64_drop_fpu(task); + psr->dfh = 1; +} + +static int +access_fr (struct unw_frame_info *info, int regnum, int hi, + unsigned long *data, int write_access) +{ + struct ia64_fpreg fpval; + int ret; + + ret = unw_get_fr(info, regnum, &fpval); + if (ret < 0) + return ret; + + if (write_access) { + fpval.u.bits[hi] = *data; + ret = unw_set_fr(info, regnum, fpval); + } else + *data = fpval.u.bits[hi]; + return ret; +} + +/* + * Change the machine-state of CHILD such that it will return via the normal + * kernel exit-path, rather than the syscall-exit path. + */ +static void +convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt, + unsigned long cfm) +{ + struct unw_frame_info info, prev_info; + unsigned long ip, pr; + + unw_init_from_blocked_task(&info, child); + while (1) { + prev_info = info; + if (unw_unwind(&info) < 0) + return; + if (unw_get_rp(&info, &ip) < 0) + return; + if (ip < FIXADDR_USER_END) + break; + } + + unw_get_pr(&prev_info, &pr); + pr &= ~(1UL << PRED_SYSCALL); + pr |= (1UL << PRED_NON_SYSCALL); + unw_set_pr(&prev_info, pr); + + pt->cr_ifs = (1UL << 63) | cfm; +} + +static int +access_nat_bits (struct task_struct *child, struct pt_regs *pt, + struct unw_frame_info *info, + unsigned long *data, int write_access) +{ + unsigned long regnum, nat_bits, scratch_unat, dummy = 0; + char nat = 0; + + if (write_access) { + nat_bits = *data; + scratch_unat = ia64_put_scratch_nat_bits(pt, nat_bits); + if (unw_set_ar(info, UNW_AR_UNAT, scratch_unat) < 0) { + dprintk("ptrace: failed to set ar.unat\n"); + return -1; + } + for (regnum = 4; regnum <= 7; ++regnum) { + unw_get_gr(info, regnum, &dummy, &nat); + unw_set_gr(info, regnum, dummy, + (nat_bits >> regnum) & 1); + } + } else { + if (unw_get_ar(info, UNW_AR_UNAT, &scratch_unat) < 0) { + dprintk("ptrace: failed to read ar.unat\n"); + return -1; + } + nat_bits = ia64_get_scratch_nat_bits(pt, scratch_unat); + for (regnum = 4; regnum <= 7; ++regnum) { + unw_get_gr(info, regnum, &dummy, &nat); + nat_bits |= (nat != 0) << regnum; + } + *data = nat_bits; + } + return 0; +} + +static int +access_uarea (struct task_struct *child, unsigned long addr, + unsigned long *data, int write_access) +{ + unsigned long *ptr, regnum, urbs_end, rnat_addr, cfm; + struct switch_stack *sw; + struct pt_regs *pt; +# define pt_reg_addr(pt, reg) ((void *) \ + ((unsigned long) (pt) \ + + offsetof(struct pt_regs, reg))) + + + pt = ia64_task_regs(child); + sw = (struct switch_stack *) (child->thread.ksp + 16); + + if ((addr & 0x7) != 0) { + dprintk("ptrace: unaligned register address 0x%lx\n", addr); + return -1; + } + + if (addr < PT_F127 + 16) { + /* accessing fph */ + if (write_access) + ia64_sync_fph(child); + else + ia64_flush_fph(child); + ptr = (unsigned long *) + ((unsigned long) &child->thread.fph + addr); + } else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) { + /* scratch registers untouched by kernel (saved in pt_regs) */ + ptr = pt_reg_addr(pt, f10) + (addr - PT_F10); + } else if (addr >= PT_F12 && addr < PT_F15 + 16) { + /* + * Scratch registers untouched by kernel (saved in + * switch_stack). + */ + ptr = (unsigned long *) ((long) sw + + (addr - PT_NAT_BITS - 32)); + } else if (addr < PT_AR_LC + 8) { + /* preserved state: */ + struct unw_frame_info info; + char nat = 0; + int ret; + + unw_init_from_blocked_task(&info, child); + if (unw_unwind_to_user(&info) < 0) + return -1; + + switch (addr) { + case PT_NAT_BITS: + return access_nat_bits(child, pt, &info, + data, write_access); + + case PT_R4: case PT_R5: case PT_R6: case PT_R7: + if (write_access) { + /* read NaT bit first: */ + unsigned long dummy; + + ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4, + &dummy, &nat); + if (ret < 0) + return ret; + } + return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data, + &nat, write_access); + + case PT_B1: case PT_B2: case PT_B3: + case PT_B4: case PT_B5: + return unw_access_br(&info, (addr - PT_B1)/8 + 1, data, + write_access); + + case PT_AR_EC: + return unw_access_ar(&info, UNW_AR_EC, data, + write_access); + + case PT_AR_LC: + return unw_access_ar(&info, UNW_AR_LC, data, + write_access); + + default: + if (addr >= PT_F2 && addr < PT_F5 + 16) + return access_fr(&info, (addr - PT_F2)/16 + 2, + (addr & 8) != 0, data, + write_access); + else if (addr >= PT_F16 && addr < PT_F31 + 16) + return access_fr(&info, + (addr - PT_F16)/16 + 16, + (addr & 8) != 0, + data, write_access); + else { + dprintk("ptrace: rejecting access to register " + "address 0x%lx\n", addr); + return -1; + } + } + } else if (addr < PT_F9+16) { + /* scratch state */ + switch (addr) { + case PT_AR_BSP: + /* + * By convention, we use PT_AR_BSP to refer to + * the end of the user-level backing store. + * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof) + * to get the real value of ar.bsp at the time + * the kernel was entered. + * + * Furthermore, when changing the contents of + * PT_AR_BSP (or PT_CFM) we MUST copy any + * users-level stacked registers that are + * stored on the kernel stack back to + * user-space because otherwise, we might end + * up clobbering kernel stacked registers. + * Also, if this happens while the task is + * blocked in a system call, which convert the + * state such that the non-system-call exit + * path is used. This ensures that the proper + * state will be picked up when resuming + * execution. However, it *also* means that + * once we write PT_AR_BSP/PT_CFM, it won't be + * possible to modify the syscall arguments of + * the pending system call any longer. This + * shouldn't be an issue because modifying + * PT_AR_BSP/PT_CFM generally implies that + * we're either abandoning the pending system + * call or that we defer it's re-execution + * (e.g., due to GDB doing an inferior + * function call). + */ + urbs_end = ia64_get_user_rbs_end(child, pt, &cfm); + if (write_access) { + if (*data != urbs_end) { + if (ia64_sync_user_rbs(child, sw, + pt->ar_bspstore, + urbs_end) < 0) + return -1; + if (in_syscall(pt)) + convert_to_non_syscall(child, + pt, + cfm); + /* + * Simulate user-level write + * of ar.bsp: + */ + pt->loadrs = 0; + pt->ar_bspstore = *data; + } + } else + *data = urbs_end; + return 0; + + case PT_CFM: + urbs_end = ia64_get_user_rbs_end(child, pt, &cfm); + if (write_access) { + if (((cfm ^ *data) & PFM_MASK) != 0) { + if (ia64_sync_user_rbs(child, sw, + pt->ar_bspstore, + urbs_end) < 0) + return -1; + if (in_syscall(pt)) + convert_to_non_syscall(child, + pt, + cfm); + pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK) + | (*data & PFM_MASK)); + } + } else + *data = cfm; + return 0; + + case PT_CR_IPSR: + if (write_access) + pt->cr_ipsr = ((*data & IPSR_MASK) + | (pt->cr_ipsr & ~IPSR_MASK)); + else + *data = (pt->cr_ipsr & IPSR_MASK); + return 0; + + case PT_AR_RNAT: + urbs_end = ia64_get_user_rbs_end(child, pt, NULL); + rnat_addr = (long) ia64_rse_rnat_addr((long *) + urbs_end); + if (write_access) + return ia64_poke(child, sw, urbs_end, + rnat_addr, *data); + else + return ia64_peek(child, sw, urbs_end, + rnat_addr, data); + + case PT_R1: + ptr = pt_reg_addr(pt, r1); + break; + case PT_R2: case PT_R3: + ptr = pt_reg_addr(pt, r2) + (addr - PT_R2); + break; + case PT_R8: case PT_R9: case PT_R10: case PT_R11: + ptr = pt_reg_addr(pt, r8) + (addr - PT_R8); + break; + case PT_R12: case PT_R13: + ptr = pt_reg_addr(pt, r12) + (addr - PT_R12); + break; + case PT_R14: + ptr = pt_reg_addr(pt, r14); + break; + case PT_R15: + ptr = pt_reg_addr(pt, r15); + break; + case PT_R16: case PT_R17: case PT_R18: case PT_R19: + case PT_R20: case PT_R21: case PT_R22: case PT_R23: + case PT_R24: case PT_R25: case PT_R26: case PT_R27: + case PT_R28: case PT_R29: case PT_R30: case PT_R31: + ptr = pt_reg_addr(pt, r16) + (addr - PT_R16); + break; + case PT_B0: + ptr = pt_reg_addr(pt, b0); + break; + case PT_B6: + ptr = pt_reg_addr(pt, b6); + break; + case PT_B7: + ptr = pt_reg_addr(pt, b7); + break; + case PT_F6: case PT_F6+8: case PT_F7: case PT_F7+8: + case PT_F8: case PT_F8+8: case PT_F9: case PT_F9+8: + ptr = pt_reg_addr(pt, f6) + (addr - PT_F6); + break; + case PT_AR_BSPSTORE: + ptr = pt_reg_addr(pt, ar_bspstore); + break; + case PT_AR_RSC: + ptr = pt_reg_addr(pt, ar_rsc); + break; + case PT_AR_UNAT: + ptr = pt_reg_addr(pt, ar_unat); + break; + case PT_AR_PFS: + ptr = pt_reg_addr(pt, ar_pfs); + break; + case PT_AR_CCV: + ptr = pt_reg_addr(pt, ar_ccv); + break; + case PT_AR_FPSR: + ptr = pt_reg_addr(pt, ar_fpsr); + break; + case PT_CR_IIP: + ptr = pt_reg_addr(pt, cr_iip); + break; + case PT_PR: + ptr = pt_reg_addr(pt, pr); + break; + /* scratch register */ + + default: + /* disallow accessing anything else... */ + dprintk("ptrace: rejecting access to register " + "address 0x%lx\n", addr); + return -1; + } + } else if (addr <= PT_AR_SSD) { + ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD); + } else { + /* access debug registers */ + + if (addr >= PT_IBR) { + regnum = (addr - PT_IBR) >> 3; + ptr = &child->thread.ibr[0]; + } else { + regnum = (addr - PT_DBR) >> 3; + ptr = &child->thread.dbr[0]; + } + + if (regnum >= 8) { + dprintk("ptrace: rejecting access to register " + "address 0x%lx\n", addr); + return -1; + } +#ifdef CONFIG_PERFMON + /* + * Check if debug registers are used by perfmon. This + * test must be done once we know that we can do the + * operation, i.e. the arguments are all valid, but + * before we start modifying the state. + * + * Perfmon needs to keep a count of how many processes + * are trying to modify the debug registers for system + * wide monitoring sessions. + * + * We also include read access here, because they may + * cause the PMU-installed debug register state + * (dbr[], ibr[]) to be reset. The two arrays are also + * used by perfmon, but we do not use + * IA64_THREAD_DBG_VALID. The registers are restored + * by the PMU context switch code. + */ + if (pfm_use_debug_registers(child)) return -1; +#endif + + if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { + child->thread.flags |= IA64_THREAD_DBG_VALID; + memset(child->thread.dbr, 0, + sizeof(child->thread.dbr)); + memset(child->thread.ibr, 0, + sizeof(child->thread.ibr)); + } + + ptr += regnum; + + if ((regnum & 1) && write_access) { + /* don't let the user set kernel-level breakpoints: */ + *ptr = *data & ~(7UL << 56); + return 0; + } + } + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; +} + +static long +ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) +{ + unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val; + struct unw_frame_info info; + struct ia64_fpreg fpval; + struct switch_stack *sw; + struct pt_regs *pt; + long ret, retval = 0; + char nat = 0; + int i; + + if (!access_ok(VERIFY_WRITE, ppr, sizeof(struct pt_all_user_regs))) + return -EIO; + + pt = ia64_task_regs(child); + sw = (struct switch_stack *) (child->thread.ksp + 16); + unw_init_from_blocked_task(&info, child); + if (unw_unwind_to_user(&info) < 0) { + return -EIO; + } + + if (((unsigned long) ppr & 0x7) != 0) { + dprintk("ptrace:unaligned register address %p\n", ppr); + return -EIO; + } + + if (access_uarea(child, PT_CR_IPSR, &psr, 0) < 0 + || access_uarea(child, PT_AR_EC, &ec, 0) < 0 + || access_uarea(child, PT_AR_LC, &lc, 0) < 0 + || access_uarea(child, PT_AR_RNAT, &rnat, 0) < 0 + || access_uarea(child, PT_AR_BSP, &bsp, 0) < 0 + || access_uarea(child, PT_CFM, &cfm, 0) + || access_uarea(child, PT_NAT_BITS, &nat_bits, 0)) + return -EIO; + + /* control regs */ + + retval |= __put_user(pt->cr_iip, &ppr->cr_iip); + retval |= __put_user(psr, &ppr->cr_ipsr); + + /* app regs */ + + retval |= __put_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]); + retval |= __put_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]); + retval |= __put_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]); + retval |= __put_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]); + retval |= __put_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]); + retval |= __put_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]); + + retval |= __put_user(ec, &ppr->ar[PT_AUR_EC]); + retval |= __put_user(lc, &ppr->ar[PT_AUR_LC]); + retval |= __put_user(rnat, &ppr->ar[PT_AUR_RNAT]); + retval |= __put_user(bsp, &ppr->ar[PT_AUR_BSP]); + retval |= __put_user(cfm, &ppr->cfm); + + /* gr1-gr3 */ + + retval |= __copy_to_user(&ppr->gr[1], &pt->r1, sizeof(long)); + retval |= __copy_to_user(&ppr->gr[2], &pt->r2, sizeof(long) *2); + + /* gr4-gr7 */ + + for (i = 4; i < 8; i++) { + if (unw_access_gr(&info, i, &val, &nat, 0) < 0) + return -EIO; + retval |= __put_user(val, &ppr->gr[i]); + } + + /* gr8-gr11 */ + + retval |= __copy_to_user(&ppr->gr[8], &pt->r8, sizeof(long) * 4); + + /* gr12-gr15 */ + + retval |= __copy_to_user(&ppr->gr[12], &pt->r12, sizeof(long) * 2); + retval |= __copy_to_user(&ppr->gr[14], &pt->r14, sizeof(long)); + retval |= __copy_to_user(&ppr->gr[15], &pt->r15, sizeof(long)); + + /* gr16-gr31 */ + + retval |= __copy_to_user(&ppr->gr[16], &pt->r16, sizeof(long) * 16); + + /* b0 */ + + retval |= __put_user(pt->b0, &ppr->br[0]); + + /* b1-b5 */ + + for (i = 1; i < 6; i++) { + if (unw_access_br(&info, i, &val, 0) < 0) + return -EIO; + __put_user(val, &ppr->br[i]); + } + + /* b6-b7 */ + + retval |= __put_user(pt->b6, &ppr->br[6]); + retval |= __put_user(pt->b7, &ppr->br[7]); + + /* fr2-fr5 */ + + for (i = 2; i < 6; i++) { + if (unw_get_fr(&info, i, &fpval) < 0) + return -EIO; + retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval)); + } + + /* fr6-fr11 */ + + retval |= __copy_to_user(&ppr->fr[6], &pt->f6, + sizeof(struct ia64_fpreg) * 6); + + /* fp scratch regs(12-15) */ + + retval |= __copy_to_user(&ppr->fr[12], &sw->f12, + sizeof(struct ia64_fpreg) * 4); + + /* fr16-fr31 */ + + for (i = 16; i < 32; i++) { + if (unw_get_fr(&info, i, &fpval) < 0) + return -EIO; + retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval)); + } + + /* fph */ + + ia64_flush_fph(child); + retval |= __copy_to_user(&ppr->fr[32], &child->thread.fph, + sizeof(ppr->fr[32]) * 96); + + /* preds */ + + retval |= __put_user(pt->pr, &ppr->pr); + + /* nat bits */ + + retval |= __put_user(nat_bits, &ppr->nat); + + ret = retval ? -EIO : 0; + return ret; +} + +static long +ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) +{ + unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val = 0; + struct unw_frame_info info; + struct switch_stack *sw; + struct ia64_fpreg fpval; + struct pt_regs *pt; + long ret, retval = 0; + int i; + + memset(&fpval, 0, sizeof(fpval)); + + if (!access_ok(VERIFY_READ, ppr, sizeof(struct pt_all_user_regs))) + return -EIO; + + pt = ia64_task_regs(child); + sw = (struct switch_stack *) (child->thread.ksp + 16); + unw_init_from_blocked_task(&info, child); + if (unw_unwind_to_user(&info) < 0) { + return -EIO; + } + + if (((unsigned long) ppr & 0x7) != 0) { + dprintk("ptrace:unaligned register address %p\n", ppr); + return -EIO; + } + + /* control regs */ + + retval |= __get_user(pt->cr_iip, &ppr->cr_iip); + retval |= __get_user(psr, &ppr->cr_ipsr); + + /* app regs */ + + retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]); + retval |= __get_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]); + retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]); + retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]); + retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]); + retval |= __get_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]); + + retval |= __get_user(ec, &ppr->ar[PT_AUR_EC]); + retval |= __get_user(lc, &ppr->ar[PT_AUR_LC]); + retval |= __get_user(rnat, &ppr->ar[PT_AUR_RNAT]); + retval |= __get_user(bsp, &ppr->ar[PT_AUR_BSP]); + retval |= __get_user(cfm, &ppr->cfm); + + /* gr1-gr3 */ + + retval |= __copy_from_user(&pt->r1, &ppr->gr[1], sizeof(long)); + retval |= __copy_from_user(&pt->r2, &ppr->gr[2], sizeof(long) * 2); + + /* gr4-gr7 */ + + for (i = 4; i < 8; i++) { + retval |= __get_user(val, &ppr->gr[i]); + /* NaT bit will be set via PT_NAT_BITS: */ + if (unw_set_gr(&info, i, val, 0) < 0) + return -EIO; + } + + /* gr8-gr11 */ + + retval |= __copy_from_user(&pt->r8, &ppr->gr[8], sizeof(long) * 4); + + /* gr12-gr15 */ + + retval |= __copy_from_user(&pt->r12, &ppr->gr[12], sizeof(long) * 2); + retval |= __copy_from_user(&pt->r14, &ppr->gr[14], sizeof(long)); + retval |= __copy_from_user(&pt->r15, &ppr->gr[15], sizeof(long)); + + /* gr16-gr31 */ + + retval |= __copy_from_user(&pt->r16, &ppr->gr[16], sizeof(long) * 16); + + /* b0 */ + + retval |= __get_user(pt->b0, &ppr->br[0]); + + /* b1-b5 */ + + for (i = 1; i < 6; i++) { + retval |= __get_user(val, &ppr->br[i]); + unw_set_br(&info, i, val); + } + + /* b6-b7 */ + + retval |= __get_user(pt->b6, &ppr->br[6]); + retval |= __get_user(pt->b7, &ppr->br[7]); + + /* fr2-fr5 */ + + for (i = 2; i < 6; i++) { + retval |= __copy_from_user(&fpval, &ppr->fr[i], sizeof(fpval)); + if (unw_set_fr(&info, i, fpval) < 0) + return -EIO; + } + + /* fr6-fr11 */ + + retval |= __copy_from_user(&pt->f6, &ppr->fr[6], + sizeof(ppr->fr[6]) * 6); + + /* fp scratch regs(12-15) */ + + retval |= __copy_from_user(&sw->f12, &ppr->fr[12], + sizeof(ppr->fr[12]) * 4); + + /* fr16-fr31 */ + + for (i = 16; i < 32; i++) { + retval |= __copy_from_user(&fpval, &ppr->fr[i], + sizeof(fpval)); + if (unw_set_fr(&info, i, fpval) < 0) + return -EIO; + } + + /* fph */ + + ia64_sync_fph(child); + retval |= __copy_from_user(&child->thread.fph, &ppr->fr[32], + sizeof(ppr->fr[32]) * 96); + + /* preds */ + + retval |= __get_user(pt->pr, &ppr->pr); + + /* nat bits */ + + retval |= __get_user(nat_bits, &ppr->nat); + + retval |= access_uarea(child, PT_CR_IPSR, &psr, 1); + retval |= access_uarea(child, PT_AR_EC, &ec, 1); + retval |= access_uarea(child, PT_AR_LC, &lc, 1); + retval |= access_uarea(child, PT_AR_RNAT, &rnat, 1); + retval |= access_uarea(child, PT_AR_BSP, &bsp, 1); + retval |= access_uarea(child, PT_CFM, &cfm, 1); + retval |= access_uarea(child, PT_NAT_BITS, &nat_bits, 1); + + ret = retval ? -EIO : 0; + return ret; +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure the single step bit is not set. + */ +void +ptrace_disable (struct task_struct *child) +{ + struct ia64_psr *child_psr = ia64_psr(ia64_task_regs(child)); + + /* make sure the single step/taken-branch trap bits are not set: */ + child_psr->ss = 0; + child_psr->tb = 0; +} + +asmlinkage long +sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) +{ + struct pt_regs *pt; + unsigned long urbs_end, peek_or_poke; + struct task_struct *child; + struct switch_stack *sw; + long ret; + + lock_kernel(); + ret = -EPERM; + if (request == PTRACE_TRACEME) { + /* are we already being traced? */ + if (current->ptrace & PT_PTRACED) + goto out; + ret = security_ptrace(current->parent, current); + if (ret) + goto out; + current->ptrace |= PT_PTRACED; + ret = 0; + goto out; + } + + peek_or_poke = (request == PTRACE_PEEKTEXT + || request == PTRACE_PEEKDATA + || request == PTRACE_POKETEXT + || request == PTRACE_POKEDATA); + ret = -ESRCH; + read_lock(&tasklist_lock); + { + child = find_task_by_pid(pid); + if (child) { + if (peek_or_poke) + child = find_thread_for_addr(child, addr); + get_task_struct(child); + } + } + read_unlock(&tasklist_lock); + if (!child) + goto out; + ret = -EPERM; + if (pid == 1) /* no messing around with init! */ + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out_tsk; + } + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out_tsk; + + pt = ia64_task_regs(child); + sw = (struct switch_stack *) (child->thread.ksp + 16); + + switch (request) { + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + /* read word at location addr */ + urbs_end = ia64_get_user_rbs_end(child, pt, NULL); + ret = ia64_peek(child, sw, urbs_end, addr, &data); + if (ret == 0) { + ret = data; + /* ensure "ret" is not mistaken as an error code: */ + force_successful_syscall_return(); + } + goto out_tsk; + + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: + /* write the word at location addr */ + urbs_end = ia64_get_user_rbs_end(child, pt, NULL); + ret = ia64_poke(child, sw, urbs_end, addr, data); + goto out_tsk; + + case PTRACE_PEEKUSR: + /* read the word at addr in the USER area */ + if (access_uarea(child, addr, &data, 0) < 0) { + ret = -EIO; + goto out_tsk; + } + ret = data; + /* ensure "ret" is not mistaken as an error code */ + force_successful_syscall_return(); + goto out_tsk; + + case PTRACE_POKEUSR: + /* write the word at addr in the USER area */ + if (access_uarea(child, addr, &data, 1) < 0) { + ret = -EIO; + goto out_tsk; + } + ret = 0; + goto out_tsk; + + case PTRACE_OLD_GETSIGINFO: + /* for backwards-compatibility */ + ret = ptrace_request(child, PTRACE_GETSIGINFO, addr, data); + goto out_tsk; + + case PTRACE_OLD_SETSIGINFO: + /* for backwards-compatibility */ + ret = ptrace_request(child, PTRACE_SETSIGINFO, addr, data); + goto out_tsk; + + case PTRACE_SYSCALL: + /* continue and stop at next (return from) syscall */ + case PTRACE_CONT: + /* restart after signal. */ + ret = -EIO; + if (data > _NSIG) + goto out_tsk; + if (request == PTRACE_SYSCALL) + set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + else + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + child->exit_code = data; + + /* + * Make sure the single step/taken-branch trap bits + * are not set: + */ + ia64_psr(pt)->ss = 0; + ia64_psr(pt)->tb = 0; + + wake_up_process(child); + ret = 0; + goto out_tsk; + + case PTRACE_KILL: + /* + * Make the child exit. Best I can do is send it a + * sigkill. Perhaps it should be put in the status + * that it wants to exit. + */ + if (child->exit_state == EXIT_ZOMBIE) + /* already dead */ + goto out_tsk; + child->exit_code = SIGKILL; + + ptrace_disable(child); + wake_up_process(child); + ret = 0; + goto out_tsk; + + case PTRACE_SINGLESTEP: + /* let child execute for one instruction */ + case PTRACE_SINGLEBLOCK: + ret = -EIO; + if (data > _NSIG) + goto out_tsk; + + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + if (request == PTRACE_SINGLESTEP) { + ia64_psr(pt)->ss = 1; + } else { + ia64_psr(pt)->tb = 1; + } + child->exit_code = data; + + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + goto out_tsk; + + case PTRACE_DETACH: + /* detach a process that was attached. */ + ret = ptrace_detach(child, data); + goto out_tsk; + + case PTRACE_GETREGS: + ret = ptrace_getregs(child, + (struct pt_all_user_regs __user *) data); + goto out_tsk; + + case PTRACE_SETREGS: + ret = ptrace_setregs(child, + (struct pt_all_user_regs __user *) data); + goto out_tsk; + + default: + ret = ptrace_request(child, request, addr, data); + goto out_tsk; + } + out_tsk: + put_task_struct(child); + out: + unlock_kernel(); + return ret; +} + + +void +syscall_trace (void) +{ + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + if (!(current->ptrace & PT_PTRACED)) + return; + /* + * The 0x80 provides a way for the tracing parent to + * distinguish between a syscall stop and SIGTRAP delivery. + */ + ptrace_notify(SIGTRAP + | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); + + /* + * This isn't the same as continuing with a signal, but it + * will do for normal use. strace only continues with a + * signal if the stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} + +/* "asmlinkage" so the input arguments are preserved... */ + +asmlinkage void +syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, + struct pt_regs regs) +{ + long syscall; + + if (unlikely(current->audit_context)) { + if (IS_IA32_PROCESS(®s)) + syscall = regs.r1; + else + syscall = regs.r15; + + audit_syscall_entry(current, syscall, arg0, arg1, arg2, arg3); + } + + if (test_thread_flag(TIF_SYSCALL_TRACE) + && (current->ptrace & PT_PTRACED)) + syscall_trace(); +} + +/* "asmlinkage" so the input arguments are preserved... */ + +asmlinkage void +syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, + struct pt_regs regs) +{ + if (unlikely(current->audit_context)) + audit_syscall_exit(current, regs.r8); + + if (test_thread_flag(TIF_SYSCALL_TRACE) + && (current->ptrace & PT_PTRACED)) + syscall_trace(); +} diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c new file mode 100644 index 000000000000..acc0f132f86c --- /dev/null +++ b/arch/ia64/kernel/sal.c @@ -0,0 +1,302 @@ +/* + * System Abstraction Layer (SAL) interface routines. + * + * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + */ +#include <linux/config.h> + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/string.h> + +#include <asm/page.h> +#include <asm/sal.h> +#include <asm/pal.h> + + __cacheline_aligned DEFINE_SPINLOCK(sal_lock); +unsigned long sal_platform_features; + +unsigned short sal_revision; +unsigned short sal_version; + +#define SAL_MAJOR(x) ((x) >> 8) +#define SAL_MINOR(x) ((x) & 0xff) + +static struct { + void *addr; /* function entry point */ + void *gpval; /* gp value to use */ +} pdesc; + +static long +default_handler (void) +{ + return -1; +} + +ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler; +ia64_sal_desc_ptc_t *ia64_ptc_domain_info; + +const char * +ia64_sal_strerror (long status) +{ + const char *str; + switch (status) { + case 0: str = "Call completed without error"; break; + case 1: str = "Effect a warm boot of the system to complete " + "the update"; break; + case -1: str = "Not implemented"; break; + case -2: str = "Invalid argument"; break; + case -3: str = "Call completed with error"; break; + case -4: str = "Virtual address not registered"; break; + case -5: str = "No information available"; break; + case -6: str = "Insufficient space to add the entry"; break; + case -7: str = "Invalid entry_addr value"; break; + case -8: str = "Invalid interrupt vector"; break; + case -9: str = "Requested memory not available"; break; + case -10: str = "Unable to write to the NVM device"; break; + case -11: str = "Invalid partition type specified"; break; + case -12: str = "Invalid NVM_Object id specified"; break; + case -13: str = "NVM_Object already has the maximum number " + "of partitions"; break; + case -14: str = "Insufficient space in partition for the " + "requested write sub-function"; break; + case -15: str = "Insufficient data buffer space for the " + "requested read record sub-function"; break; + case -16: str = "Scratch buffer required for the write/delete " + "sub-function"; break; + case -17: str = "Insufficient space in the NVM_Object for the " + "requested create sub-function"; break; + case -18: str = "Invalid value specified in the partition_rec " + "argument"; break; + case -19: str = "Record oriented I/O not supported for this " + "partition"; break; + case -20: str = "Bad format of record to be written or " + "required keyword variable not " + "specified"; break; + default: str = "Unknown SAL status code"; break; + } + return str; +} + +void __init +ia64_sal_handler_init (void *entry_point, void *gpval) +{ + /* fill in the SAL procedure descriptor and point ia64_sal to it: */ + pdesc.addr = entry_point; + pdesc.gpval = gpval; + ia64_sal = (ia64_sal_handler) &pdesc; +} + +static void __init +check_versions (struct ia64_sal_systab *systab) +{ + sal_revision = (systab->sal_rev_major << 8) | systab->sal_rev_minor; + sal_version = (systab->sal_b_rev_major << 8) | systab->sal_b_rev_minor; + + /* Check for broken firmware */ + if ((sal_revision == SAL_VERSION_CODE(49, 29)) + && (sal_version == SAL_VERSION_CODE(49, 29))) + { + /* + * Old firmware for zx2000 prototypes have this weird version number, + * reset it to something sane. + */ + sal_revision = SAL_VERSION_CODE(2, 8); + sal_version = SAL_VERSION_CODE(0, 0); + } +} + +static void __init +sal_desc_entry_point (void *p) +{ + struct ia64_sal_desc_entry_point *ep = p; + ia64_pal_handler_init(__va(ep->pal_proc)); + ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp)); +} + +#ifdef CONFIG_SMP +static void __init +set_smp_redirect (int flag) +{ +#ifndef CONFIG_HOTPLUG_CPU + if (no_int_routing) + smp_int_redirect &= ~flag; + else + smp_int_redirect |= flag; +#else + /* + * For CPU Hotplug we dont want to do any chipset supported + * interrupt redirection. The reason is this would require that + * All interrupts be stopped and hard bind the irq to a cpu. + * Later when the interrupt is fired we need to set the redir hint + * on again in the vector. This is combersome for something that the + * user mode irq balancer will solve anyways. + */ + no_int_routing=1; + smp_int_redirect &= ~flag; +#endif +} +#else +#define set_smp_redirect(flag) do { } while (0) +#endif + +static void __init +sal_desc_platform_feature (void *p) +{ + struct ia64_sal_desc_platform_feature *pf = p; + sal_platform_features = pf->feature_mask; + + printk(KERN_INFO "SAL Platform features:"); + if (!sal_platform_features) { + printk(" None\n"); + return; + } + + if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_BUS_LOCK) + printk(" BusLock"); + if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT) { + printk(" IRQ_Redirection"); + set_smp_redirect(SMP_IRQ_REDIRECTION); + } + if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT) { + printk(" IPI_Redirection"); + set_smp_redirect(SMP_IPI_REDIRECTION); + } + if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT) + printk(" ITC_Drift"); + printk("\n"); +} + +#ifdef CONFIG_SMP +static void __init +sal_desc_ap_wakeup (void *p) +{ + struct ia64_sal_desc_ap_wakeup *ap = p; + + switch (ap->mechanism) { + case IA64_SAL_AP_EXTERNAL_INT: + ap_wakeup_vector = ap->vector; + printk(KERN_INFO "SAL: AP wakeup using external interrupt " + "vector 0x%lx\n", ap_wakeup_vector); + break; + default: + printk(KERN_ERR "SAL: AP wakeup mechanism unsupported!\n"); + break; + } +} + +static void __init +chk_nointroute_opt(void) +{ + char *cp; + extern char saved_command_line[]; + + for (cp = saved_command_line; *cp; ) { + if (memcmp(cp, "nointroute", 10) == 0) { + no_int_routing = 1; + printk ("no_int_routing on\n"); + break; + } else { + while (*cp != ' ' && *cp) + ++cp; + while (*cp == ' ') + ++cp; + } + } +} + +#else +static void __init sal_desc_ap_wakeup(void *p) { } +#endif + +void __init +ia64_sal_init (struct ia64_sal_systab *systab) +{ + char *p; + int i; + + if (!systab) { + printk(KERN_WARNING "Hmm, no SAL System Table.\n"); + return; + } + + if (strncmp(systab->signature, "SST_", 4) != 0) + printk(KERN_ERR "bad signature in system table!"); + + check_versions(systab); +#ifdef CONFIG_SMP + chk_nointroute_opt(); +#endif + + /* revisions are coded in BCD, so %x does the job for us */ + printk(KERN_INFO "SAL %x.%x: %.32s %.32s%sversion %x.%x\n", + SAL_MAJOR(sal_revision), SAL_MINOR(sal_revision), + systab->oem_id, systab->product_id, + systab->product_id[0] ? " " : "", + SAL_MAJOR(sal_version), SAL_MINOR(sal_version)); + + p = (char *) (systab + 1); + for (i = 0; i < systab->entry_count; i++) { + /* + * The first byte of each entry type contains the type + * descriptor. + */ + switch (*p) { + case SAL_DESC_ENTRY_POINT: + sal_desc_entry_point(p); + break; + case SAL_DESC_PLATFORM_FEATURE: + sal_desc_platform_feature(p); + break; + case SAL_DESC_PTC: + ia64_ptc_domain_info = (ia64_sal_desc_ptc_t *)p; + break; + case SAL_DESC_AP_WAKEUP: + sal_desc_ap_wakeup(p); + break; + } + p += SAL_DESC_SIZE(*p); + } +} + +int +ia64_sal_oemcall(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7) +{ + if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) + return -1; + SAL_CALL(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + return 0; +} +EXPORT_SYMBOL(ia64_sal_oemcall); + +int +ia64_sal_oemcall_nolock(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, + u64 arg7) +{ + if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) + return -1; + SAL_CALL_NOLOCK(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, + arg7); + return 0; +} +EXPORT_SYMBOL(ia64_sal_oemcall_nolock); + +int +ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc, + u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, u64 arg7) +{ + if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) + return -1; + SAL_CALL_REENTRANT(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, + arg7); + return 0; +} +EXPORT_SYMBOL(ia64_sal_oemcall_reentrant); diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c new file mode 100644 index 000000000000..d227fabecd02 --- /dev/null +++ b/arch/ia64/kernel/salinfo.c @@ -0,0 +1,629 @@ +/* + * salinfo.c + * + * Creates entries in /proc/sal for various system features. + * + * Copyright (c) 2003 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 2003 Hewlett-Packard Co + * Bjorn Helgaas <bjorn.helgaas@hp.com> + * + * 10/30/2001 jbarnes@sgi.com copied much of Stephane's palinfo + * code to create this file + * Oct 23 2003 kaos@sgi.com + * Replace IPI with set_cpus_allowed() to read a record from the required cpu. + * Redesign salinfo log processing to separate interrupt and user space + * contexts. + * Cache the record across multi-block reads from user space. + * Support > 64 cpus. + * Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module. + * + * Jan 28 2004 kaos@sgi.com + * Periodically check for outstanding MCA or INIT records. + * + * Dec 5 2004 kaos@sgi.com + * Standardize which records are cleared automatically. + */ + +#include <linux/types.h> +#include <linux/proc_fs.h> +#include <linux/module.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/timer.h> +#include <linux/vmalloc.h> + +#include <asm/semaphore.h> +#include <asm/sal.h> +#include <asm/uaccess.h> + +MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>"); +MODULE_DESCRIPTION("/proc interface to IA-64 SAL features"); +MODULE_LICENSE("GPL"); + +static int salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data); + +typedef struct { + const char *name; /* name of the proc entry */ + unsigned long feature; /* feature bit */ + struct proc_dir_entry *entry; /* registered entry (removal) */ +} salinfo_entry_t; + +/* + * List {name,feature} pairs for every entry in /proc/sal/<feature> + * that this module exports + */ +static salinfo_entry_t salinfo_entries[]={ + { "bus_lock", IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, }, + { "irq_redirection", IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, }, + { "ipi_redirection", IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, }, + { "itc_drift", IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, }, +}; + +#define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries) + +static char *salinfo_log_name[] = { + "mca", + "init", + "cmc", + "cpe", +}; + +static struct proc_dir_entry *salinfo_proc_entries[ + ARRAY_SIZE(salinfo_entries) + /* /proc/sal/bus_lock */ + ARRAY_SIZE(salinfo_log_name) + /* /proc/sal/{mca,...} */ + (2 * ARRAY_SIZE(salinfo_log_name)) + /* /proc/sal/mca/{event,data} */ + 1]; /* /proc/sal */ + +/* Some records we get ourselves, some are accessed as saved data in buffers + * that are owned by mca.c. + */ +struct salinfo_data_saved { + u8* buffer; + u64 size; + u64 id; + int cpu; +}; + +/* State transitions. Actions are :- + * Write "read <cpunum>" to the data file. + * Write "clear <cpunum>" to the data file. + * Write "oemdata <cpunum> <offset> to the data file. + * Read from the data file. + * Close the data file. + * + * Start state is NO_DATA. + * + * NO_DATA + * write "read <cpunum>" -> NO_DATA or LOG_RECORD. + * write "clear <cpunum>" -> NO_DATA or LOG_RECORD. + * write "oemdata <cpunum> <offset> -> return -EINVAL. + * read data -> return EOF. + * close -> unchanged. Free record areas. + * + * LOG_RECORD + * write "read <cpunum>" -> NO_DATA or LOG_RECORD. + * write "clear <cpunum>" -> NO_DATA or LOG_RECORD. + * write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA. + * read data -> return the INIT/MCA/CMC/CPE record. + * close -> unchanged. Keep record areas. + * + * OEMDATA + * write "read <cpunum>" -> NO_DATA or LOG_RECORD. + * write "clear <cpunum>" -> NO_DATA or LOG_RECORD. + * write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA. + * read data -> return the formatted oemdata. + * close -> unchanged. Keep record areas. + * + * Closing the data file does not change the state. This allows shell scripts + * to manipulate salinfo data, each shell redirection opens the file, does one + * action then closes it again. The record areas are only freed at close when + * the state is NO_DATA. + */ +enum salinfo_state { + STATE_NO_DATA, + STATE_LOG_RECORD, + STATE_OEMDATA, +}; + +struct salinfo_data { + volatile cpumask_t cpu_event; /* which cpus have outstanding events */ + struct semaphore sem; /* count of cpus with outstanding events (bits set in cpu_event) */ + u8 *log_buffer; + u64 log_size; + u8 *oemdata; /* decoded oem data */ + u64 oemdata_size; + int open; /* single-open to prevent races */ + u8 type; + u8 saved_num; /* using a saved record? */ + enum salinfo_state state :8; /* processing state */ + u8 padding; + int cpu_check; /* next CPU to check */ + struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */ +}; + +static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)]; + +static spinlock_t data_lock, data_saved_lock; + +/** salinfo_platform_oemdata - optional callback to decode oemdata from an error + * record. + * @sect_header: pointer to the start of the section to decode. + * @oemdata: returns vmalloc area containing the decded output. + * @oemdata_size: returns length of decoded output (strlen). + * + * Description: If user space asks for oem data to be decoded by the kernel + * and/or prom and the platform has set salinfo_platform_oemdata to the address + * of a platform specific routine then call that routine. salinfo_platform_oemdata + * vmalloc's and formats its output area, returning the address of the text + * and its strlen. Returns 0 for success, -ve for error. The callback is + * invoked on the cpu that generated the error record. + */ +int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size); + +struct salinfo_platform_oemdata_parms { + const u8 *efi_guid; + u8 **oemdata; + u64 *oemdata_size; + int ret; +}; + +static void +salinfo_platform_oemdata_cpu(void *context) +{ + struct salinfo_platform_oemdata_parms *parms = context; + parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size); +} + +static void +shift1_data_saved (struct salinfo_data *data, int shift) +{ + memcpy(data->data_saved+shift, data->data_saved+shift+1, + (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0])); + memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0, + sizeof(data->data_saved[0])); +} + +/* This routine is invoked in interrupt context. Note: mca.c enables + * interrupts before calling this code for CMC/CPE. MCA and INIT events are + * not irq safe, do not call any routines that use spinlocks, they may deadlock. + * MCA and INIT records are recorded, a timer event will look for any + * outstanding events and wake up the user space code. + * + * The buffer passed from mca.c points to the output from ia64_log_get. This is + * a persistent buffer but its contents can change between the interrupt and + * when user space processes the record. Save the record id to identify + * changes. + */ +void +salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe) +{ + struct salinfo_data *data = salinfo_data + type; + struct salinfo_data_saved *data_saved; + unsigned long flags = 0; + int i; + int saved_size = ARRAY_SIZE(data->data_saved); + + BUG_ON(type >= ARRAY_SIZE(salinfo_log_name)); + + if (irqsafe) + spin_lock_irqsave(&data_saved_lock, flags); + for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { + if (!data_saved->buffer) + break; + } + if (i == saved_size) { + if (!data->saved_num) { + shift1_data_saved(data, 0); + data_saved = data->data_saved + saved_size - 1; + } else + data_saved = NULL; + } + if (data_saved) { + data_saved->cpu = smp_processor_id(); + data_saved->id = ((sal_log_record_header_t *)buffer)->id; + data_saved->size = size; + data_saved->buffer = buffer; + } + if (irqsafe) + spin_unlock_irqrestore(&data_saved_lock, flags); + + if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) { + if (irqsafe) + up(&data->sem); + } +} + +/* Check for outstanding MCA/INIT records every minute (arbitrary) */ +#define SALINFO_TIMER_DELAY (60*HZ) +static struct timer_list salinfo_timer; + +static void +salinfo_timeout_check(struct salinfo_data *data) +{ + int i; + if (!data->open) + return; + for (i = 0; i < NR_CPUS; ++i) { + if (test_bit(i, &data->cpu_event)) { + /* double up() is not a problem, user space will see no + * records for the additional "events". + */ + up(&data->sem); + } + } +} + +static void +salinfo_timeout (unsigned long arg) +{ + salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); + salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); + salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; + add_timer(&salinfo_timer); +} + +static int +salinfo_event_open(struct inode *inode, struct file *file) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return 0; +} + +static ssize_t +salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_dentry->d_inode; + struct proc_dir_entry *entry = PDE(inode); + struct salinfo_data *data = entry->data; + char cmd[32]; + size_t size; + int i, n, cpu = -1; + +retry: + if (down_trylock(&data->sem)) { + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + if (down_interruptible(&data->sem)) + return -ERESTARTSYS; + } + + n = data->cpu_check; + for (i = 0; i < NR_CPUS; i++) { + if (test_bit(n, &data->cpu_event)) { + cpu = n; + break; + } + if (++n == NR_CPUS) + n = 0; + } + + if (cpu == -1) + goto retry; + + /* events are sticky until the user says "clear" */ + up(&data->sem); + + /* for next read, start checking at next CPU */ + data->cpu_check = cpu; + if (++data->cpu_check == NR_CPUS) + data->cpu_check = 0; + + snprintf(cmd, sizeof(cmd), "read %d\n", cpu); + + size = strlen(cmd); + if (size > count) + size = count; + if (copy_to_user(buffer, cmd, size)) + return -EFAULT; + + return size; +} + +static struct file_operations salinfo_event_fops = { + .open = salinfo_event_open, + .read = salinfo_event_read, +}; + +static int +salinfo_log_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *entry = PDE(inode); + struct salinfo_data *data = entry->data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + spin_lock(&data_lock); + if (data->open) { + spin_unlock(&data_lock); + return -EBUSY; + } + data->open = 1; + spin_unlock(&data_lock); + + if (data->state == STATE_NO_DATA && + !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) { + data->open = 0; + return -ENOMEM; + } + + return 0; +} + +static int +salinfo_log_release(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *entry = PDE(inode); + struct salinfo_data *data = entry->data; + + if (data->state == STATE_NO_DATA) { + vfree(data->log_buffer); + vfree(data->oemdata); + data->log_buffer = NULL; + data->oemdata = NULL; + } + spin_lock(&data_lock); + data->open = 0; + spin_unlock(&data_lock); + return 0; +} + +static void +call_on_cpu(int cpu, void (*fn)(void *), void *arg) +{ + cpumask_t save_cpus_allowed, new_cpus_allowed; + memcpy(&save_cpus_allowed, ¤t->cpus_allowed, sizeof(save_cpus_allowed)); + memset(&new_cpus_allowed, 0, sizeof(new_cpus_allowed)); + set_bit(cpu, &new_cpus_allowed); + set_cpus_allowed(current, new_cpus_allowed); + (*fn)(arg); + set_cpus_allowed(current, save_cpus_allowed); +} + +static void +salinfo_log_read_cpu(void *context) +{ + struct salinfo_data *data = context; + sal_log_record_header_t *rh; + data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer); + rh = (sal_log_record_header_t *)(data->log_buffer); + /* Clear corrected errors as they are read from SAL */ + if (rh->severity == sal_log_severity_corrected) + ia64_sal_clear_state_info(data->type); +} + +static void +salinfo_log_new_read(int cpu, struct salinfo_data *data) +{ + struct salinfo_data_saved *data_saved; + unsigned long flags; + int i; + int saved_size = ARRAY_SIZE(data->data_saved); + + data->saved_num = 0; + spin_lock_irqsave(&data_saved_lock, flags); +retry: + for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { + if (data_saved->buffer && data_saved->cpu == cpu) { + sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer); + data->log_size = data_saved->size; + memcpy(data->log_buffer, rh, data->log_size); + barrier(); /* id check must not be moved */ + if (rh->id == data_saved->id) { + data->saved_num = i+1; + break; + } + /* saved record changed by mca.c since interrupt, discard it */ + shift1_data_saved(data, i); + goto retry; + } + } + spin_unlock_irqrestore(&data_saved_lock, flags); + + if (!data->saved_num) + call_on_cpu(cpu, salinfo_log_read_cpu, data); + if (!data->log_size) { + data->state = STATE_NO_DATA; + clear_bit(cpu, &data->cpu_event); + } else { + data->state = STATE_LOG_RECORD; + } +} + +static ssize_t +salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_dentry->d_inode; + struct proc_dir_entry *entry = PDE(inode); + struct salinfo_data *data = entry->data; + u8 *buf; + u64 bufsize; + + if (data->state == STATE_LOG_RECORD) { + buf = data->log_buffer; + bufsize = data->log_size; + } else if (data->state == STATE_OEMDATA) { + buf = data->oemdata; + bufsize = data->oemdata_size; + } else { + buf = NULL; + bufsize = 0; + } + return simple_read_from_buffer(buffer, count, ppos, buf, bufsize); +} + +static void +salinfo_log_clear_cpu(void *context) +{ + struct salinfo_data *data = context; + ia64_sal_clear_state_info(data->type); +} + +static int +salinfo_log_clear(struct salinfo_data *data, int cpu) +{ + sal_log_record_header_t *rh; + data->state = STATE_NO_DATA; + if (!test_bit(cpu, &data->cpu_event)) + return 0; + down(&data->sem); + clear_bit(cpu, &data->cpu_event); + if (data->saved_num) { + unsigned long flags; + spin_lock_irqsave(&data_saved_lock, flags); + shift1_data_saved(data, data->saved_num - 1 ); + data->saved_num = 0; + spin_unlock_irqrestore(&data_saved_lock, flags); + } + rh = (sal_log_record_header_t *)(data->log_buffer); + /* Corrected errors have already been cleared from SAL */ + if (rh->severity != sal_log_severity_corrected) + call_on_cpu(cpu, salinfo_log_clear_cpu, data); + /* clearing a record may make a new record visible */ + salinfo_log_new_read(cpu, data); + if (data->state == STATE_LOG_RECORD && + !test_and_set_bit(cpu, &data->cpu_event)) + up(&data->sem); + return 0; +} + +static ssize_t +salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_dentry->d_inode; + struct proc_dir_entry *entry = PDE(inode); + struct salinfo_data *data = entry->data; + char cmd[32]; + size_t size; + u32 offset; + int cpu; + + size = sizeof(cmd); + if (count < size) + size = count; + if (copy_from_user(cmd, buffer, size)) + return -EFAULT; + + if (sscanf(cmd, "read %d", &cpu) == 1) { + salinfo_log_new_read(cpu, data); + } else if (sscanf(cmd, "clear %d", &cpu) == 1) { + int ret; + if ((ret = salinfo_log_clear(data, cpu))) + count = ret; + } else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) { + if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA) + return -EINVAL; + if (offset > data->log_size - sizeof(efi_guid_t)) + return -EINVAL; + data->state = STATE_OEMDATA; + if (salinfo_platform_oemdata) { + struct salinfo_platform_oemdata_parms parms = { + .efi_guid = data->log_buffer + offset, + .oemdata = &data->oemdata, + .oemdata_size = &data->oemdata_size + }; + call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms); + if (parms.ret) + count = parms.ret; + } else + data->oemdata_size = 0; + } else + return -EINVAL; + + return count; +} + +static struct file_operations salinfo_data_fops = { + .open = salinfo_log_open, + .release = salinfo_log_release, + .read = salinfo_log_read, + .write = salinfo_log_write, +}; + +static int __init +salinfo_init(void) +{ + struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */ + struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */ + struct proc_dir_entry *dir, *entry; + struct salinfo_data *data; + int i, j, online; + + salinfo_dir = proc_mkdir("sal", NULL); + if (!salinfo_dir) + return 0; + + for (i=0; i < NR_SALINFO_ENTRIES; i++) { + /* pass the feature bit in question as misc data */ + *sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir, + salinfo_read, (void *)salinfo_entries[i].feature); + } + + for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { + data = salinfo_data + i; + data->type = i; + sema_init(&data->sem, 0); + dir = proc_mkdir(salinfo_log_name[i], salinfo_dir); + if (!dir) + continue; + + entry = create_proc_entry("event", S_IRUSR, dir); + if (!entry) + continue; + entry->data = data; + entry->proc_fops = &salinfo_event_fops; + *sdir++ = entry; + + entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir); + if (!entry) + continue; + entry->data = data; + entry->proc_fops = &salinfo_data_fops; + *sdir++ = entry; + + /* we missed any events before now */ + online = 0; + for (j = 0; j < NR_CPUS; j++) + if (cpu_online(j)) { + set_bit(j, &data->cpu_event); + ++online; + } + sema_init(&data->sem, online); + + *sdir++ = dir; + } + + *sdir++ = salinfo_dir; + + init_timer(&salinfo_timer); + salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; + salinfo_timer.function = &salinfo_timeout; + add_timer(&salinfo_timer); + + return 0; +} + +/* + * 'data' contains an integer that corresponds to the feature we're + * testing + */ +static int +salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + int len = 0; + + len = sprintf(page, (sal_platform_features & (unsigned long)data) ? "1\n" : "0\n"); + + if (len <= off+count) *eof = 1; + + *start = page + off; + len -= off; + + if (len>count) len = count; + if (len<0) len = 0; + + return len; +} + +module_init(salinfo_init); diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c new file mode 100644 index 000000000000..2724ef3fbae2 --- /dev/null +++ b/arch/ia64/kernel/semaphore.c @@ -0,0 +1,165 @@ +/* + * IA-64 semaphore implementation (derived from x86 version). + * + * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ + +/* + * Semaphores are implemented using a two-way counter: The "count" + * variable is decremented for each process that tries to acquire the + * semaphore, while the "sleepers" variable is a count of such + * acquires. + * + * Notably, the inline "up()" and "down()" functions can efficiently + * test if they need to do any extra work (up needs to do something + * only if count was negative before the increment operation. + * + * "sleeping" and the contention routine ordering is protected + * by the spinlock in the semaphore's waitqueue head. + * + * Note that these functions are only called when there is contention + * on the lock, and as such all this is the "non-critical" part of the + * whole semaphore business. The critical part is the inline stuff in + * <asm/semaphore.h> where we want to avoid any extra jumps and calls. + */ +#include <linux/sched.h> +#include <linux/init.h> + +#include <asm/errno.h> +#include <asm/semaphore.h> + +/* + * Logic: + * - Only on a boundary condition do we need to care. When we go + * from a negative count to a non-negative, we wake people up. + * - When we go from a non-negative count to a negative do we + * (a) synchronize with the "sleepers" count and (b) make sure + * that we're on the wakeup list before we synchronize so that + * we cannot lose wakeup events. + */ + +void +__up (struct semaphore *sem) +{ + wake_up(&sem->wait); +} + +void __sched __down (struct semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + unsigned long flags; + + tsk->state = TASK_UNINTERRUPTIBLE; + spin_lock_irqsave(&sem->wait.lock, flags); + add_wait_queue_exclusive_locked(&sem->wait, &wait); + + sem->sleepers++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock in + * the wait_queue_head. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irqrestore(&sem->wait.lock, flags); + + schedule(); + + spin_lock_irqsave(&sem->wait.lock, flags); + tsk->state = TASK_UNINTERRUPTIBLE; + } + remove_wait_queue_locked(&sem->wait, &wait); + wake_up_locked(&sem->wait); + spin_unlock_irqrestore(&sem->wait.lock, flags); + tsk->state = TASK_RUNNING; +} + +int __sched __down_interruptible (struct semaphore * sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + unsigned long flags; + + tsk->state = TASK_INTERRUPTIBLE; + spin_lock_irqsave(&sem->wait.lock, flags); + add_wait_queue_exclusive_locked(&sem->wait, &wait); + + sem->sleepers ++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * With signals pending, this turns into + * the trylock failure case - we won't be + * sleeping, and we* can't get the lock as + * it has contention. Just correct the count + * and exit. + */ + if (signal_pending(current)) { + retval = -EINTR; + sem->sleepers = 0; + atomic_add(sleepers, &sem->count); + break; + } + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock in + * wait_queue_head. The "-1" is because we're + * still hoping to get the semaphore. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irqrestore(&sem->wait.lock, flags); + + schedule(); + + spin_lock_irqsave(&sem->wait.lock, flags); + tsk->state = TASK_INTERRUPTIBLE; + } + remove_wait_queue_locked(&sem->wait, &wait); + wake_up_locked(&sem->wait); + spin_unlock_irqrestore(&sem->wait.lock, flags); + + tsk->state = TASK_RUNNING; + return retval; +} + +/* + * Trylock failed - make sure we correct for having decremented the + * count. + */ +int +__down_trylock (struct semaphore *sem) +{ + unsigned long flags; + int sleepers; + + spin_lock_irqsave(&sem->wait.lock, flags); + sleepers = sem->sleepers + 1; + sem->sleepers = 0; + + /* + * Add "everybody else" and us into it. They aren't + * playing, because we own the spinlock in the + * wait_queue_head. + */ + if (!atomic_add_negative(sleepers, &sem->count)) { + wake_up_locked(&sem->wait); + } + + spin_unlock_irqrestore(&sem->wait.lock, flags); + return 1; +} diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c new file mode 100644 index 000000000000..f05650c801d2 --- /dev/null +++ b/arch/ia64/kernel/setup.c @@ -0,0 +1,723 @@ +/* + * Architecture-specific setup. + * + * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * + * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo(). + * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map + * 03/31/00 R.Seth cpu_initialized and current->processor fixes + * 02/04/00 D.Mosberger some more get_cpuinfo fixes... + * 02/01/00 R.Seth fixed get_cpuinfo for SMP + * 01/07/99 S.Eranian added the support for command line argument + * 06/24/99 W.Drummond added boot_cpu_data. + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/acpi.h> +#include <linux/bootmem.h> +#include <linux/console.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/string.h> +#include <linux/threads.h> +#include <linux/tty.h> +#include <linux/serial.h> +#include <linux/serial_core.h> +#include <linux/efi.h> +#include <linux/initrd.h> + +#include <asm/ia32.h> +#include <asm/machvec.h> +#include <asm/mca.h> +#include <asm/meminit.h> +#include <asm/page.h> +#include <asm/patch.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/sal.h> +#include <asm/sections.h> +#include <asm/serial.h> +#include <asm/setup.h> +#include <asm/smp.h> +#include <asm/system.h> +#include <asm/unistd.h> + +#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE) +# error "struct cpuinfo_ia64 too big!" +#endif + +#ifdef CONFIG_SMP +unsigned long __per_cpu_offset[NR_CPUS]; +EXPORT_SYMBOL(__per_cpu_offset); +#endif + +DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info); +DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); +DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8); +unsigned long ia64_cycles_per_usec; +struct ia64_boot_param *ia64_boot_param; +struct screen_info screen_info; + +unsigned long ia64_max_cacheline_size; +unsigned long ia64_iobase; /* virtual address for I/O accesses */ +EXPORT_SYMBOL(ia64_iobase); +struct io_space io_space[MAX_IO_SPACES]; +EXPORT_SYMBOL(io_space); +unsigned int num_io_spaces; + +/* + * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This + * mask specifies a mask of address bits that must be 0 in order for two buffers to be + * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start + * address of the second buffer must be aligned to (merge_mask+1) in order to be + * mergeable). By default, we assume there is no I/O MMU which can merge physically + * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu + * page-size of 2^64. + */ +unsigned long ia64_max_iommu_merge_mask = ~0UL; +EXPORT_SYMBOL(ia64_max_iommu_merge_mask); + +/* + * We use a special marker for the end of memory and it uses the extra (+1) slot + */ +struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1]; +int num_rsvd_regions; + + +/* + * Filter incoming memory segments based on the primitive map created from the boot + * parameters. Segments contained in the map are removed from the memory ranges. A + * caller-specified function is called with the memory ranges that remain after filtering. + * This routine does not assume the incoming segments are sorted. + */ +int +filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) +{ + unsigned long range_start, range_end, prev_start; + void (*func)(unsigned long, unsigned long, int); + int i; + +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + printk(KERN_WARNING "warning: skipping physical page 0\n"); + start += PAGE_SIZE; + if (start >= end) return 0; + } +#endif + /* + * lowest possible address(walker uses virtual) + */ + prev_start = PAGE_OFFSET; + func = arg; + + for (i = 0; i < num_rsvd_regions; ++i) { + range_start = max(start, prev_start); + range_end = min(end, rsvd_region[i].start); + + if (range_start < range_end) + call_pernode_memory(__pa(range_start), range_end - range_start, func); + + /* nothing more available in this segment */ + if (range_end == end) return 0; + + prev_start = rsvd_region[i].end; + } + /* end of memory marker allows full processing inside loop body */ + return 0; +} + +static void +sort_regions (struct rsvd_region *rsvd_region, int max) +{ + int j; + + /* simple bubble sorting */ + while (max--) { + for (j = 0; j < max; ++j) { + if (rsvd_region[j].start > rsvd_region[j+1].start) { + struct rsvd_region tmp; + tmp = rsvd_region[j]; + rsvd_region[j] = rsvd_region[j + 1]; + rsvd_region[j + 1] = tmp; + } + } + } +} + +/** + * reserve_memory - setup reserved memory areas + * + * Setup the reserved memory areas set aside for the boot parameters, + * initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined, + * see include/asm-ia64/meminit.h if you need to define more. + */ +void +reserve_memory (void) +{ + int n = 0; + + /* + * none of the entries in this table overlap + */ + rsvd_region[n].start = (unsigned long) ia64_boot_param; + rsvd_region[n].end = rsvd_region[n].start + sizeof(*ia64_boot_param); + n++; + + rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap); + rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->efi_memmap_size; + n++; + + rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line); + rsvd_region[n].end = (rsvd_region[n].start + + strlen(__va(ia64_boot_param->command_line)) + 1); + n++; + + rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START); + rsvd_region[n].end = (unsigned long) ia64_imva(_end); + n++; + +#ifdef CONFIG_BLK_DEV_INITRD + if (ia64_boot_param->initrd_start) { + rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start); + rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->initrd_size; + n++; + } +#endif + + /* end of memory marker */ + rsvd_region[n].start = ~0UL; + rsvd_region[n].end = ~0UL; + n++; + + num_rsvd_regions = n; + + sort_regions(rsvd_region, num_rsvd_regions); +} + +/** + * find_initrd - get initrd parameters from the boot parameter structure + * + * Grab the initrd start and end from the boot parameter struct given us by + * the boot loader. + */ +void +find_initrd (void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + if (ia64_boot_param->initrd_start) { + initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start); + initrd_end = initrd_start+ia64_boot_param->initrd_size; + + printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n", + initrd_start, ia64_boot_param->initrd_size); + } +#endif +} + +static void __init +io_port_init (void) +{ + extern unsigned long ia64_iobase; + unsigned long phys_iobase; + + /* + * Set `iobase' to the appropriate address in region 6 (uncached access range). + * + * The EFI memory map is the "preferred" location to get the I/O port space base, + * rather the relying on AR.KR0. This should become more clear in future SAL + * specs. We'll fall back to getting it out of AR.KR0 if no appropriate entry is + * found in the memory map. + */ + phys_iobase = efi_get_iobase(); + if (phys_iobase) + /* set AR.KR0 since this is all we use it for anyway */ + ia64_set_kr(IA64_KR_IO_BASE, phys_iobase); + else { + phys_iobase = ia64_get_kr(IA64_KR_IO_BASE); + printk(KERN_INFO "No I/O port range found in EFI memory map, falling back " + "to AR.KR0\n"); + printk(KERN_INFO "I/O port base = 0x%lx\n", phys_iobase); + } + ia64_iobase = (unsigned long) ioremap(phys_iobase, 0); + + /* setup legacy IO port space */ + io_space[0].mmio_base = ia64_iobase; + io_space[0].sparse = 1; + num_io_spaces = 1; +} + +/** + * early_console_setup - setup debugging console + * + * Consoles started here require little enough setup that we can start using + * them very early in the boot process, either right after the machine + * vector initialization, or even before if the drivers can detect their hw. + * + * Returns non-zero if a console couldn't be setup. + */ +static inline int __init +early_console_setup (char *cmdline) +{ +#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE + { + extern int sn_serial_console_early_setup(void); + if (!sn_serial_console_early_setup()) + return 0; + } +#endif +#ifdef CONFIG_EFI_PCDP + if (!efi_setup_pcdp_console(cmdline)) + return 0; +#endif +#ifdef CONFIG_SERIAL_8250_CONSOLE + if (!early_serial_console_init(cmdline)) + return 0; +#endif + + return -1; +} + +static inline void +mark_bsp_online (void) +{ +#ifdef CONFIG_SMP + /* If we register an early console, allow CPU 0 to printk */ + cpu_set(smp_processor_id(), cpu_online_map); +#endif +} + +void __init +setup_arch (char **cmdline_p) +{ + unw_init(); + + ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); + + *cmdline_p = __va(ia64_boot_param->command_line); + strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE); + + efi_init(); + io_port_init(); + +#ifdef CONFIG_IA64_GENERIC + { + const char *mvec_name = strstr (*cmdline_p, "machvec="); + char str[64]; + + if (mvec_name) { + const char *end; + size_t len; + + mvec_name += 8; + end = strchr (mvec_name, ' '); + if (end) + len = end - mvec_name; + else + len = strlen (mvec_name); + len = min(len, sizeof (str) - 1); + strncpy (str, mvec_name, len); + str[len] = '\0'; + mvec_name = str; + } else + mvec_name = acpi_get_sysname(); + machvec_init(mvec_name); + } +#endif + + if (early_console_setup(*cmdline_p) == 0) + mark_bsp_online(); + +#ifdef CONFIG_ACPI_BOOT + /* Initialize the ACPI boot-time table parser */ + acpi_table_init(); +# ifdef CONFIG_ACPI_NUMA + acpi_numa_init(); +# endif +#else +# ifdef CONFIG_SMP + smp_build_cpu_map(); /* happens, e.g., with the Ski simulator */ +# endif +#endif /* CONFIG_APCI_BOOT */ + + find_memory(); + + /* process SAL system table: */ + ia64_sal_init(efi.sal_systab); + +#ifdef CONFIG_SMP + cpu_physical_id(0) = hard_smp_processor_id(); +#endif + + cpu_init(); /* initialize the bootstrap CPU */ + +#ifdef CONFIG_ACPI_BOOT + acpi_boot_init(); +#endif + +#ifdef CONFIG_VT + if (!conswitchp) { +# if defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +# endif +# if defined(CONFIG_VGA_CONSOLE) + /* + * Non-legacy systems may route legacy VGA MMIO range to system + * memory. vga_con probes the MMIO hole, so memory looks like + * a VGA device to it. The EFI memory map can tell us if it's + * memory so we can avoid this problem. + */ + if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY) + conswitchp = &vga_con; +# endif + } +#endif + + /* enable IA-64 Machine Check Abort Handling unless disabled */ + if (!strstr(saved_command_line, "nomca")) + ia64_mca_init(); + + platform_setup(cmdline_p); + paging_init(); +} + +/* + * Display cpu info for all cpu's. + */ +static int +show_cpuinfo (struct seq_file *m, void *v) +{ +#ifdef CONFIG_SMP +# define lpj c->loops_per_jiffy +# define cpunum c->cpu +#else +# define lpj loops_per_jiffy +# define cpunum 0 +#endif + static struct { + unsigned long mask; + const char *feature_name; + } feature_bits[] = { + { 1UL << 0, "branchlong" }, + { 1UL << 1, "spontaneous deferral"}, + { 1UL << 2, "16-byte atomic ops" } + }; + char family[32], features[128], *cp, sep; + struct cpuinfo_ia64 *c = v; + unsigned long mask; + int i; + + mask = c->features; + + switch (c->family) { + case 0x07: memcpy(family, "Itanium", 8); break; + case 0x1f: memcpy(family, "Itanium 2", 10); break; + default: sprintf(family, "%u", c->family); break; + } + + /* build the feature string: */ + memcpy(features, " standard", 10); + cp = features; + sep = 0; + for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) { + if (mask & feature_bits[i].mask) { + if (sep) + *cp++ = sep; + sep = ','; + *cp++ = ' '; + strcpy(cp, feature_bits[i].feature_name); + cp += strlen(feature_bits[i].feature_name); + mask &= ~feature_bits[i].mask; + } + } + if (mask) { + /* print unknown features as a hex value: */ + if (sep) + *cp++ = sep; + sprintf(cp, " 0x%lx", mask); + } + + seq_printf(m, + "processor : %d\n" + "vendor : %s\n" + "arch : IA-64\n" + "family : %s\n" + "model : %u\n" + "revision : %u\n" + "archrev : %u\n" + "features :%s\n" /* don't change this---it _is_ right! */ + "cpu number : %lu\n" + "cpu regs : %u\n" + "cpu MHz : %lu.%06lu\n" + "itc MHz : %lu.%06lu\n" + "BogoMIPS : %lu.%02lu\n\n", + cpunum, c->vendor, family, c->model, c->revision, c->archrev, + features, c->ppn, c->number, + c->proc_freq / 1000000, c->proc_freq % 1000000, + c->itc_freq / 1000000, c->itc_freq % 1000000, + lpj*HZ/500000, (lpj*HZ/5000) % 100); + return 0; +} + +static void * +c_start (struct seq_file *m, loff_t *pos) +{ +#ifdef CONFIG_SMP + while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map)) + ++*pos; +#endif + return *pos < NR_CPUS ? cpu_data(*pos) : NULL; +} + +static void * +c_next (struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void +c_stop (struct seq_file *m, void *v) +{ +} + +struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo +}; + +void +identify_cpu (struct cpuinfo_ia64 *c) +{ + union { + unsigned long bits[5]; + struct { + /* id 0 & 1: */ + char vendor[16]; + + /* id 2 */ + u64 ppn; /* processor serial number */ + + /* id 3: */ + unsigned number : 8; + unsigned revision : 8; + unsigned model : 8; + unsigned family : 8; + unsigned archrev : 8; + unsigned reserved : 24; + + /* id 4: */ + u64 features; + } field; + } cpuid; + pal_vm_info_1_u_t vm1; + pal_vm_info_2_u_t vm2; + pal_status_t status; + unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */ + int i; + + for (i = 0; i < 5; ++i) + cpuid.bits[i] = ia64_get_cpuid(i); + + memcpy(c->vendor, cpuid.field.vendor, 16); +#ifdef CONFIG_SMP + c->cpu = smp_processor_id(); +#endif + c->ppn = cpuid.field.ppn; + c->number = cpuid.field.number; + c->revision = cpuid.field.revision; + c->model = cpuid.field.model; + c->family = cpuid.field.family; + c->archrev = cpuid.field.archrev; + c->features = cpuid.field.features; + + status = ia64_pal_vm_summary(&vm1, &vm2); + if (status == PAL_STATUS_SUCCESS) { + impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb; + phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size; + } + c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1)); + c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); +} + +void +setup_per_cpu_areas (void) +{ + /* start_kernel() requires this... */ +} + +static void +get_max_cacheline_size (void) +{ + unsigned long line_size, max = 1; + u64 l, levels, unique_caches; + pal_cache_config_info_t cci; + s64 status; + + status = ia64_pal_cache_summary(&levels, &unique_caches); + if (status != 0) { + printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n", + __FUNCTION__, status); + max = SMP_CACHE_BYTES; + goto out; + } + + for (l = 0; l < levels; ++l) { + status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2, + &cci); + if (status != 0) { + printk(KERN_ERR + "%s: ia64_pal_cache_config_info(l=%lu) failed (status=%ld)\n", + __FUNCTION__, l, status); + max = SMP_CACHE_BYTES; + } + line_size = 1 << cci.pcci_line_size; + if (line_size > max) + max = line_size; + } + out: + if (max > ia64_max_cacheline_size) + ia64_max_cacheline_size = max; +} + +/* + * cpu_init() initializes state that is per-CPU. This function acts + * as a 'CPU state barrier', nothing should get across. + */ +void +cpu_init (void) +{ + extern void __devinit ia64_mmu_init (void *); + unsigned long num_phys_stacked; + pal_vm_info_2_u_t vmi; + unsigned int max_ctx; + struct cpuinfo_ia64 *cpu_info; + void *cpu_data; + + cpu_data = per_cpu_init(); + + /* + * We set ar.k3 so that assembly code in MCA handler can compute + * physical addresses of per cpu variables with a simple: + * phys = ar.k3 + &per_cpu_var + */ + ia64_set_kr(IA64_KR_PER_CPU_DATA, + ia64_tpa(cpu_data) - (long) __per_cpu_start); + + get_max_cacheline_size(); + + /* + * We can't pass "local_cpu_data" to identify_cpu() because we haven't called + * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it + * depends on the data returned by identify_cpu(). We break the dependency by + * accessing cpu_data() through the canonical per-CPU address. + */ + cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start); + identify_cpu(cpu_info); + +#ifdef CONFIG_MCKINLEY + { +# define FEATURE_SET 16 + struct ia64_pal_retval iprv; + + if (cpu_info->family == 0x1f) { + PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0); + if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80)) + PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, + (iprv.v1 | 0x80), FEATURE_SET, 0); + } + } +#endif + + /* Clear the stack memory reserved for pt_regs: */ + memset(ia64_task_regs(current), 0, sizeof(struct pt_regs)); + + ia64_set_kr(IA64_KR_FPU_OWNER, 0); + + /* + * Initialize the page-table base register to a global + * directory with all zeroes. This ensure that we can handle + * TLB-misses to user address-space even before we created the + * first user address-space. This may happen, e.g., due to + * aggressive use of lfetch.fault. + */ + ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page))); + + /* + * Initialize default control register to defer all speculative faults. The + * kernel MUST NOT depend on a particular setting of these bits (in other words, + * the kernel must have recovery code for all speculative accesses). Turn on + * dcr.lc as per recommendation by the architecture team. Most IA-32 apps + * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll + * be fine). + */ + ia64_setreg(_IA64_REG_CR_DCR, ( IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR + | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC)); + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + if (current->mm) + BUG(); + + ia64_mmu_init(ia64_imva(cpu_data)); + ia64_mca_cpu_init(ia64_imva(cpu_data)); + +#ifdef CONFIG_IA32_SUPPORT + ia32_cpu_init(); +#endif + + /* Clear ITC to eliminiate sched_clock() overflows in human time. */ + ia64_set_itc(0); + + /* disable all local interrupt sources: */ + ia64_set_itv(1 << 16); + ia64_set_lrr0(1 << 16); + ia64_set_lrr1(1 << 16); + ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); + ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); + + /* clear TPR & XTP to enable all interrupt classes: */ + ia64_setreg(_IA64_REG_CR_TPR, 0); +#ifdef CONFIG_SMP + normal_xtp(); +#endif + + /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */ + if (ia64_pal_vm_summary(NULL, &vmi) == 0) + max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1; + else { + printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n"); + max_ctx = (1U << 15) - 1; /* use architected minimum */ + } + while (max_ctx < ia64_ctx.max_ctx) { + unsigned int old = ia64_ctx.max_ctx; + if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old) + break; + } + + if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) { + printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical " + "stacked regs\n"); + num_phys_stacked = 96; + } + /* size of physical stacked register partition plus 8 bytes: */ + __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8; + platform_cpu_init(); +} + +void +check_bugs (void) +{ + ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles, + (unsigned long) __end___mckinley_e9_bundles); +} diff --git a/arch/ia64/kernel/sigframe.h b/arch/ia64/kernel/sigframe.h new file mode 100644 index 000000000000..37b986cb86e0 --- /dev/null +++ b/arch/ia64/kernel/sigframe.h @@ -0,0 +1,25 @@ +struct sigscratch { + unsigned long scratch_unat; /* ar.unat for the general registers saved in pt */ + unsigned long ar_pfs; /* for syscalls, the user-level function-state */ + struct pt_regs pt; +}; + +struct sigframe { + /* + * Place signal handler args where user-level unwinder can find them easily. + * DO NOT MOVE THESE. They are part of the IA-64 Linux ABI and there is + * user-level code that depends on their presence! + */ + unsigned long arg0; /* signum */ + unsigned long arg1; /* siginfo pointer */ + unsigned long arg2; /* sigcontext pointer */ + /* + * End of architected state. + */ + + void __user *handler; /* pointer to the plabel of the signal handler */ + struct siginfo info; + struct sigcontext sc; +}; + +extern long ia64_do_signal (sigset_t *, struct sigscratch *, long); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c new file mode 100644 index 000000000000..6891d86937d9 --- /dev/null +++ b/arch/ia64/kernel/signal.c @@ -0,0 +1,691 @@ +/* + * Architecture-specific signal handling support. + * + * Copyright (C) 1999-2004 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * Derived from i386 and Alpha versions. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/tty.h> +#include <linux/binfmts.h> +#include <linux/unistd.h> +#include <linux/wait.h> + +#include <asm/ia32.h> +#include <asm/intrinsics.h> +#include <asm/uaccess.h> +#include <asm/rse.h> +#include <asm/sigcontext.h> + +#include "sigframe.h" + +#define DEBUG_SIG 0 +#define STACK_ALIGN 16 /* minimal alignment for stack pointer */ +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +#if _NSIG_WORDS > 1 +# define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t)) +# define GET_SIGSET(k,u) __copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t)) +#else +# define PUT_SIGSET(k,u) __put_user((k)->sig[0], &(u)->sig[0]) +# define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0]) +#endif + +long +ia64_rt_sigsuspend (sigset_t __user *uset, size_t sigsetsize, struct sigscratch *scr) +{ + sigset_t oldset, set; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (!access_ok(VERIFY_READ, uset, sigsetsize)) + return -EFAULT; + + if (GET_SIGSET(&set, uset)) + return -EFAULT; + + sigdelsetmask(&set, ~_BLOCKABLE); + + spin_lock_irq(¤t->sighand->siglock); + { + oldset = current->blocked; + current->blocked = set; + recalc_sigpending(); + } + spin_unlock_irq(¤t->sighand->siglock); + + /* + * The return below usually returns to the signal handler. We need to + * pre-set the correct error code here to ensure that the right values + * get saved in sigcontext by ia64_do_signal. + */ + scr->pt.r8 = EINTR; + scr->pt.r10 = -1; + + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (ia64_do_signal(&oldset, scr, 1)) + return -EINTR; + } +} + +asmlinkage long +sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2, + long arg3, long arg4, long arg5, long arg6, long arg7, + struct pt_regs regs) +{ + return do_sigaltstack(uss, uoss, regs.r12); +} + +static long +restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) +{ + unsigned long ip, flags, nat, um, cfm; + long err; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + /* restore scratch that always needs gets updated during signal delivery: */ + err = __get_user(flags, &sc->sc_flags); + err |= __get_user(nat, &sc->sc_nat); + err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */ + err |= __get_user(cfm, &sc->sc_cfm); + err |= __get_user(um, &sc->sc_um); /* user mask */ + err |= __get_user(scr->pt.ar_rsc, &sc->sc_ar_rsc); + err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat); + err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); + err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs); + err |= __get_user(scr->pt.pr, &sc->sc_pr); /* predicates */ + err |= __get_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */ + err |= __get_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */ + err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8); /* r1 */ + err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8); /* r8-r11 */ + err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8); /* r12-r13 */ + err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8); /* r15 */ + + scr->pt.cr_ifs = cfm | (1UL << 63); + + /* establish new instruction pointer: */ + scr->pt.cr_iip = ip & ~0x3UL; + ia64_psr(&scr->pt)->ri = ip & 0x3; + scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM); + + scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat); + + if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) { + /* Restore most scratch-state only when not in syscall. */ + err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */ + err |= __get_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */ + err |= __get_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */ + err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */ + err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8); /* r2-r3 */ + err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8); /* r16-r31 */ + } + + if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) { + struct ia64_psr *psr = ia64_psr(&scr->pt); + + __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16); + psr->mfh = 0; /* drop signal handler's fph contents... */ + if (psr->dfh) + ia64_drop_fpu(current); + else { + /* We already own the local fph, otherwise psr->dfh wouldn't be 0. */ + __ia64_load_fpu(current->thread.fph); + ia64_set_local_fpu_owner(current); + } + } + return err; +} + +int +copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from) +{ + if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t))) + return -EFAULT; + if (from->si_code < 0) { + if (__copy_to_user(to, from, sizeof(siginfo_t))) + return -EFAULT; + return 0; + } else { + int err; + + /* + * If you change siginfo_t structure, please be sure this code is fixed + * accordingly. It should never copy any pad contained in the structure + * to avoid security leaks, but must copy the generic 3 ints plus the + * relevant union member. + */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + err |= __put_user(from->si_flags, &to->si_flags); + err |= __put_user(from->si_isr, &to->si_isr); + case __SI_POLL >> 16: + err |= __put_user(from->si_addr, &to->si_addr); + err |= __put_user(from->si_imm, &to->si_imm); + break; + case __SI_TIMER >> 16: + err |= __put_user(from->si_tid, &to->si_tid); + err |= __put_user(from->si_overrun, &to->si_overrun); + err |= __put_user(from->si_ptr, &to->si_ptr); + break; + case __SI_RT >> 16: /* Not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_ptr, &to->si_ptr); + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + default: + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(from->si_pid, &to->si_pid); + break; + } + return err; + } +} + +long +ia64_rt_sigreturn (struct sigscratch *scr) +{ + extern char ia64_strace_leave_kernel, ia64_leave_kernel; + struct sigcontext __user *sc; + struct siginfo si; + sigset_t set; + long retval; + + sc = &((struct sigframe __user *) (scr->pt.r12 + 16))->sc; + + /* + * When we return to the previously executing context, r8 and r10 have already + * been setup the way we want them. Indeed, if the signal wasn't delivered while + * in a system call, we must not touch r8 or r10 as otherwise user-level state + * could be corrupted. + */ + retval = (long) &ia64_leave_kernel; + if (test_thread_flag(TIF_SYSCALL_TRACE)) + /* + * strace expects to be notified after sigreturn returns even though the + * context to which we return may not be in the middle of a syscall. + * Thus, the return-value that strace displays for sigreturn is + * meaningless. + */ + retval = (long) &ia64_strace_leave_kernel; + + if (!access_ok(VERIFY_READ, sc, sizeof(*sc))) + goto give_sigsegv; + + if (GET_SIGSET(&set, &sc->sc_mask)) + goto give_sigsegv; + + sigdelsetmask(&set, ~_BLOCKABLE); + + spin_lock_irq(¤t->sighand->siglock); + { + current->blocked = set; + recalc_sigpending(); + } + spin_unlock_irq(¤t->sighand->siglock); + + if (restore_sigcontext(sc, scr)) + goto give_sigsegv; + +#if DEBUG_SIG + printk("SIG return (%s:%d): sp=%lx ip=%lx\n", + current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip); +#endif + /* + * It is more difficult to avoid calling this function than to + * call it and ignore errors. + */ + do_sigaltstack(&sc->sc_stack, NULL, scr->pt.r12); + return retval; + + give_sigsegv: + si.si_signo = SIGSEGV; + si.si_errno = 0; + si.si_code = SI_KERNEL; + si.si_pid = current->pid; + si.si_uid = current->uid; + si.si_addr = sc; + force_sig_info(SIGSEGV, &si, current); + return retval; +} + +/* + * This does just the minimum required setup of sigcontext. + * Specifically, it only installs data that is either not knowable at + * the user-level or that gets modified before execution in the + * trampoline starts. Everything else is done at the user-level. + */ +static long +setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr) +{ + unsigned long flags = 0, ifs, cfm, nat; + long err; + + ifs = scr->pt.cr_ifs; + + if (on_sig_stack((unsigned long) sc)) + flags |= IA64_SC_FLAG_ONSTACK; + if ((ifs & (1UL << 63)) == 0) + /* if cr_ifs doesn't have the valid bit set, we got here through a syscall */ + flags |= IA64_SC_FLAG_IN_SYSCALL; + cfm = ifs & ((1UL << 38) - 1); + ia64_flush_fph(current); + if ((current->thread.flags & IA64_THREAD_FPH_VALID)) { + flags |= IA64_SC_FLAG_FPH_VALID; + __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16); + } + + nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat); + + err = __put_user(flags, &sc->sc_flags); + err |= __put_user(nat, &sc->sc_nat); + err |= PUT_SIGSET(mask, &sc->sc_mask); + err |= __put_user(cfm, &sc->sc_cfm); + err |= __put_user(scr->pt.cr_ipsr & IA64_PSR_UM, &sc->sc_um); + err |= __put_user(scr->pt.ar_rsc, &sc->sc_ar_rsc); + err |= __put_user(scr->pt.ar_unat, &sc->sc_ar_unat); /* ar.unat */ + err |= __put_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); /* ar.fpsr */ + err |= __put_user(scr->pt.ar_pfs, &sc->sc_ar_pfs); + err |= __put_user(scr->pt.pr, &sc->sc_pr); /* predicates */ + err |= __put_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */ + err |= __put_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */ + err |= __copy_to_user(&sc->sc_gr[1], &scr->pt.r1, 8); /* r1 */ + err |= __copy_to_user(&sc->sc_gr[8], &scr->pt.r8, 4*8); /* r8-r11 */ + err |= __copy_to_user(&sc->sc_gr[12], &scr->pt.r12, 2*8); /* r12-r13 */ + err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8); /* r15 */ + err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip); + + if (flags & IA64_SC_FLAG_IN_SYSCALL) { + /* Clear scratch registers if the signal interrupted a system call. */ + err |= __put_user(0, &sc->sc_ar_ccv); /* ar.ccv */ + err |= __put_user(0, &sc->sc_br[7]); /* b7 */ + err |= __put_user(0, &sc->sc_gr[14]); /* r14 */ + err |= __clear_user(&sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */ + err |= __clear_user(&sc->sc_gr[2], 2*8); /* r2-r3 */ + err |= __clear_user(&sc->sc_gr[16], 16*8); /* r16-r31 */ + } else { + /* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */ + err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */ + err |= __put_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */ + err |= __put_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */ + err |= __copy_to_user(&sc->sc_ar25, &scr->pt.ar_csd, 2*8); /* ar.csd & ar.ssd */ + err |= __copy_to_user(&sc->sc_gr[2], &scr->pt.r2, 2*8); /* r2-r3 */ + err |= __copy_to_user(&sc->sc_gr[16], &scr->pt.r16, 16*8); /* r16-r31 */ + } + return err; +} + +/* + * Check whether the register-backing store is already on the signal stack. + */ +static inline int +rbs_on_sig_stack (unsigned long bsp) +{ + return (bsp - current->sas_ss_sp < current->sas_ss_size); +} + +static long +force_sigsegv_info (int sig, void __user *addr) +{ + unsigned long flags; + struct siginfo si; + + if (sig == SIGSEGV) { + /* + * Acquiring siglock around the sa_handler-update is almost + * certainly overkill, but this isn't a + * performance-critical path and I'd rather play it safe + * here than having to debug a nasty race if and when + * something changes in kernel/signal.c that would make it + * no longer safe to modify sa_handler without holding the + * lock. + */ + spin_lock_irqsave(¤t->sighand->siglock, flags); + current->sighand->action[sig - 1].sa.sa_handler = SIG_DFL; + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + si.si_signo = SIGSEGV; + si.si_errno = 0; + si.si_code = SI_KERNEL; + si.si_pid = current->pid; + si.si_uid = current->uid; + si.si_addr = addr; + force_sig_info(SIGSEGV, &si, current); + return 0; +} + +static long +setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, + struct sigscratch *scr) +{ + extern char __kernel_sigtramp[]; + unsigned long tramp_addr, new_rbs = 0; + struct sigframe __user *frame; + long err; + + frame = (void __user *) scr->pt.r12; + tramp_addr = (unsigned long) __kernel_sigtramp; + if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags((unsigned long) frame) == 0) { + frame = (void __user *) ((current->sas_ss_sp + current->sas_ss_size) + & ~(STACK_ALIGN - 1)); + /* + * We need to check for the register stack being on the signal stack + * separately, because it's switched separately (memory stack is switched + * in the kernel, register stack is switched in the signal trampoline). + */ + if (!rbs_on_sig_stack(scr->pt.ar_bspstore)) + new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1); + } + frame = (void __user *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return force_sigsegv_info(sig, frame); + + err = __put_user(sig, &frame->arg0); + err |= __put_user(&frame->info, &frame->arg1); + err |= __put_user(&frame->sc, &frame->arg2); + err |= __put_user(new_rbs, &frame->sc.sc_rbs_base); + err |= __put_user(0, &frame->sc.sc_loadrs); /* initialize to zero */ + err |= __put_user(ka->sa.sa_handler, &frame->handler); + + err |= copy_siginfo_to_user(&frame->info, info); + + err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp); + err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size); + err |= __put_user(sas_ss_flags(scr->pt.r12), &frame->sc.sc_stack.ss_flags); + err |= setup_sigcontext(&frame->sc, set, scr); + + if (unlikely(err)) + return force_sigsegv_info(sig, frame); + + scr->pt.r12 = (unsigned long) frame - 16; /* new stack pointer */ + scr->pt.ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */ + scr->pt.cr_iip = tramp_addr; + ia64_psr(&scr->pt)->ri = 0; /* start executing in first slot */ + ia64_psr(&scr->pt)->be = 0; /* force little-endian byte-order */ + /* + * Force the interruption function mask to zero. This has no effect when a + * system-call got interrupted by a signal (since, in that case, scr->pt_cr_ifs is + * ignored), but it has the desirable effect of making it possible to deliver a + * signal with an incomplete register frame (which happens when a mandatory RSE + * load faults). Furthermore, it has no negative effect on the getting the user's + * dirty partition preserved, because that's governed by scr->pt.loadrs. + */ + scr->pt.cr_ifs = (1UL << 63); + + /* + * Note: this affects only the NaT bits of the scratch regs (the ones saved in + * pt_regs), which is exactly what we want. + */ + scr->scratch_unat = 0; /* ensure NaT bits of r12 is clear */ + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%p\n", + current->comm, current->pid, sig, scr->pt.r12, frame->sc.sc_ip, frame->handler); +#endif + return 1; +} + +static long +handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, + struct sigscratch *scr) +{ + if (IS_IA32_PROCESS(&scr->pt)) { + /* send signal to IA-32 process */ + if (!ia32_setup_frame1(sig, ka, info, oldset, &scr->pt)) + return 0; + } else + /* send signal to IA-64 process */ + if (!setup_frame(sig, ka, info, oldset, scr)) + return 0; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sighand->siglock); + { + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + sigaddset(¤t->blocked, sig); + recalc_sigpending(); + } + spin_unlock_irq(¤t->sighand->siglock); + } + return 1; +} + +/* + * Note that `init' is a special process: it doesn't get signals it doesn't want to + * handle. Thus you cannot kill init even with a SIGKILL even by mistake. + */ +long +ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) +{ + struct k_sigaction ka; + siginfo_t info; + long restart = in_syscall; + long errno = scr->pt.r8; +# define ERR_CODE(c) (IS_IA32_PROCESS(&scr->pt) ? -(c) : (c)) + + /* + * In the ia64_leave_kernel code path, we want the common case to go fast, which + * is why we may in certain cases get here from kernel mode. Just return without + * doing anything if so. + */ + if (!user_mode(&scr->pt)) + return 0; + + if (!oldset) + oldset = ¤t->blocked; + + /* + * This only loops in the rare cases of handle_signal() failing, in which case we + * need to push through a forced SIGSEGV. + */ + while (1) { + int signr = get_signal_to_deliver(&info, &ka, &scr->pt, NULL); + + /* + * get_signal_to_deliver() may have run a debugger (via notify_parent()) + * and the debugger may have modified the state (e.g., to arrange for an + * inferior call), thus it's important to check for restarting _after_ + * get_signal_to_deliver(). + */ + if (IS_IA32_PROCESS(&scr->pt)) { + if (in_syscall) { + if (errno >= 0) + restart = 0; + else + errno = -errno; + } + } else if ((long) scr->pt.r10 != -1) + /* + * A system calls has to be restarted only if one of the error codes + * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10 + * isn't -1 then r8 doesn't hold an error code and we don't need to + * restart the syscall, so we can clear the "restart" flag here. + */ + restart = 0; + + if (signr <= 0) + break; + + if (unlikely(restart)) { + switch (errno) { + case ERESTART_RESTARTBLOCK: + case ERESTARTNOHAND: + scr->pt.r8 = ERR_CODE(EINTR); + /* note: scr->pt.r10 is already -1 */ + break; + + case ERESTARTSYS: + if ((ka.sa.sa_flags & SA_RESTART) == 0) { + scr->pt.r8 = ERR_CODE(EINTR); + /* note: scr->pt.r10 is already -1 */ + break; + } + case ERESTARTNOINTR: + if (IS_IA32_PROCESS(&scr->pt)) { + scr->pt.r8 = scr->pt.r1; + scr->pt.cr_iip -= 2; + } else + ia64_decrement_ip(&scr->pt); + restart = 0; /* don't restart twice if handle_signal() fails... */ + } + } + + /* + * Whee! Actually deliver the signal. If the delivery failed, we need to + * continue to iterate in this loop so we can deliver the SIGSEGV... + */ + if (handle_signal(signr, &ka, &info, oldset, scr)) + return 1; + } + + /* Did we come from a system call? */ + if (restart) { + /* Restart the system call - no handlers present */ + if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR + || errno == ERESTART_RESTARTBLOCK) + { + if (IS_IA32_PROCESS(&scr->pt)) { + scr->pt.r8 = scr->pt.r1; + scr->pt.cr_iip -= 2; + if (errno == ERESTART_RESTARTBLOCK) + scr->pt.r8 = 0; /* x86 version of __NR_restart_syscall */ + } else { + /* + * Note: the syscall number is in r15 which is saved in + * pt_regs so all we need to do here is adjust ip so that + * the "break" instruction gets re-executed. + */ + ia64_decrement_ip(&scr->pt); + if (errno == ERESTART_RESTARTBLOCK) + scr->pt.r15 = __NR_restart_syscall; + } + } + } + return 0; +} + +/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it + * could not be delivered. It is important that the target process is not + * allowed to do any more work in user space. Possible cases for the target + * process: + * + * - It is sleeping and will wake up soon. Store the data in the current task, + * the signal will be sent when the current task returns from the next + * interrupt. + * + * - It is running in user context. Store the data in the current task, the + * signal will be sent when the current task returns from the next interrupt. + * + * - It is running in kernel context on this or another cpu and will return to + * user context. Store the data in the target task, the signal will be sent + * to itself when the target task returns to user space. + * + * - It is running in kernel context on this cpu and will sleep before + * returning to user context. Because this is also the current task, the + * signal will not get delivered and the task could sleep indefinitely. + * Store the data in the idle task for this cpu, the signal will be sent + * after the idle task processes its next interrupt. + * + * To cover all cases, store the data in the target task, the current task and + * the idle task on this cpu. Whatever happens, the signal will be delivered + * to the target task before it can do any useful user space work. Multiple + * deliveries have no unwanted side effects. + * + * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts + * disabled. It must not take any locks nor use kernel structures or services + * that require locks. + */ + +/* To ensure that we get the right pid, check its start time. To avoid extra + * include files in thread_info.h, convert the task start_time to unsigned long, + * giving us a cycle time of > 580 years. + */ +static inline unsigned long +start_time_ul(const struct task_struct *t) +{ + return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec; +} + +void +set_sigdelayed(pid_t pid, int signo, int code, void __user *addr) +{ + struct task_struct *t; + unsigned long start_time = 0; + int i; + + for (i = 1; i <= 3; ++i) { + switch (i) { + case 1: + t = find_task_by_pid(pid); + if (t) + start_time = start_time_ul(t); + break; + case 2: + t = current; + break; + default: + t = idle_task(smp_processor_id()); + break; + } + + if (!t) + return; + t->thread_info->sigdelayed.signo = signo; + t->thread_info->sigdelayed.code = code; + t->thread_info->sigdelayed.addr = addr; + t->thread_info->sigdelayed.start_time = start_time; + t->thread_info->sigdelayed.pid = pid; + wmb(); + set_tsk_thread_flag(t, TIF_SIGDELAYED); + } +} + +/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that + * was detected in MCA/INIT/NMI/PMI context where it could not be delivered. + */ + +void +do_sigdelayed(void) +{ + struct siginfo siginfo; + pid_t pid; + struct task_struct *t; + + clear_thread_flag(TIF_SIGDELAYED); + memset(&siginfo, 0, sizeof(siginfo)); + siginfo.si_signo = current_thread_info()->sigdelayed.signo; + siginfo.si_code = current_thread_info()->sigdelayed.code; + siginfo.si_addr = current_thread_info()->sigdelayed.addr; + pid = current_thread_info()->sigdelayed.pid; + t = find_task_by_pid(pid); + if (!t) + return; + if (current_thread_info()->sigdelayed.start_time != start_time_ul(t)) + return; + force_sig_info(siginfo.si_signo, &siginfo, t); +} diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c new file mode 100644 index 000000000000..953095e2ce15 --- /dev/null +++ b/arch/ia64/kernel/smp.c @@ -0,0 +1,376 @@ +/* + * SMP Support + * + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com> + * + * Lots of stuff stolen from arch/alpha/kernel/smp.c + * + * 01/05/16 Rohit Seth <rohit.seth@intel.com> IA64-SMP functions. Reorganized + * the existing code (on the lines of x86 port). + * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy + * calibration on each CPU. + * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id + * 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor + * & cpu_online_map now gets done here (instead of setup.c) + * 99/10/05 davidm Update to bring it in sync with new command-line processing + * scheme. + * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and + * smp_call_function_single to resend IPI on timeouts + */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/smp.h> +#include <linux/kernel_stat.h> +#include <linux/mm.h> +#include <linux/cache.h> +#include <linux/delay.h> +#include <linux/efi.h> +#include <linux/bitops.h> + +#include <asm/atomic.h> +#include <asm/current.h> +#include <asm/delay.h> +#include <asm/machvec.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/sal.h> +#include <asm/system.h> +#include <asm/tlbflush.h> +#include <asm/unistd.h> +#include <asm/mca.h> + +/* + * Structure and data for smp_call_function(). This is designed to minimise static memory + * requirements. It also looks cleaner. + */ +static __cacheline_aligned DEFINE_SPINLOCK(call_lock); + +struct call_data_struct { + void (*func) (void *info); + void *info; + long wait; + atomic_t started; + atomic_t finished; +}; + +static volatile struct call_data_struct *call_data; + +#define IPI_CALL_FUNC 0 +#define IPI_CPU_STOP 1 + +/* This needs to be cacheline aligned because it is written to by *other* CPUs. */ +static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned; + +extern void cpu_halt (void); + +void +lock_ipi_calllock(void) +{ + spin_lock_irq(&call_lock); +} + +void +unlock_ipi_calllock(void) +{ + spin_unlock_irq(&call_lock); +} + +static void +stop_this_cpu (void) +{ + /* + * Remove this CPU: + */ + cpu_clear(smp_processor_id(), cpu_online_map); + max_xtp(); + local_irq_disable(); + cpu_halt(); +} + +void +cpu_die(void) +{ + max_xtp(); + local_irq_disable(); + cpu_halt(); + /* Should never be here */ + BUG(); + for (;;); +} + +irqreturn_t +handle_IPI (int irq, void *dev_id, struct pt_regs *regs) +{ + int this_cpu = get_cpu(); + unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation); + unsigned long ops; + + mb(); /* Order interrupt and bit testing. */ + while ((ops = xchg(pending_ipis, 0)) != 0) { + mb(); /* Order bit clearing and data access. */ + do { + unsigned long which; + + which = ffz(~ops); + ops &= ~(1 << which); + + switch (which) { + case IPI_CALL_FUNC: + { + struct call_data_struct *data; + void (*func)(void *info); + void *info; + int wait; + + /* release the 'pointer lock' */ + data = (struct call_data_struct *) call_data; + func = data->func; + info = data->info; + wait = data->wait; + + mb(); + atomic_inc(&data->started); + /* + * At this point the structure may be gone unless + * wait is true. + */ + (*func)(info); + + /* Notify the sending CPU that the task is done. */ + mb(); + if (wait) + atomic_inc(&data->finished); + } + break; + + case IPI_CPU_STOP: + stop_this_cpu(); + break; + + default: + printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); + break; + } + } while (ops); + mb(); /* Order data access and bit testing. */ + } + put_cpu(); + return IRQ_HANDLED; +} + +/* + * Called with preeemption disabled. + */ +static inline void +send_IPI_single (int dest_cpu, int op) +{ + set_bit(op, &per_cpu(ipi_operation, dest_cpu)); + platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0); +} + +/* + * Called with preeemption disabled. + */ +static inline void +send_IPI_allbutself (int op) +{ + unsigned int i; + + for (i = 0; i < NR_CPUS; i++) { + if (cpu_online(i) && i != smp_processor_id()) + send_IPI_single(i, op); + } +} + +/* + * Called with preeemption disabled. + */ +static inline void +send_IPI_all (int op) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) + if (cpu_online(i)) + send_IPI_single(i, op); +} + +/* + * Called with preeemption disabled. + */ +static inline void +send_IPI_self (int op) +{ + send_IPI_single(smp_processor_id(), op); +} + +/* + * Called with preeemption disabled. + */ +void +smp_send_reschedule (int cpu) +{ + platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); +} + +void +smp_flush_tlb_all (void) +{ + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); +} + +void +smp_flush_tlb_mm (struct mm_struct *mm) +{ + /* this happens for the common case of a single-threaded fork(): */ + if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1)) + { + local_finish_flush_tlb_mm(mm); + return; + } + + /* + * We could optimize this further by using mm->cpu_vm_mask to track which CPUs + * have been running in the address space. It's not clear that this is worth the + * trouble though: to avoid races, we have to raise the IPI on the target CPU + * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is + * rather trivial. + */ + on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1); +} + +/* + * Run a function on another CPU + * <func> The function to run. This must be fast and non-blocking. + * <info> An arbitrary pointer to pass to the function. + * <nonatomic> Currently unused. + * <wait> If true, wait until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. + * + * Does not return until the remote CPU is nearly ready to execute <func> + * or is or has executed. + */ + +int +smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic, + int wait) +{ + struct call_data_struct data; + int cpus = 1; + int me = get_cpu(); /* prevent preemption and reschedule on another processor */ + + if (cpuid == me) { + printk("%s: trying to call self\n", __FUNCTION__); + put_cpu(); + return -EBUSY; + } + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + spin_lock_bh(&call_lock); + + call_data = &data; + mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ + send_IPI_single(cpuid, IPI_CALL_FUNC); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + + if (wait) + while (atomic_read(&data.finished) != cpus) + cpu_relax(); + call_data = NULL; + + spin_unlock_bh(&call_lock); + put_cpu(); + return 0; +} +EXPORT_SYMBOL(smp_call_function_single); + +/* + * this function sends a 'generic call function' IPI to all other CPUs + * in the system. + */ + +/* + * [SUMMARY] Run a function on all other CPUs. + * <func> The function to run. This must be fast and non-blocking. + * <info> An arbitrary pointer to pass to the function. + * <nonatomic> currently unused. + * <wait> If true, wait (atomically) until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. + * + * Does not return until remote CPUs are nearly ready to execute <func> or are or have + * executed. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int +smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) +{ + struct call_data_struct data; + int cpus = num_online_cpus()-1; + + if (!cpus) + return 0; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + spin_lock(&call_lock); + + call_data = &data; + mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ + send_IPI_allbutself(IPI_CALL_FUNC); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + + if (wait) + while (atomic_read(&data.finished) != cpus) + cpu_relax(); + call_data = NULL; + + spin_unlock(&call_lock); + return 0; +} +EXPORT_SYMBOL(smp_call_function); + +/* + * this function calls the 'stop' function on all other CPUs in the system. + */ +void +smp_send_stop (void) +{ + send_IPI_allbutself(IPI_CPU_STOP); +} + +int __init +setup_profiling_timer (unsigned int multiplier) +{ + return -EINVAL; +} diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c new file mode 100644 index 000000000000..5318f0cbfc26 --- /dev/null +++ b/arch/ia64/kernel/smpboot.c @@ -0,0 +1,692 @@ +/* + * SMP boot-related support + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * 01/05/16 Rohit Seth <rohit.seth@intel.com> Moved SMP booting functions from smp.c to here. + * 01/04/27 David Mosberger <davidm@hpl.hp.com> Added ITC synching code. + * 02/07/31 David Mosberger <davidm@hpl.hp.com> Switch over to hotplug-CPU boot-sequence. + * smp_boot_cpus()/smp_commence() is replaced by + * smp_prepare_cpus()/__cpu_up()/smp_cpus_done(). + */ +#include <linux/config.h> + +#include <linux/module.h> +#include <linux/acpi.h> +#include <linux/bootmem.h> +#include <linux/cpu.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/mm.h> +#include <linux/notifier.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/spinlock.h> +#include <linux/efi.h> +#include <linux/percpu.h> +#include <linux/bitops.h> + +#include <asm/atomic.h> +#include <asm/cache.h> +#include <asm/current.h> +#include <asm/delay.h> +#include <asm/ia32.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/machvec.h> +#include <asm/mca.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/sal.h> +#include <asm/system.h> +#include <asm/tlbflush.h> +#include <asm/unistd.h> + +#define SMP_DEBUG 0 + +#if SMP_DEBUG +#define Dprintk(x...) printk(x) +#else +#define Dprintk(x...) +#endif + + +/* + * ITC synchronization related stuff: + */ +#define MASTER 0 +#define SLAVE (SMP_CACHE_BYTES/8) + +#define NUM_ROUNDS 64 /* magic value */ +#define NUM_ITERS 5 /* likewise */ + +static DEFINE_SPINLOCK(itc_sync_lock); +static volatile unsigned long go[SLAVE + 1]; + +#define DEBUG_ITC_SYNC 0 + +extern void __devinit calibrate_delay (void); +extern void start_ap (void); +extern unsigned long ia64_iobase; + +task_t *task_for_booting_cpu; + +/* + * State for each CPU + */ +DEFINE_PER_CPU(int, cpu_state); + +/* Bitmasks of currently online, and possible CPUs */ +cpumask_t cpu_online_map; +EXPORT_SYMBOL(cpu_online_map); +cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); + +/* which logical CPU number maps to which CPU (physical APIC ID) */ +volatile int ia64_cpu_to_sapicid[NR_CPUS]; +EXPORT_SYMBOL(ia64_cpu_to_sapicid); + +static volatile cpumask_t cpu_callin_map; + +struct smp_boot_data smp_boot_data __initdata; + +unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */ + +char __initdata no_int_routing; + +unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ + +static int __init +nointroute (char *str) +{ + no_int_routing = 1; + printk ("no_int_routing on\n"); + return 1; +} + +__setup("nointroute", nointroute); + +void +sync_master (void *arg) +{ + unsigned long flags, i; + + go[MASTER] = 0; + + local_irq_save(flags); + { + for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { + while (!go[MASTER]); + go[MASTER] = 0; + go[SLAVE] = ia64_get_itc(); + } + } + local_irq_restore(flags); +} + +/* + * Return the number of cycles by which our itc differs from the itc on the master + * (time-keeper) CPU. A positive number indicates our itc is ahead of the master, + * negative that it is behind. + */ +static inline long +get_delta (long *rt, long *master) +{ + unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; + unsigned long tcenter, t0, t1, tm; + long i; + + for (i = 0; i < NUM_ITERS; ++i) { + t0 = ia64_get_itc(); + go[MASTER] = 1; + while (!(tm = go[SLAVE])); + go[SLAVE] = 0; + t1 = ia64_get_itc(); + + if (t1 - t0 < best_t1 - best_t0) + best_t0 = t0, best_t1 = t1, best_tm = tm; + } + + *rt = best_t1 - best_t0; + *master = best_tm - best_t0; + + /* average best_t0 and best_t1 without overflow: */ + tcenter = (best_t0/2 + best_t1/2); + if (best_t0 % 2 + best_t1 % 2 == 2) + ++tcenter; + return tcenter - best_tm; +} + +/* + * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU + * (normally the time-keeper CPU). We use a closed loop to eliminate the possibility of + * unaccounted-for errors (such as getting a machine check in the middle of a calibration + * step). The basic idea is for the slave to ask the master what itc value it has and to + * read its own itc before and after the master responds. Each iteration gives us three + * timestamps: + * + * slave master + * + * t0 ---\ + * ---\ + * ---> + * tm + * /--- + * /--- + * t1 <--- + * + * + * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0 + * and t1. If we achieve this, the clocks are synchronized provided the interconnect + * between the slave and the master is symmetric. Even if the interconnect were + * asymmetric, we would still know that the synchronization error is smaller than the + * roundtrip latency (t0 - t1). + * + * When the interconnect is quiet and symmetric, this lets us synchronize the itc to + * within one or two cycles. However, we can only *guarantee* that the synchronization is + * accurate to within a round-trip time, which is typically in the range of several + * hundred cycles (e.g., ~500 cycles). In practice, this means that the itc's are usually + * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better + * than half a micro second or so. + */ +void +ia64_sync_itc (unsigned int master) +{ + long i, delta, adj, adjust_latency = 0, done = 0; + unsigned long flags, rt, master_time_stamp, bound; +#if DEBUG_ITC_SYNC + struct { + long rt; /* roundtrip time */ + long master; /* master's timestamp */ + long diff; /* difference between midpoint and master's timestamp */ + long lat; /* estimate of itc adjustment latency */ + } t[NUM_ROUNDS]; +#endif + + /* + * Make sure local timer ticks are disabled while we sync. If + * they were enabled, we'd have to worry about nasty issues + * like setting the ITC ahead of (or a long time before) the + * next scheduled tick. + */ + BUG_ON((ia64_get_itv() & (1 << 16)) == 0); + + go[MASTER] = 1; + + if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) { + printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master); + return; + } + + while (go[MASTER]); /* wait for master to be ready */ + + spin_lock_irqsave(&itc_sync_lock, flags); + { + for (i = 0; i < NUM_ROUNDS; ++i) { + delta = get_delta(&rt, &master_time_stamp); + if (delta == 0) { + done = 1; /* let's lock on to this... */ + bound = rt; + } + + if (!done) { + if (i > 0) { + adjust_latency += -delta; + adj = -delta + adjust_latency/4; + } else + adj = -delta; + + ia64_set_itc(ia64_get_itc() + adj); + } +#if DEBUG_ITC_SYNC + t[i].rt = rt; + t[i].master = master_time_stamp; + t[i].diff = delta; + t[i].lat = adjust_latency/4; +#endif + } + } + spin_unlock_irqrestore(&itc_sync_lock, flags); + +#if DEBUG_ITC_SYNC + for (i = 0; i < NUM_ROUNDS; ++i) + printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", + t[i].rt, t[i].master, t[i].diff, t[i].lat); +#endif + + printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, " + "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt); +} + +/* + * Ideally sets up per-cpu profiling hooks. Doesn't do much now... + */ +static inline void __devinit +smp_setup_percpu_timer (void) +{ +} + +static void __devinit +smp_callin (void) +{ + int cpuid, phys_id; + extern void ia64_init_itm(void); + +#ifdef CONFIG_PERFMON + extern void pfm_init_percpu(void); +#endif + + cpuid = smp_processor_id(); + phys_id = hard_smp_processor_id(); + + if (cpu_online(cpuid)) { + printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", + phys_id, cpuid); + BUG(); + } + + lock_ipi_calllock(); + cpu_set(cpuid, cpu_online_map); + unlock_ipi_calllock(); + + smp_setup_percpu_timer(); + + ia64_mca_cmc_vector_setup(); /* Setup vector on AP */ + +#ifdef CONFIG_PERFMON + pfm_init_percpu(); +#endif + + local_irq_enable(); + + if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { + /* + * Synchronize the ITC with the BP. Need to do this after irqs are + * enabled because ia64_sync_itc() calls smp_call_function_single(), which + * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls + * local_bh_enable(), which bugs out if irqs are not enabled... + */ + Dprintk("Going to syncup ITC with BP.\n"); + ia64_sync_itc(0); + } + + /* + * Get our bogomips. + */ + ia64_init_itm(); + calibrate_delay(); + local_cpu_data->loops_per_jiffy = loops_per_jiffy; + +#ifdef CONFIG_IA32_SUPPORT + ia32_gdt_init(); +#endif + + /* + * Allow the master to continue. + */ + cpu_set(cpuid, cpu_callin_map); + Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid); +} + + +/* + * Activate a secondary processor. head.S calls this. + */ +int __devinit +start_secondary (void *unused) +{ + /* Early console may use I/O ports */ + ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); + + Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); + efi_map_pal_code(); + cpu_init(); + smp_callin(); + + cpu_idle(); + return 0; +} + +struct pt_regs * __devinit idle_regs(struct pt_regs *regs) +{ + return NULL; +} + +struct create_idle { + struct task_struct *idle; + struct completion done; + int cpu; +}; + +void +do_fork_idle(void *_c_idle) +{ + struct create_idle *c_idle = _c_idle; + + c_idle->idle = fork_idle(c_idle->cpu); + complete(&c_idle->done); +} + +static int __devinit +do_boot_cpu (int sapicid, int cpu) +{ + int timeout; + struct create_idle c_idle = { + .cpu = cpu, + .done = COMPLETION_INITIALIZER(c_idle.done), + }; + DECLARE_WORK(work, do_fork_idle, &c_idle); + /* + * We can't use kernel_thread since we must avoid to reschedule the child. + */ + if (!keventd_up() || current_is_keventd()) + work.func(work.data); + else { + schedule_work(&work); + wait_for_completion(&c_idle.done); + } + + if (IS_ERR(c_idle.idle)) + panic("failed fork for CPU %d", cpu); + task_for_booting_cpu = c_idle.idle; + + Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid); + + platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0); + + /* + * Wait 10s total for the AP to start + */ + Dprintk("Waiting on callin_map ..."); + for (timeout = 0; timeout < 100000; timeout++) { + if (cpu_isset(cpu, cpu_callin_map)) + break; /* It has booted */ + udelay(100); + } + Dprintk("\n"); + + if (!cpu_isset(cpu, cpu_callin_map)) { + printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid); + ia64_cpu_to_sapicid[cpu] = -1; + cpu_clear(cpu, cpu_online_map); /* was set in smp_callin() */ + return -EINVAL; + } + return 0; +} + +static int __init +decay (char *str) +{ + int ticks; + get_option (&str, &ticks); + return 1; +} + +__setup("decay=", decay); + +/* + * Initialize the logical CPU number to SAPICID mapping + */ +void __init +smp_build_cpu_map (void) +{ + int sapicid, cpu, i; + int boot_cpu_id = hard_smp_processor_id(); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + ia64_cpu_to_sapicid[cpu] = -1; +#ifdef CONFIG_HOTPLUG_CPU + cpu_set(cpu, cpu_possible_map); +#endif + } + + ia64_cpu_to_sapicid[0] = boot_cpu_id; + cpus_clear(cpu_present_map); + cpu_set(0, cpu_present_map); + cpu_set(0, cpu_possible_map); + for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) { + sapicid = smp_boot_data.cpu_phys_id[i]; + if (sapicid == boot_cpu_id) + continue; + cpu_set(cpu, cpu_present_map); + cpu_set(cpu, cpu_possible_map); + ia64_cpu_to_sapicid[cpu] = sapicid; + cpu++; + } +} + +#ifdef CONFIG_NUMA + +/* on which node is each logical CPU (one cacheline even for 64 CPUs) */ +u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_to_node_map); +/* which logical CPUs are on which nodes */ +cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; + +/* + * Build cpu to node mapping and initialize the per node cpu masks. + */ +void __init +build_cpu_to_node_map (void) +{ + int cpu, i, node; + + for(node=0; node<MAX_NUMNODES; node++) + cpus_clear(node_to_cpu_mask[node]); + for(cpu = 0; cpu < NR_CPUS; ++cpu) { + /* + * All Itanium NUMA platforms I know use ACPI, so maybe we + * can drop this ifdef completely. [EF] + */ +#ifdef CONFIG_ACPI_NUMA + node = -1; + for (i = 0; i < NR_CPUS; ++i) + if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { + node = node_cpuid[i].nid; + break; + } +#else +# error Fixme: Dunno how to build CPU-to-node map. +#endif + cpu_to_node_map[cpu] = (node >= 0) ? node : 0; + if (node >= 0) + cpu_set(cpu, node_to_cpu_mask[node]); + } +} + +#endif /* CONFIG_NUMA */ + +/* + * Cycle through the APs sending Wakeup IPIs to boot each. + */ +void __init +smp_prepare_cpus (unsigned int max_cpus) +{ + int boot_cpu_id = hard_smp_processor_id(); + + /* + * Initialize the per-CPU profiling counter/multiplier + */ + + smp_setup_percpu_timer(); + + /* + * We have the boot CPU online for sure. + */ + cpu_set(0, cpu_online_map); + cpu_set(0, cpu_callin_map); + + local_cpu_data->loops_per_jiffy = loops_per_jiffy; + ia64_cpu_to_sapicid[0] = boot_cpu_id; + + printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id); + + current_thread_info()->cpu = 0; + + /* + * If SMP should be disabled, then really disable it! + */ + if (!max_cpus) { + printk(KERN_INFO "SMP mode deactivated.\n"); + cpus_clear(cpu_online_map); + cpus_clear(cpu_present_map); + cpus_clear(cpu_possible_map); + cpu_set(0, cpu_online_map); + cpu_set(0, cpu_present_map); + cpu_set(0, cpu_possible_map); + return; + } +} + +void __devinit smp_prepare_boot_cpu(void) +{ + cpu_set(smp_processor_id(), cpu_online_map); + cpu_set(smp_processor_id(), cpu_callin_map); +} + +#ifdef CONFIG_HOTPLUG_CPU +extern void fixup_irqs(void); +/* must be called with cpucontrol mutex held */ +static int __devinit cpu_enable(unsigned int cpu) +{ + per_cpu(cpu_state,cpu) = CPU_UP_PREPARE; + wmb(); + + while (!cpu_online(cpu)) + cpu_relax(); + return 0; +} + +int __cpu_disable(void) +{ + int cpu = smp_processor_id(); + + /* + * dont permit boot processor for now + */ + if (cpu == 0) + return -EBUSY; + + fixup_irqs(); + local_flush_tlb_all(); + printk ("Disabled cpu %u\n", smp_processor_id()); + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + unsigned int i; + + for (i = 0; i < 100; i++) { + /* They ack this in play_dead by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) + { + /* + * TBD: Enable this when physical removal + * or when we put the processor is put in + * SAL_BOOT_RENDEZ mode + * cpu_clear(cpu, cpu_callin_map); + */ + return; + } + msleep(100); + } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); +} +#else /* !CONFIG_HOTPLUG_CPU */ +static int __devinit cpu_enable(unsigned int cpu) +{ + return 0; +} + +int __cpu_disable(void) +{ + return -ENOSYS; +} + +void __cpu_die(unsigned int cpu) +{ + /* We said "no" in __cpu_disable */ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +void +smp_cpus_done (unsigned int dummy) +{ + int cpu; + unsigned long bogosum = 0; + + /* + * Allow the user to impress friends. + */ + + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (cpu_online(cpu)) + bogosum += cpu_data(cpu)->loops_per_jiffy; + + printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); +} + +int __devinit +__cpu_up (unsigned int cpu) +{ + int ret; + int sapicid; + + sapicid = ia64_cpu_to_sapicid[cpu]; + if (sapicid == -1) + return -EINVAL; + + /* + * Already booted.. just enable and get outa idle lool + */ + if (cpu_isset(cpu, cpu_callin_map)) + { + cpu_enable(cpu); + local_irq_enable(); + while (!cpu_isset(cpu, cpu_online_map)) + mb(); + return 0; + } + /* Processor goes to start_secondary(), sets online flag */ + ret = do_boot_cpu(sapicid, cpu); + if (ret < 0) + return ret; + + return 0; +} + +/* + * Assume that CPU's have been discovered by some platform-dependent interface. For + * SoftSDV/Lion, that would be ACPI. + * + * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP(). + */ +void __init +init_smp_config(void) +{ + struct fptr { + unsigned long fp; + unsigned long gp; + } *ap_startup; + long sal_ret; + + /* Tell SAL where to drop the AP's. */ + ap_startup = (struct fptr *) start_ap; + sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ, + ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0); + if (sal_ret < 0) + printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n", + ia64_sal_strerror(sal_ret)); +} + diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c new file mode 100644 index 000000000000..3ac216e1c8bb --- /dev/null +++ b/arch/ia64/kernel/sys_ia64.c @@ -0,0 +1,298 @@ +/* + * This file contains various system calls that have different calling + * conventions on different platforms. + * + * Copyright (C) 1999-2000, 2002-2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/sched.h> +#include <linux/shm.h> +#include <linux/file.h> /* doh, must come after sched.h... */ +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/syscalls.h> +#include <linux/highuid.h> +#include <linux/hugetlb.h> + +#include <asm/shmparam.h> +#include <asm/uaccess.h> + +unsigned long +arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + long map_shared = (flags & MAP_SHARED); + unsigned long start_addr, align_mask = PAGE_SIZE - 1; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + if (len > RGN_MAP_LIMIT) + return -ENOMEM; + +#ifdef CONFIG_HUGETLB_PAGE + if (REGION_NUMBER(addr) == REGION_HPAGE) + addr = 0; +#endif + if (!addr) + addr = mm->free_area_cache; + + if (map_shared && (TASK_SIZE > 0xfffffffful)) + /* + * For 64-bit tasks, align shared segments to 1MB to avoid potential + * performance penalty due to virtual aliasing (see ASDM). For 32-bit + * tasks, we prefer to avoid exhausting the address space too quickly by + * limiting alignment to a single page. + */ + align_mask = SHMLBA - 1; + + full_search: + start_addr = addr = (addr + align_mask) & ~align_mask; + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) { + if (start_addr != TASK_UNMAPPED_BASE) { + /* Start a new search --- just in case we missed some holes. */ + addr = TASK_UNMAPPED_BASE; + goto full_search; + } + return -ENOMEM; + } + if (!vma || addr + len <= vma->vm_start) { + /* Remember the address where we stopped this search: */ + mm->free_area_cache = addr + len; + return addr; + } + addr = (vma->vm_end + align_mask) & ~align_mask; + } +} + +asmlinkage long +ia64_getpriority (int which, int who) +{ + long prio; + + prio = sys_getpriority(which, who); + if (prio >= 0) { + force_successful_syscall_return(); + prio = 20 - prio; + } + return prio; +} + +/* XXX obsolete, but leave it here until the old libc is gone... */ +asmlinkage unsigned long +sys_getpagesize (void) +{ + return PAGE_SIZE; +} + +asmlinkage unsigned long +ia64_shmat (int shmid, void __user *shmaddr, int shmflg) +{ + unsigned long raddr; + int retval; + + retval = do_shmat(shmid, shmaddr, shmflg, &raddr); + if (retval < 0) + return retval; + + force_successful_syscall_return(); + return raddr; +} + +asmlinkage unsigned long +ia64_brk (unsigned long brk) +{ + unsigned long rlim, retval, newbrk, oldbrk; + struct mm_struct *mm = current->mm; + + /* + * Most of this replicates the code in sys_brk() except for an additional safety + * check and the clearing of r8. However, we can't call sys_brk() because we need + * to acquire the mmap_sem before we can do the test... + */ + down_write(&mm->mmap_sem); + + if (brk < mm->end_code) + goto out; + newbrk = PAGE_ALIGN(brk); + oldbrk = PAGE_ALIGN(mm->brk); + if (oldbrk == newbrk) + goto set_brk; + + /* Always allow shrinking brk. */ + if (brk <= mm->brk) { + if (!do_munmap(mm, newbrk, oldbrk-newbrk)) + goto set_brk; + goto out; + } + + /* Check against unimplemented/unmapped addresses: */ + if ((newbrk - oldbrk) > RGN_MAP_LIMIT || REGION_OFFSET(newbrk) > RGN_MAP_LIMIT) + goto out; + + /* Check against rlimit.. */ + rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) + goto out; + + /* Check against existing mmap mappings. */ + if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) + goto out; + + /* Ok, looks good - let it rip. */ + if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk) + goto out; +set_brk: + mm->brk = brk; +out: + retval = mm->brk; + up_write(&mm->mmap_sem); + force_successful_syscall_return(); + return retval; +} + +/* + * On IA-64, we return the two file descriptors in ret0 and ret1 (r8 + * and r9) as this is faster than doing a copy_to_user(). + */ +asmlinkage long +sys_pipe (void) +{ + struct pt_regs *regs = ia64_task_regs(current); + int fd[2]; + int retval; + + retval = do_pipe(fd); + if (retval) + goto out; + retval = fd[0]; + regs->r9 = fd[1]; + out: + return retval; +} + +static inline unsigned long +do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff) +{ + unsigned long roff; + struct file *file = NULL; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + return -EBADF; + + if (!file->f_op || !file->f_op->mmap) { + addr = -ENODEV; + goto out; + } + } + + /* + * A zero mmap always succeeds in Linux, independent of whether or not the + * remaining arguments are valid. + */ + if (len == 0) + goto out; + + /* Careful about overflows.. */ + len = PAGE_ALIGN(len); + if (!len || len > TASK_SIZE) { + addr = -EINVAL; + goto out; + } + + /* + * Don't permit mappings into unmapped space, the virtual page table of a region, + * or across a region boundary. Note: RGN_MAP_LIMIT is equal to 2^n-PAGE_SIZE + * (for some integer n <= 61) and len > 0. + */ + roff = REGION_OFFSET(addr); + if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len))) { + addr = -EINVAL; + goto out; + } + + down_write(¤t->mm->mmap_sem); + addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + +out: if (file) + fput(file); + return addr; +} + +/* + * mmap2() is like mmap() except that the offset is expressed in units + * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces + * of) files that are larger than the address space of the CPU. + */ +asmlinkage unsigned long +sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff) +{ + addr = do_mmap2(addr, len, prot, flags, fd, pgoff); + if (!IS_ERR((void *) addr)) + force_successful_syscall_return(); + return addr; +} + +asmlinkage unsigned long +sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off) +{ + if (offset_in_page(off) != 0) + return -EINVAL; + + addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + if (!IS_ERR((void *) addr)) + force_successful_syscall_return(); + return addr; +} + +asmlinkage unsigned long +ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, + unsigned long new_addr) +{ + extern unsigned long do_mremap (unsigned long addr, + unsigned long old_len, + unsigned long new_len, + unsigned long flags, + unsigned long new_addr); + + down_write(¤t->mm->mmap_sem); + { + addr = do_mremap(addr, old_len, new_len, flags, new_addr); + } + up_write(¤t->mm->mmap_sem); + + if (IS_ERR((void *) addr)) + return addr; + + force_successful_syscall_return(); + return addr; +} + +#ifndef CONFIG_PCI + +asmlinkage long +sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, + void *buf) +{ + return -ENOSYS; +} + +asmlinkage long +sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, + void *buf) +{ + return -ENOSYS; +} + +#endif /* CONFIG_PCI */ diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c new file mode 100644 index 000000000000..8b8a5a45b621 --- /dev/null +++ b/arch/ia64/kernel/time.c @@ -0,0 +1,255 @@ +/* + * linux/arch/ia64/kernel/time.c + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger <davidm@hpl.hp.com> + * Copyright (C) 1999 Don Dugger <don.dugger@intel.com> + * Copyright (C) 1999-2000 VA Linux Systems + * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com> + */ +#include <linux/config.h> + +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/profile.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/interrupt.h> +#include <linux/efi.h> +#include <linux/profile.h> +#include <linux/timex.h> + +#include <asm/machvec.h> +#include <asm/delay.h> +#include <asm/hw_irq.h> +#include <asm/ptrace.h> +#include <asm/sal.h> +#include <asm/sections.h> +#include <asm/system.h> + +extern unsigned long wall_jiffies; + +u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; + +EXPORT_SYMBOL(jiffies_64); + +#define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */ + +#ifdef CONFIG_IA64_DEBUG_IRQ + +unsigned long last_cli_ip; +EXPORT_SYMBOL(last_cli_ip); + +#endif + +static struct time_interpolator itc_interpolator = { + .shift = 16, + .mask = 0xffffffffffffffffLL, + .source = TIME_SOURCE_CPU +}; + +static irqreturn_t +timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned long new_itm; + + if (unlikely(cpu_is_offline(smp_processor_id()))) { + return IRQ_HANDLED; + } + + platform_timer_interrupt(irq, dev_id, regs); + + new_itm = local_cpu_data->itm_next; + + if (!time_after(ia64_get_itc(), new_itm)) + printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n", + ia64_get_itc(), new_itm); + + profile_tick(CPU_PROFILING, regs); + + while (1) { + update_process_times(user_mode(regs)); + + new_itm += local_cpu_data->itm_delta; + + if (smp_processor_id() == TIME_KEEPER_ID) { + /* + * Here we are in the timer irq handler. We have irqs locally + * disabled, but we don't know if the timer_bh is running on + * another CPU. We need to avoid to SMP race by acquiring the + * xtime_lock. + */ + write_seqlock(&xtime_lock); + do_timer(regs); + local_cpu_data->itm_next = new_itm; + write_sequnlock(&xtime_lock); + } else + local_cpu_data->itm_next = new_itm; + + if (time_after(new_itm, ia64_get_itc())) + break; + } + + do { + /* + * If we're too close to the next clock tick for + * comfort, we increase the safety margin by + * intentionally dropping the next tick(s). We do NOT + * update itm.next because that would force us to call + * do_timer() which in turn would let our clock run + * too fast (with the potentially devastating effect + * of losing monotony of time). + */ + while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) + new_itm += local_cpu_data->itm_delta; + ia64_set_itm(new_itm); + /* double check, in case we got hit by a (slow) PMI: */ + } while (time_after_eq(ia64_get_itc(), new_itm)); + return IRQ_HANDLED; +} + +/* + * Encapsulate access to the itm structure for SMP. + */ +void +ia64_cpu_local_tick (void) +{ + int cpu = smp_processor_id(); + unsigned long shift = 0, delta; + + /* arrange for the cycle counter to generate a timer interrupt: */ + ia64_set_itv(IA64_TIMER_VECTOR); + + delta = local_cpu_data->itm_delta; + /* + * Stagger the timer tick for each CPU so they don't occur all at (almost) the + * same time: + */ + if (cpu) { + unsigned long hi = 1UL << ia64_fls(cpu); + shift = (2*(cpu - hi) + 1) * delta/hi/2; + } + local_cpu_data->itm_next = ia64_get_itc() + delta + shift; + ia64_set_itm(local_cpu_data->itm_next); +} + +static int nojitter; + +static int __init nojitter_setup(char *str) +{ + nojitter = 1; + printk("Jitter checking for ITC timers disabled\n"); + return 1; +} + +__setup("nojitter", nojitter_setup); + + +void __devinit +ia64_init_itm (void) +{ + unsigned long platform_base_freq, itc_freq; + struct pal_freq_ratio itc_ratio, proc_ratio; + long status, platform_base_drift, itc_drift; + + /* + * According to SAL v2.6, we need to use a SAL call to determine the platform base + * frequency and then a PAL call to determine the frequency ratio between the ITC + * and the base frequency. + */ + status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, + &platform_base_freq, &platform_base_drift); + if (status != 0) { + printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status)); + } else { + status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio); + if (status != 0) + printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status); + } + if (status != 0) { + /* invent "random" values */ + printk(KERN_ERR + "SAL/PAL failed to obtain frequency info---inventing reasonable values\n"); + platform_base_freq = 100000000; + platform_base_drift = -1; /* no drift info */ + itc_ratio.num = 3; + itc_ratio.den = 1; + } + if (platform_base_freq < 40000000) { + printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n", + platform_base_freq); + platform_base_freq = 75000000; + platform_base_drift = -1; + } + if (!proc_ratio.den) + proc_ratio.den = 1; /* avoid division by zero */ + if (!itc_ratio.den) + itc_ratio.den = 1; /* avoid division by zero */ + + itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den; + + local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ; + printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, " + "ITC freq=%lu.%03luMHz", smp_processor_id(), + platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000, + itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000); + + if (platform_base_drift != -1) { + itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den; + printk("+/-%ldppm\n", itc_drift); + } else { + itc_drift = -1; + printk("\n"); + } + + local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den; + local_cpu_data->itc_freq = itc_freq; + local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC; + local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT) + + itc_freq/2)/itc_freq; + + if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { + itc_interpolator.frequency = local_cpu_data->itc_freq; + itc_interpolator.drift = itc_drift; +#ifdef CONFIG_SMP + /* On IA64 in an SMP configuration ITCs are never accurately synchronized. + * Jitter compensation requires a cmpxchg which may limit + * the scalability of the syscalls for retrieving time. + * The ITC synchronization is usually successful to within a few + * ITC ticks but this is not a sure thing. If you need to improve + * timer performance in SMP situations then boot the kernel with the + * "nojitter" option. However, doing so may result in time fluctuating (maybe + * even going backward) if the ITC offsets between the individual CPUs + * are too large. + */ + if (!nojitter) itc_interpolator.jitter = 1; +#endif + register_time_interpolator(&itc_interpolator); + } + + /* Setup the CPU local timer tick */ + ia64_cpu_local_tick(); +} + +static struct irqaction timer_irqaction = { + .handler = timer_interrupt, + .flags = SA_INTERRUPT, + .name = "timer" +}; + +void __init +time_init (void) +{ + register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction); + efi_gettimeofday(&xtime); + ia64_init_itm(); + + /* + * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the + * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC). + */ + set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); +} diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c new file mode 100644 index 000000000000..f1aafd4c05f9 --- /dev/null +++ b/arch/ia64/kernel/topology.c @@ -0,0 +1,92 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * This file contains NUMA specific variables and functions which can + * be split away from DISCONTIGMEM and are used on NUMA machines with + * contiguous memory. + * 2002/08/07 Erich Focht <efocht@ess.nec.de> + * Populate cpu entries in sysfs for non-numa systems as well + * Intel Corporation - Ashok Raj + */ + +#include <linux/config.h> +#include <linux/cpu.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/node.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/nodemask.h> +#include <asm/mmzone.h> +#include <asm/numa.h> +#include <asm/cpu.h> + +#ifdef CONFIG_NUMA +static struct node *sysfs_nodes; +#endif +static struct ia64_cpu *sysfs_cpus; + +int arch_register_cpu(int num) +{ + struct node *parent = NULL; + +#ifdef CONFIG_NUMA + parent = &sysfs_nodes[cpu_to_node(num)]; +#endif /* CONFIG_NUMA */ + + return register_cpu(&sysfs_cpus[num].cpu, num, parent); +} + +#ifdef CONFIG_HOTPLUG_CPU + +void arch_unregister_cpu(int num) +{ + struct node *parent = NULL; + +#ifdef CONFIG_NUMA + int node = cpu_to_node(num); + parent = &sysfs_nodes[node]; +#endif /* CONFIG_NUMA */ + + return unregister_cpu(&sysfs_cpus[num].cpu, parent); +} +EXPORT_SYMBOL(arch_register_cpu); +EXPORT_SYMBOL(arch_unregister_cpu); +#endif /*CONFIG_HOTPLUG_CPU*/ + + +static int __init topology_init(void) +{ + int i, err = 0; + +#ifdef CONFIG_NUMA + sysfs_nodes = kmalloc(sizeof(struct node) * MAX_NUMNODES, GFP_KERNEL); + if (!sysfs_nodes) { + err = -ENOMEM; + goto out; + } + memset(sysfs_nodes, 0, sizeof(struct node) * MAX_NUMNODES); + + /* MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes? */ + for_each_online_node(i) + if ((err = register_node(&sysfs_nodes[i], i, 0))) + goto out; +#endif + + sysfs_cpus = kmalloc(sizeof(struct ia64_cpu) * NR_CPUS, GFP_KERNEL); + if (!sysfs_cpus) { + err = -ENOMEM; + goto out; + } + memset(sysfs_cpus, 0, sizeof(struct ia64_cpu) * NR_CPUS); + + for_each_present_cpu(i) + if((err = arch_register_cpu(i))) + goto out; +out: + return err; +} + +__initcall(topology_init); diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c new file mode 100644 index 000000000000..e82ad78081b3 --- /dev/null +++ b/arch/ia64/kernel/traps.c @@ -0,0 +1,609 @@ +/* + * Architecture-specific trap handling. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * 05/12/00 grao <goutham.rao@intel.com> : added isr in siginfo for SIGFPE + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/tty.h> +#include <linux/vt_kern.h> /* For unblank_screen() */ +#include <linux/module.h> /* for EXPORT_SYMBOL */ +#include <linux/hardirq.h> + +#include <asm/fpswa.h> +#include <asm/ia32.h> +#include <asm/intrinsics.h> +#include <asm/processor.h> +#include <asm/uaccess.h> + +extern spinlock_t timerlist_lock; + +fpswa_interface_t *fpswa_interface; +EXPORT_SYMBOL(fpswa_interface); + +void __init +trap_init (void) +{ + if (ia64_boot_param->fpswa) + /* FPSWA fixup: make the interface pointer a kernel virtual address: */ + fpswa_interface = __va(ia64_boot_param->fpswa); +} + +/* + * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock + * is acquired through the console unblank code) + */ +void +bust_spinlocks (int yes) +{ + int loglevel_save = console_loglevel; + + if (yes) { + oops_in_progress = 1; + return; + } + +#ifdef CONFIG_VT + unblank_screen(); +#endif + oops_in_progress = 0; + /* + * OK, the message is on the console. Now we call printk() without + * oops_in_progress set so that printk will give klogd a poke. Hold onto + * your hats... + */ + console_loglevel = 15; /* NMI oopser may have shut the console up */ + printk(" "); + console_loglevel = loglevel_save; +} + +void +die (const char *str, struct pt_regs *regs, long err) +{ + static struct { + spinlock_t lock; + u32 lock_owner; + int lock_owner_depth; + } die = { + .lock = SPIN_LOCK_UNLOCKED, + .lock_owner = -1, + .lock_owner_depth = 0 + }; + static int die_counter; + + if (die.lock_owner != smp_processor_id()) { + console_verbose(); + spin_lock_irq(&die.lock); + die.lock_owner = smp_processor_id(); + die.lock_owner_depth = 0; + bust_spinlocks(1); + } + + if (++die.lock_owner_depth < 3) { + printk("%s[%d]: %s %ld [%d]\n", + current->comm, current->pid, str, err, ++die_counter); + show_regs(regs); + } else + printk(KERN_ERR "Recursive die() failure, output suppressed\n"); + + bust_spinlocks(0); + die.lock_owner = -1; + spin_unlock_irq(&die.lock); + do_exit(SIGSEGV); +} + +void +die_if_kernel (char *str, struct pt_regs *regs, long err) +{ + if (!user_mode(regs)) + die(str, regs, err); +} + +void +ia64_bad_break (unsigned long break_num, struct pt_regs *regs) +{ + siginfo_t siginfo; + int sig, code; + + /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */ + siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); + siginfo.si_imm = break_num; + siginfo.si_flags = 0; /* clear __ISR_VALID */ + siginfo.si_isr = 0; + + switch (break_num) { + case 0: /* unknown error (used by GCC for __builtin_abort()) */ + die_if_kernel("bugcheck!", regs, break_num); + sig = SIGILL; code = ILL_ILLOPC; + break; + + case 1: /* integer divide by zero */ + sig = SIGFPE; code = FPE_INTDIV; + break; + + case 2: /* integer overflow */ + sig = SIGFPE; code = FPE_INTOVF; + break; + + case 3: /* range check/bounds check */ + sig = SIGFPE; code = FPE_FLTSUB; + break; + + case 4: /* null pointer dereference */ + sig = SIGSEGV; code = SEGV_MAPERR; + break; + + case 5: /* misaligned data */ + sig = SIGSEGV; code = BUS_ADRALN; + break; + + case 6: /* decimal overflow */ + sig = SIGFPE; code = __FPE_DECOVF; + break; + + case 7: /* decimal divide by zero */ + sig = SIGFPE; code = __FPE_DECDIV; + break; + + case 8: /* packed decimal error */ + sig = SIGFPE; code = __FPE_DECERR; + break; + + case 9: /* invalid ASCII digit */ + sig = SIGFPE; code = __FPE_INVASC; + break; + + case 10: /* invalid decimal digit */ + sig = SIGFPE; code = __FPE_INVDEC; + break; + + case 11: /* paragraph stack overflow */ + sig = SIGSEGV; code = __SEGV_PSTKOVF; + break; + + case 0x3f000 ... 0x3ffff: /* bundle-update in progress */ + sig = SIGILL; code = __ILL_BNDMOD; + break; + + default: + if (break_num < 0x40000 || break_num > 0x100000) + die_if_kernel("Bad break", regs, break_num); + + if (break_num < 0x80000) { + sig = SIGILL; code = __ILL_BREAK; + } else { + sig = SIGTRAP; code = TRAP_BRKPT; + } + } + siginfo.si_signo = sig; + siginfo.si_errno = 0; + siginfo.si_code = code; + force_sig_info(sig, &siginfo, current); +} + +/* + * disabled_fph_fault() is called when a user-level process attempts to access f32..f127 + * and it doesn't own the fp-high register partition. When this happens, we save the + * current fph partition in the task_struct of the fpu-owner (if necessary) and then load + * the fp-high partition of the current task (if necessary). Note that the kernel has + * access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes + * care of clearing psr.dfh. + */ +static inline void +disabled_fph_fault (struct pt_regs *regs) +{ + struct ia64_psr *psr = ia64_psr(regs); + + /* first, grant user-level access to fph partition: */ + psr->dfh = 0; +#ifndef CONFIG_SMP + { + struct task_struct *fpu_owner + = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER); + + if (ia64_is_local_fpu_owner(current)) + return; + + if (fpu_owner) + ia64_flush_fph(fpu_owner); + } +#endif /* !CONFIG_SMP */ + ia64_set_local_fpu_owner(current); + if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) { + __ia64_load_fpu(current->thread.fph); + psr->mfh = 0; + } else { + __ia64_init_fpu(); + /* + * Set mfh because the state in thread.fph does not match the state in + * the fph partition. + */ + psr->mfh = 1; + } +} + +static inline int +fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs, + struct pt_regs *regs) +{ + fp_state_t fp_state; + fpswa_ret_t ret; + + if (!fpswa_interface) + return -1; + + memset(&fp_state, 0, sizeof(fp_state_t)); + + /* + * compute fp_state. only FP registers f6 - f11 are used by the + * kernel, so set those bits in the mask and set the low volatile + * pointer to point to these registers. + */ + fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */ + + fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6; + /* + * unsigned long (*EFI_FPSWA) ( + * unsigned long trap_type, + * void *Bundle, + * unsigned long *pipsr, + * unsigned long *pfsr, + * unsigned long *pisr, + * unsigned long *ppreds, + * unsigned long *pifs, + * void *fp_state); + */ + ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle, + (unsigned long *) ipsr, (unsigned long *) fpsr, + (unsigned long *) isr, (unsigned long *) pr, + (unsigned long *) ifs, &fp_state); + + return ret.status; +} + +/* + * Handle floating-point assist faults and traps. + */ +static int +handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) +{ + long exception, bundle[2]; + unsigned long fault_ip; + struct siginfo siginfo; + static int fpu_swa_count = 0; + static unsigned long last_time; + + fault_ip = regs->cr_iip; + if (!fp_fault && (ia64_psr(regs)->ri == 0)) + fault_ip -= 16; + if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle))) + return -1; + + if (jiffies - last_time > 5*HZ) + fpu_swa_count = 0; + if ((fpu_swa_count < 4) && !(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) { + last_time = jiffies; + ++fpu_swa_count; + printk(KERN_WARNING + "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", + current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr); + } + + exception = fp_emulate(fp_fault, bundle, ®s->cr_ipsr, ®s->ar_fpsr, &isr, ®s->pr, + ®s->cr_ifs, regs); + if (fp_fault) { + if (exception == 0) { + /* emulation was successful */ + ia64_increment_ip(regs); + } else if (exception == -1) { + printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n"); + return -1; + } else { + /* is next instruction a trap? */ + if (exception & 2) { + ia64_increment_ip(regs); + } + siginfo.si_signo = SIGFPE; + siginfo.si_errno = 0; + siginfo.si_code = __SI_FAULT; /* default code */ + siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); + if (isr & 0x11) { + siginfo.si_code = FPE_FLTINV; + } else if (isr & 0x22) { + /* denormal operand gets the same si_code as underflow + * see arch/i386/kernel/traps.c:math_error() */ + siginfo.si_code = FPE_FLTUND; + } else if (isr & 0x44) { + siginfo.si_code = FPE_FLTDIV; + } + siginfo.si_isr = isr; + siginfo.si_flags = __ISR_VALID; + siginfo.si_imm = 0; + force_sig_info(SIGFPE, &siginfo, current); + } + } else { + if (exception == -1) { + printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n"); + return -1; + } else if (exception != 0) { + /* raise exception */ + siginfo.si_signo = SIGFPE; + siginfo.si_errno = 0; + siginfo.si_code = __SI_FAULT; /* default code */ + siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); + if (isr & 0x880) { + siginfo.si_code = FPE_FLTOVF; + } else if (isr & 0x1100) { + siginfo.si_code = FPE_FLTUND; + } else if (isr & 0x2200) { + siginfo.si_code = FPE_FLTRES; + } + siginfo.si_isr = isr; + siginfo.si_flags = __ISR_VALID; + siginfo.si_imm = 0; + force_sig_info(SIGFPE, &siginfo, current); + } + } + return 0; +} + +struct illegal_op_return { + unsigned long fkt, arg1, arg2, arg3; +}; + +struct illegal_op_return +ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, + struct pt_regs regs) +{ + struct illegal_op_return rv; + struct siginfo si; + char buf[128]; + +#ifdef CONFIG_IA64_BRL_EMU + { + extern struct illegal_op_return ia64_emulate_brl (struct pt_regs *, unsigned long); + + rv = ia64_emulate_brl(®s, ec); + if (rv.fkt != (unsigned long) -1) + return rv; + } +#endif + + sprintf(buf, "IA-64 Illegal operation fault"); + die_if_kernel(buf, ®s, 0); + + memset(&si, 0, sizeof(si)); + si.si_signo = SIGILL; + si.si_code = ILL_ILLOPC; + si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(®s)->ri); + force_sig_info(SIGILL, &si, current); + rv.fkt = 0; + return rv; +} + +void +ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, + unsigned long iim, unsigned long itir, long arg5, long arg6, + long arg7, struct pt_regs regs) +{ + unsigned long code, error = isr, iip; + struct siginfo siginfo; + char buf[128]; + int result, sig; + static const char *reason[] = { + "IA-64 Illegal Operation fault", + "IA-64 Privileged Operation fault", + "IA-64 Privileged Register fault", + "IA-64 Reserved Register/Field fault", + "Disabled Instruction Set Transition fault", + "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault", + "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", + "Unknown fault 13", "Unknown fault 14", "Unknown fault 15" + }; + + if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { + /* + * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel + * the lfetch. + */ + ia64_psr(®s)->ed = 1; + return; + } + + iip = regs.cr_iip + ia64_psr(®s)->ri; + + switch (vector) { + case 24: /* General Exception */ + code = (isr >> 4) & 0xf; + sprintf(buf, "General Exception: %s%s", reason[code], + (code == 3) ? ((isr & (1UL << 37)) + ? " (RSE access)" : " (data access)") : ""); + if (code == 8) { +# ifdef CONFIG_IA64_PRINT_HAZARDS + printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n", + current->comm, current->pid, + regs.cr_iip + ia64_psr(®s)->ri, regs.pr); +# endif + return; + } + break; + + case 25: /* Disabled FP-Register */ + if (isr & 2) { + disabled_fph_fault(®s); + return; + } + sprintf(buf, "Disabled FPL fault---not supposed to happen!"); + break; + + case 26: /* NaT Consumption */ + if (user_mode(®s)) { + void __user *addr; + + if (((isr >> 4) & 0xf) == 2) { + /* NaT page consumption */ + sig = SIGSEGV; + code = SEGV_ACCERR; + addr = (void __user *) ifa; + } else { + /* register NaT consumption */ + sig = SIGILL; + code = ILL_ILLOPN; + addr = (void __user *) (regs.cr_iip + + ia64_psr(®s)->ri); + } + siginfo.si_signo = sig; + siginfo.si_code = code; + siginfo.si_errno = 0; + siginfo.si_addr = addr; + siginfo.si_imm = vector; + siginfo.si_flags = __ISR_VALID; + siginfo.si_isr = isr; + force_sig_info(sig, &siginfo, current); + return; + } else if (ia64_done_with_exception(®s)) + return; + sprintf(buf, "NaT consumption"); + break; + + case 31: /* Unsupported Data Reference */ + if (user_mode(®s)) { + siginfo.si_signo = SIGILL; + siginfo.si_code = ILL_ILLOPN; + siginfo.si_errno = 0; + siginfo.si_addr = (void __user *) iip; + siginfo.si_imm = vector; + siginfo.si_flags = __ISR_VALID; + siginfo.si_isr = isr; + force_sig_info(SIGILL, &siginfo, current); + return; + } + sprintf(buf, "Unsupported data reference"); + break; + + case 29: /* Debug */ + case 35: /* Taken Branch Trap */ + case 36: /* Single Step Trap */ + if (fsys_mode(current, ®s)) { + extern char __kernel_syscall_via_break[]; + /* + * Got a trap in fsys-mode: Taken Branch Trap and Single Step trap + * need special handling; Debug trap is not supposed to happen. + */ + if (unlikely(vector == 29)) { + die("Got debug trap in fsys-mode---not supposed to happen!", + ®s, 0); + return; + } + /* re-do the system call via break 0x100000: */ + regs.cr_iip = (unsigned long) __kernel_syscall_via_break; + ia64_psr(®s)->ri = 0; + ia64_psr(®s)->cpl = 3; + return; + } + switch (vector) { + case 29: + siginfo.si_code = TRAP_HWBKPT; +#ifdef CONFIG_ITANIUM + /* + * Erratum 10 (IFA may contain incorrect address) now has + * "NoFix" status. There are no plans for fixing this. + */ + if (ia64_psr(®s)->is == 0) + ifa = regs.cr_iip; +#endif + break; + case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; + case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; + } + siginfo.si_signo = SIGTRAP; + siginfo.si_errno = 0; + siginfo.si_addr = (void __user *) ifa; + siginfo.si_imm = 0; + siginfo.si_flags = __ISR_VALID; + siginfo.si_isr = isr; + force_sig_info(SIGTRAP, &siginfo, current); + return; + + case 32: /* fp fault */ + case 33: /* fp trap */ + result = handle_fpu_swa((vector == 32) ? 1 : 0, ®s, isr); + if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { + siginfo.si_signo = SIGFPE; + siginfo.si_errno = 0; + siginfo.si_code = FPE_FLTINV; + siginfo.si_addr = (void __user *) iip; + siginfo.si_flags = __ISR_VALID; + siginfo.si_isr = isr; + siginfo.si_imm = 0; + force_sig_info(SIGFPE, &siginfo, current); + } + return; + + case 34: + if (isr & 0x2) { + /* Lower-Privilege Transfer Trap */ + /* + * Just clear PSR.lp and then return immediately: all the + * interesting work (e.g., signal delivery is done in the kernel + * exit path). + */ + ia64_psr(®s)->lp = 0; + return; + } else { + /* Unimplemented Instr. Address Trap */ + if (user_mode(®s)) { + siginfo.si_signo = SIGILL; + siginfo.si_code = ILL_BADIADDR; + siginfo.si_errno = 0; + siginfo.si_flags = 0; + siginfo.si_isr = 0; + siginfo.si_imm = 0; + siginfo.si_addr = (void __user *) iip; + force_sig_info(SIGILL, &siginfo, current); + return; + } + sprintf(buf, "Unimplemented Instruction Address fault"); + } + break; + + case 45: +#ifdef CONFIG_IA32_SUPPORT + if (ia32_exception(®s, isr) == 0) + return; +#endif + printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n"); + printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", + iip, ifa, isr); + force_sig(SIGSEGV, current); + break; + + case 46: +#ifdef CONFIG_IA32_SUPPORT + if (ia32_intercept(®s, isr) == 0) + return; +#endif + printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n"); + printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n", + iip, ifa, isr, iim); + force_sig(SIGSEGV, current); + return; + + case 47: + sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16); + break; + + default: + sprintf(buf, "Fault %lu", vector); + break; + } + die_if_kernel(buf, ®s, error); + force_sig(SIGILL, current); +} diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c new file mode 100644 index 000000000000..43b45b65ee5a --- /dev/null +++ b/arch/ia64/kernel/unaligned.c @@ -0,0 +1,1521 @@ +/* + * Architecture-specific unaligned trap handling. + * + * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix + * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame + * stacked register returns an undefined value; it does NOT trigger a + * "rsvd register fault"). + * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops. + * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes. + * 2001/01/17 Add support emulation of unaligned kernel accesses. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp_lock.h> +#include <linux/tty.h> + +#include <asm/intrinsics.h> +#include <asm/processor.h> +#include <asm/rse.h> +#include <asm/uaccess.h> +#include <asm/unaligned.h> + +extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn)); + +#undef DEBUG_UNALIGNED_TRAP + +#ifdef DEBUG_UNALIGNED_TRAP +# define DPRINT(a...) do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0) +# define DDUMP(str,vp,len) dump(str, vp, len) + +static void +dump (const char *str, void *vp, size_t len) +{ + unsigned char *cp = vp; + int i; + + printk("%s", str); + for (i = 0; i < len; ++i) + printk (" %02x", *cp++); + printk("\n"); +} +#else +# define DPRINT(a...) +# define DDUMP(str,vp,len) +#endif + +#define IA64_FIRST_STACKED_GR 32 +#define IA64_FIRST_ROTATING_FR 32 +#define SIGN_EXT9 0xffffffffffffff00ul + +/* + * For M-unit: + * + * opcode | m | x6 | + * --------|------|---------| + * [40-37] | [36] | [35:30] | + * --------|------|---------| + * 4 | 1 | 6 | = 11 bits + * -------------------------- + * However bits [31:30] are not directly useful to distinguish between + * load/store so we can use [35:32] instead, which gives the following + * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer + * checking the m-bit until later in the load/store emulation. + */ +#define IA64_OPCODE_MASK 0x1ef +#define IA64_OPCODE_SHIFT 32 + +/* + * Table C-28 Integer Load/Store + * + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF + * + * ld8.fill, st8.fill MUST be aligned because the RNATs are based on + * the address (bits [8:3]), so we must failed. + */ +#define LD_OP 0x080 +#define LDS_OP 0x081 +#define LDA_OP 0x082 +#define LDSA_OP 0x083 +#define LDBIAS_OP 0x084 +#define LDACQ_OP 0x085 +/* 0x086, 0x087 are not relevant */ +#define LDCCLR_OP 0x088 +#define LDCNC_OP 0x089 +#define LDCCLRACQ_OP 0x08a +#define ST_OP 0x08c +#define STREL_OP 0x08d +/* 0x08e,0x8f are not relevant */ + +/* + * Table C-29 Integer Load +Reg + * + * we use the ld->m (bit [36:36]) field to determine whether or not we have + * a load/store of this form. + */ + +/* + * Table C-30 Integer Load/Store +Imm + * + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF + * + * ld8.fill, st8.fill must be aligned because the Nat register are based on + * the address, so we must fail and the program must be fixed. + */ +#define LD_IMM_OP 0x0a0 +#define LDS_IMM_OP 0x0a1 +#define LDA_IMM_OP 0x0a2 +#define LDSA_IMM_OP 0x0a3 +#define LDBIAS_IMM_OP 0x0a4 +#define LDACQ_IMM_OP 0x0a5 +/* 0x0a6, 0xa7 are not relevant */ +#define LDCCLR_IMM_OP 0x0a8 +#define LDCNC_IMM_OP 0x0a9 +#define LDCCLRACQ_IMM_OP 0x0aa +#define ST_IMM_OP 0x0ac +#define STREL_IMM_OP 0x0ad +/* 0x0ae,0xaf are not relevant */ + +/* + * Table C-32 Floating-point Load/Store + */ +#define LDF_OP 0x0c0 +#define LDFS_OP 0x0c1 +#define LDFA_OP 0x0c2 +#define LDFSA_OP 0x0c3 +/* 0x0c6 is irrelevant */ +#define LDFCCLR_OP 0x0c8 +#define LDFCNC_OP 0x0c9 +/* 0x0cb is irrelevant */ +#define STF_OP 0x0cc + +/* + * Table C-33 Floating-point Load +Reg + * + * we use the ld->m (bit [36:36]) field to determine whether or not we have + * a load/store of this form. + */ + +/* + * Table C-34 Floating-point Load/Store +Imm + */ +#define LDF_IMM_OP 0x0e0 +#define LDFS_IMM_OP 0x0e1 +#define LDFA_IMM_OP 0x0e2 +#define LDFSA_IMM_OP 0x0e3 +/* 0x0e6 is irrelevant */ +#define LDFCCLR_IMM_OP 0x0e8 +#define LDFCNC_IMM_OP 0x0e9 +#define STF_IMM_OP 0x0ec + +typedef struct { + unsigned long qp:6; /* [0:5] */ + unsigned long r1:7; /* [6:12] */ + unsigned long imm:7; /* [13:19] */ + unsigned long r3:7; /* [20:26] */ + unsigned long x:1; /* [27:27] */ + unsigned long hint:2; /* [28:29] */ + unsigned long x6_sz:2; /* [30:31] */ + unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */ + unsigned long m:1; /* [36:36] */ + unsigned long op:4; /* [37:40] */ + unsigned long pad:23; /* [41:63] */ +} load_store_t; + + +typedef enum { + UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */ + UPD_REG /* ldXZ r1=[r3],r2 */ +} update_t; + +/* + * We use tables to keep track of the offsets of registers in the saved state. + * This way we save having big switch/case statements. + * + * We use bit 0 to indicate switch_stack or pt_regs. + * The offset is simply shifted by 1 bit. + * A 2-byte value should be enough to hold any kind of offset + * + * In case the calling convention changes (and thus pt_regs/switch_stack) + * simply use RSW instead of RPT or vice-versa. + */ + +#define RPO(x) ((size_t) &((struct pt_regs *)0)->x) +#define RSO(x) ((size_t) &((struct switch_stack *)0)->x) + +#define RPT(x) (RPO(x) << 1) +#define RSW(x) (1| RSO(x)<<1) + +#define GR_OFFS(x) (gr_info[x]>>1) +#define GR_IN_SW(x) (gr_info[x] & 0x1) + +#define FR_OFFS(x) (fr_info[x]>>1) +#define FR_IN_SW(x) (fr_info[x] & 0x1) + +static u16 gr_info[32]={ + 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ + + RPT(r1), RPT(r2), RPT(r3), + + RSW(r4), RSW(r5), RSW(r6), RSW(r7), + + RPT(r8), RPT(r9), RPT(r10), RPT(r11), + RPT(r12), RPT(r13), RPT(r14), RPT(r15), + + RPT(r16), RPT(r17), RPT(r18), RPT(r19), + RPT(r20), RPT(r21), RPT(r22), RPT(r23), + RPT(r24), RPT(r25), RPT(r26), RPT(r27), + RPT(r28), RPT(r29), RPT(r30), RPT(r31) +}; + +static u16 fr_info[32]={ + 0, /* constant : WE SHOULD NEVER GET THIS */ + 0, /* constant : WE SHOULD NEVER GET THIS */ + + RSW(f2), RSW(f3), RSW(f4), RSW(f5), + + RPT(f6), RPT(f7), RPT(f8), RPT(f9), + RPT(f10), RPT(f11), + + RSW(f12), RSW(f13), RSW(f14), + RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19), + RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24), + RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29), + RSW(f30), RSW(f31) +}; + +/* Invalidate ALAT entry for integer register REGNO. */ +static void +invala_gr (int regno) +{ +# define F(reg) case reg: ia64_invala_gr(reg); break + + switch (regno) { + F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); + F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); + F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); + F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); + F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); + F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); + F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); + F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); + F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); + F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); + F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); + F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); + F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); + F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); + F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); + F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); + } +# undef F +} + +/* Invalidate ALAT entry for floating-point register REGNO. */ +static void +invala_fr (int regno) +{ +# define F(reg) case reg: ia64_invala_fr(reg); break + + switch (regno) { + F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); + F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); + F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); + F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); + F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); + F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); + F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); + F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); + F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); + F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); + F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); + F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); + F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); + F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); + F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); + F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); + } +# undef F +} + +static inline unsigned long +rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg) +{ + reg += rrb; + if (reg >= sor) + reg -= sor; + return reg; +} + +static void +set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end; + unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; + unsigned long rnats, nat_mask; + unsigned long on_kbs; + long sof = (regs->cr_ifs) & 0x7f; + long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx >= sof) { + /* this should never happen, as the "rsvd register fault" has higher priority */ + DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof); + return; + } + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n", + r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx); + + on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore); + addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx); + if (addr >= kbs) { + /* the register is on the kernel backing store: easy... */ + rnat_addr = ia64_rse_rnat_addr(addr); + if ((unsigned long) rnat_addr >= sw->ar_bspstore) + rnat_addr = &sw->ar_rnat; + nat_mask = 1UL << ia64_rse_slot_num(addr); + + *addr = val; + if (nat) + *rnat_addr |= nat_mask; + else + *rnat_addr &= ~nat_mask; + return; + } + + if (!user_stack(current, regs)) { + DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1); + return; + } + + bspstore = (unsigned long *)regs->ar_bspstore; + ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); + bsp = ia64_rse_skip_regs(ubs_end, -sof); + addr = ia64_rse_skip_regs(bsp, ridx); + + DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); + + ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); + + rnat_addr = ia64_rse_rnat_addr(addr); + + ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); + DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n", + (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1); + + nat_mask = 1UL << ia64_rse_slot_num(addr); + if (nat) + rnats |= nat_mask; + else + rnats &= ~nat_mask; + ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats); + + DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats); +} + + +static void +get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore; + unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; + unsigned long rnats, nat_mask; + unsigned long on_kbs; + long sof = (regs->cr_ifs) & 0x7f; + long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx >= sof) { + /* read of out-of-frame register returns an undefined value; 0 in our case. */ + DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof); + goto fail; + } + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n", + r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx); + + on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore); + addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx); + if (addr >= kbs) { + /* the register is on the kernel backing store: easy... */ + *val = *addr; + if (nat) { + rnat_addr = ia64_rse_rnat_addr(addr); + if ((unsigned long) rnat_addr >= sw->ar_bspstore) + rnat_addr = &sw->ar_rnat; + nat_mask = 1UL << ia64_rse_slot_num(addr); + *nat = (*rnat_addr & nat_mask) != 0; + } + return; + } + + if (!user_stack(current, regs)) { + DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1); + goto fail; + } + + bspstore = (unsigned long *)regs->ar_bspstore; + ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); + bsp = ia64_rse_skip_regs(ubs_end, -sof); + addr = ia64_rse_skip_regs(bsp, ridx); + + DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); + + ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); + + if (nat) { + rnat_addr = ia64_rse_rnat_addr(addr); + nat_mask = 1UL << ia64_rse_slot_num(addr); + + DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats); + + ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); + *nat = (rnats & nat_mask) != 0; + } + return; + + fail: + *val = 0; + if (nat) + *nat = 0; + return; +} + + +static void +setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long addr; + unsigned long bitmask; + unsigned long *unat; + + /* + * First takes care of stacked registers + */ + if (regnum >= IA64_FIRST_STACKED_GR) { + set_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Using r0 as a target raises a General Exception fault which has higher priority + * than the Unaligned Reference fault. + */ + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + if (GR_IN_SW(regnum)) { + addr = (unsigned long)sw; + unat = &sw->ar_unat; + } else { + addr = (unsigned long)regs; + unat = &sw->caller_unat; + } + DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n", + addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum)); + /* + * add offset from base of struct + * and do it ! + */ + addr += GR_OFFS(regnum); + + *(unsigned long *)addr = val; + + /* + * We need to clear the corresponding UNAT bit to fully emulate the load + * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 + */ + bitmask = 1UL << (addr >> 3 & 0x3f); + DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat); + if (nat) { + *unat |= bitmask; + } else { + *unat &= ~bitmask; + } + DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat); +} + +/* + * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the + * range from 32-127, result is in the range from 0-95. + */ +static inline unsigned long +fph_index (struct pt_regs *regs, long regnum) +{ + unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; + return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); +} + +static void +setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *)regs - 1; + unsigned long addr; + + /* + * From EAS-2.5: FPDisableFault has higher priority than Unaligned + * Fault. Thus, when we get here, we know the partition is enabled. + * To update f32-f127, there are three choices: + * + * (1) save f32-f127 to thread.fph and update the values there + * (2) use a gigantic switch statement to directly access the registers + * (3) generate code on the fly to update the desired register + * + * For now, we are using approach (1). + */ + if (regnum >= IA64_FIRST_ROTATING_FR) { + ia64_sync_fph(current); + current->thread.fph[fph_index(regs, regnum)] = *fpval; + } else { + /* + * pt_regs or switch_stack ? + */ + if (FR_IN_SW(regnum)) { + addr = (unsigned long)sw; + } else { + addr = (unsigned long)regs; + } + + DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum)); + + addr += FR_OFFS(regnum); + *(struct ia64_fpreg *)addr = *fpval; + + /* + * mark the low partition as being used now + * + * It is highly unlikely that this bit is not already set, but + * let's do it for safety. + */ + regs->cr_ipsr |= IA64_PSR_MFL; + } +} + +/* + * Those 2 inline functions generate the spilled versions of the constant floating point + * registers which can be used with stfX + */ +static inline void +float_spill_f0 (struct ia64_fpreg *final) +{ + ia64_stf_spill(final, 0); +} + +static inline void +float_spill_f1 (struct ia64_fpreg *final) +{ + ia64_stf_spill(final, 1); +} + +static void +getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long addr; + + /* + * From EAS-2.5: FPDisableFault has higher priority than + * Unaligned Fault. Thus, when we get here, we know the partition is + * enabled. + * + * When regnum > 31, the register is still live and we need to force a save + * to current->thread.fph to get access to it. See discussion in setfpreg() + * for reasons and other ways of doing this. + */ + if (regnum >= IA64_FIRST_ROTATING_FR) { + ia64_flush_fph(current); + *fpval = current->thread.fph[fph_index(regs, regnum)]; + } else { + /* + * f0 = 0.0, f1= 1.0. Those registers are constant and are thus + * not saved, we must generate their spilled form on the fly + */ + switch(regnum) { + case 0: + float_spill_f0(fpval); + break; + case 1: + float_spill_f1(fpval); + break; + default: + /* + * pt_regs or switch_stack ? + */ + addr = FR_IN_SW(regnum) ? (unsigned long)sw + : (unsigned long)regs; + + DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n", + FR_IN_SW(regnum), addr, FR_OFFS(regnum)); + + addr += FR_OFFS(regnum); + *fpval = *(struct ia64_fpreg *)addr; + } + } +} + + +static void +getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long addr, *unat; + + if (regnum >= IA64_FIRST_STACKED_GR) { + get_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * take care of r0 (read-only always evaluate to 0) + */ + if (regnum == 0) { + *val = 0; + if (nat) + *nat = 0; + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + if (GR_IN_SW(regnum)) { + addr = (unsigned long)sw; + unat = &sw->ar_unat; + } else { + addr = (unsigned long)regs; + unat = &sw->caller_unat; + } + + DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum)); + + addr += GR_OFFS(regnum); + + *val = *(unsigned long *)addr; + + /* + * do it only when requested + */ + if (nat) + *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL; +} + +static void +emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa) +{ + /* + * IMPORTANT: + * Given the way we handle unaligned speculative loads, we should + * not get to this point in the code but we keep this sanity check, + * just in case. + */ + if (ld.x6_op == 1 || ld.x6_op == 3) { + printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__); + die_if_kernel("unaligned reference on speculative load with register update\n", + regs, 30); + } + + + /* + * at this point, we know that the base register to update is valid i.e., + * it's not r0 + */ + if (type == UPD_IMMEDIATE) { + unsigned long imm; + + /* + * Load +Imm: ldXZ r1=[r3],imm(9) + * + * + * form imm9: [13:19] contain the first 7 bits + */ + imm = ld.x << 7 | ld.imm; + + /* + * sign extend (1+8bits) if m set + */ + if (ld.m) imm |= SIGN_EXT9; + + /* + * ifa == r3 and we know that the NaT bit on r3 was clear so + * we can directly use ifa. + */ + ifa += imm; + + setreg(ld.r3, ifa, 0, regs); + + DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa); + + } else if (ld.m) { + unsigned long r2; + int nat_r2; + + /* + * Load +Reg Opcode: ldXZ r1=[r3],r2 + * + * Note: that we update r3 even in the case of ldfX.a + * (where the load does not happen) + * + * The way the load algorithm works, we know that r3 does not + * have its NaT bit set (would have gotten NaT consumption + * before getting the unaligned fault). So we can use ifa + * which equals r3 at this point. + * + * IMPORTANT: + * The above statement holds ONLY because we know that we + * never reach this code when trying to do a ldX.s. + * If we ever make it to here on an ldfX.s then + */ + getreg(ld.imm, &r2, &nat_r2, regs); + + ifa += r2; + + /* + * propagate Nat r2 -> r3 + */ + setreg(ld.r3, ifa, nat_r2, regs); + + DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2); + } +} + + +static int +emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs) +{ + unsigned int len = 1 << ld.x6_sz; + unsigned long val = 0; + + /* + * r0, as target, doesn't need to be checked because Illegal Instruction + * faults have higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an + * unaligned reference. + */ + + /* + * ldX.a we will emulate load and also invalidate the ALAT entry. + * See comment below for explanation on how we handle ldX.a + */ + + if (len != 2 && len != 4 && len != 8) { + DPRINT("unknown size: x6=%d\n", ld.x6_sz); + return -1; + } + /* this assumes little-endian byte-order: */ + if (copy_from_user(&val, (void __user *) ifa, len)) + return -1; + setreg(ld.r1, val, 0, regs); + + /* + * check for updates on any kind of loads + */ + if (ld.op == 0x5 || ld.m) + emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); + + /* + * handling of various loads (based on EAS2.4): + * + * ldX.acq (ordered load): + * - acquire semantics would have been used, so force fence instead. + * + * ldX.c.clr (check load and clear): + * - if we get to this handler, it's because the entry was not in the ALAT. + * Therefore the operation reverts to a normal load + * + * ldX.c.nc (check load no clear): + * - same as previous one + * + * ldX.c.clr.acq (ordered check load and clear): + * - same as above for c.clr part. The load needs to have acquire semantics. So + * we use the fence semantics which is stronger and thus ensures correctness. + * + * ldX.a (advanced load): + * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the + * address doesn't match requested size alignment. This means that we would + * possibly need more than one load to get the result. + * + * The load part can be handled just like a normal load, however the difficult + * part is to get the right thing into the ALAT. The critical piece of information + * in the base address of the load & size. To do that, a ld.a must be executed, + * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now + * if we use the same target register, we will be okay for the check.a instruction. + * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry + * which would overlap within [r3,r3+X] (the size of the load was store in the + * ALAT). If such an entry is found the entry is invalidated. But this is not good + * enough, take the following example: + * r3=3 + * ld4.a r1=[r3] + * + * Could be emulated by doing: + * ld1.a r1=[r3],1 + * store to temporary; + * ld1.a r1=[r3],1 + * store & shift to temporary; + * ld1.a r1=[r3],1 + * store & shift to temporary; + * ld1.a r1=[r3] + * store & shift to temporary; + * r1=temporary + * + * So in this case, you would get the right value is r1 but the wrong info in + * the ALAT. Notice that you could do it in reverse to finish with address 3 + * but you would still get the size wrong. To get the size right, one needs to + * execute exactly the same kind of load. You could do it from a aligned + * temporary location, but you would get the address wrong. + * + * So no matter what, it is not possible to emulate an advanced load + * correctly. But is that really critical ? + * + * We will always convert ld.a into a normal load with ALAT invalidated. This + * will enable compiler to do optimization where certain code path after ld.a + * is not required to have ld.c/chk.a, e.g., code path with no intervening stores. + * + * If there is a store after the advanced load, one must either do a ld.c.* or + * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no + * entry found in ALAT), and that's perfectly ok because: + * + * - ld.c.*, if the entry is not present a normal load is executed + * - chk.a.*, if the entry is not present, execution jumps to recovery code + * + * In either case, the load can be potentially retried in another form. + * + * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick + * up a stale entry later). The register base update MUST also be performed. + */ + + /* + * when the load has the .acq completer then + * use ordering fence. + */ + if (ld.x6_op == 0x5 || ld.x6_op == 0xa) + mb(); + + /* + * invalidate ALAT entry in case of advanced load + */ + if (ld.x6_op == 0x2) + invala_gr(ld.r1); + + return 0; +} + +static int +emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs) +{ + unsigned long r2; + unsigned int len = 1 << ld.x6_sz; + + /* + * if we get to this handler, Nat bits on both r3 and r2 have already + * been checked. so we don't need to do it + * + * extract the value to be stored + */ + getreg(ld.imm, &r2, NULL, regs); + + /* + * we rely on the macros in unaligned.h for now i.e., + * we let the compiler figure out how to read memory gracefully. + * + * We need this switch/case because the way the inline function + * works. The code is optimized by the compiler and looks like + * a single switch/case. + */ + DPRINT("st%d [%lx]=%lx\n", len, ifa, r2); + + if (len != 2 && len != 4 && len != 8) { + DPRINT("unknown size: x6=%d\n", ld.x6_sz); + return -1; + } + + /* this assumes little-endian byte-order: */ + if (copy_to_user((void __user *) ifa, &r2, len)) + return -1; + + /* + * stX [r3]=r2,imm(9) + * + * NOTE: + * ld.r3 can never be r0, because r0 would not generate an + * unaligned access. + */ + if (ld.op == 0x5) { + unsigned long imm; + + /* + * form imm9: [12:6] contain first 7bits + */ + imm = ld.x << 7 | ld.r1; + /* + * sign extend (8bits) if m set + */ + if (ld.m) imm |= SIGN_EXT9; + /* + * ifa == r3 (NaT is necessarily cleared) + */ + ifa += imm; + + DPRINT("imm=%lx r3=%lx\n", imm, ifa); + + setreg(ld.r3, ifa, 0, regs); + } + /* + * we don't have alat_invalidate_multiple() so we need + * to do the complete flush :-<< + */ + ia64_invala(); + + /* + * stX.rel: use fence instead of release + */ + if (ld.x6_op == 0xd) + mb(); + + return 0; +} + +/* + * floating point operations sizes in bytes + */ +static const unsigned char float_fsz[4]={ + 10, /* extended precision (e) */ + 8, /* integer (8) */ + 4, /* single precision (s) */ + 8 /* double precision (d) */ +}; + +static inline void +mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldfe(6, init); + ia64_stop(); + ia64_stf_spill(final, 6); +} + +static inline void +mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf8(6, init); + ia64_stop(); + ia64_stf_spill(final, 6); +} + +static inline void +mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldfs(6, init); + ia64_stop(); + ia64_stf_spill(final, 6); +} + +static inline void +mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldfd(6, init); + ia64_stop(); + ia64_stf_spill(final, 6); +} + +static inline void +float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf_fill(6, init); + ia64_stop(); + ia64_stfe(final, 6); +} + +static inline void +float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf_fill(6, init); + ia64_stop(); + ia64_stf8(final, 6); +} + +static inline void +float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf_fill(6, init); + ia64_stop(); + ia64_stfs(final, 6); +} + +static inline void +float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf_fill(6, init); + ia64_stop(); + ia64_stfd(final, 6); +} + +static int +emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs) +{ + struct ia64_fpreg fpr_init[2]; + struct ia64_fpreg fpr_final[2]; + unsigned long len = float_fsz[ld.x6_sz]; + + /* + * fr0 & fr1 don't need to be checked because Illegal Instruction faults have + * higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an unaligned + * reference. + */ + + /* + * make sure we get clean buffers + */ + memset(&fpr_init, 0, sizeof(fpr_init)); + memset(&fpr_final, 0, sizeof(fpr_final)); + + /* + * ldfpX.a: we don't try to emulate anything but we must + * invalidate the ALAT entry and execute updates, if any. + */ + if (ld.x6_op != 0x2) { + /* + * This assumes little-endian byte-order. Note that there is no "ldfpe" + * instruction: + */ + if (copy_from_user(&fpr_init[0], (void __user *) ifa, len) + || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len)) + return -1; + + DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz); + DDUMP("frp_init =", &fpr_init, 2*len); + /* + * XXX fixme + * Could optimize inlines by using ldfpX & 2 spills + */ + switch( ld.x6_sz ) { + case 0: + mem2float_extended(&fpr_init[0], &fpr_final[0]); + mem2float_extended(&fpr_init[1], &fpr_final[1]); + break; + case 1: + mem2float_integer(&fpr_init[0], &fpr_final[0]); + mem2float_integer(&fpr_init[1], &fpr_final[1]); + break; + case 2: + mem2float_single(&fpr_init[0], &fpr_final[0]); + mem2float_single(&fpr_init[1], &fpr_final[1]); + break; + case 3: + mem2float_double(&fpr_init[0], &fpr_final[0]); + mem2float_double(&fpr_init[1], &fpr_final[1]); + break; + } + DDUMP("fpr_final =", &fpr_final, 2*len); + /* + * XXX fixme + * + * A possible optimization would be to drop fpr_final and directly + * use the storage from the saved context i.e., the actual final + * destination (pt_regs, switch_stack or thread structure). + */ + setfpreg(ld.r1, &fpr_final[0], regs); + setfpreg(ld.imm, &fpr_final[1], regs); + } + + /* + * Check for updates: only immediate updates are available for this + * instruction. + */ + if (ld.m) { + /* + * the immediate is implicit given the ldsz of the operation: + * single: 8 (2x4) and for all others it's 16 (2x8) + */ + ifa += len<<1; + + /* + * IMPORTANT: + * the fact that we force the NaT of r3 to zero is ONLY valid + * as long as we don't come here with a ldfpX.s. + * For this reason we keep this sanity check + */ + if (ld.x6_op == 1 || ld.x6_op == 3) + printk(KERN_ERR "%s: register update on speculative load pair, error\n", + __FUNCTION__); + + setreg(ld.r3, ifa, 0, regs); + } + + /* + * Invalidate ALAT entries, if any, for both registers. + */ + if (ld.x6_op == 0x2) { + invala_fr(ld.r1); + invala_fr(ld.imm); + } + return 0; +} + + +static int +emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs) +{ + struct ia64_fpreg fpr_init; + struct ia64_fpreg fpr_final; + unsigned long len = float_fsz[ld.x6_sz]; + + /* + * fr0 & fr1 don't need to be checked because Illegal Instruction + * faults have higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an + * unaligned reference. + */ + + /* + * make sure we get clean buffers + */ + memset(&fpr_init,0, sizeof(fpr_init)); + memset(&fpr_final,0, sizeof(fpr_final)); + + /* + * ldfX.a we don't try to emulate anything but we must + * invalidate the ALAT entry. + * See comments in ldX for descriptions on how the various loads are handled. + */ + if (ld.x6_op != 0x2) { + if (copy_from_user(&fpr_init, (void __user *) ifa, len)) + return -1; + + DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz); + DDUMP("fpr_init =", &fpr_init, len); + /* + * we only do something for x6_op={0,8,9} + */ + switch( ld.x6_sz ) { + case 0: + mem2float_extended(&fpr_init, &fpr_final); + break; + case 1: + mem2float_integer(&fpr_init, &fpr_final); + break; + case 2: + mem2float_single(&fpr_init, &fpr_final); + break; + case 3: + mem2float_double(&fpr_init, &fpr_final); + break; + } + DDUMP("fpr_final =", &fpr_final, len); + /* + * XXX fixme + * + * A possible optimization would be to drop fpr_final and directly + * use the storage from the saved context i.e., the actual final + * destination (pt_regs, switch_stack or thread structure). + */ + setfpreg(ld.r1, &fpr_final, regs); + } + + /* + * check for updates on any loads + */ + if (ld.op == 0x7 || ld.m) + emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); + + /* + * invalidate ALAT entry in case of advanced floating point loads + */ + if (ld.x6_op == 0x2) + invala_fr(ld.r1); + + return 0; +} + + +static int +emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs) +{ + struct ia64_fpreg fpr_init; + struct ia64_fpreg fpr_final; + unsigned long len = float_fsz[ld.x6_sz]; + + /* + * make sure we get clean buffers + */ + memset(&fpr_init,0, sizeof(fpr_init)); + memset(&fpr_final,0, sizeof(fpr_final)); + + /* + * if we get to this handler, Nat bits on both r3 and r2 have already + * been checked. so we don't need to do it + * + * extract the value to be stored + */ + getfpreg(ld.imm, &fpr_init, regs); + /* + * during this step, we extract the spilled registers from the saved + * context i.e., we refill. Then we store (no spill) to temporary + * aligned location + */ + switch( ld.x6_sz ) { + case 0: + float2mem_extended(&fpr_init, &fpr_final); + break; + case 1: + float2mem_integer(&fpr_init, &fpr_final); + break; + case 2: + float2mem_single(&fpr_init, &fpr_final); + break; + case 3: + float2mem_double(&fpr_init, &fpr_final); + break; + } + DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz); + DDUMP("fpr_init =", &fpr_init, len); + DDUMP("fpr_final =", &fpr_final, len); + + if (copy_to_user((void __user *) ifa, &fpr_final, len)) + return -1; + + /* + * stfX [r3]=r2,imm(9) + * + * NOTE: + * ld.r3 can never be r0, because r0 would not generate an + * unaligned access. + */ + if (ld.op == 0x7) { + unsigned long imm; + + /* + * form imm9: [12:6] contain first 7bits + */ + imm = ld.x << 7 | ld.r1; + /* + * sign extend (8bits) if m set + */ + if (ld.m) + imm |= SIGN_EXT9; + /* + * ifa == r3 (NaT is necessarily cleared) + */ + ifa += imm; + + DPRINT("imm=%lx r3=%lx\n", imm, ifa); + + setreg(ld.r3, ifa, 0, regs); + } + /* + * we don't have alat_invalidate_multiple() so we need + * to do the complete flush :-<< + */ + ia64_invala(); + + return 0; +} + +/* + * Make sure we log the unaligned access, so that user/sysadmin can notice it and + * eventually fix the program. However, we don't want to do that for every access so we + * pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be + * either... + */ +static int +within_logging_rate_limit (void) +{ + static unsigned long count, last_time; + + if (jiffies - last_time > 5*HZ) + count = 0; + if (++count < 5) { + last_time = jiffies; + return 1; + } + return 0; + +} + +void +ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) +{ + struct ia64_psr *ipsr = ia64_psr(regs); + mm_segment_t old_fs = get_fs(); + unsigned long bundle[2]; + unsigned long opcode; + struct siginfo si; + const struct exception_table_entry *eh = NULL; + union { + unsigned long l; + load_store_t insn; + } u; + int ret = -1; + + if (ia64_psr(regs)->be) { + /* we don't support big-endian accesses */ + die_if_kernel("big-endian unaligned accesses are not supported", regs, 0); + goto force_sigbus; + } + + /* + * Treat kernel accesses for which there is an exception handler entry the same as + * user-level unaligned accesses. Otherwise, a clever program could trick this + * handler into reading an arbitrary kernel addresses... + */ + if (!user_mode(regs)) + eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri); + if (user_mode(regs) || eh) { + if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0) + goto force_sigbus; + + if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT) + && within_logging_rate_limit()) + { + char buf[200]; /* comm[] is at most 16 bytes... */ + size_t len; + + len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, " + "ip=0x%016lx\n\r", current->comm, current->pid, + ifa, regs->cr_iip + ipsr->ri); + /* + * Don't call tty_write_message() if we're in the kernel; we might + * be holding locks... + */ + if (user_mode(regs)) + tty_write_message(current->signal->tty, buf); + buf[len-1] = '\0'; /* drop '\r' */ + printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */ + } + } else { + if (within_logging_rate_limit()) + printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n", + ifa, regs->cr_iip + ipsr->ri); + set_fs(KERNEL_DS); + } + + DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n", + regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it); + + if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16)) + goto failure; + + /* + * extract the instruction from the bundle given the slot number + */ + switch (ipsr->ri) { + case 0: u.l = (bundle[0] >> 5); break; + case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break; + case 2: u.l = (bundle[1] >> 23); break; + } + opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK; + + DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d " + "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm, + u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op); + + /* + * IMPORTANT: + * Notice that the switch statement DOES not cover all possible instructions + * that DO generate unaligned references. This is made on purpose because for some + * instructions it DOES NOT make sense to try and emulate the access. Sometimes it + * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e., + * the program will get a signal and die: + * + * load/store: + * - ldX.spill + * - stX.spill + * Reason: RNATs are based on addresses + * - ld16 + * - st16 + * Reason: ld16 and st16 are supposed to occur in a single + * memory op + * + * synchronization: + * - cmpxchg + * - fetchadd + * - xchg + * Reason: ATOMIC operations cannot be emulated properly using multiple + * instructions. + * + * speculative loads: + * - ldX.sZ + * Reason: side effects, code must be ready to deal with failure so simpler + * to let the load fail. + * --------------------------------------------------------------------------------- + * XXX fixme + * + * I would like to get rid of this switch case and do something + * more elegant. + */ + switch (opcode) { + case LDS_OP: + case LDSA_OP: + if (u.insn.x) + /* oops, really a semaphore op (cmpxchg, etc) */ + goto failure; + /* no break */ + case LDS_IMM_OP: + case LDSA_IMM_OP: + case LDFS_OP: + case LDFSA_OP: + case LDFS_IMM_OP: + /* + * The instruction will be retried with deferred exceptions turned on, and + * we should get Nat bit installed + * + * IMPORTANT: When PSR_ED is set, the register & immediate update forms + * are actually executed even though the operation failed. So we don't + * need to take care of this. + */ + DPRINT("forcing PSR_ED\n"); + regs->cr_ipsr |= IA64_PSR_ED; + goto done; + + case LD_OP: + case LDA_OP: + case LDBIAS_OP: + case LDACQ_OP: + case LDCCLR_OP: + case LDCNC_OP: + case LDCCLRACQ_OP: + if (u.insn.x) + /* oops, really a semaphore op (cmpxchg, etc) */ + goto failure; + /* no break */ + case LD_IMM_OP: + case LDA_IMM_OP: + case LDBIAS_IMM_OP: + case LDACQ_IMM_OP: + case LDCCLR_IMM_OP: + case LDCNC_IMM_OP: + case LDCCLRACQ_IMM_OP: + ret = emulate_load_int(ifa, u.insn, regs); + break; + + case ST_OP: + case STREL_OP: + if (u.insn.x) + /* oops, really a semaphore op (cmpxchg, etc) */ + goto failure; + /* no break */ + case ST_IMM_OP: + case STREL_IMM_OP: + ret = emulate_store_int(ifa, u.insn, regs); + break; + + case LDF_OP: + case LDFA_OP: + case LDFCCLR_OP: + case LDFCNC_OP: + case LDF_IMM_OP: + case LDFA_IMM_OP: + case LDFCCLR_IMM_OP: + case LDFCNC_IMM_OP: + if (u.insn.x) + ret = emulate_load_floatpair(ifa, u.insn, regs); + else + ret = emulate_load_float(ifa, u.insn, regs); + break; + + case STF_OP: + case STF_IMM_OP: + ret = emulate_store_float(ifa, u.insn, regs); + break; + + default: + goto failure; + } + DPRINT("ret=%d\n", ret); + if (ret) + goto failure; + + if (ipsr->ri == 2) + /* + * given today's architecture this case is not likely to happen because a + * memory access instruction (M) can never be in the last slot of a + * bundle. But let's keep it for now. + */ + regs->cr_iip += 16; + ipsr->ri = (ipsr->ri + 1) & 0x3; + + DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip); + done: + set_fs(old_fs); /* restore original address limit */ + return; + + failure: + /* something went wrong... */ + if (!user_mode(regs)) { + if (eh) { + ia64_handle_exception(regs, eh); + goto done; + } + die_if_kernel("error during unaligned kernel access\n", regs, ret); + /* NOT_REACHED */ + } + force_sigbus: + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRALN; + si.si_addr = (void __user *) ifa; + si.si_flags = 0; + si.si_isr = 0; + si.si_imm = 0; + force_sig_info(SIGBUS, &si, current); + goto done; +} diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c new file mode 100644 index 000000000000..d494ff647cac --- /dev/null +++ b/arch/ia64/kernel/unwind.c @@ -0,0 +1,2306 @@ +/* + * Copyright (C) 1999-2004 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2003 Fenghua Yu <fenghua.yu@intel.com> + * - Change pt_regs_off() to make it less dependant on pt_regs structure. + */ +/* + * This file implements call frame unwind support for the Linux + * kernel. Parsing and processing the unwind information is + * time-consuming, so this implementation translates the unwind + * descriptors into unwind scripts. These scripts are very simple + * (basically a sequence of assignments) and efficient to execute. + * They are cached for later re-use. Each script is specific for a + * given instruction pointer address and the set of predicate values + * that the script depends on (most unwind descriptors are + * unconditional and scripts often do not depend on predicates at + * all). This code is based on the unwind conventions described in + * the "IA-64 Software Conventions and Runtime Architecture" manual. + * + * SMP conventions: + * o updates to the global unwind data (in structure "unw") are serialized + * by the unw.lock spinlock + * o each unwind script has its own read-write lock; a thread must acquire + * a read lock before executing a script and must acquire a write lock + * before modifying a script + * o if both the unw.lock spinlock and a script's read-write lock must be + * acquired, then the read-write lock must be acquired first. + */ +#include <linux/module.h> +#include <linux/bootmem.h> +#include <linux/elf.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/unwind.h> + +#include <asm/delay.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/ptrace_offsets.h> +#include <asm/rse.h> +#include <asm/sections.h> +#include <asm/system.h> +#include <asm/uaccess.h> + +#include "entry.h" +#include "unwind_i.h" + +#define UNW_LOG_CACHE_SIZE 7 /* each unw_script is ~256 bytes in size */ +#define UNW_CACHE_SIZE (1 << UNW_LOG_CACHE_SIZE) + +#define UNW_LOG_HASH_SIZE (UNW_LOG_CACHE_SIZE + 1) +#define UNW_HASH_SIZE (1 << UNW_LOG_HASH_SIZE) + +#define UNW_STATS 0 /* WARNING: this disabled interrupts for long time-spans!! */ + +#ifdef UNW_DEBUG + static unsigned int unw_debug_level = UNW_DEBUG; +# define UNW_DEBUG_ON(n) unw_debug_level >= n + /* Do not code a printk level, not all debug lines end in newline */ +# define UNW_DPRINT(n, ...) if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__) +# define inline +#else /* !UNW_DEBUG */ +# define UNW_DEBUG_ON(n) 0 +# define UNW_DPRINT(n, ...) +#endif /* UNW_DEBUG */ + +#if UNW_STATS +# define STAT(x...) x +#else +# define STAT(x...) +#endif + +#define alloc_reg_state() kmalloc(sizeof(struct unw_reg_state), GFP_ATOMIC) +#define free_reg_state(usr) kfree(usr) +#define alloc_labeled_state() kmalloc(sizeof(struct unw_labeled_state), GFP_ATOMIC) +#define free_labeled_state(usr) kfree(usr) + +typedef unsigned long unw_word; +typedef unsigned char unw_hash_index_t; + +static struct { + spinlock_t lock; /* spinlock for unwind data */ + + /* list of unwind tables (one per load-module) */ + struct unw_table *tables; + + unsigned long r0; /* constant 0 for r0 */ + + /* table of registers that prologues can save (and order in which they're saved): */ + const unsigned char save_order[8]; + + /* maps a preserved register index (preg_index) to corresponding switch_stack offset: */ + unsigned short sw_off[sizeof(struct unw_frame_info) / 8]; + + unsigned short lru_head; /* index of lead-recently used script */ + unsigned short lru_tail; /* index of most-recently used script */ + + /* index into unw_frame_info for preserved register i */ + unsigned short preg_index[UNW_NUM_REGS]; + + short pt_regs_offsets[32]; + + /* unwind table for the kernel: */ + struct unw_table kernel_table; + + /* unwind table describing the gate page (kernel code that is mapped into user space): */ + size_t gate_table_size; + unsigned long *gate_table; + + /* hash table that maps instruction pointer to script index: */ + unsigned short hash[UNW_HASH_SIZE]; + + /* script cache: */ + struct unw_script cache[UNW_CACHE_SIZE]; + +# ifdef UNW_DEBUG + const char *preg_name[UNW_NUM_REGS]; +# endif +# if UNW_STATS + struct { + struct { + int lookups; + int hinted_hits; + int normal_hits; + int collision_chain_traversals; + } cache; + struct { + unsigned long build_time; + unsigned long run_time; + unsigned long parse_time; + int builds; + int news; + int collisions; + int runs; + } script; + struct { + unsigned long init_time; + unsigned long unwind_time; + int inits; + int unwinds; + } api; + } stat; +# endif +} unw = { + .tables = &unw.kernel_table, + .lock = SPIN_LOCK_UNLOCKED, + .save_order = { + UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR, + UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR + }, + .preg_index = { + offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_GR */ + offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_MEM */ + offsetof(struct unw_frame_info, bsp_loc)/8, + offsetof(struct unw_frame_info, bspstore_loc)/8, + offsetof(struct unw_frame_info, pfs_loc)/8, + offsetof(struct unw_frame_info, rnat_loc)/8, + offsetof(struct unw_frame_info, psp)/8, + offsetof(struct unw_frame_info, rp_loc)/8, + offsetof(struct unw_frame_info, r4)/8, + offsetof(struct unw_frame_info, r5)/8, + offsetof(struct unw_frame_info, r6)/8, + offsetof(struct unw_frame_info, r7)/8, + offsetof(struct unw_frame_info, unat_loc)/8, + offsetof(struct unw_frame_info, pr_loc)/8, + offsetof(struct unw_frame_info, lc_loc)/8, + offsetof(struct unw_frame_info, fpsr_loc)/8, + offsetof(struct unw_frame_info, b1_loc)/8, + offsetof(struct unw_frame_info, b2_loc)/8, + offsetof(struct unw_frame_info, b3_loc)/8, + offsetof(struct unw_frame_info, b4_loc)/8, + offsetof(struct unw_frame_info, b5_loc)/8, + offsetof(struct unw_frame_info, f2_loc)/8, + offsetof(struct unw_frame_info, f3_loc)/8, + offsetof(struct unw_frame_info, f4_loc)/8, + offsetof(struct unw_frame_info, f5_loc)/8, + offsetof(struct unw_frame_info, fr_loc[16 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[17 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[18 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[19 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[20 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[21 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[22 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[23 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[24 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[25 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[26 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[27 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[28 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[29 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[30 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[31 - 16])/8, + }, + .pt_regs_offsets = { + [0] = -1, + offsetof(struct pt_regs, r1), + offsetof(struct pt_regs, r2), + offsetof(struct pt_regs, r3), + [4] = -1, [5] = -1, [6] = -1, [7] = -1, + offsetof(struct pt_regs, r8), + offsetof(struct pt_regs, r9), + offsetof(struct pt_regs, r10), + offsetof(struct pt_regs, r11), + offsetof(struct pt_regs, r12), + offsetof(struct pt_regs, r13), + offsetof(struct pt_regs, r14), + offsetof(struct pt_regs, r15), + offsetof(struct pt_regs, r16), + offsetof(struct pt_regs, r17), + offsetof(struct pt_regs, r18), + offsetof(struct pt_regs, r19), + offsetof(struct pt_regs, r20), + offsetof(struct pt_regs, r21), + offsetof(struct pt_regs, r22), + offsetof(struct pt_regs, r23), + offsetof(struct pt_regs, r24), + offsetof(struct pt_regs, r25), + offsetof(struct pt_regs, r26), + offsetof(struct pt_regs, r27), + offsetof(struct pt_regs, r28), + offsetof(struct pt_regs, r29), + offsetof(struct pt_regs, r30), + offsetof(struct pt_regs, r31), + }, + .hash = { [0 ... UNW_HASH_SIZE - 1] = -1 }, +#ifdef UNW_DEBUG + .preg_name = { + "pri_unat_gr", "pri_unat_mem", "bsp", "bspstore", "ar.pfs", "ar.rnat", "psp", "rp", + "r4", "r5", "r6", "r7", + "ar.unat", "pr", "ar.lc", "ar.fpsr", + "b1", "b2", "b3", "b4", "b5", + "f2", "f3", "f4", "f5", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" + } +#endif +}; + +static inline int +read_only (void *addr) +{ + return (unsigned long) ((char *) addr - (char *) &unw.r0) < sizeof(unw.r0); +} + +/* + * Returns offset of rREG in struct pt_regs. + */ +static inline unsigned long +pt_regs_off (unsigned long reg) +{ + short off = -1; + + if (reg < ARRAY_SIZE(unw.pt_regs_offsets)) + off = unw.pt_regs_offsets[reg]; + + if (off < 0) { + UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", __FUNCTION__, reg); + off = 0; + } + return (unsigned long) off; +} + +static inline struct pt_regs * +get_scratch_regs (struct unw_frame_info *info) +{ + if (!info->pt) { + /* This should not happen with valid unwind info. */ + UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting info->pt\n", __FUNCTION__); + if (info->flags & UNW_FLAG_INTERRUPT_FRAME) + info->pt = (unsigned long) ((struct pt_regs *) info->psp - 1); + else + info->pt = info->sp - 16; + } + UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __FUNCTION__, info->sp, info->pt); + return (struct pt_regs *) info->pt; +} + +/* Unwind accessors. */ + +int +unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char *nat, int write) +{ + unsigned long *addr, *nat_addr, nat_mask = 0, dummy_nat; + struct unw_ireg *ireg; + struct pt_regs *pt; + + if ((unsigned) regnum - 1 >= 127) { + if (regnum == 0 && !write) { + *val = 0; /* read r0 always returns 0 */ + *nat = 0; + return 0; + } + UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n", + __FUNCTION__, regnum); + return -1; + } + + if (regnum < 32) { + if (regnum >= 4 && regnum <= 7) { + /* access a preserved register */ + ireg = &info->r4 + (regnum - 4); + addr = ireg->loc; + if (addr) { + nat_addr = addr + ireg->nat.off; + switch (ireg->nat.type) { + case UNW_NAT_VAL: + /* simulate getf.sig/setf.sig */ + if (write) { + if (*nat) { + /* write NaTVal and be done with it */ + addr[0] = 0; + addr[1] = 0x1fffe; + return 0; + } + addr[1] = 0x1003e; + } else { + if (addr[0] == 0 && addr[1] == 0x1ffe) { + /* return NaT and be done with it */ + *val = 0; + *nat = 1; + return 0; + } + } + /* fall through */ + case UNW_NAT_NONE: + dummy_nat = 0; + nat_addr = &dummy_nat; + break; + + case UNW_NAT_MEMSTK: + nat_mask = (1UL << ((long) addr & 0x1f8)/8); + break; + + case UNW_NAT_REGSTK: + nat_addr = ia64_rse_rnat_addr(addr); + if ((unsigned long) addr < info->regstk.limit + || (unsigned long) addr >= info->regstk.top) + { + UNW_DPRINT(0, "unwind.%s: %p outside of regstk " + "[0x%lx-0x%lx)\n", + __FUNCTION__, (void *) addr, + info->regstk.limit, + info->regstk.top); + return -1; + } + if ((unsigned long) nat_addr >= info->regstk.top) + nat_addr = &info->sw->ar_rnat; + nat_mask = (1UL << ia64_rse_slot_num(addr)); + break; + } + } else { + addr = &info->sw->r4 + (regnum - 4); + nat_addr = &info->sw->ar_unat; + nat_mask = (1UL << ((long) addr & 0x1f8)/8); + } + } else { + /* access a scratch register */ + pt = get_scratch_regs(info); + addr = (unsigned long *) ((unsigned long)pt + pt_regs_off(regnum)); + if (info->pri_unat_loc) + nat_addr = info->pri_unat_loc; + else + nat_addr = &info->sw->ar_unat; + nat_mask = (1UL << ((long) addr & 0x1f8)/8); + } + } else { + /* access a stacked register */ + addr = ia64_rse_skip_regs((unsigned long *) info->bsp, regnum - 32); + nat_addr = ia64_rse_rnat_addr(addr); + if ((unsigned long) addr < info->regstk.limit + || (unsigned long) addr >= info->regstk.top) + { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to access register outside " + "of rbs\n", __FUNCTION__); + return -1; + } + if ((unsigned long) nat_addr >= info->regstk.top) + nat_addr = &info->sw->ar_rnat; + nat_mask = (1UL << ia64_rse_slot_num(addr)); + } + + if (write) { + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __FUNCTION__); + } else { + *addr = *val; + if (*nat) + *nat_addr |= nat_mask; + else + *nat_addr &= ~nat_mask; + } + } else { + if ((*nat_addr & nat_mask) == 0) { + *val = *addr; + *nat = 0; + } else { + *val = 0; /* if register is a NaT, *addr may contain kernel data! */ + *nat = 1; + } + } + return 0; +} +EXPORT_SYMBOL(unw_access_gr); + +int +unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val, int write) +{ + unsigned long *addr; + struct pt_regs *pt; + + switch (regnum) { + /* scratch: */ + case 0: pt = get_scratch_regs(info); addr = &pt->b0; break; + case 6: pt = get_scratch_regs(info); addr = &pt->b6; break; + case 7: pt = get_scratch_regs(info); addr = &pt->b7; break; + + /* preserved: */ + case 1: case 2: case 3: case 4: case 5: + addr = *(&info->b1_loc + (regnum - 1)); + if (!addr) + addr = &info->sw->b1 + (regnum - 1); + break; + + default: + UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n", + __FUNCTION__, regnum); + return -1; + } + if (write) + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __FUNCTION__); + } else + *addr = *val; + else + *val = *addr; + return 0; +} +EXPORT_SYMBOL(unw_access_br); + +int +unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg *val, int write) +{ + struct ia64_fpreg *addr = NULL; + struct pt_regs *pt; + + if ((unsigned) (regnum - 2) >= 126) { + UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n", + __FUNCTION__, regnum); + return -1; + } + + if (regnum <= 5) { + addr = *(&info->f2_loc + (regnum - 2)); + if (!addr) + addr = &info->sw->f2 + (regnum - 2); + } else if (regnum <= 15) { + if (regnum <= 11) { + pt = get_scratch_regs(info); + addr = &pt->f6 + (regnum - 6); + } + else + addr = &info->sw->f12 + (regnum - 12); + } else if (regnum <= 31) { + addr = info->fr_loc[regnum - 16]; + if (!addr) + addr = &info->sw->f16 + (regnum - 16); + } else { + struct task_struct *t = info->task; + + if (write) + ia64_sync_fph(t); + else + ia64_flush_fph(t); + addr = t->thread.fph + (regnum - 32); + } + + if (write) + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __FUNCTION__); + } else + *addr = *val; + else + *val = *addr; + return 0; +} +EXPORT_SYMBOL(unw_access_fr); + +int +unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, int write) +{ + unsigned long *addr; + struct pt_regs *pt; + + switch (regnum) { + case UNW_AR_BSP: + addr = info->bsp_loc; + if (!addr) + addr = &info->sw->ar_bspstore; + break; + + case UNW_AR_BSPSTORE: + addr = info->bspstore_loc; + if (!addr) + addr = &info->sw->ar_bspstore; + break; + + case UNW_AR_PFS: + addr = info->pfs_loc; + if (!addr) + addr = &info->sw->ar_pfs; + break; + + case UNW_AR_RNAT: + addr = info->rnat_loc; + if (!addr) + addr = &info->sw->ar_rnat; + break; + + case UNW_AR_UNAT: + addr = info->unat_loc; + if (!addr) + addr = &info->sw->ar_unat; + break; + + case UNW_AR_LC: + addr = info->lc_loc; + if (!addr) + addr = &info->sw->ar_lc; + break; + + case UNW_AR_EC: + if (!info->cfm_loc) + return -1; + if (write) + *info->cfm_loc = + (*info->cfm_loc & ~(0x3fUL << 52)) | ((*val & 0x3f) << 52); + else + *val = (*info->cfm_loc >> 52) & 0x3f; + return 0; + + case UNW_AR_FPSR: + addr = info->fpsr_loc; + if (!addr) + addr = &info->sw->ar_fpsr; + break; + + case UNW_AR_RSC: + pt = get_scratch_regs(info); + addr = &pt->ar_rsc; + break; + + case UNW_AR_CCV: + pt = get_scratch_regs(info); + addr = &pt->ar_ccv; + break; + + case UNW_AR_CSD: + pt = get_scratch_regs(info); + addr = &pt->ar_csd; + break; + + case UNW_AR_SSD: + pt = get_scratch_regs(info); + addr = &pt->ar_ssd; + break; + + default: + UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n", + __FUNCTION__, regnum); + return -1; + } + + if (write) { + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __FUNCTION__); + } else + *addr = *val; + } else + *val = *addr; + return 0; +} +EXPORT_SYMBOL(unw_access_ar); + +int +unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write) +{ + unsigned long *addr; + + addr = info->pr_loc; + if (!addr) + addr = &info->sw->pr; + + if (write) { + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __FUNCTION__); + } else + *addr = *val; + } else + *val = *addr; + return 0; +} +EXPORT_SYMBOL(unw_access_pr); + + +/* Routines to manipulate the state stack. */ + +static inline void +push (struct unw_state_record *sr) +{ + struct unw_reg_state *rs; + + rs = alloc_reg_state(); + if (!rs) { + printk(KERN_ERR "unwind: cannot stack reg state!\n"); + return; + } + memcpy(rs, &sr->curr, sizeof(*rs)); + sr->curr.next = rs; +} + +static void +pop (struct unw_state_record *sr) +{ + struct unw_reg_state *rs = sr->curr.next; + + if (!rs) { + printk(KERN_ERR "unwind: stack underflow!\n"); + return; + } + memcpy(&sr->curr, rs, sizeof(*rs)); + free_reg_state(rs); +} + +/* Make a copy of the state stack. Non-recursive to avoid stack overflows. */ +static struct unw_reg_state * +dup_state_stack (struct unw_reg_state *rs) +{ + struct unw_reg_state *copy, *prev = NULL, *first = NULL; + + while (rs) { + copy = alloc_reg_state(); + if (!copy) { + printk(KERN_ERR "unwind.dup_state_stack: out of memory\n"); + return NULL; + } + memcpy(copy, rs, sizeof(*copy)); + if (first) + prev->next = copy; + else + first = copy; + rs = rs->next; + prev = copy; + } + return first; +} + +/* Free all stacked register states (but not RS itself). */ +static void +free_state_stack (struct unw_reg_state *rs) +{ + struct unw_reg_state *p, *next; + + for (p = rs->next; p != NULL; p = next) { + next = p->next; + free_reg_state(p); + } + rs->next = NULL; +} + +/* Unwind decoder routines */ + +static enum unw_register_index __attribute_const__ +decode_abreg (unsigned char abreg, int memory) +{ + switch (abreg) { + case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04); + case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22); + case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30); + case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41); + case 0x60: return UNW_REG_PR; + case 0x61: return UNW_REG_PSP; + case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM : UNW_REG_PRI_UNAT_GR; + case 0x63: return UNW_REG_RP; + case 0x64: return UNW_REG_BSP; + case 0x65: return UNW_REG_BSPSTORE; + case 0x66: return UNW_REG_RNAT; + case 0x67: return UNW_REG_UNAT; + case 0x68: return UNW_REG_FPSR; + case 0x69: return UNW_REG_PFS; + case 0x6a: return UNW_REG_LC; + default: + break; + } + UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __FUNCTION__, abreg); + return UNW_REG_LC; +} + +static void +set_reg (struct unw_reg_info *reg, enum unw_where where, int when, unsigned long val) +{ + reg->val = val; + reg->where = where; + if (reg->when == UNW_WHEN_NEVER) + reg->when = when; +} + +static void +alloc_spill_area (unsigned long *offp, unsigned long regsize, + struct unw_reg_info *lo, struct unw_reg_info *hi) +{ + struct unw_reg_info *reg; + + for (reg = hi; reg >= lo; --reg) { + if (reg->where == UNW_WHERE_SPILL_HOME) { + reg->where = UNW_WHERE_PSPREL; + *offp -= regsize; + reg->val = *offp; + } + } +} + +static inline void +spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, unw_word t) +{ + struct unw_reg_info *reg; + + for (reg = *regp; reg <= lim; ++reg) { + if (reg->where == UNW_WHERE_SPILL_HOME) { + reg->when = t; + *regp = reg + 1; + return; + } + } + UNW_DPRINT(0, "unwind.%s: excess spill!\n", __FUNCTION__); +} + +static inline void +finish_prologue (struct unw_state_record *sr) +{ + struct unw_reg_info *reg; + unsigned long off; + int i; + + /* + * First, resolve implicit register save locations (see Section "11.4.2.3 Rules + * for Using Unwind Descriptors", rule 3): + */ + for (i = 0; i < (int) ARRAY_SIZE(unw.save_order); ++i) { + reg = sr->curr.reg + unw.save_order[i]; + if (reg->where == UNW_WHERE_GR_SAVE) { + reg->where = UNW_WHERE_GR; + reg->val = sr->gr_save_loc++; + } + } + + /* + * Next, compute when the fp, general, and branch registers get + * saved. This must come before alloc_spill_area() because + * we need to know which registers are spilled to their home + * locations. + */ + if (sr->imask) { + unsigned char kind, mask = 0, *cp = sr->imask; + int t; + static const unsigned char limit[3] = { + UNW_REG_F31, UNW_REG_R7, UNW_REG_B5 + }; + struct unw_reg_info *(regs[3]); + + regs[0] = sr->curr.reg + UNW_REG_F2; + regs[1] = sr->curr.reg + UNW_REG_R4; + regs[2] = sr->curr.reg + UNW_REG_B1; + + for (t = 0; t < sr->region_len; ++t) { + if ((t & 3) == 0) + mask = *cp++; + kind = (mask >> 2*(3-(t & 3))) & 3; + if (kind > 0) + spill_next_when(®s[kind - 1], sr->curr.reg + limit[kind - 1], + sr->region_start + t); + } + } + /* + * Next, lay out the memory stack spill area: + */ + if (sr->any_spills) { + off = sr->spill_offset; + alloc_spill_area(&off, 16, sr->curr.reg + UNW_REG_F2, sr->curr.reg + UNW_REG_F31); + alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_B1, sr->curr.reg + UNW_REG_B5); + alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_R4, sr->curr.reg + UNW_REG_R7); + } +} + +/* + * Region header descriptors. + */ + +static void +desc_prologue (int body, unw_word rlen, unsigned char mask, unsigned char grsave, + struct unw_state_record *sr) +{ + int i, region_start; + + if (!(sr->in_body || sr->first_region)) + finish_prologue(sr); + sr->first_region = 0; + + /* check if we're done: */ + if (sr->when_target < sr->region_start + sr->region_len) { + sr->done = 1; + return; + } + + region_start = sr->region_start + sr->region_len; + + for (i = 0; i < sr->epilogue_count; ++i) + pop(sr); + sr->epilogue_count = 0; + sr->epilogue_start = UNW_WHEN_NEVER; + + sr->region_start = region_start; + sr->region_len = rlen; + sr->in_body = body; + + if (!body) { + push(sr); + + for (i = 0; i < 4; ++i) { + if (mask & 0x8) + set_reg(sr->curr.reg + unw.save_order[i], UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, grsave++); + mask <<= 1; + } + sr->gr_save_loc = grsave; + sr->any_spills = 0; + sr->imask = NULL; + sr->spill_offset = 0x10; /* default to psp+16 */ + } +} + +/* + * Prologue descriptors. + */ + +static inline void +desc_abi (unsigned char abi, unsigned char context, struct unw_state_record *sr) +{ + if (abi == 3 && context == 'i') { + sr->flags |= UNW_FLAG_INTERRUPT_FRAME; + UNW_DPRINT(3, "unwind.%s: interrupt frame\n", __FUNCTION__); + } + else + UNW_DPRINT(0, "unwind%s: ignoring unwabi(abi=0x%x,context=0x%x)\n", + __FUNCTION__, abi, context); +} + +static inline void +desc_br_gr (unsigned char brmask, unsigned char gr, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 5; ++i) { + if (brmask & 1) + set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, gr++); + brmask >>= 1; + } +} + +static inline void +desc_br_mem (unsigned char brmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 5; ++i) { + if (brmask & 1) { + set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + brmask >>= 1; + } +} + +static inline void +desc_frgr_mem (unsigned char grmask, unw_word frmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) { + if ((grmask & 1) != 0) { + set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + grmask >>= 1; + } + for (i = 0; i < 20; ++i) { + if ((frmask & 1) != 0) { + int base = (i < 4) ? UNW_REG_F2 : UNW_REG_F16 - 4; + set_reg(sr->curr.reg + base + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + frmask >>= 1; + } +} + +static inline void +desc_fr_mem (unsigned char frmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) { + if ((frmask & 1) != 0) { + set_reg(sr->curr.reg + UNW_REG_F2 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + frmask >>= 1; + } +} + +static inline void +desc_gr_gr (unsigned char grmask, unsigned char gr, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) { + if ((grmask & 1) != 0) + set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, gr++); + grmask >>= 1; + } +} + +static inline void +desc_gr_mem (unsigned char grmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) { + if ((grmask & 1) != 0) { + set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + grmask >>= 1; + } +} + +static inline void +desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr) +{ + set_reg(sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE, + sr->region_start + min_t(int, t, sr->region_len - 1), 16*size); +} + +static inline void +desc_mem_stack_v (unw_word t, struct unw_state_record *sr) +{ + sr->curr.reg[UNW_REG_PSP].when = sr->region_start + min_t(int, t, sr->region_len - 1); +} + +static inline void +desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr) +{ + set_reg(sr->curr.reg + reg, UNW_WHERE_GR, sr->region_start + sr->region_len - 1, dst); +} + +static inline void +desc_reg_psprel (unsigned char reg, unw_word pspoff, struct unw_state_record *sr) +{ + set_reg(sr->curr.reg + reg, UNW_WHERE_PSPREL, sr->region_start + sr->region_len - 1, + 0x10 - 4*pspoff); +} + +static inline void +desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr) +{ + set_reg(sr->curr.reg + reg, UNW_WHERE_SPREL, sr->region_start + sr->region_len - 1, + 4*spoff); +} + +static inline void +desc_rp_br (unsigned char dst, struct unw_state_record *sr) +{ + sr->return_link_reg = dst; +} + +static inline void +desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr) +{ + struct unw_reg_info *reg = sr->curr.reg + regnum; + + if (reg->where == UNW_WHERE_NONE) + reg->where = UNW_WHERE_GR_SAVE; + reg->when = sr->region_start + min_t(int, t, sr->region_len - 1); +} + +static inline void +desc_spill_base (unw_word pspoff, struct unw_state_record *sr) +{ + sr->spill_offset = 0x10 - 4*pspoff; +} + +static inline unsigned char * +desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr) +{ + sr->imask = imaskp; + return imaskp + (2*sr->region_len + 7)/8; +} + +/* + * Body descriptors. + */ +static inline void +desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr) +{ + sr->epilogue_start = sr->region_start + sr->region_len - 1 - t; + sr->epilogue_count = ecount + 1; +} + +static inline void +desc_copy_state (unw_word label, struct unw_state_record *sr) +{ + struct unw_labeled_state *ls; + + for (ls = sr->labeled_states; ls; ls = ls->next) { + if (ls->label == label) { + free_state_stack(&sr->curr); + memcpy(&sr->curr, &ls->saved_state, sizeof(sr->curr)); + sr->curr.next = dup_state_stack(ls->saved_state.next); + return; + } + } + printk(KERN_ERR "unwind: failed to find state labeled 0x%lx\n", label); +} + +static inline void +desc_label_state (unw_word label, struct unw_state_record *sr) +{ + struct unw_labeled_state *ls; + + ls = alloc_labeled_state(); + if (!ls) { + printk(KERN_ERR "unwind.desc_label_state(): out of memory\n"); + return; + } + ls->label = label; + memcpy(&ls->saved_state, &sr->curr, sizeof(ls->saved_state)); + ls->saved_state.next = dup_state_stack(sr->curr.next); + + /* insert into list of labeled states: */ + ls->next = sr->labeled_states; + sr->labeled_states = ls; +} + +/* + * General descriptors. + */ + +static inline int +desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr) +{ + if (sr->when_target <= sr->region_start + min_t(int, t, sr->region_len - 1)) + return 0; + if (qp > 0) { + if ((sr->pr_val & (1UL << qp)) == 0) + return 0; + sr->pr_mask |= (1UL << qp); + } + return 1; +} + +static inline void +desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg, struct unw_state_record *sr) +{ + struct unw_reg_info *r; + + if (!desc_is_active(qp, t, sr)) + return; + + r = sr->curr.reg + decode_abreg(abreg, 0); + r->where = UNW_WHERE_NONE; + r->when = UNW_WHEN_NEVER; + r->val = 0; +} + +static inline void +desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg, unsigned char x, + unsigned char ytreg, struct unw_state_record *sr) +{ + enum unw_where where = UNW_WHERE_GR; + struct unw_reg_info *r; + + if (!desc_is_active(qp, t, sr)) + return; + + if (x) + where = UNW_WHERE_BR; + else if (ytreg & 0x80) + where = UNW_WHERE_FR; + + r = sr->curr.reg + decode_abreg(abreg, 0); + r->where = where; + r->when = sr->region_start + min_t(int, t, sr->region_len - 1); + r->val = (ytreg & 0x7f); +} + +static inline void +desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word pspoff, + struct unw_state_record *sr) +{ + struct unw_reg_info *r; + + if (!desc_is_active(qp, t, sr)) + return; + + r = sr->curr.reg + decode_abreg(abreg, 1); + r->where = UNW_WHERE_PSPREL; + r->when = sr->region_start + min_t(int, t, sr->region_len - 1); + r->val = 0x10 - 4*pspoff; +} + +static inline void +desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word spoff, + struct unw_state_record *sr) +{ + struct unw_reg_info *r; + + if (!desc_is_active(qp, t, sr)) + return; + + r = sr->curr.reg + decode_abreg(abreg, 1); + r->where = UNW_WHERE_SPREL; + r->when = sr->region_start + min_t(int, t, sr->region_len - 1); + r->val = 4*spoff; +} + +#define UNW_DEC_BAD_CODE(code) printk(KERN_ERR "unwind: unknown code 0x%02x\n", \ + code); + +/* + * region headers: + */ +#define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg) desc_prologue(0,r,m,gr,arg) +#define UNW_DEC_PROLOGUE(fmt,b,r,arg) desc_prologue(b,r,0,32,arg) +/* + * prologue descriptors: + */ +#define UNW_DEC_ABI(fmt,a,c,arg) desc_abi(a,c,arg) +#define UNW_DEC_BR_GR(fmt,b,g,arg) desc_br_gr(b,g,arg) +#define UNW_DEC_BR_MEM(fmt,b,arg) desc_br_mem(b,arg) +#define UNW_DEC_FRGR_MEM(fmt,g,f,arg) desc_frgr_mem(g,f,arg) +#define UNW_DEC_FR_MEM(fmt,f,arg) desc_fr_mem(f,arg) +#define UNW_DEC_GR_GR(fmt,m,g,arg) desc_gr_gr(m,g,arg) +#define UNW_DEC_GR_MEM(fmt,m,arg) desc_gr_mem(m,arg) +#define UNW_DEC_MEM_STACK_F(fmt,t,s,arg) desc_mem_stack_f(t,s,arg) +#define UNW_DEC_MEM_STACK_V(fmt,t,arg) desc_mem_stack_v(t,arg) +#define UNW_DEC_REG_GR(fmt,r,d,arg) desc_reg_gr(r,d,arg) +#define UNW_DEC_REG_PSPREL(fmt,r,o,arg) desc_reg_psprel(r,o,arg) +#define UNW_DEC_REG_SPREL(fmt,r,o,arg) desc_reg_sprel(r,o,arg) +#define UNW_DEC_REG_WHEN(fmt,r,t,arg) desc_reg_when(r,t,arg) +#define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg) +#define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg) +#define UNW_DEC_PRIUNAT_GR(fmt,r,arg) desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg) +#define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg) desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg) +#define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg) desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg) +#define UNW_DEC_RP_BR(fmt,d,arg) desc_rp_br(d,arg) +#define UNW_DEC_SPILL_BASE(fmt,o,arg) desc_spill_base(o,arg) +#define UNW_DEC_SPILL_MASK(fmt,m,arg) (m = desc_spill_mask(m,arg)) +/* + * body descriptors: + */ +#define UNW_DEC_EPILOGUE(fmt,t,c,arg) desc_epilogue(t,c,arg) +#define UNW_DEC_COPY_STATE(fmt,l,arg) desc_copy_state(l,arg) +#define UNW_DEC_LABEL_STATE(fmt,l,arg) desc_label_state(l,arg) +/* + * general unwind descriptors: + */ +#define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg) desc_spill_reg_p(p,t,a,x,y,arg) +#define UNW_DEC_SPILL_REG(f,t,a,x,y,arg) desc_spill_reg_p(0,t,a,x,y,arg) +#define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg) desc_spill_psprel_p(p,t,a,o,arg) +#define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg) desc_spill_psprel_p(0,t,a,o,arg) +#define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg) desc_spill_sprel_p(p,t,a,o,arg) +#define UNW_DEC_SPILL_SPREL(f,t,a,o,arg) desc_spill_sprel_p(0,t,a,o,arg) +#define UNW_DEC_RESTORE_P(f,p,t,a,arg) desc_restore_p(p,t,a,arg) +#define UNW_DEC_RESTORE(f,t,a,arg) desc_restore_p(0,t,a,arg) + +#include "unwind_decoder.c" + + +/* Unwind scripts. */ + +static inline unw_hash_index_t +hash (unsigned long ip) +{ +# define hashmagic 0x9e3779b97f4a7c16UL /* based on (sqrt(5)/2-1)*2^64 */ + + return (ip >> 4)*hashmagic >> (64 - UNW_LOG_HASH_SIZE); +#undef hashmagic +} + +static inline long +cache_match (struct unw_script *script, unsigned long ip, unsigned long pr) +{ + read_lock(&script->lock); + if (ip == script->ip && ((pr ^ script->pr_val) & script->pr_mask) == 0) + /* keep the read lock... */ + return 1; + read_unlock(&script->lock); + return 0; +} + +static inline struct unw_script * +script_lookup (struct unw_frame_info *info) +{ + struct unw_script *script = unw.cache + info->hint; + unsigned short index; + unsigned long ip, pr; + + if (UNW_DEBUG_ON(0)) + return NULL; /* Always regenerate scripts in debug mode */ + + STAT(++unw.stat.cache.lookups); + + ip = info->ip; + pr = info->pr; + + if (cache_match(script, ip, pr)) { + STAT(++unw.stat.cache.hinted_hits); + return script; + } + + index = unw.hash[hash(ip)]; + if (index >= UNW_CACHE_SIZE) + return NULL; + + script = unw.cache + index; + while (1) { + if (cache_match(script, ip, pr)) { + /* update hint; no locking required as single-word writes are atomic */ + STAT(++unw.stat.cache.normal_hits); + unw.cache[info->prev_script].hint = script - unw.cache; + return script; + } + if (script->coll_chain >= UNW_HASH_SIZE) + return NULL; + script = unw.cache + script->coll_chain; + STAT(++unw.stat.cache.collision_chain_traversals); + } +} + +/* + * On returning, a write lock for the SCRIPT is still being held. + */ +static inline struct unw_script * +script_new (unsigned long ip) +{ + struct unw_script *script, *prev, *tmp; + unw_hash_index_t index; + unsigned short head; + + STAT(++unw.stat.script.news); + + /* + * Can't (easily) use cmpxchg() here because of ABA problem + * that is intrinsic in cmpxchg()... + */ + head = unw.lru_head; + script = unw.cache + head; + unw.lru_head = script->lru_chain; + + /* + * We'd deadlock here if we interrupted a thread that is holding a read lock on + * script->lock. Thus, if the write_trylock() fails, we simply bail out. The + * alternative would be to disable interrupts whenever we hold a read-lock, but + * that seems silly. + */ + if (!write_trylock(&script->lock)) + return NULL; + + /* re-insert script at the tail of the LRU chain: */ + unw.cache[unw.lru_tail].lru_chain = head; + unw.lru_tail = head; + + /* remove the old script from the hash table (if it's there): */ + if (script->ip) { + index = hash(script->ip); + tmp = unw.cache + unw.hash[index]; + prev = NULL; + while (1) { + if (tmp == script) { + if (prev) + prev->coll_chain = tmp->coll_chain; + else + unw.hash[index] = tmp->coll_chain; + break; + } else + prev = tmp; + if (tmp->coll_chain >= UNW_CACHE_SIZE) + /* old script wasn't in the hash-table */ + break; + tmp = unw.cache + tmp->coll_chain; + } + } + + /* enter new script in the hash table */ + index = hash(ip); + script->coll_chain = unw.hash[index]; + unw.hash[index] = script - unw.cache; + + script->ip = ip; /* set new IP while we're holding the locks */ + + STAT(if (script->coll_chain < UNW_CACHE_SIZE) ++unw.stat.script.collisions); + + script->flags = 0; + script->hint = 0; + script->count = 0; + return script; +} + +static void +script_finalize (struct unw_script *script, struct unw_state_record *sr) +{ + script->pr_mask = sr->pr_mask; + script->pr_val = sr->pr_val; + /* + * We could down-grade our write-lock on script->lock here but + * the rwlock API doesn't offer atomic lock downgrading, so + * we'll just keep the write-lock and release it later when + * we're done using the script. + */ +} + +static inline void +script_emit (struct unw_script *script, struct unw_insn insn) +{ + if (script->count >= UNW_MAX_SCRIPT_LEN) { + UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u instructions!\n", + __FUNCTION__, UNW_MAX_SCRIPT_LEN); + return; + } + script->insn[script->count++] = insn; +} + +static inline void +emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script) +{ + struct unw_reg_info *r = sr->curr.reg + i; + enum unw_insn_opcode opc; + struct unw_insn insn; + unsigned long val = 0; + + switch (r->where) { + case UNW_WHERE_GR: + if (r->val >= 32) { + /* register got spilled to a stacked register */ + opc = UNW_INSN_SETNAT_TYPE; + val = UNW_NAT_REGSTK; + } else + /* register got spilled to a scratch register */ + opc = UNW_INSN_SETNAT_MEMSTK; + break; + + case UNW_WHERE_FR: + opc = UNW_INSN_SETNAT_TYPE; + val = UNW_NAT_VAL; + break; + + case UNW_WHERE_BR: + opc = UNW_INSN_SETNAT_TYPE; + val = UNW_NAT_NONE; + break; + + case UNW_WHERE_PSPREL: + case UNW_WHERE_SPREL: + opc = UNW_INSN_SETNAT_MEMSTK; + break; + + default: + UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for where = %u\n", + __FUNCTION__, r->where); + return; + } + insn.opc = opc; + insn.dst = unw.preg_index[i]; + insn.val = val; + script_emit(script, insn); +} + +static void +compile_reg (struct unw_state_record *sr, int i, struct unw_script *script) +{ + struct unw_reg_info *r = sr->curr.reg + i; + enum unw_insn_opcode opc; + unsigned long val, rval; + struct unw_insn insn; + long need_nat_info; + + if (r->where == UNW_WHERE_NONE || r->when >= sr->when_target) + return; + + opc = UNW_INSN_MOVE; + val = rval = r->val; + need_nat_info = (i >= UNW_REG_R4 && i <= UNW_REG_R7); + + switch (r->where) { + case UNW_WHERE_GR: + if (rval >= 32) { + opc = UNW_INSN_MOVE_STACKED; + val = rval - 32; + } else if (rval >= 4 && rval <= 7) { + if (need_nat_info) { + opc = UNW_INSN_MOVE2; + need_nat_info = 0; + } + val = unw.preg_index[UNW_REG_R4 + (rval - 4)]; + } else if (rval == 0) { + opc = UNW_INSN_MOVE_CONST; + val = 0; + } else { + /* register got spilled to a scratch register */ + opc = UNW_INSN_MOVE_SCRATCH; + val = pt_regs_off(rval); + } + break; + + case UNW_WHERE_FR: + if (rval <= 5) + val = unw.preg_index[UNW_REG_F2 + (rval - 2)]; + else if (rval >= 16 && rval <= 31) + val = unw.preg_index[UNW_REG_F16 + (rval - 16)]; + else { + opc = UNW_INSN_MOVE_SCRATCH; + if (rval <= 11) + val = offsetof(struct pt_regs, f6) + 16*(rval - 6); + else + UNW_DPRINT(0, "unwind.%s: kernel may not touch f%lu\n", + __FUNCTION__, rval); + } + break; + + case UNW_WHERE_BR: + if (rval >= 1 && rval <= 5) + val = unw.preg_index[UNW_REG_B1 + (rval - 1)]; + else { + opc = UNW_INSN_MOVE_SCRATCH; + if (rval == 0) + val = offsetof(struct pt_regs, b0); + else if (rval == 6) + val = offsetof(struct pt_regs, b6); + else + val = offsetof(struct pt_regs, b7); + } + break; + + case UNW_WHERE_SPREL: + opc = UNW_INSN_ADD_SP; + break; + + case UNW_WHERE_PSPREL: + opc = UNW_INSN_ADD_PSP; + break; + + default: + UNW_DPRINT(0, "unwind%s: register %u has unexpected `where' value of %u\n", + __FUNCTION__, i, r->where); + break; + } + insn.opc = opc; + insn.dst = unw.preg_index[i]; + insn.val = val; + script_emit(script, insn); + if (need_nat_info) + emit_nat_info(sr, i, script); + + if (i == UNW_REG_PSP) { + /* + * info->psp must contain the _value_ of the previous + * sp, not it's save location. We get this by + * dereferencing the value we just stored in + * info->psp: + */ + insn.opc = UNW_INSN_LOAD; + insn.dst = insn.val = unw.preg_index[UNW_REG_PSP]; + script_emit(script, insn); + } +} + +static inline const struct unw_table_entry * +lookup (struct unw_table *table, unsigned long rel_ip) +{ + const struct unw_table_entry *e = NULL; + unsigned long lo, hi, mid; + + /* do a binary search for right entry: */ + for (lo = 0, hi = table->length; lo < hi; ) { + mid = (lo + hi) / 2; + e = &table->array[mid]; + if (rel_ip < e->start_offset) + hi = mid; + else if (rel_ip >= e->end_offset) + lo = mid + 1; + else + break; + } + if (rel_ip < e->start_offset || rel_ip >= e->end_offset) + return NULL; + return e; +} + +/* + * Build an unwind script that unwinds from state OLD_STATE to the + * entrypoint of the function that called OLD_STATE. + */ +static inline struct unw_script * +build_script (struct unw_frame_info *info) +{ + const struct unw_table_entry *e = NULL; + struct unw_script *script = NULL; + struct unw_labeled_state *ls, *next; + unsigned long ip = info->ip; + struct unw_state_record sr; + struct unw_table *table; + struct unw_reg_info *r; + struct unw_insn insn; + u8 *dp, *desc_end; + u64 hdr; + int i; + STAT(unsigned long start, parse_start;) + + STAT(++unw.stat.script.builds; start = ia64_get_itc()); + + /* build state record */ + memset(&sr, 0, sizeof(sr)); + for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) + r->when = UNW_WHEN_NEVER; + sr.pr_val = info->pr; + + UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __FUNCTION__, ip); + script = script_new(ip); + if (!script) { + UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n", __FUNCTION__); + STAT(unw.stat.script.build_time += ia64_get_itc() - start); + return NULL; + } + unw.cache[info->prev_script].hint = script - unw.cache; + + /* search the kernels and the modules' unwind tables for IP: */ + + STAT(parse_start = ia64_get_itc()); + + for (table = unw.tables; table; table = table->next) { + if (ip >= table->start && ip < table->end) { + e = lookup(table, ip - table->segment_base); + break; + } + } + if (!e) { + /* no info, return default unwinder (leaf proc, no mem stack, no saved regs) */ + UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev ip=0x%lx)\n", + __FUNCTION__, ip, unw.cache[info->prev_script].ip); + sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR; + sr.curr.reg[UNW_REG_RP].when = -1; + sr.curr.reg[UNW_REG_RP].val = 0; + compile_reg(&sr, UNW_REG_RP, script); + script_finalize(script, &sr); + STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); + STAT(unw.stat.script.build_time += ia64_get_itc() - start); + return script; + } + + sr.when_target = (3*((ip & ~0xfUL) - (table->segment_base + e->start_offset))/16 + + (ip & 0xfUL)); + hdr = *(u64 *) (table->segment_base + e->info_offset); + dp = (u8 *) (table->segment_base + e->info_offset + 8); + desc_end = dp + 8*UNW_LENGTH(hdr); + + while (!sr.done && dp < desc_end) + dp = unw_decode(dp, sr.in_body, &sr); + + if (sr.when_target > sr.epilogue_start) { + /* + * sp has been restored and all values on the memory stack below + * psp also have been restored. + */ + sr.curr.reg[UNW_REG_PSP].val = 0; + sr.curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE; + sr.curr.reg[UNW_REG_PSP].when = UNW_WHEN_NEVER; + for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) + if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10) + || r->where == UNW_WHERE_SPREL) + { + r->val = 0; + r->where = UNW_WHERE_NONE; + r->when = UNW_WHEN_NEVER; + } + } + + script->flags = sr.flags; + + /* + * If RP did't get saved, generate entry for the return link + * register. + */ + if (sr.curr.reg[UNW_REG_RP].when >= sr.when_target) { + sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR; + sr.curr.reg[UNW_REG_RP].when = -1; + sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg; + UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx where=%d val=0x%lx\n", + __FUNCTION__, ip, sr.curr.reg[UNW_REG_RP].where, + sr.curr.reg[UNW_REG_RP].val); + } + +#ifdef UNW_DEBUG + UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n", + __FUNCTION__, table->segment_base + e->start_offset, sr.when_target); + for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) { + if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) { + UNW_DPRINT(1, " %s <- ", unw.preg_name[r - sr.curr.reg]); + switch (r->where) { + case UNW_WHERE_GR: UNW_DPRINT(1, "r%lu", r->val); break; + case UNW_WHERE_FR: UNW_DPRINT(1, "f%lu", r->val); break; + case UNW_WHERE_BR: UNW_DPRINT(1, "b%lu", r->val); break; + case UNW_WHERE_SPREL: UNW_DPRINT(1, "[sp+0x%lx]", r->val); break; + case UNW_WHERE_PSPREL: UNW_DPRINT(1, "[psp+0x%lx]", r->val); break; + case UNW_WHERE_NONE: + UNW_DPRINT(1, "%s+0x%lx", unw.preg_name[r - sr.curr.reg], r->val); + break; + + default: + UNW_DPRINT(1, "BADWHERE(%d)", r->where); + break; + } + UNW_DPRINT(1, "\t\t%d\n", r->when); + } + } +#endif + + STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); + + /* translate state record into unwinder instructions: */ + + /* + * First, set psp if we're dealing with a fixed-size frame; + * subsequent instructions may depend on this value. + */ + if (sr.when_target > sr.curr.reg[UNW_REG_PSP].when + && (sr.curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE) + && sr.curr.reg[UNW_REG_PSP].val != 0) { + /* new psp is sp plus frame size */ + insn.opc = UNW_INSN_ADD; + insn.dst = offsetof(struct unw_frame_info, psp)/8; + insn.val = sr.curr.reg[UNW_REG_PSP].val; /* frame size */ + script_emit(script, insn); + } + + /* determine where the primary UNaT is: */ + if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_GR].when) + i = UNW_REG_PRI_UNAT_MEM; + else if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when) + i = UNW_REG_PRI_UNAT_GR; + else if (sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when > sr.curr.reg[UNW_REG_PRI_UNAT_GR].when) + i = UNW_REG_PRI_UNAT_MEM; + else + i = UNW_REG_PRI_UNAT_GR; + + compile_reg(&sr, i, script); + + for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i) + compile_reg(&sr, i, script); + + /* free labeled register states & stack: */ + + STAT(parse_start = ia64_get_itc()); + for (ls = sr.labeled_states; ls; ls = next) { + next = ls->next; + free_state_stack(&ls->saved_state); + free_labeled_state(ls); + } + free_state_stack(&sr.curr); + STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); + + script_finalize(script, &sr); + STAT(unw.stat.script.build_time += ia64_get_itc() - start); + return script; +} + +/* + * Apply the unwinding actions represented by OPS and update SR to + * reflect the state that existed upon entry to the function that this + * unwinder represents. + */ +static inline void +run_script (struct unw_script *script, struct unw_frame_info *state) +{ + struct unw_insn *ip, *limit, next_insn; + unsigned long opc, dst, val, off; + unsigned long *s = (unsigned long *) state; + STAT(unsigned long start;) + + STAT(++unw.stat.script.runs; start = ia64_get_itc()); + state->flags = script->flags; + ip = script->insn; + limit = script->insn + script->count; + next_insn = *ip; + + while (ip++ < limit) { + opc = next_insn.opc; + dst = next_insn.dst; + val = next_insn.val; + next_insn = *ip; + + redo: + switch (opc) { + case UNW_INSN_ADD: + s[dst] += val; + break; + + case UNW_INSN_MOVE2: + if (!s[val]) + goto lazy_init; + s[dst+1] = s[val+1]; + s[dst] = s[val]; + break; + + case UNW_INSN_MOVE: + if (!s[val]) + goto lazy_init; + s[dst] = s[val]; + break; + + case UNW_INSN_MOVE_SCRATCH: + if (state->pt) { + s[dst] = (unsigned long) get_scratch_regs(state) + val; + } else { + s[dst] = 0; + UNW_DPRINT(0, "unwind.%s: no state->pt, dst=%ld, val=%ld\n", + __FUNCTION__, dst, val); + } + break; + + case UNW_INSN_MOVE_CONST: + if (val == 0) + s[dst] = (unsigned long) &unw.r0; + else { + s[dst] = 0; + UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST bad val=%ld\n", + __FUNCTION__, val); + } + break; + + + case UNW_INSN_MOVE_STACKED: + s[dst] = (unsigned long) ia64_rse_skip_regs((unsigned long *)state->bsp, + val); + break; + + case UNW_INSN_ADD_PSP: + s[dst] = state->psp + val; + break; + + case UNW_INSN_ADD_SP: + s[dst] = state->sp + val; + break; + + case UNW_INSN_SETNAT_MEMSTK: + if (!state->pri_unat_loc) + state->pri_unat_loc = &state->sw->ar_unat; + /* register off. is a multiple of 8, so the least 3 bits (type) are 0 */ + s[dst+1] = ((unsigned long) state->pri_unat_loc - s[dst]) | UNW_NAT_MEMSTK; + break; + + case UNW_INSN_SETNAT_TYPE: + s[dst+1] = val; + break; + + case UNW_INSN_LOAD: +#ifdef UNW_DEBUG + if ((s[val] & (local_cpu_data->unimpl_va_mask | 0x7)) != 0 + || s[val] < TASK_SIZE) + { + UNW_DPRINT(0, "unwind.%s: rejecting bad psp=0x%lx\n", + __FUNCTION__, s[val]); + break; + } +#endif + s[dst] = *(unsigned long *) s[val]; + break; + } + } + STAT(unw.stat.script.run_time += ia64_get_itc() - start); + return; + + lazy_init: + off = unw.sw_off[val]; + s[val] = (unsigned long) state->sw + off; + if (off >= offsetof(struct switch_stack, r4) && off <= offsetof(struct switch_stack, r7)) + /* + * We're initializing a general register: init NaT info, too. Note that + * the offset is a multiple of 8 which gives us the 3 bits needed for + * the type field. + */ + s[val+1] = (offsetof(struct switch_stack, ar_unat) - off) | UNW_NAT_MEMSTK; + goto redo; +} + +static int +find_save_locs (struct unw_frame_info *info) +{ + int have_write_lock = 0; + struct unw_script *scr; + unsigned long flags = 0; + + if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf)) || info->ip < TASK_SIZE) { + /* don't let obviously bad addresses pollute the cache */ + /* FIXME: should really be level 0 but it occurs too often. KAO */ + UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", __FUNCTION__, info->ip); + info->rp_loc = NULL; + return -1; + } + + scr = script_lookup(info); + if (!scr) { + spin_lock_irqsave(&unw.lock, flags); + scr = build_script(info); + if (!scr) { + spin_unlock_irqrestore(&unw.lock, flags); + UNW_DPRINT(0, + "unwind.%s: failed to locate/build unwind script for ip %lx\n", + __FUNCTION__, info->ip); + return -1; + } + have_write_lock = 1; + } + info->hint = scr->hint; + info->prev_script = scr - unw.cache; + + run_script(scr, info); + + if (have_write_lock) { + write_unlock(&scr->lock); + spin_unlock_irqrestore(&unw.lock, flags); + } else + read_unlock(&scr->lock); + return 0; +} + +int +unw_unwind (struct unw_frame_info *info) +{ + unsigned long prev_ip, prev_sp, prev_bsp; + unsigned long ip, pr, num_regs; + STAT(unsigned long start, flags;) + int retval; + + STAT(local_irq_save(flags); ++unw.stat.api.unwinds; start = ia64_get_itc()); + + prev_ip = info->ip; + prev_sp = info->sp; + prev_bsp = info->bsp; + + /* restore the ip */ + if (!info->rp_loc) { + /* FIXME: should really be level 0 but it occurs too often. KAO */ + UNW_DPRINT(1, "unwind.%s: failed to locate return link (ip=0x%lx)!\n", + __FUNCTION__, info->ip); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + ip = info->ip = *info->rp_loc; + if (ip < GATE_ADDR) { + UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __FUNCTION__, ip); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + + /* restore the cfm: */ + if (!info->pfs_loc) { + UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __FUNCTION__); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + info->cfm_loc = info->pfs_loc; + + /* restore the bsp: */ + pr = info->pr; + num_regs = 0; + if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) { + info->pt = info->sp + 16; + if ((pr & (1UL << PRED_NON_SYSCALL)) != 0) + num_regs = *info->cfm_loc & 0x7f; /* size of frame */ + info->pfs_loc = + (unsigned long *) (info->pt + offsetof(struct pt_regs, ar_pfs)); + UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", __FUNCTION__, info->pt); + } else + num_regs = (*info->cfm_loc >> 7) & 0x7f; /* size of locals */ + info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->bsp, -num_regs); + if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) { + UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range [0x%lx-0x%lx]\n", + __FUNCTION__, info->bsp, info->regstk.limit, info->regstk.top); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + + /* restore the sp: */ + info->sp = info->psp; + if (info->sp < info->memstk.top || info->sp > info->memstk.limit) { + UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range [0x%lx-0x%lx]\n", + __FUNCTION__, info->sp, info->memstk.top, info->memstk.limit); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + + if (info->ip == prev_ip && info->sp == prev_sp && info->bsp == prev_bsp) { + UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here (ip=0x%lx)\n", + __FUNCTION__, ip); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + + /* as we unwind, the saved ar.unat becomes the primary unat: */ + info->pri_unat_loc = info->unat_loc; + + /* finally, restore the predicates: */ + unw_get_pr(info, &info->pr); + + retval = find_save_locs(info); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return retval; +} +EXPORT_SYMBOL(unw_unwind); + +int +unw_unwind_to_user (struct unw_frame_info *info) +{ + unsigned long ip, sp; + + while (unw_unwind(info) >= 0) { + if (unw_get_rp(info, &ip) < 0) { + unw_get_ip(info, &ip); + UNW_DPRINT(0, "unwind.%s: failed to read return pointer (ip=0x%lx)\n", + __FUNCTION__, ip); + return -1; + } + unw_get_sp(info, &sp); + if (sp >= (unsigned long)info->task + IA64_STK_OFFSET) + break; + if (ip < FIXADDR_USER_END) + return 0; + } + unw_get_ip(info, &ip); + UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", __FUNCTION__, ip); + return -1; +} +EXPORT_SYMBOL(unw_unwind_to_user); + +static void +init_frame_info (struct unw_frame_info *info, struct task_struct *t, + struct switch_stack *sw, unsigned long stktop) +{ + unsigned long rbslimit, rbstop, stklimit; + STAT(unsigned long start, flags;) + + STAT(local_irq_save(flags); ++unw.stat.api.inits; start = ia64_get_itc()); + + /* + * Subtle stuff here: we _could_ unwind through the switch_stack frame but we + * don't want to do that because it would be slow as each preserved register would + * have to be processed. Instead, what we do here is zero out the frame info and + * start the unwind process at the function that created the switch_stack frame. + * When a preserved value in switch_stack needs to be accessed, run_script() will + * initialize the appropriate pointer on demand. + */ + memset(info, 0, sizeof(*info)); + + rbslimit = (unsigned long) t + IA64_RBS_OFFSET; + rbstop = sw->ar_bspstore; + if (rbstop - (unsigned long) t >= IA64_STK_OFFSET) + rbstop = rbslimit; + + stklimit = (unsigned long) t + IA64_STK_OFFSET; + if (stktop <= rbstop) + stktop = rbstop; + + info->regstk.limit = rbslimit; + info->regstk.top = rbstop; + info->memstk.limit = stklimit; + info->memstk.top = stktop; + info->task = t; + info->sw = sw; + info->sp = info->psp = stktop; + info->pr = sw->pr; + UNW_DPRINT(3, "unwind.%s:\n" + " task 0x%lx\n" + " rbs = [0x%lx-0x%lx)\n" + " stk = [0x%lx-0x%lx)\n" + " pr 0x%lx\n" + " sw 0x%lx\n" + " sp 0x%lx\n", + __FUNCTION__, (unsigned long) t, rbslimit, rbstop, stktop, stklimit, + info->pr, (unsigned long) info->sw, info->sp); + STAT(unw.stat.api.init_time += ia64_get_itc() - start; local_irq_restore(flags)); +} + +void +unw_init_from_interruption (struct unw_frame_info *info, struct task_struct *t, + struct pt_regs *pt, struct switch_stack *sw) +{ + unsigned long sof; + + init_frame_info(info, t, sw, pt->r12); + info->cfm_loc = &pt->cr_ifs; + info->unat_loc = &pt->ar_unat; + info->pfs_loc = &pt->ar_pfs; + sof = *info->cfm_loc & 0x7f; + info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sof); + info->ip = pt->cr_iip + ia64_psr(pt)->ri; + info->pt = (unsigned long) pt; + UNW_DPRINT(3, "unwind.%s:\n" + " bsp 0x%lx\n" + " sof 0x%lx\n" + " ip 0x%lx\n", + __FUNCTION__, info->bsp, sof, info->ip); + find_save_locs(info); +} + +void +unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct switch_stack *sw) +{ + unsigned long sol; + + init_frame_info(info, t, sw, (unsigned long) (sw + 1) - 16); + info->cfm_loc = &sw->ar_pfs; + sol = (*info->cfm_loc >> 7) & 0x7f; + info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sol); + info->ip = sw->b0; + UNW_DPRINT(3, "unwind.%s:\n" + " bsp 0x%lx\n" + " sol 0x%lx\n" + " ip 0x%lx\n", + __FUNCTION__, info->bsp, sol, info->ip); + find_save_locs(info); +} + +EXPORT_SYMBOL(unw_init_frame_info); + +void +unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t) +{ + struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16); + + UNW_DPRINT(1, "unwind.%s\n", __FUNCTION__); + unw_init_frame_info(info, t, sw); +} +EXPORT_SYMBOL(unw_init_from_blocked_task); + +static void +init_unwind_table (struct unw_table *table, const char *name, unsigned long segment_base, + unsigned long gp, const void *table_start, const void *table_end) +{ + const struct unw_table_entry *start = table_start, *end = table_end; + + table->name = name; + table->segment_base = segment_base; + table->gp = gp; + table->start = segment_base + start[0].start_offset; + table->end = segment_base + end[-1].end_offset; + table->array = start; + table->length = end - start; +} + +void * +unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp, + const void *table_start, const void *table_end) +{ + const struct unw_table_entry *start = table_start, *end = table_end; + struct unw_table *table; + unsigned long flags; + + if (end - start <= 0) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty unwind table\n", + __FUNCTION__); + return NULL; + } + + table = kmalloc(sizeof(*table), GFP_USER); + if (!table) + return NULL; + + init_unwind_table(table, name, segment_base, gp, table_start, table_end); + + spin_lock_irqsave(&unw.lock, flags); + { + /* keep kernel unwind table at the front (it's searched most commonly): */ + table->next = unw.tables->next; + unw.tables->next = table; + } + spin_unlock_irqrestore(&unw.lock, flags); + + return table; +} + +void +unw_remove_unwind_table (void *handle) +{ + struct unw_table *table, *prev; + struct unw_script *tmp; + unsigned long flags; + long index; + + if (!handle) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove non-existent unwind table\n", + __FUNCTION__); + return; + } + + table = handle; + if (table == &unw.kernel_table) { + UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind table is a " + "no-can-do!\n", __FUNCTION__); + return; + } + + spin_lock_irqsave(&unw.lock, flags); + { + /* first, delete the table: */ + + for (prev = (struct unw_table *) &unw.tables; prev; prev = prev->next) + if (prev->next == table) + break; + if (!prev) { + UNW_DPRINT(0, "unwind.%s: failed to find unwind table %p\n", + __FUNCTION__, (void *) table); + spin_unlock_irqrestore(&unw.lock, flags); + return; + } + prev->next = table->next; + } + spin_unlock_irqrestore(&unw.lock, flags); + + /* next, remove hash table entries for this table */ + + for (index = 0; index <= UNW_HASH_SIZE; ++index) { + tmp = unw.cache + unw.hash[index]; + if (unw.hash[index] >= UNW_CACHE_SIZE + || tmp->ip < table->start || tmp->ip >= table->end) + continue; + + write_lock(&tmp->lock); + { + if (tmp->ip >= table->start && tmp->ip < table->end) { + unw.hash[index] = tmp->coll_chain; + tmp->ip = 0; + } + } + write_unlock(&tmp->lock); + } + + kfree(table); +} + +static int __init +create_gate_table (void) +{ + const struct unw_table_entry *entry, *start, *end; + unsigned long *lp, segbase = GATE_ADDR; + size_t info_size, size; + char *info; + Elf64_Phdr *punw = NULL, *phdr = (Elf64_Phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i, ++phdr) + if (phdr->p_type == PT_IA_64_UNWIND) { + punw = phdr; + break; + } + + if (!punw) { + printk("%s: failed to find gate DSO's unwind table!\n", __FUNCTION__); + return 0; + } + + start = (const struct unw_table_entry *) punw->p_vaddr; + end = (struct unw_table_entry *) ((char *) start + punw->p_memsz); + size = 0; + + unw_add_unwind_table("linux-gate.so", segbase, 0, start, end); + + for (entry = start; entry < end; ++entry) + size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset)); + size += 8; /* reserve space for "end of table" marker */ + + unw.gate_table = kmalloc(size, GFP_KERNEL); + if (!unw.gate_table) { + unw.gate_table_size = 0; + printk(KERN_ERR "%s: unable to create unwind data for gate page!\n", __FUNCTION__); + return 0; + } + unw.gate_table_size = size; + + lp = unw.gate_table; + info = (char *) unw.gate_table + size; + + for (entry = start; entry < end; ++entry, lp += 3) { + info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset)); + info -= info_size; + memcpy(info, (char *) segbase + entry->info_offset, info_size); + + lp[0] = segbase + entry->start_offset; /* start */ + lp[1] = segbase + entry->end_offset; /* end */ + lp[2] = info - (char *) unw.gate_table; /* info */ + } + *lp = 0; /* end-of-table marker */ + return 0; +} + +__initcall(create_gate_table); + +void __init +unw_init (void) +{ + extern char __gp[]; + extern void unw_hash_index_t_is_too_narrow (void); + long i, off; + + if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE) + unw_hash_index_t_is_too_narrow(); + + unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(AR_UNAT); + unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE); + unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_UNAT); + unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0); + unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(AR_UNAT); + unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR); + unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC); + unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR); + for (i = UNW_REG_R4, off = SW(R4); i <= UNW_REG_R7; ++i, off += 8) + unw.sw_off[unw.preg_index[i]] = off; + for (i = UNW_REG_B1, off = SW(B1); i <= UNW_REG_B5; ++i, off += 8) + unw.sw_off[unw.preg_index[i]] = off; + for (i = UNW_REG_F2, off = SW(F2); i <= UNW_REG_F5; ++i, off += 16) + unw.sw_off[unw.preg_index[i]] = off; + for (i = UNW_REG_F16, off = SW(F16); i <= UNW_REG_F31; ++i, off += 16) + unw.sw_off[unw.preg_index[i]] = off; + + for (i = 0; i < UNW_CACHE_SIZE; ++i) { + if (i > 0) + unw.cache[i].lru_chain = (i - 1); + unw.cache[i].coll_chain = -1; + rwlock_init(&unw.cache[i].lock); + } + unw.lru_head = UNW_CACHE_SIZE - 1; + unw.lru_tail = 0; + + init_unwind_table(&unw.kernel_table, "kernel", KERNEL_START, (unsigned long) __gp, + __start_unwind, __end_unwind); +} + +/* + * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED + * + * This system call has been deprecated. The new and improved way to get + * at the kernel's unwind info is via the gate DSO. The address of the + * ELF header for this DSO is passed to user-level via AT_SYSINFO_EHDR. + * + * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED + * + * This system call copies the unwind data into the buffer pointed to by BUF and returns + * the size of the unwind data. If BUF_SIZE is smaller than the size of the unwind data + * or if BUF is NULL, nothing is copied, but the system call still returns the size of the + * unwind data. + * + * The first portion of the unwind data contains an unwind table and rest contains the + * associated unwind info (in no particular order). The unwind table consists of a table + * of entries of the form: + * + * u64 start; (64-bit address of start of function) + * u64 end; (64-bit address of start of function) + * u64 info; (BUF-relative offset to unwind info) + * + * The end of the unwind table is indicated by an entry with a START address of zero. + * + * Please see the IA-64 Software Conventions and Runtime Architecture manual for details + * on the format of the unwind info. + * + * ERRORS + * EFAULT BUF points outside your accessible address space. + */ +asmlinkage long +sys_getunwind (void __user *buf, size_t buf_size) +{ + if (buf && buf_size >= unw.gate_table_size) + if (copy_to_user(buf, unw.gate_table, unw.gate_table_size) != 0) + return -EFAULT; + return unw.gate_table_size; +} diff --git a/arch/ia64/kernel/unwind_decoder.c b/arch/ia64/kernel/unwind_decoder.c new file mode 100644 index 000000000000..50ac2d82f9bf --- /dev/null +++ b/arch/ia64/kernel/unwind_decoder.c @@ -0,0 +1,459 @@ +/* + * Copyright (C) 2000 Hewlett-Packard Co + * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * Generic IA-64 unwind info decoder. + * + * This file is used both by the Linux kernel and objdump. Please keep + * the two copies of this file in sync. + * + * You need to customize the decoder by defining the following + * macros/constants before including this file: + * + * Types: + * unw_word Unsigned integer type with at least 64 bits + * + * Register names: + * UNW_REG_BSP + * UNW_REG_BSPSTORE + * UNW_REG_FPSR + * UNW_REG_LC + * UNW_REG_PFS + * UNW_REG_PR + * UNW_REG_RNAT + * UNW_REG_PSP + * UNW_REG_RP + * UNW_REG_UNAT + * + * Decoder action macros: + * UNW_DEC_BAD_CODE(code) + * UNW_DEC_ABI(fmt,abi,context,arg) + * UNW_DEC_BR_GR(fmt,brmask,gr,arg) + * UNW_DEC_BR_MEM(fmt,brmask,arg) + * UNW_DEC_COPY_STATE(fmt,label,arg) + * UNW_DEC_EPILOGUE(fmt,t,ecount,arg) + * UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg) + * UNW_DEC_FR_MEM(fmt,frmask,arg) + * UNW_DEC_GR_GR(fmt,grmask,gr,arg) + * UNW_DEC_GR_MEM(fmt,grmask,arg) + * UNW_DEC_LABEL_STATE(fmt,label,arg) + * UNW_DEC_MEM_STACK_F(fmt,t,size,arg) + * UNW_DEC_MEM_STACK_V(fmt,t,arg) + * UNW_DEC_PRIUNAT_GR(fmt,r,arg) + * UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) + * UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) + * UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg) + * UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg) + * UNW_DEC_PROLOGUE(fmt,body,rlen,arg) + * UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg) + * UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg) + * UNW_DEC_REG_REG(fmt,src,dst,arg) + * UNW_DEC_REG_SPREL(fmt,reg,spoff,arg) + * UNW_DEC_REG_WHEN(fmt,reg,t,arg) + * UNW_DEC_RESTORE(fmt,t,abreg,arg) + * UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg) + * UNW_DEC_SPILL_BASE(fmt,pspoff,arg) + * UNW_DEC_SPILL_MASK(fmt,imaskp,arg) + * UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg) + * UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg) + * UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg) + * UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg) + * UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg) + * UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg) + */ + +static unw_word +unw_decode_uleb128 (unsigned char **dpp) +{ + unsigned shift = 0; + unw_word byte, result = 0; + unsigned char *bp = *dpp; + + while (1) + { + byte = *bp++; + result |= (byte & 0x7f) << shift; + if ((byte & 0x80) == 0) + break; + shift += 7; + } + *dpp = bp; + return result; +} + +static unsigned char * +unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, abreg; + unw_word t, off; + + byte1 = *dp++; + t = unw_decode_uleb128 (&dp); + off = unw_decode_uleb128 (&dp); + abreg = (byte1 & 0x7f); + if (byte1 & 0x80) + UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg); + else + UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg); + return dp; +} + +static unsigned char * +unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, byte2, abreg, x, ytreg; + unw_word t; + + byte1 = *dp++; byte2 = *dp++; + t = unw_decode_uleb128 (&dp); + abreg = (byte1 & 0x7f); + ytreg = byte2; + x = (byte1 >> 7) & 1; + if ((byte1 & 0x80) == 0 && ytreg == 0) + UNW_DEC_RESTORE(X2, t, abreg, arg); + else + UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg); + return dp; +} + +static unsigned char * +unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, byte2, abreg, qp; + unw_word t, off; + + byte1 = *dp++; byte2 = *dp++; + t = unw_decode_uleb128 (&dp); + off = unw_decode_uleb128 (&dp); + + qp = (byte1 & 0x3f); + abreg = (byte2 & 0x7f); + + if (byte1 & 0x80) + UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg); + else + UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg); + return dp; +} + +static unsigned char * +unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg; + unw_word t; + + byte1 = *dp++; byte2 = *dp++; byte3 = *dp++; + t = unw_decode_uleb128 (&dp); + + qp = (byte1 & 0x3f); + abreg = (byte2 & 0x7f); + x = (byte2 >> 7) & 1; + ytreg = byte3; + + if ((byte2 & 0x80) == 0 && byte3 == 0) + UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg); + else + UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg); + return dp; +} + +static unsigned char * +unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg) +{ + int body = (code & 0x20) != 0; + unw_word rlen; + + rlen = (code & 0x1f); + UNW_DEC_PROLOGUE(R1, body, rlen, arg); + return dp; +} + +static unsigned char * +unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, mask, grsave; + unw_word rlen; + + byte1 = *dp++; + + mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1); + grsave = (byte1 & 0x7f); + rlen = unw_decode_uleb128 (&dp); + UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg); + return dp; +} + +static unsigned char * +unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word rlen; + + rlen = unw_decode_uleb128 (&dp); + UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg); + return dp; +} + +static unsigned char * +unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char brmask = (code & 0x1f); + + UNW_DEC_BR_MEM(P1, brmask, arg); + return dp; +} + +static unsigned char * +unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg) +{ + if ((code & 0x10) == 0) + { + unsigned char byte1 = *dp++; + + UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1), + (byte1 & 0x7f), arg); + } + else if ((code & 0x08) == 0) + { + unsigned char byte1 = *dp++, r, dst; + + r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1); + dst = (byte1 & 0x7f); + switch (r) + { + case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break; + case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break; + case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break; + case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break; + case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break; + case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break; + case 6: UNW_DEC_RP_BR(P3, dst, arg); break; + case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break; + case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break; + case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break; + case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break; + case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break; + default: UNW_DEC_BAD_CODE(r); break; + } + } + else if ((code & 0x7) == 0) + UNW_DEC_SPILL_MASK(P4, dp, arg); + else if ((code & 0x7) == 1) + { + unw_word grmask, frmask, byte1, byte2, byte3; + + byte1 = *dp++; byte2 = *dp++; byte3 = *dp++; + grmask = ((byte1 >> 4) & 0xf); + frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3; + UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg); + } + else + UNW_DEC_BAD_CODE(code); + return dp; +} + +static unsigned char * +unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg) +{ + int gregs = (code & 0x10) != 0; + unsigned char mask = (code & 0x0f); + + if (gregs) + UNW_DEC_GR_MEM(P6, mask, arg); + else + UNW_DEC_FR_MEM(P6, mask, arg); + return dp; +} + +static unsigned char * +unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char r, byte1, byte2; + unw_word t, size; + + if ((code & 0x10) == 0) + { + r = (code & 0xf); + t = unw_decode_uleb128 (&dp); + switch (r) + { + case 0: + size = unw_decode_uleb128 (&dp); + UNW_DEC_MEM_STACK_F(P7, t, size, arg); + break; + + case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break; + case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break; + case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break; + case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break; + case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break; + case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break; + case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break; + case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break; + case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break; + case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break; + case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break; + case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break; + case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break; + case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break; + case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break; + default: UNW_DEC_BAD_CODE(r); break; + } + } + else + { + switch (code & 0xf) + { + case 0x0: /* p8 */ + { + r = *dp++; + t = unw_decode_uleb128 (&dp); + switch (r) + { + case 1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break; + case 2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break; + case 3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break; + case 4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break; + case 5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break; + case 6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break; + case 7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break; + case 8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break; + case 9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break; + case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break; + case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break; + case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break; + case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break; + case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break; + case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break; + case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break; + case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break; + case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break; + case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break; + default: UNW_DEC_BAD_CODE(r); break; + } + } + break; + + case 0x1: + byte1 = *dp++; byte2 = *dp++; + UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg); + break; + + case 0xf: /* p10 */ + byte1 = *dp++; byte2 = *dp++; + UNW_DEC_ABI(P10, byte1, byte2, arg); + break; + + case 0x9: + return unw_decode_x1 (dp, code, arg); + + case 0xa: + return unw_decode_x2 (dp, code, arg); + + case 0xb: + return unw_decode_x3 (dp, code, arg); + + case 0xc: + return unw_decode_x4 (dp, code, arg); + + default: + UNW_DEC_BAD_CODE(code); + break; + } + } + return dp; +} + +static unsigned char * +unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word label = (code & 0x1f); + + if ((code & 0x20) != 0) + UNW_DEC_COPY_STATE(B1, label, arg); + else + UNW_DEC_LABEL_STATE(B1, label, arg); + return dp; +} + +static unsigned char * +unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word t; + + t = unw_decode_uleb128 (&dp); + UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg); + return dp; +} + +static unsigned char * +unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word t, ecount, label; + + if ((code & 0x10) == 0) + { + t = unw_decode_uleb128 (&dp); + ecount = unw_decode_uleb128 (&dp); + UNW_DEC_EPILOGUE(B3, t, ecount, arg); + } + else if ((code & 0x07) == 0) + { + label = unw_decode_uleb128 (&dp); + if ((code & 0x08) != 0) + UNW_DEC_COPY_STATE(B4, label, arg); + else + UNW_DEC_LABEL_STATE(B4, label, arg); + } + else + switch (code & 0x7) + { + case 1: return unw_decode_x1 (dp, code, arg); + case 2: return unw_decode_x2 (dp, code, arg); + case 3: return unw_decode_x3 (dp, code, arg); + case 4: return unw_decode_x4 (dp, code, arg); + default: UNW_DEC_BAD_CODE(code); break; + } + return dp; +} + +typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *); + +static unw_decoder unw_decode_table[2][8] = +{ + /* prologue table: */ + { + unw_decode_r1, /* 0 */ + unw_decode_r1, + unw_decode_r2, + unw_decode_r3, + unw_decode_p1, /* 4 */ + unw_decode_p2_p5, + unw_decode_p6, + unw_decode_p7_p10 + }, + { + unw_decode_r1, /* 0 */ + unw_decode_r1, + unw_decode_r2, + unw_decode_r3, + unw_decode_b1, /* 4 */ + unw_decode_b1, + unw_decode_b2, + unw_decode_b3_x4 + } +}; + +/* + * Decode one descriptor and return address of next descriptor. + */ +static inline unsigned char * +unw_decode (unsigned char *dp, int inside_body, void *arg) +{ + unw_decoder decoder; + unsigned char code; + + code = *dp++; + decoder = unw_decode_table[inside_body][code >> 5]; + dp = (*decoder) (dp, code, arg); + return dp; +} diff --git a/arch/ia64/kernel/unwind_i.h b/arch/ia64/kernel/unwind_i.h new file mode 100644 index 000000000000..96693a6ae370 --- /dev/null +++ b/arch/ia64/kernel/unwind_i.h @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * Kernel unwind support. + */ + +#define UNW_VER(x) ((x) >> 48) +#define UNW_FLAG_MASK 0x0000ffff00000000 +#define UNW_FLAG_OSMASK 0x0000f00000000000 +#define UNW_FLAG_EHANDLER(x) ((x) & 0x0000000100000000L) +#define UNW_FLAG_UHANDLER(x) ((x) & 0x0000000200000000L) +#define UNW_LENGTH(x) ((x) & 0x00000000ffffffffL) + +enum unw_register_index { + /* primary unat: */ + UNW_REG_PRI_UNAT_GR, + UNW_REG_PRI_UNAT_MEM, + + /* register stack */ + UNW_REG_BSP, /* register stack pointer */ + UNW_REG_BSPSTORE, + UNW_REG_PFS, /* previous function state */ + UNW_REG_RNAT, + /* memory stack */ + UNW_REG_PSP, /* previous memory stack pointer */ + /* return pointer: */ + UNW_REG_RP, + + /* preserved registers: */ + UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7, + UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR, + UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5, + UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5, + UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19, + UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23, + UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27, + UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31, + UNW_NUM_REGS +}; + +struct unw_info_block { + u64 header; + u64 desc[0]; /* unwind descriptors */ + /* personality routine and language-specific data follow behind descriptors */ +}; + +struct unw_table { + struct unw_table *next; /* must be first member! */ + const char *name; + unsigned long gp; /* global pointer for this load-module */ + unsigned long segment_base; /* base for offsets in the unwind table entries */ + unsigned long start; + unsigned long end; + const struct unw_table_entry *array; + unsigned long length; +}; + +enum unw_where { + UNW_WHERE_NONE, /* register isn't saved at all */ + UNW_WHERE_GR, /* register is saved in a general register */ + UNW_WHERE_FR, /* register is saved in a floating-point register */ + UNW_WHERE_BR, /* register is saved in a branch register */ + UNW_WHERE_SPREL, /* register is saved on memstack (sp-relative) */ + UNW_WHERE_PSPREL, /* register is saved on memstack (psp-relative) */ + /* + * At the end of each prologue these locations get resolved to + * UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively: + */ + UNW_WHERE_SPILL_HOME, /* register is saved in its spill home */ + UNW_WHERE_GR_SAVE /* register is saved in next general register */ +}; + +#define UNW_WHEN_NEVER 0x7fffffff + +struct unw_reg_info { + unsigned long val; /* save location: register number or offset */ + enum unw_where where; /* where the register gets saved */ + int when; /* when the register gets saved */ +}; + +struct unw_reg_state { + struct unw_reg_state *next; /* next (outer) element on state stack */ + struct unw_reg_info reg[UNW_NUM_REGS]; /* register save locations */ +}; + +struct unw_labeled_state { + struct unw_labeled_state *next; /* next labeled state (or NULL) */ + unsigned long label; /* label for this state */ + struct unw_reg_state saved_state; +}; + +struct unw_state_record { + unsigned int first_region : 1; /* is this the first region? */ + unsigned int done : 1; /* are we done scanning descriptors? */ + unsigned int any_spills : 1; /* got any register spills? */ + unsigned int in_body : 1; /* are we inside a body (as opposed to a prologue)? */ + unsigned long flags; /* see UNW_FLAG_* in unwind.h */ + + u8 *imask; /* imask of spill_mask record or NULL */ + unsigned long pr_val; /* predicate values */ + unsigned long pr_mask; /* predicate mask */ + long spill_offset; /* psp-relative offset for spill base */ + int region_start; + int region_len; + int epilogue_start; + int epilogue_count; + int when_target; + + u8 gr_save_loc; /* next general register to use for saving a register */ + u8 return_link_reg; /* branch register in which the return link is passed */ + + struct unw_labeled_state *labeled_states; /* list of all labeled states */ + struct unw_reg_state curr; /* current state */ +}; + +enum unw_nat_type { + UNW_NAT_NONE, /* NaT not represented */ + UNW_NAT_VAL, /* NaT represented by NaT value (fp reg) */ + UNW_NAT_MEMSTK, /* NaT value is in unat word at offset OFF */ + UNW_NAT_REGSTK /* NaT is in rnat */ +}; + +enum unw_insn_opcode { + UNW_INSN_ADD, /* s[dst] += val */ + UNW_INSN_ADD_PSP, /* s[dst] = (s.psp + val) */ + UNW_INSN_ADD_SP, /* s[dst] = (s.sp + val) */ + UNW_INSN_MOVE, /* s[dst] = s[val] */ + UNW_INSN_MOVE2, /* s[dst] = s[val]; s[dst+1] = s[val+1] */ + UNW_INSN_MOVE_STACKED, /* s[dst] = ia64_rse_skip(*s.bsp, val) */ + UNW_INSN_SETNAT_MEMSTK, /* s[dst+1].nat.type = MEMSTK; + s[dst+1].nat.off = *s.pri_unat - s[dst] */ + UNW_INSN_SETNAT_TYPE, /* s[dst+1].nat.type = val */ + UNW_INSN_LOAD, /* s[dst] = *s[val] */ + UNW_INSN_MOVE_SCRATCH, /* s[dst] = scratch reg "val" */ + UNW_INSN_MOVE_CONST, /* s[dst] = constant reg "val" */ +}; + +struct unw_insn { + unsigned int opc : 4; + unsigned int dst : 9; + signed int val : 19; +}; + +/* + * Preserved general static registers (r4-r7) give rise to two script + * instructions; everything else yields at most one instruction; at + * the end of the script, the psp gets popped, accounting for one more + * instruction. + */ +#define UNW_MAX_SCRIPT_LEN (UNW_NUM_REGS + 5) + +struct unw_script { + unsigned long ip; /* ip this script is for */ + unsigned long pr_mask; /* mask of predicates script depends on */ + unsigned long pr_val; /* predicate values this script is for */ + rwlock_t lock; + unsigned int flags; /* see UNW_FLAG_* in unwind.h */ + unsigned short lru_chain; /* used for least-recently-used chain */ + unsigned short coll_chain; /* used for hash collisions */ + unsigned short hint; /* hint for next script to try (or -1) */ + unsigned short count; /* number of instructions in script */ + struct unw_insn insn[UNW_MAX_SCRIPT_LEN]; +}; diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S new file mode 100644 index 000000000000..b9f0db4c1b04 --- /dev/null +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -0,0 +1,251 @@ +#include <linux/config.h> + +#include <asm/cache.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/pgtable.h> + +#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) +#include <asm-generic/vmlinux.lds.h> + +OUTPUT_FORMAT("elf64-ia64-little") +OUTPUT_ARCH(ia64) +ENTRY(phys_start) +jiffies = jiffies_64; +PHDRS { + code PT_LOAD; + percpu PT_LOAD; + data PT_LOAD; +} +SECTIONS +{ + /* Sections to be discarded */ + /DISCARD/ : { + *(.exit.text) + *(.exit.data) + *(.exitcall.exit) + *(.IA_64.unwind.exit.text) + *(.IA_64.unwind_info.exit.text) + } + + v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ + phys_start = _start - LOAD_OFFSET; + + code : { } :code + . = KERNEL_START; + + _text = .; + _stext = .; + + .text : AT(ADDR(.text) - LOAD_OFFSET) + { + *(.text.ivt) + *(.text) + SCHED_TEXT + LOCK_TEXT + *(.gnu.linkonce.t*) + } + .text2 : AT(ADDR(.text2) - LOAD_OFFSET) + { *(.text2) } +#ifdef CONFIG_SMP + .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) + { *(.text.lock) } +#endif + _etext = .; + + /* Read-only data */ + + /* Exception table */ + . = ALIGN(16); + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) + { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + + .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET) + { + __start___vtop_patchlist = .; + *(.data.patch.vtop) + __end___vtop_patchlist = .; + } + + .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET) + { + __start___mckinley_e9_bundles = .; + *(.data.patch.mckinley_e9) + __end___mckinley_e9_bundles = .; + } + + /* Global data */ + _data = .; + +#if defined(CONFIG_IA64_GENERIC) + /* Machine Vector */ + . = ALIGN(16); + .machvec : AT(ADDR(.machvec) - LOAD_OFFSET) + { + machvec_start = .; + *(.machvec) + machvec_end = .; + } +#endif + + /* Unwind info & table: */ + . = ALIGN(8); + .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) + { *(.IA_64.unwind_info*) } + .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) + { + __start_unwind = .; + *(.IA_64.unwind*) + __end_unwind = .; + } + + RODATA + + .opd : AT(ADDR(.opd) - LOAD_OFFSET) + { *(.opd) } + + /* Initialization code and data: */ + + . = ALIGN(PAGE_SIZE); + __init_begin = .; + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) + { + _sinittext = .; + *(.init.text) + _einittext = .; + } + + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) + { *(.init.data) } + + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) + { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } + + . = ALIGN(16); + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) + { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) + { + __initcall_start = .; + *(.initcall1.init) + *(.initcall2.init) + *(.initcall3.init) + *(.initcall4.init) + *(.initcall5.init) + *(.initcall6.init) + *(.initcall7.init) + __initcall_end = .; + } + __con_initcall_start = .; + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) + { *(.con_initcall.init) } + __con_initcall_end = .; + __security_initcall_start = .; + .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) + { *(.security_initcall.init) } + __security_initcall_end = .; + . = ALIGN(PAGE_SIZE); + __init_end = .; + + /* The initial task and kernel stack */ + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) + { *(.data.init_task) } + + .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) + { *(__special_page_section) + __start_gate_section = .; + *(.data.gate) + __stop_gate_section = .; + } + . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose kernel data */ + + .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) + { *(.data.cacheline_aligned) } + + /* Per-cpu data: */ + percpu : { } :percpu + . = ALIGN(PERCPU_PAGE_SIZE); + __phys_per_cpu_start = .; + .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) + { + __per_cpu_start = .; + *(.data.percpu) + __per_cpu_end = .; + } + . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into percpu page size */ + + data : { } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) + { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) + { *(.got.plt) *(.got) } + __gp = ADDR(.got) + 0x200000; + /* We want the small data sections together, so single-instruction offsets + can access them all, and initialized data all before uninitialized, so + we can shorten the on-disk segment size. */ + .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) + { *(.sdata) *(.sdata1) *(.srdata) } + _edata = .; + _bss = .; + .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) + { *(.sbss) *(.scommon) } + .bss : AT(ADDR(.bss) - LOAD_OFFSET) + { *(.bss) *(COMMON) } + + _end = .; + + code : { } :code + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* These must appear regardless of . */ + /* Discard them for now since Intel SoftSDV cannot handle them. + .comment 0 : { *(.comment) } + .note 0 : { *(.note) } + */ + /DISCARD/ : { *(.comment) } + /DISCARD/ : { *(.note) } +} diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile new file mode 100644 index 000000000000..1902c3c2ef92 --- /dev/null +++ b/arch/ia64/lib/Makefile @@ -0,0 +1,52 @@ +# +# Makefile for ia64-specific library routines.. +# + +obj-y := io.o + +lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ + __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \ + bitop.o checksum.o clear_page.o csum_partial_copy.o copy_page.o \ + clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ + flush.o ip_fast_csum.o do_csum.o \ + memset.o strlen.o swiotlb.o + +lib-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o +lib-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o +lib-$(CONFIG_PERFMON) += carta_random.o +lib-$(CONFIG_MD_RAID5) += xor.o +lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o + +AFLAGS___divdi3.o = +AFLAGS___udivdi3.o = -DUNSIGNED +AFLAGS___moddi3.o = -DMODULO +AFLAGS___umoddi3.o = -DUNSIGNED -DMODULO + +AFLAGS___divsi3.o = +AFLAGS___udivsi3.o = -DUNSIGNED +AFLAGS___modsi3.o = -DMODULO +AFLAGS___umodsi3.o = -DUNSIGNED -DMODULO + +$(obj)/__divdi3.o: $(src)/idiv64.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__udivdi3.o: $(src)/idiv64.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__moddi3.o: $(src)/idiv64.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__umoddi3.o: $(src)/idiv64.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__divsi3.o: $(src)/idiv32.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__udivsi3.o: $(src)/idiv32.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__modsi3.o: $(src)/idiv32.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__umodsi3.o: $(src)/idiv32.S FORCE + $(call if_changed_dep,as_o_S) diff --git a/arch/ia64/lib/bitop.c b/arch/ia64/lib/bitop.c new file mode 100644 index 000000000000..82e299c8464e --- /dev/null +++ b/arch/ia64/lib/bitop.c @@ -0,0 +1,88 @@ +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/intrinsics.h> +#include <linux/module.h> +#include <linux/bitops.h> + +/* + * Find next zero bit in a bitmap reasonably efficiently.. + */ + +int __find_next_zero_bit (const void *addr, unsigned long size, unsigned long offset) +{ + unsigned long *p = ((unsigned long *) addr) + (offset >> 6); + unsigned long result = offset & ~63UL; + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 63UL; + if (offset) { + tmp = *(p++); + tmp |= ~0UL >> (64-offset); + if (size < 64) + goto found_first; + if (~tmp) + goto found_middle; + size -= 64; + result += 64; + } + while (size & ~63UL) { + if (~(tmp = *(p++))) + goto found_middle; + result += 64; + size -= 64; + } + if (!size) + return result; + tmp = *p; +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) /* any bits zero? */ + return result + size; /* nope */ +found_middle: + return result + ffz(tmp); +} +EXPORT_SYMBOL(__find_next_zero_bit); + +/* + * Find next bit in a bitmap reasonably efficiently.. + */ +int __find_next_bit(const void *addr, unsigned long size, unsigned long offset) +{ + unsigned long *p = ((unsigned long *) addr) + (offset >> 6); + unsigned long result = offset & ~63UL; + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 63UL; + if (offset) { + tmp = *(p++); + tmp &= ~0UL << offset; + if (size < 64) + goto found_first; + if (tmp) + goto found_middle; + size -= 64; + result += 64; + } + while (size & ~63UL) { + if ((tmp = *(p++))) + goto found_middle; + result += 64; + size -= 64; + } + if (!size) + return result; + tmp = *p; + found_first: + tmp &= ~0UL >> (64-size); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ + found_middle: + return result + __ffs(tmp); +} +EXPORT_SYMBOL(__find_next_bit); diff --git a/arch/ia64/lib/carta_random.S b/arch/ia64/lib/carta_random.S new file mode 100644 index 000000000000..d0674c360364 --- /dev/null +++ b/arch/ia64/lib/carta_random.S @@ -0,0 +1,54 @@ +/* + * Fast, simple, yet decent quality random number generator based on + * a paper by David G. Carta ("Two Fast Implementations of the + * `Minimal Standard' Random Number Generator," Communications of the + * ACM, January, 1990). + * + * Copyright (C) 2002 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <asm/asmmacro.h> + +#define a r2 +#define m r3 +#define lo r8 +#define hi r9 +#define t0 r16 +#define t1 r17 +#define seed r32 + +GLOBAL_ENTRY(carta_random32) + movl a = (16807 << 16) | 16807 + ;; + pmpyshr2.u t0 = a, seed, 0 + pmpyshr2.u t1 = a, seed, 16 + ;; + unpack2.l t0 = t1, t0 + dep m = -1, r0, 0, 31 + ;; + zxt4 lo = t0 + shr.u hi = t0, 32 + ;; + dep t0 = 0, hi, 15, 49 // t0 = (hi & 0x7fff) + ;; + shl t0 = t0, 16 // t0 = (hi & 0x7fff) << 16 + shr t1 = hi, 15 // t1 = (hi >> 15) + ;; + add lo = lo, t0 + ;; + cmp.gtu p6, p0 = lo, m + ;; +(p6) and lo = lo, m + ;; +(p6) add lo = 1, lo + ;; + add lo = lo, t1 + ;; + cmp.gtu p6, p0 = lo, m + ;; +(p6) and lo = lo, m + ;; +(p6) add lo = 1, lo + br.ret.sptk.many rp +END(carta_random32) diff --git a/arch/ia64/lib/checksum.c b/arch/ia64/lib/checksum.c new file mode 100644 index 000000000000..beb11721d9f5 --- /dev/null +++ b/arch/ia64/lib/checksum.c @@ -0,0 +1,102 @@ +/* + * Network checksum routines + * + * Copyright (C) 1999, 2003 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * + * Most of the code coming from arch/alpha/lib/checksum.c + * + * This file contains network checksum routines that are better done + * in an architecture-specific manner due to speed.. + */ + +#include <linux/module.h> +#include <linux/string.h> + +#include <asm/byteorder.h> + +static inline unsigned short +from64to16 (unsigned long x) +{ + /* add up 32-bit words for 33 bits */ + x = (x & 0xffffffff) + (x >> 32); + /* add up 16-bit and 17-bit words for 17+c bits */ + x = (x & 0xffff) + (x >> 16); + /* add up 16-bit and 2-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented. + */ +unsigned short int +csum_tcpudp_magic (unsigned long saddr, unsigned long daddr, unsigned short len, + unsigned short proto, unsigned int sum) +{ + return ~from64to16(saddr + daddr + sum + ((unsigned long) ntohs(len) << 16) + + ((unsigned long) proto << 8)); +} + +EXPORT_SYMBOL(csum_tcpudp_magic); + +unsigned int +csum_tcpudp_nofold (unsigned long saddr, unsigned long daddr, unsigned short len, + unsigned short proto, unsigned int sum) +{ + unsigned long result; + + result = (saddr + daddr + sum + + ((unsigned long) ntohs(len) << 16) + + ((unsigned long) proto << 8)); + + /* Fold down to 32-bits so we don't lose in the typedef-less network stack. */ + /* 64 to 33 */ + result = (result & 0xffffffff) + (result >> 32); + /* 33 to 32 */ + result = (result & 0xffffffff) + (result >> 32); + return result; +} + +extern unsigned long do_csum (const unsigned char *, long); + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +unsigned int +csum_partial (const unsigned char * buff, int len, unsigned int sum) +{ + unsigned long result = do_csum(buff, len); + + /* add in old sum, and carry.. */ + result += sum; + /* 32+c bits -> 32 bits */ + result = (result & 0xffffffff) + (result >> 32); + return result; +} + +EXPORT_SYMBOL(csum_partial); + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +unsigned short +ip_compute_csum (unsigned char * buff, int len) +{ + return ~do_csum(buff,len); +} + +EXPORT_SYMBOL(ip_compute_csum); diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S new file mode 100644 index 000000000000..d4987061dda7 --- /dev/null +++ b/arch/ia64/lib/clear_page.S @@ -0,0 +1,77 @@ +/* + * Copyright (C) 1999-2002 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> + * + * 1/06/01 davidm Tuned for Itanium. + * 2/12/02 kchen Tuned for both Itanium and McKinley + * 3/08/02 davidm Some more tweaking + */ +#include <linux/config.h> + +#include <asm/asmmacro.h> +#include <asm/page.h> + +#ifdef CONFIG_ITANIUM +# define L3_LINE_SIZE 64 // Itanium L3 line size +# define PREFETCH_LINES 9 // magic number +#else +# define L3_LINE_SIZE 128 // McKinley L3 line size +# define PREFETCH_LINES 12 // magic number +#endif + +#define saved_lc r2 +#define dst_fetch r3 +#define dst1 r8 +#define dst2 r9 +#define dst3 r10 +#define dst4 r11 + +#define dst_last r31 + +GLOBAL_ENTRY(clear_page) + .prologue + .regstk 1,0,0,0 + mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until + .save ar.lc, saved_lc + mov saved_lc = ar.lc + + .body + mov ar.lc = (PREFETCH_LINES - 1) + mov dst_fetch = in0 + adds dst1 = 16, in0 + adds dst2 = 32, in0 + ;; +.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE + adds dst3 = 48, in0 // executing this multiple times is harmless + br.cloop.sptk.few .fetch + ;; + addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch + mov ar.lc = r16 // one L3 line per iteration + adds dst4 = 64, in0 + ;; +#ifdef CONFIG_ITANIUM + // Optimized for Itanium +1: stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 + cmp.lt p8,p0=dst_fetch, dst_last + ;; +#else + // Optimized for McKinley +1: stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 + stf.spill.nta [dst3] = f0, 64 + stf.spill.nta [dst4] = f0, 128 + cmp.lt p8,p0=dst_fetch, dst_last + ;; + stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 +#endif + stf.spill.nta [dst3] = f0, 64 +(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE + br.cloop.sptk.few 1b + ;; + mov ar.lc = saved_lc // restore lc + br.ret.sptk.many rp +END(clear_page) diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S new file mode 100644 index 000000000000..eecd8577b209 --- /dev/null +++ b/arch/ia64/lib/clear_user.S @@ -0,0 +1,209 @@ +/* + * This routine clears to zero a linear memory buffer in user space. + * + * Inputs: + * in0: address of buffer + * in1: length of buffer in bytes + * Outputs: + * r8: number of bytes that didn't get cleared due to a fault + * + * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + */ + +#include <asm/asmmacro.h> + +// +// arguments +// +#define buf r32 +#define len r33 + +// +// local registers +// +#define cnt r16 +#define buf2 r17 +#define saved_lc r18 +#define saved_pfs r19 +#define tmp r20 +#define len2 r21 +#define len3 r22 + +// +// Theory of operations: +// - we check whether or not the buffer is small, i.e., less than 17 +// in which case we do the byte by byte loop. +// +// - Otherwise we go progressively from 1 byte store to 8byte store in +// the head part, the body is a 16byte store loop and we finish we the +// tail for the last 15 bytes. +// The good point about this breakdown is that the long buffer handling +// contains only 2 branches. +// +// The reason for not using shifting & masking for both the head and the +// tail is to stay semantically correct. This routine is not supposed +// to write bytes outside of the buffer. While most of the time this would +// be ok, we can't tolerate a mistake. A classical example is the case +// of multithreaded code were to the extra bytes touched is actually owned +// by another thread which runs concurrently to ours. Another, less likely, +// example is with device drivers where reading an I/O mapped location may +// have side effects (same thing for writing). +// + +GLOBAL_ENTRY(__do_clear_user) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,2,0,0,0 + cmp.eq p6,p0=r0,len // check for zero length + .save ar.lc, saved_lc + mov saved_lc=ar.lc // preserve ar.lc (slow) + .body + ;; // avoid WAW on CFM + adds tmp=-1,len // br.ctop is repeat/until + mov ret0=len // return value is length at this point +(p6) br.ret.spnt.many rp + ;; + cmp.lt p6,p0=16,len // if len > 16 then long memset + mov ar.lc=tmp // initialize lc for small count +(p6) br.cond.dptk .long_do_clear + ;; // WAR on ar.lc + // + // worst case 16 iterations, avg 8 iterations + // + // We could have played with the predicates to use the extra + // M slot for 2 stores/iteration but the cost the initialization + // the various counters compared to how long the loop is supposed + // to last on average does not make this solution viable. + // +1: + EX( .Lexit1, st1 [buf]=r0,1 ) + adds len=-1,len // countdown length using len + br.cloop.dptk 1b + ;; // avoid RAW on ar.lc + // + // .Lexit4: comes from byte by byte loop + // len contains bytes left +.Lexit1: + mov ret0=len // faster than using ar.lc + mov ar.lc=saved_lc + br.ret.sptk.many rp // end of short clear_user + + + // + // At this point we know we have more than 16 bytes to copy + // so we focus on alignment (no branches required) + // + // The use of len/len2 for countdown of the number of bytes left + // instead of ret0 is due to the fact that the exception code + // changes the values of r8. + // +.long_do_clear: + tbit.nz p6,p0=buf,0 // odd alignment (for long_do_clear) + ;; + EX( .Lexit3, (p6) st1 [buf]=r0,1 ) // 1-byte aligned +(p6) adds len=-1,len;; // sync because buf is modified + tbit.nz p6,p0=buf,1 + ;; + EX( .Lexit3, (p6) st2 [buf]=r0,2 ) // 2-byte aligned +(p6) adds len=-2,len;; + tbit.nz p6,p0=buf,2 + ;; + EX( .Lexit3, (p6) st4 [buf]=r0,4 ) // 4-byte aligned +(p6) adds len=-4,len;; + tbit.nz p6,p0=buf,3 + ;; + EX( .Lexit3, (p6) st8 [buf]=r0,8 ) // 8-byte aligned +(p6) adds len=-8,len;; + shr.u cnt=len,4 // number of 128-bit (2x64bit) words + ;; + cmp.eq p6,p0=r0,cnt + adds tmp=-1,cnt +(p6) br.cond.dpnt .dotail // we have less than 16 bytes left + ;; + adds buf2=8,buf // setup second base pointer + mov ar.lc=tmp + ;; + + // + // 16bytes/iteration core loop + // + // The second store can never generate a fault because + // we come into the loop only when we are 16-byte aligned. + // This means that if we cross a page then it will always be + // in the first store and never in the second. + // + // + // We need to keep track of the remaining length. A possible (optimistic) + // way would be to use ar.lc and derive how many byte were left by + // doing : left= 16*ar.lc + 16. this would avoid the addition at + // every iteration. + // However we need to keep the synchronization point. A template + // M;;MB does not exist and thus we can keep the addition at no + // extra cycle cost (use a nop slot anyway). It also simplifies the + // (unlikely) error recovery code + // + +2: EX(.Lexit3, st8 [buf]=r0,16 ) + ;; // needed to get len correct when error + st8 [buf2]=r0,16 + adds len=-16,len + br.cloop.dptk 2b + ;; + mov ar.lc=saved_lc + // + // tail correction based on len only + // + // We alternate the use of len3,len2 to allow parallelism and correct + // error handling. We also reuse p6/p7 to return correct value. + // The addition of len2/len3 does not cost anything more compared to + // the regular memset as we had empty slots. + // +.dotail: + mov len2=len // for parallelization of error handling + mov len3=len + tbit.nz p6,p0=len,3 + ;; + EX( .Lexit2, (p6) st8 [buf]=r0,8 ) // at least 8 bytes +(p6) adds len3=-8,len2 + tbit.nz p7,p6=len,2 + ;; + EX( .Lexit2, (p7) st4 [buf]=r0,4 ) // at least 4 bytes +(p7) adds len2=-4,len3 + tbit.nz p6,p7=len,1 + ;; + EX( .Lexit2, (p6) st2 [buf]=r0,2 ) // at least 2 bytes +(p6) adds len3=-2,len2 + tbit.nz p7,p6=len,0 + ;; + EX( .Lexit2, (p7) st1 [buf]=r0 ) // only 1 byte left + mov ret0=r0 // success + br.ret.sptk.many rp // end of most likely path + + // + // Outlined error handling code + // + + // + // .Lexit3: comes from core loop, need restore pr/lc + // len contains bytes left + // + // + // .Lexit2: + // if p6 -> coming from st8 or st2 : len2 contains what's left + // if p7 -> coming from st4 or st1 : len3 contains what's left + // We must restore lc/pr even though might not have been used. +.Lexit2: + .pred.rel "mutex", p6, p7 +(p6) mov len=len2 +(p7) mov len=len3 + ;; + // + // .Lexit4: comes from head, need not restore pr/lc + // len contains bytes left + // +.Lexit3: + mov ret0=len + mov ar.lc=saved_lc + br.ret.sptk.many rp +END(__do_clear_user) diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S new file mode 100644 index 000000000000..127d1d050d78 --- /dev/null +++ b/arch/ia64/lib/copy_page.S @@ -0,0 +1,98 @@ +/* + * + * Optimized version of the standard copy_page() function + * + * Inputs: + * in0: address of target page + * in1: address of source page + * Output: + * no return value + * + * Copyright (C) 1999, 2001 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger <davidm@hpl.hp.com> + * + * 4/06/01 davidm Tuned to make it perform well both for cached and uncached copies. + */ +#include <asm/asmmacro.h> +#include <asm/page.h> + +#define PIPE_DEPTH 3 +#define EPI p[PIPE_DEPTH-1] + +#define lcount r16 +#define saved_pr r17 +#define saved_lc r18 +#define saved_pfs r19 +#define src1 r20 +#define src2 r21 +#define tgt1 r22 +#define tgt2 r23 +#define srcf r24 +#define tgtf r25 +#define tgt_last r26 + +#define Nrot ((8*PIPE_DEPTH+7)&~7) + +GLOBAL_ENTRY(copy_page) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot + + .rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \ + t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH] + .rotp p[PIPE_DEPTH] + + .save ar.lc, saved_lc + mov saved_lc=ar.lc + mov ar.ec=PIPE_DEPTH + + mov lcount=PAGE_SIZE/64-1 + .save pr, saved_pr + mov saved_pr=pr + mov pr.rot=1<<16 + + .body + + mov src1=in1 + adds src2=8,in1 + mov tgt_last = PAGE_SIZE + ;; + adds tgt2=8,in0 + add srcf=512,in1 + mov ar.lc=lcount + mov tgt1=in0 + add tgtf=512,in0 + add tgt_last = tgt_last, in0 + ;; +1: +(p[0]) ld8 t1[0]=[src1],16 +(EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16 +(p[0]) ld8 t2[0]=[src2],16 +(EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16 + cmp.ltu p6,p0 = tgtf, tgt_last + ;; +(p[0]) ld8 t3[0]=[src1],16 +(EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16 +(p[0]) ld8 t4[0]=[src2],16 +(EPI) st8 [tgt2]=t4[PIPE_DEPTH-1],16 + ;; +(p[0]) ld8 t5[0]=[src1],16 +(EPI) st8 [tgt1]=t5[PIPE_DEPTH-1],16 +(p[0]) ld8 t6[0]=[src2],16 +(EPI) st8 [tgt2]=t6[PIPE_DEPTH-1],16 + ;; +(p[0]) ld8 t7[0]=[src1],16 +(EPI) st8 [tgt1]=t7[PIPE_DEPTH-1],16 +(p[0]) ld8 t8[0]=[src2],16 +(EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16 + +(p6) lfetch [srcf], 64 +(p6) lfetch [tgtf], 64 + br.ctop.sptk.few 1b + ;; + mov pr=saved_pr,0xffffffffffff0000 // restore predicates + mov ar.pfs=saved_pfs + mov ar.lc=saved_lc + br.ret.sptk.many rp +END(copy_page) diff --git a/arch/ia64/lib/copy_page_mck.S b/arch/ia64/lib/copy_page_mck.S new file mode 100644 index 000000000000..3c45d60a81b4 --- /dev/null +++ b/arch/ia64/lib/copy_page_mck.S @@ -0,0 +1,185 @@ +/* + * McKinley-optimized version of copy_page(). + * + * Copyright (C) 2002 Hewlett-Packard Co + * David Mosberger <davidm@hpl.hp.com> + * + * Inputs: + * in0: address of target page + * in1: address of source page + * Output: + * no return value + * + * General idea: + * - use regular loads and stores to prefetch data to avoid consuming M-slot just for + * lfetches => good for in-cache performance + * - avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single + * cycle + * + * Principle of operation: + * First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes. + * To avoid secondary misses in L2, we prefetch both source and destination with a line-size + * of 128 bytes. When both of these lines are in the L2 and the first half of the + * source line is in L1, we start copying the remaining words. The second half of the + * source line is prefetched in an earlier iteration, so that by the time we start + * accessing it, it's also present in the L1. + * + * We use a software-pipelined loop to control the overall operation. The pipeline + * has 2*PREFETCH_DIST+K stages. The first PREFETCH_DIST stages are used for prefetching + * source cache-lines. The second PREFETCH_DIST stages are used for prefetching destination + * cache-lines, the last K stages are used to copy the cache-line words not copied by + * the prefetches. The four relevant points in the pipelined are called A, B, C, D: + * p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line + * should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought + * into L1D and p[D] is TRUE if a cacheline needs to be copied. + * + * This all sounds very complicated, but thanks to the modulo-scheduled loop support, + * the resulting code is very regular and quite easy to follow (once you get the idea). + * + * As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented + * as the separate .prefetch_loop. Logically, this loop performs exactly like the + * main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed, + * so that each loop iteration is faster (again, good for cached case). + * + * When reading the code, it helps to keep the following picture in mind: + * + * word 0 word 1 + * +------+------+--- + * | v[x] | t1 | ^ + * | t2 | t3 | | + * | t4 | t5 | | + * | t6 | t7 | | 128 bytes + * | n[y] | t9 | | (L2 cache line) + * | t10 | t11 | | + * | t12 | t13 | | + * | t14 | t15 | v + * +------+------+--- + * + * Here, v[x] is copied by the (memory) prefetch. n[y] is loaded at p[C] + * to fetch the second-half of the L2 cache line into L1, and the tX words are copied in + * an order that avoids bank conflicts. + */ +#include <asm/asmmacro.h> +#include <asm/page.h> + +#define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st) + +#define src0 r2 +#define src1 r3 +#define dst0 r9 +#define dst1 r10 +#define src_pre_mem r11 +#define dst_pre_mem r14 +#define src_pre_l2 r15 +#define dst_pre_l2 r16 +#define t1 r17 +#define t2 r18 +#define t3 r19 +#define t4 r20 +#define t5 t1 // alias! +#define t6 t2 // alias! +#define t7 t3 // alias! +#define t9 t5 // alias! +#define t10 t4 // alias! +#define t11 t7 // alias! +#define t12 t6 // alias! +#define t14 t10 // alias! +#define t13 r21 +#define t15 r22 + +#define saved_lc r23 +#define saved_pr r24 + +#define A 0 +#define B (PREFETCH_DIST) +#define C (B + PREFETCH_DIST) +#define D (C + 3) +#define N (D + 1) +#define Nrot ((N + 7) & ~7) + +GLOBAL_ENTRY(copy_page) + .prologue + alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot + + .rotr v[2*PREFETCH_DIST], n[D-C+1] + .rotp p[N] + + .save ar.lc, saved_lc + mov saved_lc = ar.lc + .save pr, saved_pr + mov saved_pr = pr + .body + + mov src_pre_mem = in1 + mov pr.rot = 0x10000 + mov ar.ec = 1 // special unrolled loop + + mov dst_pre_mem = in0 + mov ar.lc = 2*PREFETCH_DIST - 1 + + add src_pre_l2 = 8*8, in1 + add dst_pre_l2 = 8*8, in0 + add src0 = 8, in1 // first t1 src + add src1 = 3*8, in1 // first t3 src + add dst0 = 8, in0 // first t1 dst + add dst1 = 3*8, in0 // first t3 dst + mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1 + nop.m 0 + nop.i 0 + ;; + // same as .line_copy loop, but with all predicated-off instructions removed: +.prefetch_loop: +(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 +(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 + br.ctop.sptk .prefetch_loop + ;; + cmp.eq p16, p0 = r0, r0 // reset p16 to 1 (br.ctop cleared it to zero) + mov ar.lc = t1 // with 64KB pages, t1 is too big to fit in 8 bits! + mov ar.ec = N // # of stages in pipeline + ;; +.line_copy: +(p[D]) ld8 t2 = [src0], 3*8 // M0 +(p[D]) ld8 t4 = [src1], 3*8 // M1 +(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 prefetch dst from memory +(p[D]) st8 [dst_pre_l2] = n[D-C], 128 // M3 prefetch dst from L2 + ;; +(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 prefetch src from memory +(p[C]) ld8 n[0] = [src_pre_l2], 128 // M1 prefetch src from L2 +(p[D]) st8 [dst0] = t1, 8 // M2 +(p[D]) st8 [dst1] = t3, 8 // M3 + ;; +(p[D]) ld8 t5 = [src0], 8 +(p[D]) ld8 t7 = [src1], 3*8 +(p[D]) st8 [dst0] = t2, 3*8 +(p[D]) st8 [dst1] = t4, 3*8 + ;; +(p[D]) ld8 t6 = [src0], 3*8 +(p[D]) ld8 t10 = [src1], 8 +(p[D]) st8 [dst0] = t5, 8 +(p[D]) st8 [dst1] = t7, 3*8 + ;; +(p[D]) ld8 t9 = [src0], 3*8 +(p[D]) ld8 t11 = [src1], 3*8 +(p[D]) st8 [dst0] = t6, 3*8 +(p[D]) st8 [dst1] = t10, 8 + ;; +(p[D]) ld8 t12 = [src0], 8 +(p[D]) ld8 t14 = [src1], 8 +(p[D]) st8 [dst0] = t9, 3*8 +(p[D]) st8 [dst1] = t11, 3*8 + ;; +(p[D]) ld8 t13 = [src0], 4*8 +(p[D]) ld8 t15 = [src1], 4*8 +(p[D]) st8 [dst0] = t12, 8 +(p[D]) st8 [dst1] = t14, 8 + ;; +(p[D-1])ld8 t1 = [src0], 8 +(p[D-1])ld8 t3 = [src1], 8 +(p[D]) st8 [dst0] = t13, 4*8 +(p[D]) st8 [dst1] = t15, 4*8 + br.ctop.sptk .line_copy + ;; + mov ar.lc = saved_lc + mov pr = saved_pr, -1 + br.ret.sptk.many rp +END(copy_page) diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S new file mode 100644 index 000000000000..c952bdc6a093 --- /dev/null +++ b/arch/ia64/lib/copy_user.S @@ -0,0 +1,610 @@ +/* + * + * Optimized version of the copy_user() routine. + * It is used to copy date across the kernel/user boundary. + * + * The source and destination are always on opposite side of + * the boundary. When reading from user space we must catch + * faults on loads. When writing to user space we must catch + * errors on stores. Note that because of the nature of the copy + * we don't need to worry about overlapping regions. + * + * + * Inputs: + * in0 address of source buffer + * in1 address of destination buffer + * in2 number of bytes to copy + * + * Outputs: + * ret0 0 in case of success. The number of bytes NOT copied in + * case of error. + * + * Copyright (C) 2000-2001 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * + * Fixme: + * - handle the case where we have more than 16 bytes and the alignment + * are different. + * - more benchmarking + * - fix extraneous stop bit introduced by the EX() macro. + */ + +#include <asm/asmmacro.h> + +// +// Tuneable parameters +// +#define COPY_BREAK 16 // we do byte copy below (must be >=16) +#define PIPE_DEPTH 21 // pipe depth + +#define EPI p[PIPE_DEPTH-1] + +// +// arguments +// +#define dst in0 +#define src in1 +#define len in2 + +// +// local registers +// +#define t1 r2 // rshift in bytes +#define t2 r3 // lshift in bytes +#define rshift r14 // right shift in bits +#define lshift r15 // left shift in bits +#define word1 r16 +#define word2 r17 +#define cnt r18 +#define len2 r19 +#define saved_lc r20 +#define saved_pr r21 +#define tmp r22 +#define val r23 +#define src1 r24 +#define dst1 r25 +#define src2 r26 +#define dst2 r27 +#define len1 r28 +#define enddst r29 +#define endsrc r30 +#define saved_pfs r31 + +GLOBAL_ENTRY(__copy_user) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7) + + .rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH] + .rotp p[PIPE_DEPTH] + + adds len2=-1,len // br.ctop is repeat/until + mov ret0=r0 + + ;; // RAW of cfm when len=0 + cmp.eq p8,p0=r0,len // check for zero length + .save ar.lc, saved_lc + mov saved_lc=ar.lc // preserve ar.lc (slow) +(p8) br.ret.spnt.many rp // empty mempcy() + ;; + add enddst=dst,len // first byte after end of source + add endsrc=src,len // first byte after end of destination + .save pr, saved_pr + mov saved_pr=pr // preserve predicates + + .body + + mov dst1=dst // copy because of rotation + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + + mov src1=src // copy because of rotation + mov ar.lc=len2 // initialize lc for small count + cmp.lt p10,p7=COPY_BREAK,len // if len > COPY_BREAK then long copy + + xor tmp=src,dst // same alignment test prepare +(p10) br.cond.dptk .long_copy_user + ;; // RAW pr.rot/p16 ? + // + // Now we do the byte by byte loop with software pipeline + // + // p7 is necessarily false by now +1: + EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1) + EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) + br.ctop.dptk.few 1b + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,0xffffffffffff0000 + mov ar.pfs=saved_pfs // restore ar.ec + br.ret.sptk.many rp // end of short memcpy + + // + // Not 8-byte aligned + // +.diff_align_copy_user: + // At this point we know we have more than 16 bytes to copy + // and also that src and dest do _not_ have the same alignment. + and src2=0x7,src1 // src offset + and dst2=0x7,dst1 // dst offset + ;; + // The basic idea is that we copy byte-by-byte at the head so + // that we can reach 8-byte alignment for both src1 and dst1. + // Then copy the body using software pipelined 8-byte copy, + // shifting the two back-to-back words right and left, then copy + // the tail by copying byte-by-byte. + // + // Fault handling. If the byte-by-byte at the head fails on the + // load, then restart and finish the pipleline by copying zeros + // to the dst1. Then copy zeros for the rest of dst1. + // If 8-byte software pipeline fails on the load, do the same as + // failure_in3 does. If the byte-by-byte at the tail fails, it is + // handled simply by failure_in_pipe1. + // + // The case p14 represents the source has more bytes in the + // the first word (by the shifted part), whereas the p15 needs to + // copy some bytes from the 2nd word of the source that has the + // tail of the 1st of the destination. + // + + // + // Optimization. If dst1 is 8-byte aligned (quite common), we don't need + // to copy the head to dst1, to start 8-byte copy software pipeline. + // We know src1 is not 8-byte aligned in this case. + // + cmp.eq p14,p15=r0,dst2 +(p15) br.cond.spnt 1f + ;; + sub t1=8,src2 + mov t2=src2 + ;; + shl rshift=t2,3 + sub len1=len,t1 // set len1 + ;; + sub lshift=64,rshift + ;; + br.cond.spnt .word_copy_user + ;; +1: + cmp.leu p14,p15=src2,dst2 + sub t1=dst2,src2 + ;; + .pred.rel "mutex", p14, p15 +(p14) sub word1=8,src2 // (8 - src offset) +(p15) sub t1=r0,t1 // absolute value +(p15) sub word1=8,dst2 // (8 - dst offset) + ;; + // For the case p14, we don't need to copy the shifted part to + // the 1st word of destination. + sub t2=8,t1 +(p14) sub word1=word1,t1 + ;; + sub len1=len,word1 // resulting len +(p15) shl rshift=t1,3 // in bits +(p14) shl rshift=t2,3 + ;; +(p14) sub len1=len1,t1 + adds cnt=-1,word1 + ;; + sub lshift=64,rshift + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + mov ar.lc=cnt + ;; +2: + EX(.failure_in_pipe2,(p16) ld1 val1[0]=[src1],1) + EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) + br.ctop.dptk.few 2b + ;; + clrrrb + ;; +.word_copy_user: + cmp.gtu p9,p0=16,len1 +(p9) br.cond.spnt 4f // if (16 > len1) skip 8-byte copy + ;; + shr.u cnt=len1,3 // number of 64-bit words + ;; + adds cnt=-1,cnt + ;; + .pred.rel "mutex", p14, p15 +(p14) sub src1=src1,t2 +(p15) sub src1=src1,t1 + // + // Now both src1 and dst1 point to an 8-byte aligned address. And + // we have more than 8 bytes to copy. + // + mov ar.lc=cnt + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + ;; +3: + // + // The pipleline consists of 3 stages: + // 1 (p16): Load a word from src1 + // 2 (EPI_1): Shift right pair, saving to tmp + // 3 (EPI): Store tmp to dst1 + // + // To make it simple, use at least 2 (p16) loops to set up val1[n] + // because we need 2 back-to-back val1[] to get tmp. + // Note that this implies EPI_2 must be p18 or greater. + // + +#define EPI_1 p[PIPE_DEPTH-2] +#define SWITCH(pred, shift) cmp.eq pred,p0=shift,rshift +#define CASE(pred, shift) \ + (pred) br.cond.spnt .copy_user_bit##shift +#define BODY(rshift) \ +.copy_user_bit##rshift: \ +1: \ + EX(.failure_out,(EPI) st8 [dst1]=tmp,8); \ +(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \ + EX(3f,(p16) ld8 val1[1]=[src1],8); \ +(p16) mov val1[0]=r0; \ + br.ctop.dptk 1b; \ + ;; \ + br.cond.sptk.many .diff_align_do_tail; \ +2: \ +(EPI) st8 [dst1]=tmp,8; \ +(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \ +3: \ +(p16) mov val1[1]=r0; \ +(p16) mov val1[0]=r0; \ + br.ctop.dptk 2b; \ + ;; \ + br.cond.sptk.many .failure_in2 + + // + // Since the instruction 'shrp' requires a fixed 128-bit value + // specifying the bits to shift, we need to provide 7 cases + // below. + // + SWITCH(p6, 8) + SWITCH(p7, 16) + SWITCH(p8, 24) + SWITCH(p9, 32) + SWITCH(p10, 40) + SWITCH(p11, 48) + SWITCH(p12, 56) + ;; + CASE(p6, 8) + CASE(p7, 16) + CASE(p8, 24) + CASE(p9, 32) + CASE(p10, 40) + CASE(p11, 48) + CASE(p12, 56) + ;; + BODY(8) + BODY(16) + BODY(24) + BODY(32) + BODY(40) + BODY(48) + BODY(56) + ;; +.diff_align_do_tail: + .pred.rel "mutex", p14, p15 +(p14) sub src1=src1,t1 +(p14) adds dst1=-8,dst1 +(p15) sub dst1=dst1,t1 + ;; +4: + // Tail correction. + // + // The problem with this piplelined loop is that the last word is not + // loaded and thus parf of the last word written is not correct. + // To fix that, we simply copy the tail byte by byte. + + sub len1=endsrc,src1,1 + clrrrb + ;; + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + mov ar.lc=len1 + ;; +5: + EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1) + EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) + br.ctop.dptk.few 5b + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,0xffffffffffff0000 + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + // + // Beginning of long mempcy (i.e. > 16 bytes) + // +.long_copy_user: + tbit.nz p6,p7=src1,0 // odd alignment + and tmp=7,tmp + ;; + cmp.eq p10,p8=r0,tmp + mov len1=len // copy because of rotation +(p8) br.cond.dpnt .diff_align_copy_user + ;; + // At this point we know we have more than 16 bytes to copy + // and also that both src and dest have the same alignment + // which may not be the one we want. So for now we must move + // forward slowly until we reach 16byte alignment: no need to + // worry about reaching the end of buffer. + // + EX(.failure_in1,(p6) ld1 val1[0]=[src1],1) // 1-byte aligned +(p6) adds len1=-1,len1;; + tbit.nz p7,p0=src1,1 + ;; + EX(.failure_in1,(p7) ld2 val1[1]=[src1],2) // 2-byte aligned +(p7) adds len1=-2,len1;; + tbit.nz p8,p0=src1,2 + ;; + // + // Stop bit not required after ld4 because if we fail on ld4 + // we have never executed the ld1, therefore st1 is not executed. + // + EX(.failure_in1,(p8) ld4 val2[0]=[src1],4) // 4-byte aligned + ;; + EX(.failure_out,(p6) st1 [dst1]=val1[0],1) + tbit.nz p9,p0=src1,3 + ;; + // + // Stop bit not required after ld8 because if we fail on ld8 + // we have never executed the ld2, therefore st2 is not executed. + // + EX(.failure_in1,(p9) ld8 val2[1]=[src1],8) // 8-byte aligned + EX(.failure_out,(p7) st2 [dst1]=val1[1],2) +(p8) adds len1=-4,len1 + ;; + EX(.failure_out, (p8) st4 [dst1]=val2[0],4) +(p9) adds len1=-8,len1;; + shr.u cnt=len1,4 // number of 128-bit (2x64bit) words + ;; + EX(.failure_out, (p9) st8 [dst1]=val2[1],8) + tbit.nz p6,p0=len1,3 + cmp.eq p7,p0=r0,cnt + adds tmp=-1,cnt // br.ctop is repeat/until +(p7) br.cond.dpnt .dotail // we have less than 16 bytes left + ;; + adds src2=8,src1 + adds dst2=8,dst1 + mov ar.lc=tmp + ;; + // + // 16bytes/iteration + // +2: + EX(.failure_in3,(p16) ld8 val1[0]=[src1],16) +(p16) ld8 val2[0]=[src2],16 + + EX(.failure_out, (EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16) +(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16 + br.ctop.dptk 2b + ;; // RAW on src1 when fall through from loop + // + // Tail correction based on len only + // + // No matter where we come from (loop or test) the src1 pointer + // is 16 byte aligned AND we have less than 16 bytes to copy. + // +.dotail: + EX(.failure_in1,(p6) ld8 val1[0]=[src1],8) // at least 8 bytes + tbit.nz p7,p0=len1,2 + ;; + EX(.failure_in1,(p7) ld4 val1[1]=[src1],4) // at least 4 bytes + tbit.nz p8,p0=len1,1 + ;; + EX(.failure_in1,(p8) ld2 val2[0]=[src1],2) // at least 2 bytes + tbit.nz p9,p0=len1,0 + ;; + EX(.failure_out, (p6) st8 [dst1]=val1[0],8) + ;; + EX(.failure_in1,(p9) ld1 val2[1]=[src1]) // only 1 byte left + mov ar.lc=saved_lc + ;; + EX(.failure_out,(p7) st4 [dst1]=val1[1],4) + mov pr=saved_pr,0xffffffffffff0000 + ;; + EX(.failure_out, (p8) st2 [dst1]=val2[0],2) + mov ar.pfs=saved_pfs + ;; + EX(.failure_out, (p9) st1 [dst1]=val2[1]) + br.ret.sptk.many rp + + + // + // Here we handle the case where the byte by byte copy fails + // on the load. + // Several factors make the zeroing of the rest of the buffer kind of + // tricky: + // - the pipeline: loads/stores are not in sync (pipeline) + // + // In the same loop iteration, the dst1 pointer does not directly + // reflect where the faulty load was. + // + // - pipeline effect + // When you get a fault on load, you may have valid data from + // previous loads not yet store in transit. Such data must be + // store normally before moving onto zeroing the rest. + // + // - single/multi dispersal independence. + // + // solution: + // - we don't disrupt the pipeline, i.e. data in transit in + // the software pipeline will be eventually move to memory. + // We simply replace the load with a simple mov and keep the + // pipeline going. We can't really do this inline because + // p16 is always reset to 1 when lc > 0. + // +.failure_in_pipe1: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied +1: +(p16) mov val1[0]=r0 +(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1 + br.ctop.dptk 1b + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + // + // This is the case where the byte by byte copy fails on the load + // when we copy the head. We need to finish the pipeline and copy + // zeros for the rest of the destination. Since this happens + // at the top we still need to fill the body and tail. +.failure_in_pipe2: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied +2: +(p16) mov val1[0]=r0 +(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1 + br.ctop.dptk 2b + ;; + sub len=enddst,dst1,1 // precompute len + br.cond.dptk.many .failure_in1bis + ;; + + // + // Here we handle the head & tail part when we check for alignment. + // The following code handles only the load failures. The + // main diffculty comes from the fact that loads/stores are + // scheduled. So when you fail on a load, the stores corresponding + // to previous successful loads must be executed. + // + // However some simplifications are possible given the way + // things work. + // + // 1) HEAD + // Theory of operation: + // + // Page A | Page B + // ---------|----- + // 1|8 x + // 1 2|8 x + // 4|8 x + // 1 4|8 x + // 2 4|8 x + // 1 2 4|8 x + // |1 + // |2 x + // |4 x + // + // page_size >= 4k (2^12). (x means 4, 2, 1) + // Here we suppose Page A exists and Page B does not. + // + // As we move towards eight byte alignment we may encounter faults. + // The numbers on each page show the size of the load (current alignment). + // + // Key point: + // - if you fail on 1, 2, 4 then you have never executed any smaller + // size loads, e.g. failing ld4 means no ld1 nor ld2 executed + // before. + // + // This allows us to simplify the cleanup code, because basically you + // only have to worry about "pending" stores in the case of a failing + // ld8(). Given the way the code is written today, this means only + // worry about st2, st4. There we can use the information encapsulated + // into the predicates. + // + // Other key point: + // - if you fail on the ld8 in the head, it means you went straight + // to it, i.e. 8byte alignment within an unexisting page. + // Again this comes from the fact that if you crossed just for the ld8 then + // you are 8byte aligned but also 16byte align, therefore you would + // either go for the 16byte copy loop OR the ld8 in the tail part. + // The combination ld1, ld2, ld4, ld8 where you fail on ld8 is impossible + // because it would mean you had 15bytes to copy in which case you + // would have defaulted to the byte by byte copy. + // + // + // 2) TAIL + // Here we now we have less than 16 bytes AND we are either 8 or 16 byte + // aligned. + // + // Key point: + // This means that we either: + // - are right on a page boundary + // OR + // - are at more than 16 bytes from a page boundary with + // at most 15 bytes to copy: no chance of crossing. + // + // This allows us to assume that if we fail on a load we haven't possibly + // executed any of the previous (tail) ones, so we don't need to do + // any stores. For instance, if we fail on ld2, this means we had + // 2 or 3 bytes left to copy and we did not execute the ld8 nor ld4. + // + // This means that we are in a situation similar the a fault in the + // head part. That's nice! + // +.failure_in1: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied + sub len=endsrc,src1,1 + // + // we know that ret0 can never be zero at this point + // because we failed why trying to do a load, i.e. there is still + // some work to do. + // The failure_in1bis and length problem is taken care of at the + // calling side. + // + ;; +.failure_in1bis: // from (.failure_in3) + mov ar.lc=len // Continue with a stupid byte store. + ;; +5: + st1 [dst1]=r0,1 + br.cloop.dptk 5b + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + // + // Here we simply restart the loop but instead + // of doing loads we fill the pipeline with zeroes + // We can't simply store r0 because we may have valid + // data in transit in the pipeline. + // ar.lc and ar.ec are setup correctly at this point + // + // we MUST use src1/endsrc here and not dst1/enddst because + // of the pipeline effect. + // +.failure_in3: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied + ;; +2: +(p16) mov val1[0]=r0 +(p16) mov val2[0]=r0 +(EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16 +(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16 + br.ctop.dptk 2b + ;; + cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ? + sub len=enddst,dst1,1 // precompute len +(p6) br.cond.dptk .failure_in1bis + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + +.failure_in2: + sub ret0=endsrc,src1 + cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ? + sub len=enddst,dst1,1 // precompute len +(p6) br.cond.dptk .failure_in1bis + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + // + // handling of failures on stores: that's the easy part + // +.failure_out: + sub ret0=enddst,dst1 + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + + mov ar.pfs=saved_pfs + br.ret.sptk.many rp +END(__copy_user) diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c new file mode 100644 index 000000000000..36866e8a5d2b --- /dev/null +++ b/arch/ia64/lib/csum_partial_copy.c @@ -0,0 +1,151 @@ +/* + * Network Checksum & Copy routine + * + * Copyright (C) 1999, 2003-2004 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * + * Most of the code has been imported from Linux/Alpha + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> + +#include <asm/uaccess.h> + +/* + * XXX Fixme: those 2 inlines are meant for debugging and will go away + */ +static inline unsigned +short from64to16(unsigned long x) +{ + /* add up 32-bit words for 33 bits */ + x = (x & 0xffffffff) + (x >> 32); + /* add up 16-bit and 17-bit words for 17+c bits */ + x = (x & 0xffff) + (x >> 16); + /* add up 16-bit and 2-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +static inline +unsigned long do_csum_c(const unsigned char * buff, int len, unsigned int psum) +{ + int odd, count; + unsigned long result = (unsigned long)psum; + + if (len <= 0) + goto out; + odd = 1 & (unsigned long) buff; + if (odd) { + result = *buff << 8; + len--; + buff++; + } + count = len >> 1; /* nr of 16-bit words.. */ + if (count) { + if (2 & (unsigned long) buff) { + result += *(unsigned short *) buff; + count--; + len -= 2; + buff += 2; + } + count >>= 1; /* nr of 32-bit words.. */ + if (count) { + if (4 & (unsigned long) buff) { + result += *(unsigned int *) buff; + count--; + len -= 4; + buff += 4; + } + count >>= 1; /* nr of 64-bit words.. */ + if (count) { + unsigned long carry = 0; + do { + unsigned long w = *(unsigned long *) buff; + count--; + buff += 8; + result += carry; + result += w; + carry = (w > result); + } while (count); + result += carry; + result = (result & 0xffffffff) + (result >> 32); + } + if (len & 4) { + result += *(unsigned int *) buff; + buff += 4; + } + } + if (len & 2) { + result += *(unsigned short *) buff; + buff += 2; + } + } + if (len & 1) + result += *buff; + + result = from64to16(result); + + if (odd) + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); + +out: + return result; +} + +/* + * XXX Fixme + * + * This is very ugly but temporary. THIS NEEDS SERIOUS ENHANCEMENTS. + * But it's very tricky to get right even in C. + */ +extern unsigned long do_csum(const unsigned char *, long); + +static unsigned int +do_csum_partial_copy_from_user (const unsigned char __user *src, unsigned char *dst, + int len, unsigned int psum, int *errp) +{ + unsigned long result; + + /* XXX Fixme + * for now we separate the copy from checksum for obvious + * alignment difficulties. Look at the Alpha code and you'll be + * scared. + */ + + if (__copy_from_user(dst, src, len) != 0 && errp) + *errp = -EFAULT; + + result = do_csum(dst, len); + + /* add in old sum, and carry.. */ + result += psum; + /* 32+c bits -> 32 bits */ + result = (result & 0xffffffff) + (result >> 32); + return result; +} + +unsigned int +csum_partial_copy_from_user (const unsigned char __user *src, unsigned char *dst, + int len, unsigned int sum, int *errp) +{ + if (!access_ok(VERIFY_READ, src, len)) { + *errp = -EFAULT; + memset(dst, 0, len); + return sum; + } + + return do_csum_partial_copy_from_user(src, dst, len, sum, errp); +} + +unsigned int +csum_partial_copy_nocheck(const unsigned char __user *src, unsigned char *dst, + int len, unsigned int sum) +{ + return do_csum_partial_copy_from_user(src, dst, len, sum, NULL); +} + +EXPORT_SYMBOL(csum_partial_copy_nocheck); diff --git a/arch/ia64/lib/dec_and_lock.c b/arch/ia64/lib/dec_and_lock.c new file mode 100644 index 000000000000..c7ce92f968f1 --- /dev/null +++ b/arch/ia64/lib/dec_and_lock.c @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2003 Jerome Marchand, Bull S.A. + * Cleaned up by David Mosberger-Tang <davidm@hpl.hp.com> + * + * This file is released under the GPLv2, or at your option any later version. + * + * ia64 version of "atomic_dec_and_lock()" using the atomic "cmpxchg" instruction. This + * code is an adaptation of the x86 version of "atomic_dec_and_lock()". + */ + +#include <linux/compiler.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <asm/atomic.h> + +/* + * Decrement REFCOUNT and if the count reaches zero, acquire the spinlock. Both of these + * operations have to be done atomically, so that the count doesn't drop to zero without + * acquiring the spinlock first. + */ +int +_atomic_dec_and_lock (atomic_t *refcount, spinlock_t *lock) +{ + int old, new; + + do { + old = atomic_read(refcount); + new = old - 1; + + if (unlikely (old == 1)) { + /* oops, we may be decrementing to zero, do it the slow way... */ + spin_lock(lock); + if (atomic_dec_and_test(refcount)) + return 1; + spin_unlock(lock); + return 0; + } + } while (cmpxchg(&refcount->counter, old, new) != old); + return 0; +} + +EXPORT_SYMBOL(_atomic_dec_and_lock); diff --git a/arch/ia64/lib/do_csum.S b/arch/ia64/lib/do_csum.S new file mode 100644 index 000000000000..6bec2fc9f5b2 --- /dev/null +++ b/arch/ia64/lib/do_csum.S @@ -0,0 +1,323 @@ +/* + * + * Optmized version of the standard do_csum() function + * + * Return: a 64bit quantity containing the 16bit Internet checksum + * + * Inputs: + * in0: address of buffer to checksum (char *) + * in1: length of the buffer (int) + * + * Copyright (C) 1999, 2001-2002 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * + * 02/04/22 Ken Chen <kenneth.w.chen@intel.com> + * Data locality study on the checksum buffer. + * More optimization cleanup - remove excessive stop bits. + * 02/04/08 David Mosberger <davidm@hpl.hp.com> + * More cleanup and tuning. + * 01/04/18 Jun Nakajima <jun.nakajima@intel.com> + * Clean up and optimize and the software pipeline, loading two + * back-to-back 8-byte words per loop. Clean up the initialization + * for the loop. Support the cases where load latency = 1 or 2. + * Set CONFIG_IA64_LOAD_LATENCY to 1 or 2 (default). + */ + +#include <asm/asmmacro.h> + +// +// Theory of operations: +// The goal is to go as quickly as possible to the point where +// we can checksum 16 bytes/loop. Before reaching that point we must +// take care of incorrect alignment of first byte. +// +// The code hereafter also takes care of the "tail" part of the buffer +// before entering the core loop, if any. The checksum is a sum so it +// allows us to commute operations. So we do the "head" and "tail" +// first to finish at full speed in the body. Once we get the head and +// tail values, we feed them into the pipeline, very handy initialization. +// +// Of course we deal with the special case where the whole buffer fits +// into one 8 byte word. In this case we have only one entry in the pipeline. +// +// We use a (LOAD_LATENCY+2)-stage pipeline in the loop to account for +// possible load latency and also to accommodate for head and tail. +// +// The end of the function deals with folding the checksum from 64bits +// down to 16bits taking care of the carry. +// +// This version avoids synchronization in the core loop by also using a +// pipeline for the accumulation of the checksum in resultx[] (x=1,2). +// +// wordx[] (x=1,2) +// |---| +// | | 0 : new value loaded in pipeline +// |---| +// | | - : in transit data +// |---| +// | | LOAD_LATENCY : current value to add to checksum +// |---| +// | | LOAD_LATENCY+1 : previous value added to checksum +// |---| (previous iteration) +// +// resultx[] (x=1,2) +// |---| +// | | 0 : initial value +// |---| +// | | LOAD_LATENCY-1 : new checksum +// |---| +// | | LOAD_LATENCY : previous value of checksum +// |---| +// | | LOAD_LATENCY+1 : final checksum when out of the loop +// |---| +// +// +// See RFC1071 "Computing the Internet Checksum" for various techniques for +// calculating the Internet checksum. +// +// NOT YET DONE: +// - Maybe another algorithm which would take care of the folding at the +// end in a different manner +// - Work with people more knowledgeable than me on the network stack +// to figure out if we could not split the function depending on the +// type of packet or alignment we get. Like the ip_fast_csum() routine +// where we know we have at least 20bytes worth of data to checksum. +// - Do a better job of handling small packets. +// - Note on prefetching: it was found that under various load, i.e. ftp read/write, +// nfs read/write, the L1 cache hit rate is at 60% and L2 cache hit rate is at 99.8% +// on the data that buffer points to (partly because the checksum is often preceded by +// a copy_from_user()). This finding indiate that lfetch will not be beneficial since +// the data is already in the cache. +// + +#define saved_pfs r11 +#define hmask r16 +#define tmask r17 +#define first1 r18 +#define firstval r19 +#define firstoff r20 +#define last r21 +#define lastval r22 +#define lastoff r23 +#define saved_lc r24 +#define saved_pr r25 +#define tmp1 r26 +#define tmp2 r27 +#define tmp3 r28 +#define carry1 r29 +#define carry2 r30 +#define first2 r31 + +#define buf in0 +#define len in1 + +#define LOAD_LATENCY 2 // XXX fix me + +#if (LOAD_LATENCY != 1) && (LOAD_LATENCY != 2) +# error "Only 1 or 2 is supported/tested for LOAD_LATENCY." +#endif + +#define PIPE_DEPTH (LOAD_LATENCY+2) +#define ELD p[LOAD_LATENCY] // end of load +#define ELD_1 p[LOAD_LATENCY+1] // and next stage + +// unsigned long do_csum(unsigned char *buf,long len) + +GLOBAL_ENTRY(do_csum) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,2,16,0,16 + .rotr word1[4], word2[4],result1[LOAD_LATENCY+2],result2[LOAD_LATENCY+2] + .rotp p[PIPE_DEPTH], pC1[2], pC2[2] + mov ret0=r0 // in case we have zero length + cmp.lt p0,p6=r0,len // check for zero length or negative (32bit len) + ;; + add tmp1=buf,len // last byte's address + .save pr, saved_pr + mov saved_pr=pr // preserve predicates (rotation) +(p6) br.ret.spnt.many rp // return if zero or negative length + + mov hmask=-1 // initialize head mask + tbit.nz p15,p0=buf,0 // is buf an odd address? + and first1=-8,buf // 8-byte align down address of first1 element + + and firstoff=7,buf // how many bytes off for first1 element + mov tmask=-1 // initialize tail mask + + ;; + adds tmp2=-1,tmp1 // last-1 + and lastoff=7,tmp1 // how many bytes off for last element + ;; + sub tmp1=8,lastoff // complement to lastoff + and last=-8,tmp2 // address of word containing last byte + ;; + sub tmp3=last,first1 // tmp3=distance from first1 to last + .save ar.lc, saved_lc + mov saved_lc=ar.lc // save lc + cmp.eq p8,p9=last,first1 // everything fits in one word ? + + ld8 firstval=[first1],8 // load, ahead of time, "first1" word + and tmp1=7, tmp1 // make sure that if tmp1==8 -> tmp1=0 + shl tmp2=firstoff,3 // number of bits + ;; +(p9) ld8 lastval=[last] // load, ahead of time, "last" word, if needed + shl tmp1=tmp1,3 // number of bits +(p9) adds tmp3=-8,tmp3 // effectively loaded + ;; +(p8) mov lastval=r0 // we don't need lastval if first1==last + shl hmask=hmask,tmp2 // build head mask, mask off [0,first1off[ + shr.u tmask=tmask,tmp1 // build tail mask, mask off ]8,lastoff] + ;; + .body +#define count tmp3 + +(p8) and hmask=hmask,tmask // apply tail mask to head mask if 1 word only +(p9) and word2[0]=lastval,tmask // mask last it as appropriate + shr.u count=count,3 // how many 8-byte? + ;; + // If count is odd, finish this 8-byte word so that we can + // load two back-to-back 8-byte words per loop thereafter. + and word1[0]=firstval,hmask // and mask it as appropriate + tbit.nz p10,p11=count,0 // if (count is odd) + ;; +(p8) mov result1[0]=word1[0] +(p9) add result1[0]=word1[0],word2[0] + ;; + cmp.ltu p6,p0=result1[0],word1[0] // check the carry + cmp.eq.or.andcm p8,p0=0,count // exit if zero 8-byte + ;; +(p6) adds result1[0]=1,result1[0] +(p8) br.cond.dptk .do_csum_exit // if (within an 8-byte word) +(p11) br.cond.dptk .do_csum16 // if (count is even) + + // Here count is odd. + ld8 word1[1]=[first1],8 // load an 8-byte word + cmp.eq p9,p10=1,count // if (count == 1) + adds count=-1,count // loaded an 8-byte word + ;; + add result1[0]=result1[0],word1[1] + ;; + cmp.ltu p6,p0=result1[0],word1[1] + ;; +(p6) adds result1[0]=1,result1[0] +(p9) br.cond.sptk .do_csum_exit // if (count == 1) exit + // Fall through to caluculate the checksum, feeding result1[0] as + // the initial value in result1[0]. + // + // Calculate the checksum loading two 8-byte words per loop. + // +.do_csum16: + add first2=8,first1 + shr.u count=count,1 // we do 16 bytes per loop + ;; + adds count=-1,count + mov carry1=r0 + mov carry2=r0 + brp.loop.imp 1f,2f + ;; + mov ar.ec=PIPE_DEPTH + mov ar.lc=count // set lc + mov pr.rot=1<<16 + // result1[0] must be initialized in advance. + mov result2[0]=r0 + ;; + .align 32 +1: +(ELD_1) cmp.ltu pC1[0],p0=result1[LOAD_LATENCY],word1[LOAD_LATENCY+1] +(pC1[1])adds carry1=1,carry1 +(ELD_1) cmp.ltu pC2[0],p0=result2[LOAD_LATENCY],word2[LOAD_LATENCY+1] +(pC2[1])adds carry2=1,carry2 +(ELD) add result1[LOAD_LATENCY-1]=result1[LOAD_LATENCY],word1[LOAD_LATENCY] +(ELD) add result2[LOAD_LATENCY-1]=result2[LOAD_LATENCY],word2[LOAD_LATENCY] +2: +(p[0]) ld8 word1[0]=[first1],16 +(p[0]) ld8 word2[0]=[first2],16 + br.ctop.sptk 1b + ;; + // Since len is a 32-bit value, carry cannot be larger than a 64-bit value. +(pC1[1])adds carry1=1,carry1 // since we miss the last one +(pC2[1])adds carry2=1,carry2 + ;; + add result1[LOAD_LATENCY+1]=result1[LOAD_LATENCY+1],carry1 + add result2[LOAD_LATENCY+1]=result2[LOAD_LATENCY+1],carry2 + ;; + cmp.ltu p6,p0=result1[LOAD_LATENCY+1],carry1 + cmp.ltu p7,p0=result2[LOAD_LATENCY+1],carry2 + ;; +(p6) adds result1[LOAD_LATENCY+1]=1,result1[LOAD_LATENCY+1] +(p7) adds result2[LOAD_LATENCY+1]=1,result2[LOAD_LATENCY+1] + ;; + add result1[0]=result1[LOAD_LATENCY+1],result2[LOAD_LATENCY+1] + ;; + cmp.ltu p6,p0=result1[0],result2[LOAD_LATENCY+1] + ;; +(p6) adds result1[0]=1,result1[0] + ;; +.do_csum_exit: + // + // now fold 64 into 16 bits taking care of carry + // that's not very good because it has lots of sequentiality + // + mov tmp3=0xffff + zxt4 tmp1=result1[0] + shr.u tmp2=result1[0],32 + ;; + add result1[0]=tmp1,tmp2 + ;; + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],16 + ;; + add result1[0]=tmp1,tmp2 + ;; + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],16 + ;; + add result1[0]=tmp1,tmp2 + ;; + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],16 + ;; + add ret0=tmp1,tmp2 + mov pr=saved_pr,0xffffffffffff0000 + ;; + // if buf was odd then swap bytes + mov ar.pfs=saved_pfs // restore ar.ec +(p15) mux1 ret0=ret0,@rev // reverse word + ;; + mov ar.lc=saved_lc +(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes + br.ret.sptk.many rp + +// I (Jun Nakajima) wrote an equivalent code (see below), but it was +// not much better than the original. So keep the original there so that +// someone else can challenge. +// +// shr.u word1[0]=result1[0],32 +// zxt4 result1[0]=result1[0] +// ;; +// add result1[0]=result1[0],word1[0] +// ;; +// zxt2 result2[0]=result1[0] +// extr.u word1[0]=result1[0],16,16 +// shr.u carry1=result1[0],32 +// ;; +// add result2[0]=result2[0],word1[0] +// ;; +// add result2[0]=result2[0],carry1 +// ;; +// extr.u ret0=result2[0],16,16 +// ;; +// add ret0=ret0,result2[0] +// ;; +// zxt2 ret0=ret0 +// mov ar.pfs=saved_pfs // restore ar.ec +// mov pr=saved_pr,0xffffffffffff0000 +// ;; +// // if buf was odd then swap bytes +// mov ar.lc=saved_lc +//(p15) mux1 ret0=ret0,@rev // reverse word +// ;; +//(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes +// br.ret.sptk.many rp + +END(do_csum) diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S new file mode 100644 index 000000000000..29c802b19669 --- /dev/null +++ b/arch/ia64/lib/flush.S @@ -0,0 +1,39 @@ +/* + * Cache flushing routines. + * + * Copyright (C) 1999-2001 Hewlett-Packard Co + * Copyright (C) 1999-2001 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <asm/asmmacro.h> +#include <asm/page.h> + + /* + * flush_icache_range(start,end) + * Must flush range from start to end-1 but nothing else (need to + * be careful not to touch addresses that may be unmapped). + */ +GLOBAL_ENTRY(flush_icache_range) + .prologue + alloc r2=ar.pfs,2,0,0,0 + sub r8=in1,in0,1 + ;; + shr.u r8=r8,5 // we flush 32 bytes per iteration + .save ar.lc, r3 + mov r3=ar.lc // save ar.lc + ;; + + .body + + mov ar.lc=r8 + ;; +.Loop: fc in0 // issuable on M0 only + add in0=32,in0 + br.cloop.sptk.few .Loop + ;; + sync.i + ;; + srlz.i + ;; + mov ar.lc=r3 // restore ar.lc + br.ret.sptk.many rp +END(flush_icache_range) diff --git a/arch/ia64/lib/idiv32.S b/arch/ia64/lib/idiv32.S new file mode 100644 index 000000000000..2ac28bf0a662 --- /dev/null +++ b/arch/ia64/lib/idiv32.S @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2000 Hewlett-Packard Co + * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * 32-bit integer division. + * + * This code is based on the application note entitled "Divide, Square Root + * and Remainder Algorithms for the IA-64 Architecture". This document + * is available as Intel document number 248725-002 or via the web at + * http://developer.intel.com/software/opensource/numerics/ + * + * For more details on the theory behind these algorithms, see "IA-64 + * and Elementary Functions" by Peter Markstein; HP Professional Books + * (http://www.hp.com/go/retailbooks/) + */ + +#include <asm/asmmacro.h> + +#ifdef MODULO +# define OP mod +#else +# define OP div +#endif + +#ifdef UNSIGNED +# define SGN u +# define EXTEND zxt4 +# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b +# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b +#else +# define SGN +# define EXTEND sxt4 +# define INT_TO_FP(a,b) fcvt.xf a=b +# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b +#endif + +#define PASTE1(a,b) a##b +#define PASTE(a,b) PASTE1(a,b) +#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3)) + +GLOBAL_ENTRY(NAME) + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias) + EXTEND in0 = in0 // in0 = a + EXTEND in1 = in1 // in1 = b + ;; + setf.sig f8 = in0 + setf.sig f9 = in1 +#ifdef MODULO + sub in1 = r0, in1 // in1 = -b +#endif + ;; + // Convert the inputs to FP, to avoid FP software-assist faults. + INT_TO_FP(f8, f8) + INT_TO_FP(f9, f9) + ;; + setf.exp f7 = r2 // f7 = 2^-34 + frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b) + ;; +(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0 +(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1 + ;; +#ifdef MODULO + setf.sig f9 = in1 // f9 = -b +#endif +(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0 +(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34 + ;; +#ifdef MODULO + setf.sig f7 = in0 +#endif +(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1 + ;; + FP_TO_INT(f6, f6) // q = trunc(q2) + ;; +#ifdef MODULO + xma.l f6 = f6, f9, f7 // r = q*(-b) + a + ;; +#endif + getf.sig r8 = f6 // transfer result to result register + br.ret.sptk.many rp +END(NAME) diff --git a/arch/ia64/lib/idiv64.S b/arch/ia64/lib/idiv64.S new file mode 100644 index 000000000000..f69bd2b0987a --- /dev/null +++ b/arch/ia64/lib/idiv64.S @@ -0,0 +1,80 @@ +/* + * Copyright (C) 1999-2000 Hewlett-Packard Co + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * 64-bit integer division. + * + * This code is based on the application note entitled "Divide, Square Root + * and Remainder Algorithms for the IA-64 Architecture". This document + * is available as Intel document number 248725-002 or via the web at + * http://developer.intel.com/software/opensource/numerics/ + * + * For more details on the theory behind these algorithms, see "IA-64 + * and Elementary Functions" by Peter Markstein; HP Professional Books + * (http://www.hp.com/go/retailbooks/) + */ + +#include <asm/asmmacro.h> + +#ifdef MODULO +# define OP mod +#else +# define OP div +#endif + +#ifdef UNSIGNED +# define SGN u +# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b +# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b +#else +# define SGN +# define INT_TO_FP(a,b) fcvt.xf a=b +# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b +#endif + +#define PASTE1(a,b) a##b +#define PASTE(a,b) PASTE1(a,b) +#define NAME PASTE(PASTE(__,SGN),PASTE(OP,di3)) + +GLOBAL_ENTRY(NAME) + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + // Convert the inputs to FP, to avoid FP software-assist faults. + INT_TO_FP(f8, f8) + INT_TO_FP(f9, f9) + ;; + frcpa.s1 f11, p6 = f8, f9 // y0 = frcpa(b) + ;; +(p6) fmpy.s1 f7 = f8, f11 // q0 = a*y0 +(p6) fnma.s1 f6 = f9, f11, f1 // e0 = -b*y0 + 1 + ;; +(p6) fma.s1 f10 = f7, f6, f7 // q1 = q0*e0 + q0 +(p6) fmpy.s1 f7 = f6, f6 // e1 = e0*e0 + ;; +#ifdef MODULO + sub in1 = r0, in1 // in1 = -b +#endif +(p6) fma.s1 f10 = f10, f7, f10 // q2 = q1*e1 + q1 +(p6) fma.s1 f6 = f11, f6, f11 // y1 = y0*e0 + y0 + ;; +(p6) fma.s1 f6 = f6, f7, f6 // y2 = y1*e1 + y1 +(p6) fnma.s1 f7 = f9, f10, f8 // r = -b*q2 + a + ;; +#ifdef MODULO + setf.sig f8 = in0 // f8 = a + setf.sig f9 = in1 // f9 = -b +#endif +(p6) fma.s1 f11 = f7, f6, f10 // q3 = r*y2 + q2 + ;; + FP_TO_INT(f11, f11) // q = trunc(q3) + ;; +#ifdef MODULO + xma.l f11 = f11, f9, f8 // r = q*(-b) + a + ;; +#endif + getf.sig r8 = f11 // transfer result to result register + br.ret.sptk.many rp +END(NAME) diff --git a/arch/ia64/lib/io.c b/arch/ia64/lib/io.c new file mode 100644 index 000000000000..8949e44091ac --- /dev/null +++ b/arch/ia64/lib/io.c @@ -0,0 +1,165 @@ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> + +#include <asm/io.h> + +/* + * Copy data from IO memory space to "real" memory space. + * This needs to be optimized. + */ +void memcpy_fromio(void *to, const volatile void __iomem *from, long count) +{ + char *dst = to; + + while (count) { + count--; + *dst++ = readb(from++); + } +} +EXPORT_SYMBOL(memcpy_fromio); + +/* + * Copy data from "real" memory space to IO memory space. + * This needs to be optimized. + */ +void memcpy_toio(volatile void __iomem *to, const void *from, long count) +{ + const char *src = from; + + while (count) { + count--; + writeb(*src++, to++); + } +} +EXPORT_SYMBOL(memcpy_toio); + +/* + * "memset" on IO memory space. + * This needs to be optimized. + */ +void memset_io(volatile void __iomem *dst, int c, long count) +{ + unsigned char ch = (char)(c & 0xff); + + while (count) { + count--; + writeb(ch, dst); + dst++; + } +} +EXPORT_SYMBOL(memset_io); + +#ifdef CONFIG_IA64_GENERIC + +#undef __ia64_inb +#undef __ia64_inw +#undef __ia64_inl +#undef __ia64_outb +#undef __ia64_outw +#undef __ia64_outl +#undef __ia64_readb +#undef __ia64_readw +#undef __ia64_readl +#undef __ia64_readq +#undef __ia64_readb_relaxed +#undef __ia64_readw_relaxed +#undef __ia64_readl_relaxed +#undef __ia64_readq_relaxed +#undef __ia64_writeb +#undef __ia64_writew +#undef __ia64_writel +#undef __ia64_writeq +#undef __ia64_mmiowb + +unsigned int +__ia64_inb (unsigned long port) +{ + return ___ia64_inb(port); +} + +unsigned int +__ia64_inw (unsigned long port) +{ + return ___ia64_inw(port); +} + +unsigned int +__ia64_inl (unsigned long port) +{ + return ___ia64_inl(port); +} + +void +__ia64_outb (unsigned char val, unsigned long port) +{ + ___ia64_outb(val, port); +} + +void +__ia64_outw (unsigned short val, unsigned long port) +{ + ___ia64_outw(val, port); +} + +void +__ia64_outl (unsigned int val, unsigned long port) +{ + ___ia64_outl(val, port); +} + +unsigned char +__ia64_readb (void __iomem *addr) +{ + return ___ia64_readb (addr); +} + +unsigned short +__ia64_readw (void __iomem *addr) +{ + return ___ia64_readw (addr); +} + +unsigned int +__ia64_readl (void __iomem *addr) +{ + return ___ia64_readl (addr); +} + +unsigned long +__ia64_readq (void __iomem *addr) +{ + return ___ia64_readq (addr); +} + +unsigned char +__ia64_readb_relaxed (void __iomem *addr) +{ + return ___ia64_readb (addr); +} + +unsigned short +__ia64_readw_relaxed (void __iomem *addr) +{ + return ___ia64_readw (addr); +} + +unsigned int +__ia64_readl_relaxed (void __iomem *addr) +{ + return ___ia64_readl (addr); +} + +unsigned long +__ia64_readq_relaxed (void __iomem *addr) +{ + return ___ia64_readq (addr); +} + +void +__ia64_mmiowb(void) +{ + ___ia64_mmiowb(); +} + +#endif /* CONFIG_IA64_GENERIC */ diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S new file mode 100644 index 000000000000..19674ca2acfc --- /dev/null +++ b/arch/ia64/lib/ip_fast_csum.S @@ -0,0 +1,90 @@ +/* + * Optmized version of the ip_fast_csum() function + * Used for calculating IP header checksum + * + * Return: 16bit checksum, complemented + * + * Inputs: + * in0: address of buffer to checksum (char *) + * in1: length of the buffer (int) + * + * Copyright (C) 2002 Intel Corp. + * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> + */ + +#include <asm/asmmacro.h> + +/* + * Since we know that most likely this function is called with buf aligned + * on 4-byte boundary and 20 bytes in length, we can execution rather quickly + * versus calling generic version of do_csum, which has lots of overhead in + * handling various alignments and sizes. However, due to lack of constrains + * put on the function input argument, cases with alignment not on 4-byte or + * size not equal to 20 bytes will be handled by the generic do_csum function. + */ + +#define in0 r32 +#define in1 r33 +#define ret0 r8 + +GLOBAL_ENTRY(ip_fast_csum) + .prologue + .body + cmp.ne p6,p7=5,in1 // size other than 20 byte? + and r14=3,in0 // is it aligned on 4-byte? + add r15=4,in0 // second source pointer + ;; + cmp.ne.or.andcm p6,p7=r14,r0 + ;; +(p7) ld4 r20=[in0],8 +(p7) ld4 r21=[r15],8 +(p6) br.spnt .generic + ;; + ld4 r22=[in0],8 + ld4 r23=[r15],8 + ;; + ld4 r24=[in0] + add r20=r20,r21 + add r22=r22,r23 + ;; + add r20=r20,r22 + ;; + add r20=r20,r24 + ;; + shr.u ret0=r20,16 // now need to add the carry + zxt2 r20=r20 + ;; + add r20=ret0,r20 + ;; + shr.u ret0=r20,16 // add carry again + zxt2 r20=r20 + ;; + add r20=ret0,r20 + ;; + shr.u ret0=r20,16 + zxt2 r20=r20 + ;; + add r20=ret0,r20 + ;; + andcm ret0=-1,r20 + .restore sp // reset frame state + br.ret.sptk.many b0 + ;; + +.generic: + .prologue + .save ar.pfs, r35 + alloc r35=ar.pfs,2,2,2,0 + .save rp, r34 + mov r34=b0 + .body + dep.z out1=in1,2,30 + mov out0=in0 + ;; + br.call.sptk.many b0=do_csum + ;; + andcm ret0=-1,ret0 + mov ar.pfs=r35 + mov b0=r34 + br.ret.sptk.many b0 +END(ip_fast_csum) diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S new file mode 100644 index 000000000000..448908d80b69 --- /dev/null +++ b/arch/ia64/lib/memcpy.S @@ -0,0 +1,301 @@ +/* + * + * Optimized version of the standard memcpy() function + * + * Inputs: + * in0: destination address + * in1: source address + * in2: number of bytes to copy + * Output: + * no return value + * + * Copyright (C) 2000-2001 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <asm/asmmacro.h> + +GLOBAL_ENTRY(memcpy) + +# define MEM_LAT 21 /* latency to memory */ + +# define dst r2 +# define src r3 +# define retval r8 +# define saved_pfs r9 +# define saved_lc r10 +# define saved_pr r11 +# define cnt r16 +# define src2 r17 +# define t0 r18 +# define t1 r19 +# define t2 r20 +# define t3 r21 +# define t4 r22 +# define src_end r23 + +# define N (MEM_LAT + 4) +# define Nrot ((N + 7) & ~7) + + /* + * First, check if everything (src, dst, len) is a multiple of eight. If + * so, we handle everything with no taken branches (other than the loop + * itself) and a small icache footprint. Otherwise, we jump off to + * the more general copy routine handling arbitrary + * sizes/alignment etc. + */ + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot + .save ar.lc, saved_lc + mov saved_lc=ar.lc + or t0=in0,in1 + ;; + + or t0=t0,in2 + .save pr, saved_pr + mov saved_pr=pr + + .body + + cmp.eq p6,p0=in2,r0 // zero length? + mov retval=in0 // return dst +(p6) br.ret.spnt.many rp // zero length, return immediately + ;; + + mov dst=in0 // copy because of rotation + shr.u cnt=in2,3 // number of 8-byte words to copy + mov pr.rot=1<<16 + ;; + + adds cnt=-1,cnt // br.ctop is repeat/until + cmp.gtu p7,p0=16,in2 // copying less than 16 bytes? + mov ar.ec=N + ;; + + and t0=0x7,t0 + mov ar.lc=cnt + ;; + cmp.ne p6,p0=t0,r0 + + mov src=in1 // copy because of rotation +(p7) br.cond.spnt.few .memcpy_short +(p6) br.cond.spnt.few .memcpy_long + ;; + nop.m 0 + ;; + nop.m 0 + nop.i 0 + ;; + nop.m 0 + ;; + .rotr val[N] + .rotp p[N] + .align 32 +1: { .mib +(p[0]) ld8 val[0]=[src],8 + nop.i 0 + brp.loop.imp 1b, 2f +} +2: { .mfb +(p[N-1])st8 [dst]=val[N-1],8 + nop.f 0 + br.ctop.dptk.few 1b +} + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,-1 + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + /* + * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time + * copy loop. This performs relatively poorly on Itanium, but it doesn't + * get used very often (gcc inlines small copies) and due to atomicity + * issues, we want to avoid read-modify-write of entire words. + */ + .align 32 +.memcpy_short: + adds cnt=-1,in2 // br.ctop is repeat/until + mov ar.ec=MEM_LAT + brp.loop.imp 1f, 2f + ;; + mov ar.lc=cnt + ;; + nop.m 0 + ;; + nop.m 0 + nop.i 0 + ;; + nop.m 0 + ;; + nop.m 0 + ;; + /* + * It is faster to put a stop bit in the loop here because it makes + * the pipeline shorter (and latency is what matters on short copies). + */ + .align 32 +1: { .mib +(p[0]) ld1 val[0]=[src],1 + nop.i 0 + brp.loop.imp 1b, 2f +} ;; +2: { .mfb +(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1 + nop.f 0 + br.ctop.dptk.few 1b +} ;; + mov ar.lc=saved_lc + mov pr=saved_pr,-1 + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + /* + * Large (>= 16 bytes) copying is done in a fancy way. Latency isn't + * an overriding concern here, but throughput is. We first do + * sub-word copying until the destination is aligned, then we check + * if the source is also aligned. If so, we do a simple load/store-loop + * until there are less than 8 bytes left over and then we do the tail, + * by storing the last few bytes using sub-word copying. If the source + * is not aligned, we branch off to the non-congruent loop. + * + * stage: op: + * 0 ld + * : + * MEM_LAT+3 shrp + * MEM_LAT+4 st + * + * On Itanium, the pipeline itself runs without stalls. However, br.ctop + * seems to introduce an unavoidable bubble in the pipeline so the overall + * latency is 2 cycles/iteration. This gives us a _copy_ throughput + * of 4 byte/cycle. Still not bad. + */ +# undef N +# undef Nrot +# define N (MEM_LAT + 5) /* number of stages */ +# define Nrot ((N+1 + 2 + 7) & ~7) /* number of rotating regs */ + +#define LOG_LOOP_SIZE 6 + +.memcpy_long: + alloc t3=ar.pfs,3,Nrot,0,Nrot // resize register frame + and t0=-8,src // t0 = src & ~7 + and t2=7,src // t2 = src & 7 + ;; + ld8 t0=[t0] // t0 = 1st source word + adds src2=7,src // src2 = (src + 7) + sub t4=r0,dst // t4 = -dst + ;; + and src2=-8,src2 // src2 = (src + 7) & ~7 + shl t2=t2,3 // t2 = 8*(src & 7) + shl t4=t4,3 // t4 = 8*(dst & 7) + ;; + ld8 t1=[src2] // t1 = 1st source word if src is 8-byte aligned, 2nd otherwise + sub t3=64,t2 // t3 = 64-8*(src & 7) + shr.u t0=t0,t2 + ;; + add src_end=src,in2 + shl t1=t1,t3 + mov pr=t4,0x38 // (p5,p4,p3)=(dst & 7) + ;; + or t0=t0,t1 + mov cnt=r0 + adds src_end=-1,src_end + ;; +(p3) st1 [dst]=t0,1 +(p3) shr.u t0=t0,8 +(p3) adds cnt=1,cnt + ;; +(p4) st2 [dst]=t0,2 +(p4) shr.u t0=t0,16 +(p4) adds cnt=2,cnt + ;; +(p5) st4 [dst]=t0,4 +(p5) adds cnt=4,cnt + and src_end=-8,src_end // src_end = last word of source buffer + ;; + + // At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy: + +1:{ add src=cnt,src // make src point to remainder of source buffer + sub cnt=in2,cnt // cnt = number of bytes left to copy + mov t4=ip + } ;; + and src2=-8,src // align source pointer + adds t4=.memcpy_loops-1b,t4 + mov ar.ec=N + + and t0=7,src // t0 = src & 7 + shr.u t2=cnt,3 // t2 = number of 8-byte words left to copy + shl cnt=cnt,3 // move bits 0-2 to 3-5 + ;; + + .rotr val[N+1], w[2] + .rotp p[N] + + cmp.ne p6,p0=t0,r0 // is src aligned, too? + shl t0=t0,LOG_LOOP_SIZE // t0 = 8*(src & 7) + adds t2=-1,t2 // br.ctop is repeat/until + ;; + add t4=t0,t4 + mov pr=cnt,0x38 // set (p5,p4,p3) to # of bytes last-word bytes to copy + mov ar.lc=t2 + ;; + nop.m 0 + ;; + nop.m 0 + nop.i 0 + ;; + nop.m 0 + ;; +(p6) ld8 val[1]=[src2],8 // prime the pump... + mov b6=t4 + br.sptk.few b6 + ;; + +.memcpy_tail: + // At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is + // less than 8) and t0 contains the last few bytes of the src buffer: +(p5) st4 [dst]=t0,4 +(p5) shr.u t0=t0,32 + mov ar.lc=saved_lc + ;; +(p4) st2 [dst]=t0,2 +(p4) shr.u t0=t0,16 + mov ar.pfs=saved_pfs + ;; +(p3) st1 [dst]=t0 + mov pr=saved_pr,-1 + br.ret.sptk.many rp + +/////////////////////////////////////////////////////// + .align 64 + +#define COPY(shift,index) \ + 1: { .mib \ + (p[0]) ld8 val[0]=[src2],8; \ + (p[MEM_LAT+3]) shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift; \ + brp.loop.imp 1b, 2f \ + }; \ + 2: { .mfb \ + (p[MEM_LAT+4]) st8 [dst]=w[1],8; \ + nop.f 0; \ + br.ctop.dptk.few 1b; \ + }; \ + ;; \ + ld8 val[N-1]=[src_end]; /* load last word (may be same as val[N]) */ \ + ;; \ + shrp t0=val[N-1],val[N-index],shift; \ + br .memcpy_tail +.memcpy_loops: + COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */ + COPY(8, 0) + COPY(16, 0) + COPY(24, 0) + COPY(32, 0) + COPY(40, 0) + COPY(48, 0) + COPY(56, 0) + +END(memcpy) diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S new file mode 100644 index 000000000000..6f26ef7cc236 --- /dev/null +++ b/arch/ia64/lib/memcpy_mck.S @@ -0,0 +1,661 @@ +/* + * Itanium 2-optimized version of memcpy and copy_user function + * + * Inputs: + * in0: destination address + * in1: source address + * in2: number of bytes to copy + * Output: + * 0 if success, or number of byte NOT copied if error occurred. + * + * Copyright (C) 2002 Intel Corp. + * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> + */ +#include <linux/config.h> +#include <asm/asmmacro.h> +#include <asm/page.h> + +#define EK(y...) EX(y) + +/* McKinley specific optimization */ + +#define retval r8 +#define saved_pfs r31 +#define saved_lc r10 +#define saved_pr r11 +#define saved_in0 r14 +#define saved_in1 r15 +#define saved_in2 r16 + +#define src0 r2 +#define src1 r3 +#define dst0 r17 +#define dst1 r18 +#define cnt r9 + +/* r19-r30 are temp for each code section */ +#define PREFETCH_DIST 8 +#define src_pre_mem r19 +#define dst_pre_mem r20 +#define src_pre_l2 r21 +#define dst_pre_l2 r22 +#define t1 r23 +#define t2 r24 +#define t3 r25 +#define t4 r26 +#define t5 t1 // alias! +#define t6 t2 // alias! +#define t7 t3 // alias! +#define n8 r27 +#define t9 t5 // alias! +#define t10 t4 // alias! +#define t11 t7 // alias! +#define t12 t6 // alias! +#define t14 t10 // alias! +#define t13 r28 +#define t15 r29 +#define tmp r30 + +/* defines for long_copy block */ +#define A 0 +#define B (PREFETCH_DIST) +#define C (B + PREFETCH_DIST) +#define D (C + 1) +#define N (D + 1) +#define Nrot ((N + 7) & ~7) + +/* alias */ +#define in0 r32 +#define in1 r33 +#define in2 r34 + +GLOBAL_ENTRY(memcpy) + and r28=0x7,in0 + and r29=0x7,in1 + mov f6=f0 + br.cond.sptk .common_code + ;; +GLOBAL_ENTRY(__copy_user) + .prologue +// check dest alignment + and r28=0x7,in0 + and r29=0x7,in1 + mov f6=f1 + mov saved_in0=in0 // save dest pointer + mov saved_in1=in1 // save src pointer + mov saved_in2=in2 // save len + ;; +.common_code: + cmp.gt p15,p0=8,in2 // check for small size + cmp.ne p13,p0=0,r28 // check dest alignment + cmp.ne p14,p0=0,r29 // check src alignment + add src0=0,in1 + sub r30=8,r28 // for .align_dest + mov retval=r0 // initialize return value + ;; + add dst0=0,in0 + add dst1=1,in0 // dest odd index + cmp.le p6,p0 = 1,r30 // for .align_dest +(p15) br.cond.dpnt .memcpy_short +(p13) br.cond.dpnt .align_dest +(p14) br.cond.dpnt .unaligned_src + ;; + +// both dest and src are aligned on 8-byte boundary +.aligned_src: + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot + .save pr, saved_pr + mov saved_pr=pr + + shr.u cnt=in2,7 // this much cache line + ;; + cmp.lt p6,p0=2*PREFETCH_DIST,cnt + cmp.lt p7,p8=1,cnt + .save ar.lc, saved_lc + mov saved_lc=ar.lc + .body + add cnt=-1,cnt + add src_pre_mem=0,in1 // prefetch src pointer + add dst_pre_mem=0,in0 // prefetch dest pointer + ;; +(p7) mov ar.lc=cnt // prefetch count +(p8) mov ar.lc=r0 +(p6) br.cond.dpnt .long_copy + ;; + +.prefetch: + lfetch.fault [src_pre_mem], 128 + lfetch.fault.excl [dst_pre_mem], 128 + br.cloop.dptk.few .prefetch + ;; + +.medium_copy: + and tmp=31,in2 // copy length after iteration + shr.u r29=in2,5 // number of 32-byte iteration + add dst1=8,dst0 // 2nd dest pointer + ;; + add cnt=-1,r29 // ctop iteration adjustment + cmp.eq p10,p0=r29,r0 // do we really need to loop? + add src1=8,src0 // 2nd src pointer + cmp.le p6,p0=8,tmp + ;; + cmp.le p7,p0=16,tmp + mov ar.lc=cnt // loop setup + cmp.eq p16,p17 = r0,r0 + mov ar.ec=2 +(p10) br.dpnt.few .aligned_src_tail + ;; + TEXT_ALIGN(32) +1: +EX(.ex_handler, (p16) ld8 r34=[src0],16) +EK(.ex_handler, (p16) ld8 r38=[src1],16) +EX(.ex_handler, (p17) st8 [dst0]=r33,16) +EK(.ex_handler, (p17) st8 [dst1]=r37,16) + ;; +EX(.ex_handler, (p16) ld8 r32=[src0],16) +EK(.ex_handler, (p16) ld8 r36=[src1],16) +EX(.ex_handler, (p16) st8 [dst0]=r34,16) +EK(.ex_handler, (p16) st8 [dst1]=r38,16) + br.ctop.dptk.few 1b + ;; + +.aligned_src_tail: +EX(.ex_handler, (p6) ld8 t1=[src0]) + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs +EX(.ex_hndlr_s, (p7) ld8 t2=[src1],8) + cmp.le p8,p0=24,tmp + and r21=-8,tmp + ;; +EX(.ex_hndlr_s, (p8) ld8 t3=[src1]) +EX(.ex_handler, (p6) st8 [dst0]=t1) // store byte 1 + and in2=7,tmp // remaining length +EX(.ex_hndlr_d, (p7) st8 [dst1]=t2,8) // store byte 2 + add src0=src0,r21 // setting up src pointer + add dst0=dst0,r21 // setting up dest pointer + ;; +EX(.ex_handler, (p8) st8 [dst1]=t3) // store byte 3 + mov pr=saved_pr,-1 + br.dptk.many .memcpy_short + ;; + +/* code taken from copy_page_mck */ +.long_copy: + .rotr v[2*PREFETCH_DIST] + .rotp p[N] + + mov src_pre_mem = src0 + mov pr.rot = 0x10000 + mov ar.ec = 1 // special unrolled loop + + mov dst_pre_mem = dst0 + + add src_pre_l2 = 8*8, src0 + add dst_pre_l2 = 8*8, dst0 + ;; + add src0 = 8, src_pre_mem // first t1 src + mov ar.lc = 2*PREFETCH_DIST - 1 + shr.u cnt=in2,7 // number of lines + add src1 = 3*8, src_pre_mem // first t3 src + add dst0 = 8, dst_pre_mem // first t1 dst + add dst1 = 3*8, dst_pre_mem // first t3 dst + ;; + and tmp=127,in2 // remaining bytes after this block + add cnt = -(2*PREFETCH_DIST) - 1, cnt + // same as .line_copy loop, but with all predicated-off instructions removed: +.prefetch_loop: +EX(.ex_hndlr_lcpy_1, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 +EK(.ex_hndlr_lcpy_1, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 + br.ctop.sptk .prefetch_loop + ;; + cmp.eq p16, p0 = r0, r0 // reset p16 to 1 + mov ar.lc = cnt + mov ar.ec = N // # of stages in pipeline + ;; +.line_copy: +EX(.ex_handler, (p[D]) ld8 t2 = [src0], 3*8) // M0 +EK(.ex_handler, (p[D]) ld8 t4 = [src1], 3*8) // M1 +EX(.ex_handler_lcpy, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 prefetch dst from memory +EK(.ex_handler_lcpy, (p[D]) st8 [dst_pre_l2] = n8, 128) // M3 prefetch dst from L2 + ;; +EX(.ex_handler_lcpy, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 prefetch src from memory +EK(.ex_handler_lcpy, (p[C]) ld8 n8 = [src_pre_l2], 128) // M1 prefetch src from L2 +EX(.ex_handler, (p[D]) st8 [dst0] = t1, 8) // M2 +EK(.ex_handler, (p[D]) st8 [dst1] = t3, 8) // M3 + ;; +EX(.ex_handler, (p[D]) ld8 t5 = [src0], 8) +EK(.ex_handler, (p[D]) ld8 t7 = [src1], 3*8) +EX(.ex_handler, (p[D]) st8 [dst0] = t2, 3*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t4, 3*8) + ;; +EX(.ex_handler, (p[D]) ld8 t6 = [src0], 3*8) +EK(.ex_handler, (p[D]) ld8 t10 = [src1], 8) +EX(.ex_handler, (p[D]) st8 [dst0] = t5, 8) +EK(.ex_handler, (p[D]) st8 [dst1] = t7, 3*8) + ;; +EX(.ex_handler, (p[D]) ld8 t9 = [src0], 3*8) +EK(.ex_handler, (p[D]) ld8 t11 = [src1], 3*8) +EX(.ex_handler, (p[D]) st8 [dst0] = t6, 3*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t10, 8) + ;; +EX(.ex_handler, (p[D]) ld8 t12 = [src0], 8) +EK(.ex_handler, (p[D]) ld8 t14 = [src1], 8) +EX(.ex_handler, (p[D]) st8 [dst0] = t9, 3*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t11, 3*8) + ;; +EX(.ex_handler, (p[D]) ld8 t13 = [src0], 4*8) +EK(.ex_handler, (p[D]) ld8 t15 = [src1], 4*8) +EX(.ex_handler, (p[D]) st8 [dst0] = t12, 8) +EK(.ex_handler, (p[D]) st8 [dst1] = t14, 8) + ;; +EX(.ex_handler, (p[C]) ld8 t1 = [src0], 8) +EK(.ex_handler, (p[C]) ld8 t3 = [src1], 8) +EX(.ex_handler, (p[D]) st8 [dst0] = t13, 4*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t15, 4*8) + br.ctop.sptk .line_copy + ;; + + add dst0=-8,dst0 + add src0=-8,src0 + mov in2=tmp + .restore sp + br.sptk.many .medium_copy + ;; + +#define BLOCK_SIZE 128*32 +#define blocksize r23 +#define curlen r24 + +// dest is on 8-byte boundary, src is not. We need to do +// ld8-ld8, shrp, then st8. Max 8 byte copy per cycle. +.unaligned_src: + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,5,0,8 + .save ar.lc, saved_lc + mov saved_lc=ar.lc + .save pr, saved_pr + mov saved_pr=pr + .body +.4k_block: + mov saved_in0=dst0 // need to save all input arguments + mov saved_in2=in2 + mov blocksize=BLOCK_SIZE + ;; + cmp.lt p6,p7=blocksize,in2 + mov saved_in1=src0 + ;; +(p6) mov in2=blocksize + ;; + shr.u r21=in2,7 // this much cache line + shr.u r22=in2,4 // number of 16-byte iteration + and curlen=15,in2 // copy length after iteration + and r30=7,src0 // source alignment + ;; + cmp.lt p7,p8=1,r21 + add cnt=-1,r21 + ;; + + add src_pre_mem=0,src0 // prefetch src pointer + add dst_pre_mem=0,dst0 // prefetch dest pointer + and src0=-8,src0 // 1st src pointer +(p7) mov ar.lc = r21 +(p8) mov ar.lc = r0 + ;; + TEXT_ALIGN(32) +1: lfetch.fault [src_pre_mem], 128 + lfetch.fault.excl [dst_pre_mem], 128 + br.cloop.dptk.few 1b + ;; + + shladd dst1=r22,3,dst0 // 2nd dest pointer + shladd src1=r22,3,src0 // 2nd src pointer + cmp.eq p8,p9=r22,r0 // do we really need to loop? + cmp.le p6,p7=8,curlen; // have at least 8 byte remaining? + add cnt=-1,r22 // ctop iteration adjustment + ;; +EX(.ex_handler, (p9) ld8 r33=[src0],8) // loop primer +EK(.ex_handler, (p9) ld8 r37=[src1],8) +(p8) br.dpnt.few .noloop + ;; + +// The jump address is calculated based on src alignment. The COPYU +// macro below need to confine its size to power of two, so an entry +// can be caulated using shl instead of an expensive multiply. The +// size is then hard coded by the following #define to match the +// actual size. This make it somewhat tedious when COPYU macro gets +// changed and this need to be adjusted to match. +#define LOOP_SIZE 6 +1: + mov r29=ip // jmp_table thread + mov ar.lc=cnt + ;; + add r29=.jump_table - 1b - (.jmp1-.jump_table), r29 + shl r28=r30, LOOP_SIZE // jmp_table thread + mov ar.ec=2 // loop setup + ;; + add r29=r29,r28 // jmp_table thread + cmp.eq p16,p17=r0,r0 + ;; + mov b6=r29 // jmp_table thread + ;; + br.cond.sptk.few b6 + +// for 8-15 byte case +// We will skip the loop, but need to replicate the side effect +// that the loop produces. +.noloop: +EX(.ex_handler, (p6) ld8 r37=[src1],8) + add src0=8,src0 +(p6) shl r25=r30,3 + ;; +EX(.ex_handler, (p6) ld8 r27=[src1]) +(p6) shr.u r28=r37,r25 +(p6) sub r26=64,r25 + ;; +(p6) shl r27=r27,r26 + ;; +(p6) or r21=r28,r27 + +.unaligned_src_tail: +/* check if we have more than blocksize to copy, if so go back */ + cmp.gt p8,p0=saved_in2,blocksize + ;; +(p8) add dst0=saved_in0,blocksize +(p8) add src0=saved_in1,blocksize +(p8) sub in2=saved_in2,blocksize +(p8) br.dpnt .4k_block + ;; + +/* we have up to 15 byte to copy in the tail. + * part of work is already done in the jump table code + * we are at the following state. + * src side: + * + * xxxxxx xx <----- r21 has xxxxxxxx already + * -------- -------- -------- + * 0 8 16 + * ^ + * | + * src1 + * + * dst + * -------- -------- -------- + * ^ + * | + * dst1 + */ +EX(.ex_handler, (p6) st8 [dst1]=r21,8) // more than 8 byte to copy +(p6) add curlen=-8,curlen // update length + mov ar.pfs=saved_pfs + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,-1 + mov in2=curlen // remaining length + mov dst0=dst1 // dest pointer + add src0=src1,r30 // forward by src alignment + ;; + +// 7 byte or smaller. +.memcpy_short: + cmp.le p8,p9 = 1,in2 + cmp.le p10,p11 = 2,in2 + cmp.le p12,p13 = 3,in2 + cmp.le p14,p15 = 4,in2 + add src1=1,src0 // second src pointer + add dst1=1,dst0 // second dest pointer + ;; + +EX(.ex_handler_short, (p8) ld1 t1=[src0],2) +EK(.ex_handler_short, (p10) ld1 t2=[src1],2) +(p9) br.ret.dpnt rp // 0 byte copy + ;; + +EX(.ex_handler_short, (p8) st1 [dst0]=t1,2) +EK(.ex_handler_short, (p10) st1 [dst1]=t2,2) +(p11) br.ret.dpnt rp // 1 byte copy + +EX(.ex_handler_short, (p12) ld1 t3=[src0],2) +EK(.ex_handler_short, (p14) ld1 t4=[src1],2) +(p13) br.ret.dpnt rp // 2 byte copy + ;; + + cmp.le p6,p7 = 5,in2 + cmp.le p8,p9 = 6,in2 + cmp.le p10,p11 = 7,in2 + +EX(.ex_handler_short, (p12) st1 [dst0]=t3,2) +EK(.ex_handler_short, (p14) st1 [dst1]=t4,2) +(p15) br.ret.dpnt rp // 3 byte copy + ;; + +EX(.ex_handler_short, (p6) ld1 t5=[src0],2) +EK(.ex_handler_short, (p8) ld1 t6=[src1],2) +(p7) br.ret.dpnt rp // 4 byte copy + ;; + +EX(.ex_handler_short, (p6) st1 [dst0]=t5,2) +EK(.ex_handler_short, (p8) st1 [dst1]=t6,2) +(p9) br.ret.dptk rp // 5 byte copy + +EX(.ex_handler_short, (p10) ld1 t7=[src0],2) +(p11) br.ret.dptk rp // 6 byte copy + ;; + +EX(.ex_handler_short, (p10) st1 [dst0]=t7,2) + br.ret.dptk rp // done all cases + + +/* Align dest to nearest 8-byte boundary. We know we have at + * least 7 bytes to copy, enough to crawl to 8-byte boundary. + * Actual number of byte to crawl depend on the dest alignment. + * 7 byte or less is taken care at .memcpy_short + + * src0 - source even index + * src1 - source odd index + * dst0 - dest even index + * dst1 - dest odd index + * r30 - distance to 8-byte boundary + */ + +.align_dest: + add src1=1,in1 // source odd index + cmp.le p7,p0 = 2,r30 // for .align_dest + cmp.le p8,p0 = 3,r30 // for .align_dest +EX(.ex_handler_short, (p6) ld1 t1=[src0],2) + cmp.le p9,p0 = 4,r30 // for .align_dest + cmp.le p10,p0 = 5,r30 + ;; +EX(.ex_handler_short, (p7) ld1 t2=[src1],2) +EK(.ex_handler_short, (p8) ld1 t3=[src0],2) + cmp.le p11,p0 = 6,r30 +EX(.ex_handler_short, (p6) st1 [dst0] = t1,2) + cmp.le p12,p0 = 7,r30 + ;; +EX(.ex_handler_short, (p9) ld1 t4=[src1],2) +EK(.ex_handler_short, (p10) ld1 t5=[src0],2) +EX(.ex_handler_short, (p7) st1 [dst1] = t2,2) +EK(.ex_handler_short, (p8) st1 [dst0] = t3,2) + ;; +EX(.ex_handler_short, (p11) ld1 t6=[src1],2) +EK(.ex_handler_short, (p12) ld1 t7=[src0],2) + cmp.eq p6,p7=r28,r29 +EX(.ex_handler_short, (p9) st1 [dst1] = t4,2) +EK(.ex_handler_short, (p10) st1 [dst0] = t5,2) + sub in2=in2,r30 + ;; +EX(.ex_handler_short, (p11) st1 [dst1] = t6,2) +EK(.ex_handler_short, (p12) st1 [dst0] = t7) + add dst0=in0,r30 // setup arguments + add src0=in1,r30 +(p6) br.cond.dptk .aligned_src +(p7) br.cond.dpnt .unaligned_src + ;; + +/* main loop body in jump table format */ +#define COPYU(shift) \ +1: \ +EX(.ex_handler, (p16) ld8 r32=[src0],8); /* 1 */ \ +EK(.ex_handler, (p16) ld8 r36=[src1],8); \ + (p17) shrp r35=r33,r34,shift;; /* 1 */ \ +EX(.ex_handler, (p6) ld8 r22=[src1]); /* common, prime for tail section */ \ + nop.m 0; \ + (p16) shrp r38=r36,r37,shift; \ +EX(.ex_handler, (p17) st8 [dst0]=r35,8); /* 1 */ \ +EK(.ex_handler, (p17) st8 [dst1]=r39,8); \ + br.ctop.dptk.few 1b;; \ + (p7) add src1=-8,src1; /* back out for <8 byte case */ \ + shrp r21=r22,r38,shift; /* speculative work */ \ + br.sptk.few .unaligned_src_tail /* branch out of jump table */ \ + ;; + TEXT_ALIGN(32) +.jump_table: + COPYU(8) // unaligned cases +.jmp1: + COPYU(16) + COPYU(24) + COPYU(32) + COPYU(40) + COPYU(48) + COPYU(56) + +#undef A +#undef B +#undef C +#undef D +END(memcpy) + +/* + * Due to lack of local tag support in gcc 2.x assembler, it is not clear which + * instruction failed in the bundle. The exception algorithm is that we + * first figure out the faulting address, then detect if there is any + * progress made on the copy, if so, redo the copy from last known copied + * location up to the faulting address (exclusive). In the copy_from_user + * case, remaining byte in kernel buffer will be zeroed. + * + * Take copy_from_user as an example, in the code there are multiple loads + * in a bundle and those multiple loads could span over two pages, the + * faulting address is calculated as page_round_down(max(src0, src1)). + * This is based on knowledge that if we can access one byte in a page, we + * can access any byte in that page. + * + * predicate used in the exception handler: + * p6-p7: direction + * p10-p11: src faulting addr calculation + * p12-p13: dst faulting addr calculation + */ + +#define A r19 +#define B r20 +#define C r21 +#define D r22 +#define F r28 + +#define memset_arg0 r32 +#define memset_arg2 r33 + +#define saved_retval loc0 +#define saved_rtlink loc1 +#define saved_pfs_stack loc2 + +.ex_hndlr_s: + add src0=8,src0 + br.sptk .ex_handler + ;; +.ex_hndlr_d: + add dst0=8,dst0 + br.sptk .ex_handler + ;; +.ex_hndlr_lcpy_1: + mov src1=src_pre_mem + mov dst1=dst_pre_mem + cmp.gtu p10,p11=src_pre_mem,saved_in1 + cmp.gtu p12,p13=dst_pre_mem,saved_in0 + ;; +(p10) add src0=8,saved_in1 +(p11) mov src0=saved_in1 +(p12) add dst0=8,saved_in0 +(p13) mov dst0=saved_in0 + br.sptk .ex_handler +.ex_handler_lcpy: + // in line_copy block, the preload addresses should always ahead + // of the other two src/dst pointers. Furthermore, src1/dst1 should + // always ahead of src0/dst0. + mov src1=src_pre_mem + mov dst1=dst_pre_mem +.ex_handler: + mov pr=saved_pr,-1 // first restore pr, lc, and pfs + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + ;; +.ex_handler_short: // fault occurred in these sections didn't change pr, lc, pfs + cmp.ltu p6,p7=saved_in0, saved_in1 // get the copy direction + cmp.ltu p10,p11=src0,src1 + cmp.ltu p12,p13=dst0,dst1 + fcmp.eq p8,p0=f6,f0 // is it memcpy? + mov tmp = dst0 + ;; +(p11) mov src1 = src0 // pick the larger of the two +(p13) mov dst0 = dst1 // make dst0 the smaller one +(p13) mov dst1 = tmp // and dst1 the larger one + ;; +(p6) dep F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary +(p7) dep F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary + ;; +(p6) cmp.le p14,p0=dst0,saved_in0 // no progress has been made on store +(p7) cmp.le p14,p0=src0,saved_in1 // no progress has been made on load + mov retval=saved_in2 +(p8) ld1 tmp=[src1] // force an oops for memcpy call +(p8) st1 [dst1]=r0 // force an oops for memcpy call +(p14) br.ret.sptk.many rp + +/* + * The remaining byte to copy is calculated as: + * + * A = (faulting_addr - orig_src) -> len to faulting ld address + * or + * (faulting_addr - orig_dst) -> len to faulting st address + * B = (cur_dst - orig_dst) -> len copied so far + * C = A - B -> len need to be copied + * D = orig_len - A -> len need to be zeroed + */ +(p6) sub A = F, saved_in0 +(p7) sub A = F, saved_in1 + clrrrb + ;; + alloc saved_pfs_stack=ar.pfs,3,3,3,0 + sub B = dst0, saved_in0 // how many byte copied so far + ;; + sub C = A, B + sub D = saved_in2, A + ;; + cmp.gt p8,p0=C,r0 // more than 1 byte? + add memset_arg0=saved_in0, A +(p6) mov memset_arg2=0 // copy_to_user should not call memset +(p7) mov memset_arg2=D // copy_from_user need to have kbuf zeroed + mov r8=0 + mov saved_retval = D + mov saved_rtlink = b0 + + add out0=saved_in0, B + add out1=saved_in1, B + mov out2=C +(p8) br.call.sptk.few b0=__copy_user // recursive call + ;; + + add saved_retval=saved_retval,r8 // above might return non-zero value + cmp.gt p8,p0=memset_arg2,r0 // more than 1 byte? + mov out0=memset_arg0 // *s + mov out1=r0 // c + mov out2=memset_arg2 // n +(p8) br.call.sptk.few b0=memset + ;; + + mov retval=saved_retval + mov ar.pfs=saved_pfs_stack + mov b0=saved_rtlink + br.ret.sptk.many rp + +/* end of McKinley specific optimization */ +END(__copy_user) diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S new file mode 100644 index 000000000000..bd8cf907fe22 --- /dev/null +++ b/arch/ia64/lib/memset.S @@ -0,0 +1,362 @@ +/* Optimized version of the standard memset() function. + + Copyright (c) 2002 Hewlett-Packard Co/CERN + Sverre Jarp <Sverre.Jarp@cern.ch> + + Return: dest + + Inputs: + in0: dest + in1: value + in2: count + + The algorithm is fairly straightforward: set byte by byte until we + we get to a 16B-aligned address, then loop on 128 B chunks using an + early store as prefetching, then loop on 32B chucks, then clear remaining + words, finally clear remaining bytes. + Since a stf.spill f0 can store 16B in one go, we use this instruction + to get peak speed when value = 0. */ + +#include <asm/asmmacro.h> +#undef ret + +#define dest in0 +#define value in1 +#define cnt in2 + +#define tmp r31 +#define save_lc r30 +#define ptr0 r29 +#define ptr1 r28 +#define ptr2 r27 +#define ptr3 r26 +#define ptr9 r24 +#define loopcnt r23 +#define linecnt r22 +#define bytecnt r21 + +#define fvalue f6 + +// This routine uses only scratch predicate registers (p6 - p15) +#define p_scr p6 // default register for same-cycle branches +#define p_nz p7 +#define p_zr p8 +#define p_unalgn p9 +#define p_y p11 +#define p_n p12 +#define p_yy p13 +#define p_nn p14 + +#define MIN1 15 +#define MIN1P1HALF 8 +#define LINE_SIZE 128 +#define LSIZE_SH 7 // shift amount +#define PREF_AHEAD 8 + +GLOBAL_ENTRY(memset) +{ .mmi + .prologue + alloc tmp = ar.pfs, 3, 0, 0, 0 + .body + lfetch.nt1 [dest] // + .save ar.lc, save_lc + mov.i save_lc = ar.lc +} { .mmi + mov ret0 = dest // return value + cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero + cmp.eq p_scr, p0 = cnt, r0 +;; } +{ .mmi + and ptr2 = -(MIN1+1), dest // aligned address + and tmp = MIN1, dest // prepare to check for correct alignment + tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) +} { .mib + mov ptr1 = dest + mux1 value = value, @brcst // create 8 identical bytes in word +(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 +;; } +{ .mib + cmp.ne p_unalgn, p0 = tmp, r0 // +} { .mib + sub bytecnt = (MIN1+1), tmp // NB: # of bytes to move is 1 higher than loopcnt + cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? +(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) +;; } +{ .mmi +(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment +(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? +;; } +{ .mib +(p_y) add cnt = -8, cnt // +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? +} { .mib +(p_y) st8 [ptr2] = value,-4 // +(p_n) add ptr2 = 4, ptr2 // +;; } +{ .mib +(p_yy) add cnt = -4, cnt // +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? +} { .mib +(p_yy) st4 [ptr2] = value,-2 // +(p_nn) add ptr2 = 2, ptr2 // +;; } +{ .mmi + mov tmp = LINE_SIZE+1 // for compare +(p_y) add cnt = -2, cnt // +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? +} { .mmi + setf.sig fvalue=value // transfer value to FLP side +(p_y) st2 [ptr2] = value,-1 // +(p_n) add ptr2 = 1, ptr2 // +;; } + +{ .mmi +(p_yy) st1 [ptr2] = value // + cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? +} { .mbb +(p_yy) add cnt = -1, cnt // +(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few +;; } + +{ .mib + nop.m 0 + shr.u linecnt = cnt, LSIZE_SH +(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill +;; } + + TEXT_ALIGN(32) // --------------------- // L1A: store ahead into cache lines; fill later +{ .mmi + and tmp = -(LINE_SIZE), cnt // compute end of range + mov ptr9 = ptr1 // used for prefetching + and cnt = (LINE_SIZE-1), cnt // remainder +} { .mmi + mov loopcnt = PREF_AHEAD-1 // default prefetch loop + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value +;; } +{ .mmi +(p_scr) add loopcnt = -1, linecnt // + add ptr2 = 8, ptr1 // start of stores (beyond prefetch stores) + add ptr1 = tmp, ptr1 // first address beyond total range +;; } +{ .mmi + add tmp = -1, linecnt // next loop count + mov.i ar.lc = loopcnt // +;; } +.pref_l1a: +{ .mib + stf8 [ptr9] = fvalue, 128 // Do stores one cache line apart + nop.i 0 + br.cloop.dptk.few .pref_l1a +;; } +{ .mmi + add ptr0 = 16, ptr2 // Two stores in parallel + mov.i ar.lc = tmp // +;; } +.l1ax: + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 8 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 24 + stf8 [ptr0] = fvalue, 24 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 8 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 24 + stf8 [ptr0] = fvalue, 24 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 8 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 24 + stf8 [ptr0] = fvalue, 24 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 32 + cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + ;; } +{ .mmb + stf8 [ptr2] = fvalue, 24 +(p_scr) stf8 [ptr9] = fvalue, 128 + br.cloop.dptk.few .l1ax +;; } +{ .mbb + cmp.le p_scr, p0 = 8, cnt // just a few bytes left ? +(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2 + br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3 +;; } + + TEXT_ALIGN(32) +.l1b: // ------------------------------------ // L1B: store ahead into cache lines; fill later +{ .mmi + and tmp = -(LINE_SIZE), cnt // compute end of range + mov ptr9 = ptr1 // used for prefetching + and cnt = (LINE_SIZE-1), cnt // remainder +} { .mmi + mov loopcnt = PREF_AHEAD-1 // default prefetch loop + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value +;; } +{ .mmi +(p_scr) add loopcnt = -1, linecnt + add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) + add ptr1 = tmp, ptr1 // first address beyond total range +;; } +{ .mmi + add tmp = -1, linecnt // next loop count + mov.i ar.lc = loopcnt +;; } +.pref_l1b: +{ .mib + stf.spill [ptr9] = f0, 128 // Do stores one cache line apart + nop.i 0 + br.cloop.dptk.few .pref_l1b +;; } +{ .mmi + add ptr0 = 16, ptr2 // Two stores in parallel + mov.i ar.lc = tmp +;; } +.l1bx: + { .mmi + stf.spill [ptr2] = f0, 32 + stf.spill [ptr0] = f0, 32 + ;; } + { .mmi + stf.spill [ptr2] = f0, 32 + stf.spill [ptr0] = f0, 32 + ;; } + { .mmi + stf.spill [ptr2] = f0, 32 + stf.spill [ptr0] = f0, 64 + cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + ;; } +{ .mmb + stf.spill [ptr2] = f0, 32 +(p_scr) stf.spill [ptr9] = f0, 128 + br.cloop.dptk.few .l1bx +;; } +{ .mib + cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // +;; } + +.fraction_of_line: +{ .mib + add ptr2 = 16, ptr1 + shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 +;; } +{ .mib + cmp.eq p_scr, p0 = loopcnt, r0 + add loopcnt = -1, loopcnt +(p_scr) br.cond.dpnt.many .store_words +;; } +{ .mib + and cnt = 0x1f, cnt // compute the remaining cnt + mov.i ar.lc = loopcnt +;; } + TEXT_ALIGN(32) +.l2: // ------------------------------------ // L2A: store 32B in 2 cycles +{ .mmb + stf8 [ptr1] = fvalue, 8 + stf8 [ptr2] = fvalue, 8 +;; } { .mmb + stf8 [ptr1] = fvalue, 24 + stf8 [ptr2] = fvalue, 24 + br.cloop.dptk.many .l2 +;; } +.store_words: +{ .mib + cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch +;; } + +{ .mmi + stf8 [ptr1] = fvalue, 8 // store + cmp.le p_y, p_n = 16, cnt + add cnt = -8, cnt // subtract +;; } +{ .mmi +(p_y) stf8 [ptr1] = fvalue, 8 // store +(p_y) cmp.le.unc p_yy, p_nn = 16, cnt +(p_y) add cnt = -8, cnt // subtract +;; } +{ .mmi // store +(p_yy) stf8 [ptr1] = fvalue, 8 +(p_yy) add cnt = -8, cnt // subtract +;; } + +.move_bytes_from_alignment: +{ .mib + cmp.eq p_scr, p0 = cnt, r0 + tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? +(p_scr) br.cond.dpnt.few .restore_and_exit +;; } +{ .mib +(p_y) st4 [ptr1] = value,4 + tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? +;; } +{ .mib +(p_yy) st2 [ptr1] = value,2 + tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ? +;; } + +{ .mib +(p_y) st1 [ptr1] = value +;; } +.restore_and_exit: +{ .mib + nop.m 0 + mov.i ar.lc = save_lc + br.ret.sptk.many rp +;; } + +.move_bytes_unaligned: +{ .mmi + .pred.rel "mutex",p_y, p_n + .pred.rel "mutex",p_yy, p_nn +(p_n) cmp.le p_yy, p_nn = 4, cnt +(p_y) cmp.le p_yy, p_nn = 5, cnt +(p_n) add ptr2 = 2, ptr1 +} { .mmi +(p_y) add ptr2 = 3, ptr1 +(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte [15, 14 (or less) left] +(p_y) add cnt = -1, cnt +;; } +{ .mmi +(p_yy) cmp.le.unc p_y, p0 = 8, cnt + add ptr3 = ptr1, cnt // prepare last store + mov.i ar.lc = save_lc +} { .mmi +(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes +(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [11, 10 (o less) left] +(p_yy) add cnt = -4, cnt +;; } +{ .mmi +(p_y) cmp.le.unc p_yy, p0 = 8, cnt + add ptr3 = -1, ptr3 // last store + tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? +} { .mmi +(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes +(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [7, 6 (or less) left] +(p_y) add cnt = -4, cnt +;; } +{ .mmi +(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes +(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [3, 2 (or less) left] + tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? +} { .mmi +(p_yy) add cnt = -4, cnt +;; } +{ .mmb +(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes +(p_y) st1 [ptr3] = value // fill last byte (using ptr3) + br.ret.sptk.many rp +} +END(memset) diff --git a/arch/ia64/lib/strlen.S b/arch/ia64/lib/strlen.S new file mode 100644 index 000000000000..e0cdac0a85b8 --- /dev/null +++ b/arch/ia64/lib/strlen.S @@ -0,0 +1,192 @@ +/* + * + * Optimized version of the standard strlen() function + * + * + * Inputs: + * in0 address of string + * + * Outputs: + * ret0 the number of characters in the string (0 if empty string) + * does not count the \0 + * + * Copyright (C) 1999, 2001 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * + * 09/24/99 S.Eranian add speculation recovery code + */ + +#include <asm/asmmacro.h> + +// +// +// This is an enhanced version of the basic strlen. it includes a combination +// of compute zero index (czx), parallel comparisons, speculative loads and +// loop unroll using rotating registers. +// +// General Ideas about the algorithm: +// The goal is to look at the string in chunks of 8 bytes. +// so we need to do a few extra checks at the beginning because the +// string may not be 8-byte aligned. In this case we load the 8byte +// quantity which includes the start of the string and mask the unused +// bytes with 0xff to avoid confusing czx. +// We use speculative loads and software pipelining to hide memory +// latency and do read ahead safely. This way we defer any exception. +// +// Because we don't want the kernel to be relying on particular +// settings of the DCR register, we provide recovery code in case +// speculation fails. The recovery code is going to "redo" the work using +// only normal loads. If we still get a fault then we generate a +// kernel panic. Otherwise we return the strlen as usual. +// +// The fact that speculation may fail can be caused, for instance, by +// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., +// a NaT bit will be set if the translation is not present. The normal +// load, on the other hand, will cause the translation to be inserted +// if the mapping exists. +// +// It should be noted that we execute recovery code only when we need +// to use the data that has been speculatively loaded: we don't execute +// recovery code on pure read ahead data. +// +// Remarks: +// - the cmp r0,r0 is used as a fast way to initialize a predicate +// register to 1. This is required to make sure that we get the parallel +// compare correct. +// +// - we don't use the epilogue counter to exit the loop but we need to set +// it to zero beforehand. +// +// - after the loop we must test for Nat values because neither the +// czx nor cmp instruction raise a NaT consumption fault. We must be +// careful not to look too far for a Nat for which we don't care. +// For instance we don't need to look at a NaT in val2 if the zero byte +// was in val1. +// +// - Clearly performance tuning is required. +// +// +// +#define saved_pfs r11 +#define tmp r10 +#define base r16 +#define orig r17 +#define saved_pr r18 +#define src r19 +#define mask r20 +#define val r21 +#define val1 r22 +#define val2 r23 + +GLOBAL_ENTRY(strlen) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8 + + .rotr v[2], w[2] // declares our 4 aliases + + extr.u tmp=in0,0,3 // tmp=least significant 3 bits + mov orig=in0 // keep trackof initial byte address + dep src=0,in0,0,3 // src=8byte-aligned in0 address + .save pr, saved_pr + mov saved_pr=pr // preserve predicates (rotation) + ;; + + .body + + ld8 v[1]=[src],8 // must not speculate: can fail here + shl tmp=tmp,3 // multiply by 8bits/byte + mov mask=-1 // our mask + ;; + ld8.s w[1]=[src],8 // speculatively load next + cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and + sub tmp=64,tmp // how many bits to shift our mask on the right + ;; + shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part + mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) + ;; + add base=-16,src // keep track of aligned base + or v[1]=v[1],mask // now we have a safe initial byte pattern + ;; +1: + ld8.s v[0]=[src],8 // speculatively load next + czx1.r val1=v[1] // search 0 byte from right + czx1.r val2=w[1] // search 0 byte from right following 8bytes + ;; + ld8.s w[0]=[src],8 // speculatively load next to next + cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 + cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 +(p6) br.wtop.dptk 1b // loop until p6 == 0 + ;; + // + // We must return try the recovery code iff + // val1_is_nat || (val1==8 && val2_is_nat) + // + // XXX Fixme + // - there must be a better way of doing the test + // + cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) + tnat.nz p6,p7=val1 // test NaT on val1 +(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT + ;; + // + // if we come here p7 is true, i.e., initialized for // cmp + // + cmp.eq.and p7,p0=8,val1// val1==8? + tnat.nz.and p7,p0=val2 // test NaT if val2 +(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT + ;; +(p8) mov val1=val2 // the other test got us out of the loop +(p8) adds src=-16,src // correct position when 3 ahead +(p9) adds src=-24,src // correct position when 4 ahead + ;; + sub ret0=src,orig // distance from base + sub tmp=8,val1 // which byte in word + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // adjust + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp // end of normal execution + + // + // Outlined recovery code when speculation failed + // + // This time we don't use speculation and rely on the normal exception + // mechanism. that's why the loop is not as good as the previous one + // because read ahead is not possible + // + // IMPORTANT: + // Please note that in the case of strlen() as opposed to strlen_user() + // we don't use the exception mechanism, as this function is not + // supposed to fail. If that happens it means we have a bug and the + // code will cause of kernel fault. + // + // XXX Fixme + // - today we restart from the beginning of the string instead + // of trying to continue where we left off. + // +.recover: + ld8 val=[base],8 // will fail if unrecoverable fault + ;; + or val=val,mask // remask first bytes + cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop + ;; + // + // ar.ec is still zero here + // +2: +(p6) ld8 val=[base],8 // will fail if unrecoverable fault + ;; + czx1.r val1=val // search 0 byte from right + ;; + cmp.eq p6,p0=8,val1 // val1==8 ? +(p6) br.wtop.dptk 2b // loop until p6 == 0 + ;; // (avoid WAW on p63) + sub ret0=base,orig // distance from base + sub tmp=8,val1 + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // length=now - back -1 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp // end of successful recovery code +END(strlen) diff --git a/arch/ia64/lib/strlen_user.S b/arch/ia64/lib/strlen_user.S new file mode 100644 index 000000000000..c71eded4285e --- /dev/null +++ b/arch/ia64/lib/strlen_user.S @@ -0,0 +1,198 @@ +/* + * Optimized version of the strlen_user() function + * + * Inputs: + * in0 address of buffer + * + * Outputs: + * ret0 0 in case of fault, strlen(buffer)+1 otherwise + * + * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * + * 01/19/99 S.Eranian heavily enhanced version (see details below) + * 09/24/99 S.Eranian added speculation recovery code + */ + +#include <asm/asmmacro.h> + +// +// int strlen_user(char *) +// ------------------------ +// Returns: +// - length of string + 1 +// - 0 in case an exception is raised +// +// This is an enhanced version of the basic strlen_user. it includes a +// combination of compute zero index (czx), parallel comparisons, speculative +// loads and loop unroll using rotating registers. +// +// General Ideas about the algorithm: +// The goal is to look at the string in chunks of 8 bytes. +// so we need to do a few extra checks at the beginning because the +// string may not be 8-byte aligned. In this case we load the 8byte +// quantity which includes the start of the string and mask the unused +// bytes with 0xff to avoid confusing czx. +// We use speculative loads and software pipelining to hide memory +// latency and do read ahead safely. This way we defer any exception. +// +// Because we don't want the kernel to be relying on particular +// settings of the DCR register, we provide recovery code in case +// speculation fails. The recovery code is going to "redo" the work using +// only normal loads. If we still get a fault then we return an +// error (ret0=0). Otherwise we return the strlen+1 as usual. +// The fact that speculation may fail can be caused, for instance, by +// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., +// a NaT bit will be set if the translation is not present. The normal +// load, on the other hand, will cause the translation to be inserted +// if the mapping exists. +// +// It should be noted that we execute recovery code only when we need +// to use the data that has been speculatively loaded: we don't execute +// recovery code on pure read ahead data. +// +// Remarks: +// - the cmp r0,r0 is used as a fast way to initialize a predicate +// register to 1. This is required to make sure that we get the parallel +// compare correct. +// +// - we don't use the epilogue counter to exit the loop but we need to set +// it to zero beforehand. +// +// - after the loop we must test for Nat values because neither the +// czx nor cmp instruction raise a NaT consumption fault. We must be +// careful not to look too far for a Nat for which we don't care. +// For instance we don't need to look at a NaT in val2 if the zero byte +// was in val1. +// +// - Clearly performance tuning is required. +// + +#define saved_pfs r11 +#define tmp r10 +#define base r16 +#define orig r17 +#define saved_pr r18 +#define src r19 +#define mask r20 +#define val r21 +#define val1 r22 +#define val2 r23 + +GLOBAL_ENTRY(__strlen_user) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,11,0,0,8 + + .rotr v[2], w[2] // declares our 4 aliases + + extr.u tmp=in0,0,3 // tmp=least significant 3 bits + mov orig=in0 // keep trackof initial byte address + dep src=0,in0,0,3 // src=8byte-aligned in0 address + .save pr, saved_pr + mov saved_pr=pr // preserve predicates (rotation) + ;; + + .body + + ld8.s v[1]=[src],8 // load the initial 8bytes (must speculate) + shl tmp=tmp,3 // multiply by 8bits/byte + mov mask=-1 // our mask + ;; + ld8.s w[1]=[src],8 // load next 8 bytes in 2nd pipeline + cmp.eq p6,p0=r0,r0 // sets p6 (required because of // cmp.and) + sub tmp=64,tmp // how many bits to shift our mask on the right + ;; + shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part + mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) + ;; + add base=-16,src // keep track of aligned base + chk.s v[1], .recover // if already NaT, then directly skip to recover + or v[1]=v[1],mask // now we have a safe initial byte pattern + ;; +1: + ld8.s v[0]=[src],8 // speculatively load next + czx1.r val1=v[1] // search 0 byte from right + czx1.r val2=w[1] // search 0 byte from right following 8bytes + ;; + ld8.s w[0]=[src],8 // speculatively load next to next + cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 + cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 +(p6) br.wtop.dptk.few 1b // loop until p6 == 0 + ;; + // + // We must return try the recovery code iff + // val1_is_nat || (val1==8 && val2_is_nat) + // + // XXX Fixme + // - there must be a better way of doing the test + // + cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) + tnat.nz p6,p7=val1 // test NaT on val1 +(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT + ;; + // + // if we come here p7 is true, i.e., initialized for // cmp + // + cmp.eq.and p7,p0=8,val1// val1==8? + tnat.nz.and p7,p0=val2 // test NaT if val2 +(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT + ;; +(p8) mov val1=val2 // val2 contains the value +(p8) adds src=-16,src // correct position when 3 ahead +(p9) adds src=-24,src // correct position when 4 ahead + ;; + sub ret0=src,orig // distance from origin + sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1 + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // length=now - back -1 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp // end of normal execution + + // + // Outlined recovery code when speculation failed + // + // This time we don't use speculation and rely on the normal exception + // mechanism. that's why the loop is not as good as the previous one + // because read ahead is not possible + // + // XXX Fixme + // - today we restart from the beginning of the string instead + // of trying to continue where we left off. + // +.recover: + EX(.Lexit1, ld8 val=[base],8) // load the initial bytes + ;; + or val=val,mask // remask first bytes + cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop + ;; + // + // ar.ec is still zero here + // +2: + EX(.Lexit1, (p6) ld8 val=[base],8) + ;; + czx1.r val1=val // search 0 byte from right + ;; + cmp.eq p6,p0=8,val1 // val1==8 ? +(p6) br.wtop.dptk.few 2b // loop until p6 == 0 + ;; + sub ret0=base,orig // distance from base + sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1 + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // length=now - back -1 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp // end of successful recovery code + + // + // We failed even on the normal load (called from exception handler) + // +.Lexit1: + mov ret0=0 + mov pr=saved_pr,0xffffffffffff0000 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp +END(__strlen_user) diff --git a/arch/ia64/lib/strncpy_from_user.S b/arch/ia64/lib/strncpy_from_user.S new file mode 100644 index 000000000000..a504381f31eb --- /dev/null +++ b/arch/ia64/lib/strncpy_from_user.S @@ -0,0 +1,44 @@ +/* + * Just like strncpy() except that if a fault occurs during copying, + * -EFAULT is returned. + * + * Inputs: + * in0: address of destination buffer + * in1: address of string to be copied + * in2: length of buffer in bytes + * Outputs: + * r8: -EFAULT in case of fault or number of bytes copied if no fault + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com> + * + * 00/03/06 D. Mosberger Fixed to return proper return value (bug found by + * by Andreas Schwab <schwab@suse.de>). + */ + +#include <asm/asmmacro.h> + +GLOBAL_ENTRY(__strncpy_from_user) + alloc r2=ar.pfs,3,0,0,0 + mov r8=0 + mov r9=in1 + ;; + add r10=in1,in2 + cmp.eq p6,p0=r0,in2 +(p6) br.ret.spnt.many rp + + // XXX braindead copy loop---this needs to be optimized +.Loop1: + EX(.Lexit, ld1 r8=[in1],1) + ;; + EX(.Lexit, st1 [in0]=r8,1) + cmp.ne p6,p7=r8,r0 + ;; +(p6) cmp.ne.unc p8,p0=in1,r10 +(p8) br.cond.dpnt.few .Loop1 + ;; +(p6) mov r8=in2 // buffer filled up---return buffer length +(p7) sub r8=in1,r9,1 // return string length (excluding NUL character) +[.Lexit:] + br.ret.sptk.many rp +END(__strncpy_from_user) diff --git a/arch/ia64/lib/strnlen_user.S b/arch/ia64/lib/strnlen_user.S new file mode 100644 index 000000000000..d09066b1e49d --- /dev/null +++ b/arch/ia64/lib/strnlen_user.S @@ -0,0 +1,45 @@ +/* + * Returns 0 if exception before NUL or reaching the supplied limit (N), + * a value greater than N if the string is longer than the limit, else + * strlen. + * + * Inputs: + * in0: address of buffer + * in1: string length limit N + * Outputs: + * r8: 0 in case of fault, strlen(buffer)+1 otherwise + * + * Copyright (C) 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <asm/asmmacro.h> + +GLOBAL_ENTRY(__strnlen_user) + .prologue + alloc r2=ar.pfs,2,0,0,0 + .save ar.lc, r16 + mov r16=ar.lc // preserve ar.lc + + .body + + add r3=-1,in1 + ;; + mov ar.lc=r3 + mov r9=0 + ;; + // XXX braindead strlen loop---this needs to be optimized +.Loop1: + EXCLR(.Lexit, ld1 r8=[in0],1) + add r9=1,r9 + ;; + cmp.eq p6,p0=r8,r0 +(p6) br.cond.dpnt .Lexit + br.cloop.dptk.few .Loop1 + + add r9=1,in1 // NUL not found---return N+1 + ;; +.Lexit: + mov r8=r9 + mov ar.lc=r16 // restore ar.lc + br.ret.sptk.many rp +END(__strnlen_user) diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c new file mode 100644 index 000000000000..ab7b3ad99a7f --- /dev/null +++ b/arch/ia64/lib/swiotlb.c @@ -0,0 +1,658 @@ +/* + * Dynamic DMA mapping support. + * + * This implementation is for IA-64 platforms that do not support + * I/O TLBs (aka DMA address translation hardware). + * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> + * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> + * Copyright (C) 2000, 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. + * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid + * unnecessary i-cache flushing. + * 04/07/.. ak Better overflow handling. Assorted fixes. + */ + +#include <linux/cache.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ctype.h> + +#include <asm/io.h> +#include <asm/pci.h> +#include <asm/dma.h> + +#include <linux/init.h> +#include <linux/bootmem.h> + +#define OFFSET(val,align) ((unsigned long) \ + ( (val) & ( (align) - 1))) + +#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) +#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG)) + +/* + * Maximum allowable number of contiguous slabs to map, + * must be a power of 2. What is the appropriate value ? + * The complexity of {map,unmap}_single is linearly dependent on this value. + */ +#define IO_TLB_SEGSIZE 128 + +/* + * log of the size of each IO TLB slab. The number of slabs is command line + * controllable. + */ +#define IO_TLB_SHIFT 11 + +int swiotlb_force; + +/* + * Used to do a quick range check in swiotlb_unmap_single and + * swiotlb_sync_single_*, to see if the memory was in fact allocated by this + * API. + */ +static char *io_tlb_start, *io_tlb_end; + +/* + * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and + * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. + */ +static unsigned long io_tlb_nslabs; + +/* + * When the IOMMU overflows we return a fallback buffer. This sets the size. + */ +static unsigned long io_tlb_overflow = 32*1024; + +void *io_tlb_overflow_buffer; + +/* + * This is a free list describing the number of free entries available from + * each index + */ +static unsigned int *io_tlb_list; +static unsigned int io_tlb_index; + +/* + * We need to save away the original address corresponding to a mapped entry + * for the sync operations. + */ +static unsigned char **io_tlb_orig_addr; + +/* + * Protect the above data structures in the map and unmap calls + */ +static DEFINE_SPINLOCK(io_tlb_lock); + +static int __init +setup_io_tlb_npages(char *str) +{ + if (isdigit(*str)) { + io_tlb_nslabs = simple_strtoul(str, &str, 0) << + (PAGE_SHIFT - IO_TLB_SHIFT); + /* avoid tail segment of size < IO_TLB_SEGSIZE */ + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + if (*str == ',') + ++str; + if (!strcmp(str, "force")) + swiotlb_force = 1; + return 1; +} +__setup("swiotlb=", setup_io_tlb_npages); +/* make io_tlb_overflow tunable too? */ + +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the PCI DMA API. + */ +void +swiotlb_init_with_default_size (size_t default_size) +{ + unsigned long i; + + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> PAGE_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + + /* + * Get IO TLB memory from the low pages + */ + io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * + (1 << IO_TLB_SHIFT)); + if (!io_tlb_start) + panic("Cannot allocate SWIOTLB buffer"); + io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); + + /* + * Allocate and initialize the free list array. This array is used + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE + * between io_tlb_start and io_tlb_end. + */ + io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); + for (i = 0; i < io_tlb_nslabs; i++) + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_index = 0; + io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *)); + + /* + * Get the overflow emergency buffer + */ + io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); + printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", + virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end)); +} + +void +swiotlb_init (void) +{ + swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ +} + +static inline int +address_needs_mapping(struct device *hwdev, dma_addr_t addr) +{ + dma_addr_t mask = 0xffffffff; + /* If the device has a mask, use it, otherwise default to 32 bits */ + if (hwdev && hwdev->dma_mask) + mask = *hwdev->dma_mask; + return (addr & ~mask) != 0; +} + +/* + * Allocates bounce buffer and returns its kernel virtual address. + */ +static void * +map_single(struct device *hwdev, char *buffer, size_t size, int dir) +{ + unsigned long flags; + char *dma_addr; + unsigned int nslots, stride, index, wrap; + int i; + + /* + * For mappings greater than a page, we limit the stride (and + * hence alignment) to a page size. + */ + nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + if (size > PAGE_SIZE) + stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); + else + stride = 1; + + if (!nslots) + BUG(); + + /* + * Find suitable number of IO TLB entries size that will fit this + * request and allocate a buffer from that IO TLB pool. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + { + wrap = index = ALIGN(io_tlb_index, stride); + + if (index >= io_tlb_nslabs) + wrap = index = 0; + + do { + /* + * If we find a slot that indicates we have 'nslots' + * number of contiguous buffers, we allocate the + * buffers from that slot and mark the entries as '0' + * indicating unavailable. + */ + if (io_tlb_list[index] >= nslots) { + int count = 0; + + for (i = index; i < (int) (index + nslots); i++) + io_tlb_list[i] = 0; + for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); + + /* + * Update the indices to avoid searching in + * the next round. + */ + io_tlb_index = ((index + nslots) < io_tlb_nslabs + ? (index + nslots) : 0); + + goto found; + } + index += stride; + if (index >= io_tlb_nslabs) + index = 0; + } while (index != wrap); + + spin_unlock_irqrestore(&io_tlb_lock, flags); + return NULL; + } + found: + spin_unlock_irqrestore(&io_tlb_lock, flags); + + /* + * Save away the mapping from the original address to the DMA address. + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ + io_tlb_orig_addr[index] = buffer; + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + memcpy(dma_addr, buffer, size); + + return dma_addr; +} + +/* + * dma_addr is the kernel virtual address of the bounce buffer to unmap. + */ +static void +unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +{ + unsigned long flags; + int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + char *buffer = io_tlb_orig_addr[index]; + + /* + * First, sync the memory before unmapping the entry + */ + if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) + /* + * bounce... copy the data back into the original buffer * and + * delete the bounce buffer. + */ + memcpy(buffer, dma_addr, size); + + /* + * Return the buffer to the free list by setting the corresponding + * entries to indicate the number of contigous entries available. + * While returning the entries to the free list, we merge the entries + * with slots below and above the pool being returned. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + { + count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? + io_tlb_list[index + nslots] : 0); + /* + * Step 1: return the slots to the free list, merging the + * slots with superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) + io_tlb_list[i] = ++count; + /* + * Step 2: merge the returned slots with the preceding slots, + * if available (non zero) + */ + for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + } + spin_unlock_irqrestore(&io_tlb_lock, flags); +} + +static void +sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +{ + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + char *buffer = io_tlb_orig_addr[index]; + + /* + * bounce... copy the data back into/from the original buffer + * XXX How do you handle DMA_BIDIRECTIONAL here ? + */ + if (dir == DMA_FROM_DEVICE) + memcpy(buffer, dma_addr, size); + else if (dir == DMA_TO_DEVICE) + memcpy(dma_addr, buffer, size); + else + BUG(); +} + +void * +swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, int flags) +{ + unsigned long dev_addr; + void *ret; + int order = get_order(size); + + /* + * XXX fix me: the DMA API should pass us an explicit DMA mask + * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32 + * bit range instead of a 16MB one). + */ + flags |= GFP_DMA; + + ret = (void *)__get_free_pages(flags, order); + if (ret && address_needs_mapping(hwdev, virt_to_phys(ret))) { + /* + * The allocated memory isn't reachable by the device. + * Fall back on swiotlb_map_single(). + */ + free_pages((unsigned long) ret, order); + ret = NULL; + } + if (!ret) { + /* + * We are either out of memory or the device can't DMA + * to GFP_DMA memory; fall back on + * swiotlb_map_single(), which will grab memory from + * the lowest available address range. + */ + dma_addr_t handle; + handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE); + if (dma_mapping_error(handle)) + return NULL; + + ret = phys_to_virt(handle); + } + + memset(ret, 0, size); + dev_addr = virt_to_phys(ret); + + /* Confirm address can be DMA'd by device */ + if (address_needs_mapping(hwdev, dev_addr)) { + printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n", + (unsigned long long)*hwdev->dma_mask, dev_addr); + panic("swiotlb_alloc_coherent: allocated memory is out of " + "range for device"); + } + *dma_handle = dev_addr; + return ret; +} + +void +swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + if (!(vaddr >= (void *)io_tlb_start + && vaddr < (void *)io_tlb_end)) + free_pages((unsigned long) vaddr, get_order(size)); + else + /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ + swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE); +} + +static void +swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) +{ + /* + * Ran out of IOMMU space for this operation. This is very bad. + * Unfortunately the drivers cannot handle this operation properly. + * unless they check for pci_dma_mapping_error (most don't) + * When the mapping is small enough return a static buffer to limit + * the damage, or panic when the transfer is too big. + */ + printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at " + "device %s\n", size, dev ? dev->bus_id : "?"); + + if (size > io_tlb_overflow && do_panic) { + if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) + panic("PCI-DMA: Memory would be corrupted\n"); + if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) + panic("PCI-DMA: Random memory would be DMAed\n"); + } +} + +/* + * Map a single buffer of the indicated size for DMA in streaming mode. The + * PCI address to use is returned. + * + * Once the device is given the dma address, the device owns this memory until + * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. + */ +dma_addr_t +swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) +{ + unsigned long dev_addr = virt_to_phys(ptr); + void *map; + + if (dir == DMA_NONE) + BUG(); + /* + * If the pointer passed in happens to be in the device's DMA window, + * we can safely return the device addr and not worry about bounce + * buffering it. + */ + if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force) + return dev_addr; + + /* + * Oh well, have to allocate and map a bounce buffer. + */ + map = map_single(hwdev, ptr, size, dir); + if (!map) { + swiotlb_full(hwdev, size, dir, 1); + map = io_tlb_overflow_buffer; + } + + dev_addr = virt_to_phys(map); + + /* + * Ensure that the address returned is DMA'ble + */ + if (address_needs_mapping(hwdev, dev_addr)) + panic("map_single: bounce buffer is not DMA'ble"); + + return dev_addr; +} + +/* + * Since DMA is i-cache coherent, any (complete) pages that were written via + * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to + * flush them when they get mapped into an executable vm-area. + */ +static void +mark_clean(void *addr, size_t size) +{ + unsigned long pg_addr, end; + + pg_addr = PAGE_ALIGN((unsigned long) addr); + end = (unsigned long) addr + size; + while (pg_addr + PAGE_SIZE <= end) { + struct page *page = virt_to_page(pg_addr); + set_bit(PG_arch_1, &page->flags); + pg_addr += PAGE_SIZE; + } +} + +/* + * Unmap a single streaming mode DMA translation. The dma_addr and size must + * match what was provided for in a previous swiotlb_map_single call. All + * other usages are undefined. + * + * After this call, reads by the cpu to the buffer are guaranteed to see + * whatever the device wrote there. + */ +void +swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, + int dir) +{ + char *dma_addr = phys_to_virt(dev_addr); + + if (dir == DMA_NONE) + BUG(); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + unmap_single(hwdev, dma_addr, size, dir); + else if (dir == DMA_FROM_DEVICE) + mark_clean(dma_addr, size); +} + +/* + * Make physical memory consistent for a single streaming mode DMA translation + * after a transfer. + * + * If you perform a swiotlb_map_single() but wish to interrogate the buffer + * using the cpu, yet do not wish to teardown the PCI dma mapping, you must + * call this function before doing so. At the next point you give the PCI dma + * address back to the card, you must first perform a + * swiotlb_dma_sync_for_device, and then the device again owns the buffer + */ +void +swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) +{ + char *dma_addr = phys_to_virt(dev_addr); + + if (dir == DMA_NONE) + BUG(); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + sync_single(hwdev, dma_addr, size, dir); + else if (dir == DMA_FROM_DEVICE) + mark_clean(dma_addr, size); +} + +void +swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) +{ + char *dma_addr = phys_to_virt(dev_addr); + + if (dir == DMA_NONE) + BUG(); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + sync_single(hwdev, dma_addr, size, dir); + else if (dir == DMA_FROM_DEVICE) + mark_clean(dma_addr, size); +} + +/* + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * This is the scatter-gather version of the above swiotlb_map_single + * interface. Here the scatter gather list elements are each tagged with the + * appropriate dma address and length. They are obtained via + * sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for swiotlb_map_single are the + * same here. + */ +int +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, + int dir) +{ + void *addr; + unsigned long dev_addr; + int i; + + if (dir == DMA_NONE) + BUG(); + + for (i = 0; i < nelems; i++, sg++) { + addr = SG_ENT_VIRT_ADDRESS(sg); + dev_addr = virt_to_phys(addr); + if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) { + sg->dma_address = (dma_addr_t) virt_to_phys(map_single(hwdev, addr, sg->length, dir)); + if (!sg->dma_address) { + /* Don't panic here, we expect map_sg users + to do proper error handling. */ + swiotlb_full(hwdev, sg->length, dir, 0); + swiotlb_unmap_sg(hwdev, sg - i, i, dir); + sg[0].dma_length = 0; + return 0; + } + } else + sg->dma_address = dev_addr; + sg->dma_length = sg->length; + } + return nelems; +} + +/* + * Unmap a set of streaming mode DMA translations. Again, cpu read rules + * concerning calls here are the same as for swiotlb_unmap_single() above. + */ +void +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, + int dir) +{ + int i; + + if (dir == DMA_NONE) + BUG(); + + for (i = 0; i < nelems; i++, sg++) + if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) + unmap_single(hwdev, (void *) phys_to_virt(sg->dma_address), sg->dma_length, dir); + else if (dir == DMA_FROM_DEVICE) + mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); +} + +/* + * Make physical memory consistent for a set of streaming mode DMA translations + * after a transfer. + * + * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules + * and usage. + */ +void +swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, + int nelems, int dir) +{ + int i; + + if (dir == DMA_NONE) + BUG(); + + for (i = 0; i < nelems; i++, sg++) + if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) + sync_single(hwdev, (void *) sg->dma_address, + sg->dma_length, dir); +} + +void +swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, + int nelems, int dir) +{ + int i; + + if (dir == DMA_NONE) + BUG(); + + for (i = 0; i < nelems; i++, sg++) + if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) + sync_single(hwdev, (void *) sg->dma_address, + sg->dma_length, dir); +} + +int +swiotlb_dma_mapping_error(dma_addr_t dma_addr) +{ + return (dma_addr == virt_to_phys(io_tlb_overflow_buffer)); +} + +/* + * Return whether the given PCI device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during PCI bus mastering, then you would pass 0x00ffffff as the mask to + * this function. + */ +int +swiotlb_dma_supported (struct device *hwdev, u64 mask) +{ + return (virt_to_phys (io_tlb_end) - 1) <= mask; +} + +EXPORT_SYMBOL(swiotlb_init); +EXPORT_SYMBOL(swiotlb_map_single); +EXPORT_SYMBOL(swiotlb_unmap_single); +EXPORT_SYMBOL(swiotlb_map_sg); +EXPORT_SYMBOL(swiotlb_unmap_sg); +EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); +EXPORT_SYMBOL(swiotlb_sync_single_for_device); +EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); +EXPORT_SYMBOL(swiotlb_sync_sg_for_device); +EXPORT_SYMBOL(swiotlb_dma_mapping_error); +EXPORT_SYMBOL(swiotlb_alloc_coherent); +EXPORT_SYMBOL(swiotlb_free_coherent); +EXPORT_SYMBOL(swiotlb_dma_supported); diff --git a/arch/ia64/lib/xor.S b/arch/ia64/lib/xor.S new file mode 100644 index 000000000000..54e3f7eab8e9 --- /dev/null +++ b/arch/ia64/lib/xor.S @@ -0,0 +1,184 @@ +/* + * arch/ia64/lib/xor.S + * + * Optimized RAID-5 checksumming functions for IA-64. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <asm/asmmacro.h> + +GLOBAL_ENTRY(xor_ia64_2) + .prologue + .fframe 0 + .save ar.pfs, r31 + alloc r31 = ar.pfs, 3, 0, 13, 16 + .save ar.lc, r30 + mov r30 = ar.lc + .save pr, r29 + mov r29 = pr + ;; + .body + mov r8 = in1 + mov ar.ec = 6 + 2 + shr in0 = in0, 3 + ;; + adds in0 = -1, in0 + mov r16 = in1 + mov r17 = in2 + ;; + mov ar.lc = in0 + mov pr.rot = 1 << 16 + ;; + .rotr s1[6+1], s2[6+1], d[2] + .rotp p[6+2] +0: +(p[0]) ld8.nta s1[0] = [r16], 8 +(p[0]) ld8.nta s2[0] = [r17], 8 +(p[6]) xor d[0] = s1[6], s2[6] +(p[6+1])st8.nta [r8] = d[1], 8 + nop.f 0 + br.ctop.dptk.few 0b + ;; + mov ar.lc = r30 + mov pr = r29, -1 + br.ret.sptk.few rp +END(xor_ia64_2) + +GLOBAL_ENTRY(xor_ia64_3) + .prologue + .fframe 0 + .save ar.pfs, r31 + alloc r31 = ar.pfs, 4, 0, 20, 24 + .save ar.lc, r30 + mov r30 = ar.lc + .save pr, r29 + mov r29 = pr + ;; + .body + mov r8 = in1 + mov ar.ec = 6 + 2 + shr in0 = in0, 3 + ;; + adds in0 = -1, in0 + mov r16 = in1 + mov r17 = in2 + ;; + mov r18 = in3 + mov ar.lc = in0 + mov pr.rot = 1 << 16 + ;; + .rotr s1[6+1], s2[6+1], s3[6+1], d[2] + .rotp p[6+2] +0: +(p[0]) ld8.nta s1[0] = [r16], 8 +(p[0]) ld8.nta s2[0] = [r17], 8 +(p[6]) xor d[0] = s1[6], s2[6] + ;; +(p[0]) ld8.nta s3[0] = [r18], 8 +(p[6+1])st8.nta [r8] = d[1], 8 +(p[6]) xor d[0] = d[0], s3[6] + br.ctop.dptk.few 0b + ;; + mov ar.lc = r30 + mov pr = r29, -1 + br.ret.sptk.few rp +END(xor_ia64_3) + +GLOBAL_ENTRY(xor_ia64_4) + .prologue + .fframe 0 + .save ar.pfs, r31 + alloc r31 = ar.pfs, 5, 0, 27, 32 + .save ar.lc, r30 + mov r30 = ar.lc + .save pr, r29 + mov r29 = pr + ;; + .body + mov r8 = in1 + mov ar.ec = 6 + 2 + shr in0 = in0, 3 + ;; + adds in0 = -1, in0 + mov r16 = in1 + mov r17 = in2 + ;; + mov r18 = in3 + mov ar.lc = in0 + mov pr.rot = 1 << 16 + mov r19 = in4 + ;; + .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] + .rotp p[6+2] +0: +(p[0]) ld8.nta s1[0] = [r16], 8 +(p[0]) ld8.nta s2[0] = [r17], 8 +(p[6]) xor d[0] = s1[6], s2[6] +(p[0]) ld8.nta s3[0] = [r18], 8 +(p[0]) ld8.nta s4[0] = [r19], 8 +(p[6]) xor r20 = s3[6], s4[6] + ;; +(p[6+1])st8.nta [r8] = d[1], 8 +(p[6]) xor d[0] = d[0], r20 + br.ctop.dptk.few 0b + ;; + mov ar.lc = r30 + mov pr = r29, -1 + br.ret.sptk.few rp +END(xor_ia64_4) + +GLOBAL_ENTRY(xor_ia64_5) + .prologue + .fframe 0 + .save ar.pfs, r31 + alloc r31 = ar.pfs, 6, 0, 34, 40 + .save ar.lc, r30 + mov r30 = ar.lc + .save pr, r29 + mov r29 = pr + ;; + .body + mov r8 = in1 + mov ar.ec = 6 + 2 + shr in0 = in0, 3 + ;; + adds in0 = -1, in0 + mov r16 = in1 + mov r17 = in2 + ;; + mov r18 = in3 + mov ar.lc = in0 + mov pr.rot = 1 << 16 + mov r19 = in4 + mov r20 = in5 + ;; + .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] + .rotp p[6+2] +0: +(p[0]) ld8.nta s1[0] = [r16], 8 +(p[0]) ld8.nta s2[0] = [r17], 8 +(p[6]) xor d[0] = s1[6], s2[6] +(p[0]) ld8.nta s3[0] = [r18], 8 +(p[0]) ld8.nta s4[0] = [r19], 8 +(p[6]) xor r21 = s3[6], s4[6] + ;; +(p[0]) ld8.nta s5[0] = [r20], 8 +(p[6+1])st8.nta [r8] = d[1], 8 +(p[6]) xor d[0] = d[0], r21 + ;; +(p[6]) xor d[0] = d[0], s5[6] + nop.f 0 + br.ctop.dptk.few 0b + ;; + mov ar.lc = r30 + mov pr = r29, -1 + br.ret.sptk.few rp +END(xor_ia64_5) diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile new file mode 100644 index 000000000000..7078f67887ec --- /dev/null +++ b/arch/ia64/mm/Makefile @@ -0,0 +1,12 @@ +# +# Makefile for the ia64-specific parts of the memory manager. +# + +obj-y := init.o fault.o tlb.o extable.o + +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_NUMA) += numa.o +obj-$(CONFIG_DISCONTIGMEM) += discontig.o +ifndef CONFIG_DISCONTIGMEM +obj-y += contig.o +endif diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c new file mode 100644 index 000000000000..6daf15ac8940 --- /dev/null +++ b/arch/ia64/mm/contig.c @@ -0,0 +1,299 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved. + * + * Routines used by ia64 machines with contiguous (or virtually contiguous) + * memory. + */ +#include <linux/config.h> +#include <linux/bootmem.h> +#include <linux/efi.h> +#include <linux/mm.h> +#include <linux/swap.h> + +#include <asm/meminit.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/sections.h> +#include <asm/mca.h> + +#ifdef CONFIG_VIRTUAL_MEM_MAP +static unsigned long num_dma_physpages; +#endif + +/** + * show_mem - display a memory statistics summary + * + * Just walks the pages in the system and describes where they're allocated. + */ +void +show_mem (void) +{ + int i, total = 0, reserved = 0; + int shared = 0, cached = 0; + + printk("Mem-info:\n"); + show_free_areas(); + + printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + i = max_mapnr; + while (i-- > 0) { + if (!pfn_valid(i)) + continue; + total++; + if (PageReserved(mem_map+i)) + reserved++; + else if (PageSwapCache(mem_map+i)) + cached++; + else if (page_count(mem_map + i)) + shared += page_count(mem_map + i) - 1; + } + printk("%d pages of RAM\n", total); + printk("%d reserved pages\n", reserved); + printk("%d pages shared\n", shared); + printk("%d pages swap cached\n", cached); + printk("%ld pages in page table cache\n", pgtable_cache_size); +} + +/* physical address where the bootmem map is located */ +unsigned long bootmap_start; + +/** + * find_max_pfn - adjust the maximum page number callback + * @start: start of range + * @end: end of range + * @arg: address of pointer to global max_pfn variable + * + * Passed as a callback function to efi_memmap_walk() to determine the highest + * available page frame number in the system. + */ +int +find_max_pfn (unsigned long start, unsigned long end, void *arg) +{ + unsigned long *max_pfnp = arg, pfn; + + pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT; + if (pfn > *max_pfnp) + *max_pfnp = pfn; + return 0; +} + +/** + * find_bootmap_location - callback to find a memory area for the bootmap + * @start: start of region + * @end: end of region + * @arg: unused callback data + * + * Find a place to put the bootmap and return its starting address in + * bootmap_start. This address must be page-aligned. + */ +int +find_bootmap_location (unsigned long start, unsigned long end, void *arg) +{ + unsigned long needed = *(unsigned long *)arg; + unsigned long range_start, range_end, free_start; + int i; + +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + start += PAGE_SIZE; + if (start >= end) + return 0; + } +#endif + + free_start = PAGE_OFFSET; + + for (i = 0; i < num_rsvd_regions; i++) { + range_start = max(start, free_start); + range_end = min(end, rsvd_region[i].start & PAGE_MASK); + + free_start = PAGE_ALIGN(rsvd_region[i].end); + + if (range_end <= range_start) + continue; /* skip over empty range */ + + if (range_end - range_start >= needed) { + bootmap_start = __pa(range_start); + return -1; /* done */ + } + + /* nothing more available in this segment */ + if (range_end == end) + return 0; + } + return 0; +} + +/** + * find_memory - setup memory map + * + * Walk the EFI memory map and find usable memory for the system, taking + * into account reserved areas. + */ +void +find_memory (void) +{ + unsigned long bootmap_size; + + reserve_memory(); + + /* first find highest page frame number */ + max_pfn = 0; + efi_memmap_walk(find_max_pfn, &max_pfn); + + /* how many bytes to cover all the pages */ + bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT; + + /* look for a location to hold the bootmap */ + bootmap_start = ~0UL; + efi_memmap_walk(find_bootmap_location, &bootmap_size); + if (bootmap_start == ~0UL) + panic("Cannot find %ld bytes for bootmap\n", bootmap_size); + + bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn); + + /* Free all available memory, then mark bootmem-map as being in use. */ + efi_memmap_walk(filter_rsvd_memory, free_bootmem); + reserve_bootmem(bootmap_start, bootmap_size); + + find_initrd(); +} + +#ifdef CONFIG_SMP +/** + * per_cpu_init - setup per-cpu variables + * + * Allocate and setup per-cpu data areas. + */ +void * +per_cpu_init (void) +{ + void *cpu_data; + int cpu; + + /* + * get_free_pages() cannot be used before cpu_init() done. BSP + * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls + * get_zeroed_page(). + */ + if (smp_processor_id() == 0) { + cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, + PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + for (cpu = 0; cpu < NR_CPUS; cpu++) { + memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start; + cpu_data += PERCPU_PAGE_SIZE; + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; + } + } + return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; +} +#endif /* CONFIG_SMP */ + +static int +count_pages (u64 start, u64 end, void *arg) +{ + unsigned long *count = arg; + + *count += (end - start) >> PAGE_SHIFT; + return 0; +} + +#ifdef CONFIG_VIRTUAL_MEM_MAP +static int +count_dma_pages (u64 start, u64 end, void *arg) +{ + unsigned long *count = arg; + + if (start < MAX_DMA_ADDRESS) + *count += (min(end, MAX_DMA_ADDRESS) - start) >> PAGE_SHIFT; + return 0; +} +#endif + +/* + * Set up the page tables. + */ + +void +paging_init (void) +{ + unsigned long max_dma; + unsigned long zones_size[MAX_NR_ZONES]; +#ifdef CONFIG_VIRTUAL_MEM_MAP + unsigned long zholes_size[MAX_NR_ZONES]; + unsigned long max_gap; +#endif + + /* initialize mem_map[] */ + + memset(zones_size, 0, sizeof(zones_size)); + + num_physpages = 0; + efi_memmap_walk(count_pages, &num_physpages); + + max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; + +#ifdef CONFIG_VIRTUAL_MEM_MAP + memset(zholes_size, 0, sizeof(zholes_size)); + + num_dma_physpages = 0; + efi_memmap_walk(count_dma_pages, &num_dma_physpages); + + if (max_low_pfn < max_dma) { + zones_size[ZONE_DMA] = max_low_pfn; + zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages; + } else { + zones_size[ZONE_DMA] = max_dma; + zholes_size[ZONE_DMA] = max_dma - num_dma_physpages; + if (num_physpages > num_dma_physpages) { + zones_size[ZONE_NORMAL] = max_low_pfn - max_dma; + zholes_size[ZONE_NORMAL] = + ((max_low_pfn - max_dma) - + (num_physpages - num_dma_physpages)); + } + } + + max_gap = 0; + efi_memmap_walk(find_largest_hole, (u64 *)&max_gap); + if (max_gap < LARGE_GAP) { + vmem_map = (struct page *) 0; + free_area_init_node(0, &contig_page_data, zones_size, 0, + zholes_size); + } else { + unsigned long map_size; + + /* allocate virtual_mem_map */ + + map_size = PAGE_ALIGN(max_low_pfn * sizeof(struct page)); + vmalloc_end -= map_size; + vmem_map = (struct page *) vmalloc_end; + efi_memmap_walk(create_mem_map_page_table, NULL); + + NODE_DATA(0)->node_mem_map = vmem_map; + free_area_init_node(0, &contig_page_data, zones_size, + 0, zholes_size); + + printk("Virtual mem_map starts at 0x%p\n", mem_map); + } +#else /* !CONFIG_VIRTUAL_MEM_MAP */ + if (max_low_pfn < max_dma) + zones_size[ZONE_DMA] = max_low_pfn; + else { + zones_size[ZONE_DMA] = max_dma; + zones_size[ZONE_NORMAL] = max_low_pfn - max_dma; + } + free_area_init(zones_size); +#endif /* !CONFIG_VIRTUAL_MEM_MAP */ + zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); +} diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c new file mode 100644 index 000000000000..3456a9b6971e --- /dev/null +++ b/arch/ia64/mm/discontig.c @@ -0,0 +1,737 @@ +/* + * Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 2001 Intel Corp. + * Copyright (c) 2001 Tony Luck <tony.luck@intel.com> + * Copyright (c) 2002 NEC Corp. + * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com> + * Copyright (c) 2004 Silicon Graphics, Inc + * Russ Anderson <rja@sgi.com> + * Jesse Barnes <jbarnes@sgi.com> + * Jack Steiner <steiner@sgi.com> + */ + +/* + * Platform initialization for Discontig Memory + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/bootmem.h> +#include <linux/acpi.h> +#include <linux/efi.h> +#include <linux/nodemask.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> +#include <asm/meminit.h> +#include <asm/numa.h> +#include <asm/sections.h> + +/* + * Track per-node information needed to setup the boot memory allocator, the + * per-node areas, and the real VM. + */ +struct early_node_data { + struct ia64_node_data *node_data; + pg_data_t *pgdat; + unsigned long pernode_addr; + unsigned long pernode_size; + struct bootmem_data bootmem_data; + unsigned long num_physpages; + unsigned long num_dma_physpages; + unsigned long min_pfn; + unsigned long max_pfn; +}; + +static struct early_node_data mem_data[MAX_NUMNODES] __initdata; + +/** + * reassign_cpu_only_nodes - called from find_memory to move CPU-only nodes to a memory node + * + * This function will move nodes with only CPUs (no memory) + * to a node with memory which is at the minimum numa_slit distance. + * Any reassigments will result in the compression of the nodes + * and renumbering the nid values where appropriate. + * The static declarations below are to avoid large stack size which + * makes the code not re-entrant. + */ +static void __init reassign_cpu_only_nodes(void) +{ + struct node_memblk_s *p; + int i, j, k, nnode, nid, cpu, cpunid, pxm; + u8 cslit, slit; + static DECLARE_BITMAP(nodes_with_mem, MAX_NUMNODES) __initdata; + static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata; + static int node_flip[MAX_NUMNODES] __initdata; + static int old_nid_map[NR_CPUS] __initdata; + + for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++) + if (!test_bit(p->nid, (void *) nodes_with_mem)) { + set_bit(p->nid, (void *) nodes_with_mem); + nnode++; + } + + /* + * All nids with memory. + */ + if (nnode == num_online_nodes()) + return; + + /* + * Change nids and attempt to migrate CPU-only nodes + * to the best numa_slit (closest neighbor) possible. + * For reassigned CPU nodes a nid can't be arrived at + * until after this loop because the target nid's new + * identity might not have been established yet. So + * new nid values are fabricated above num_online_nodes() and + * mapped back later to their true value. + */ + /* MCD - This code is a bit complicated, but may be unnecessary now. + * We can now handle much more interesting node-numbering. + * The old requirement that 0 <= nid <= numnodes <= MAX_NUMNODES + * and that there be no holes in the numbering 0..numnodes + * has become simply 0 <= nid <= MAX_NUMNODES. + */ + nid = 0; + for_each_online_node(i) { + if (test_bit(i, (void *) nodes_with_mem)) { + /* + * Save original nid value for numa_slit + * fixup and node_cpuid reassignments. + */ + node_flip[nid] = i; + + if (i == nid) { + nid++; + continue; + } + + for (p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++) + if (p->nid == i) + p->nid = nid; + + cpunid = nid; + nid++; + } else + cpunid = MAX_NUMNODES; + + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (node_cpuid[cpu].nid == i) { + /* + * For nodes not being reassigned just + * fix the cpu's nid and reverse pxm map + */ + if (cpunid < MAX_NUMNODES) { + pxm = nid_to_pxm_map[i]; + pxm_to_nid_map[pxm] = + node_cpuid[cpu].nid = cpunid; + continue; + } + + /* + * For nodes being reassigned, find best node by + * numa_slit information and then make a temporary + * nid value based on current nid and num_online_nodes(). + */ + slit = 0xff; + k = 2*num_online_nodes(); + for_each_online_node(j) { + if (i == j) + continue; + else if (test_bit(j, (void *) nodes_with_mem)) { + cslit = numa_slit[i * num_online_nodes() + j]; + if (cslit < slit) { + k = num_online_nodes() + j; + slit = cslit; + } + } + } + + /* save old nid map so we can update the pxm */ + old_nid_map[cpu] = node_cpuid[cpu].nid; + node_cpuid[cpu].nid = k; + } + } + + /* + * Fixup temporary nid values for CPU-only nodes. + */ + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (node_cpuid[cpu].nid == (2*num_online_nodes())) { + pxm = nid_to_pxm_map[old_nid_map[cpu]]; + pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1; + } else { + for (i = 0; i < nnode; i++) { + if (node_flip[i] != (node_cpuid[cpu].nid - num_online_nodes())) + continue; + + pxm = nid_to_pxm_map[old_nid_map[cpu]]; + pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i; + break; + } + } + + /* + * Fix numa_slit by compressing from larger + * nid array to reduced nid array. + */ + for (i = 0; i < nnode; i++) + for (j = 0; j < nnode; j++) + numa_slit_fix[i * nnode + j] = + numa_slit[node_flip[i] * num_online_nodes() + node_flip[j]]; + + memcpy(numa_slit, numa_slit_fix, sizeof (numa_slit)); + + nodes_clear(node_online_map); + for (i = 0; i < nnode; i++) + node_set_online(i); + + return; +} + +/* + * To prevent cache aliasing effects, align per-node structures so that they + * start at addresses that are strided by node number. + */ +#define NODEDATA_ALIGN(addr, node) \ + ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + (node)*PERCPU_PAGE_SIZE) + +/** + * build_node_maps - callback to setup bootmem structs for each node + * @start: physical start of range + * @len: length of range + * @node: node where this range resides + * + * We allocate a struct bootmem_data for each piece of memory that we wish to + * treat as a virtually contiguous block (i.e. each node). Each such block + * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down + * if necessary. Any non-existent pages will simply be part of the virtual + * memmap. We also update min_low_pfn and max_low_pfn here as we receive + * memory ranges from the caller. + */ +static int __init build_node_maps(unsigned long start, unsigned long len, + int node) +{ + unsigned long cstart, epfn, end = start + len; + struct bootmem_data *bdp = &mem_data[node].bootmem_data; + + epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; + cstart = GRANULEROUNDDOWN(start); + + if (!bdp->node_low_pfn) { + bdp->node_boot_start = cstart; + bdp->node_low_pfn = epfn; + } else { + bdp->node_boot_start = min(cstart, bdp->node_boot_start); + bdp->node_low_pfn = max(epfn, bdp->node_low_pfn); + } + + min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT); + max_low_pfn = max(max_low_pfn, bdp->node_low_pfn); + + return 0; +} + +/** + * early_nr_phys_cpus_node - return number of physical cpus on a given node + * @node: node to check + * + * Count the number of physical cpus on @node. These are cpus that actually + * exist. We can't use nr_cpus_node() yet because + * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been + * called yet. + */ +static int early_nr_phys_cpus_node(int node) +{ + int cpu, n = 0; + + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (node == node_cpuid[cpu].nid) + if ((cpu == 0) || node_cpuid[cpu].phys_id) + n++; + + return n; +} + + +/** + * early_nr_cpus_node - return number of cpus on a given node + * @node: node to check + * + * Count the number of cpus on @node. We can't use nr_cpus_node() yet because + * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been + * called yet. Note that node 0 will also count all non-existent cpus. + */ +static int early_nr_cpus_node(int node) +{ + int cpu, n = 0; + + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (node == node_cpuid[cpu].nid) + n++; + + return n; +} + +/** + * find_pernode_space - allocate memory for memory map and per-node structures + * @start: physical start of range + * @len: length of range + * @node: node where this range resides + * + * This routine reserves space for the per-cpu data struct, the list of + * pg_data_ts and the per-node data struct. Each node will have something like + * the following in the first chunk of addr. space large enough to hold it. + * + * ________________________ + * | | + * |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first + * | PERCPU_PAGE_SIZE * | start and length big enough + * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus. + * |------------------------| + * | local pg_data_t * | + * |------------------------| + * | local ia64_node_data | + * |------------------------| + * | ??? | + * |________________________| + * + * Once this space has been set aside, the bootmem maps are initialized. We + * could probably move the allocation of the per-cpu and ia64_node_data space + * outside of this function and use alloc_bootmem_node(), but doing it here + * is straightforward and we get the alignments we want so... + */ +static int __init find_pernode_space(unsigned long start, unsigned long len, + int node) +{ + unsigned long epfn, cpu, cpus, phys_cpus; + unsigned long pernodesize = 0, pernode, pages, mapsize; + void *cpu_data; + struct bootmem_data *bdp = &mem_data[node].bootmem_data; + + epfn = (start + len) >> PAGE_SHIFT; + + pages = bdp->node_low_pfn - (bdp->node_boot_start >> PAGE_SHIFT); + mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT; + + /* + * Make sure this memory falls within this node's usable memory + * since we may have thrown some away in build_maps(). + */ + if (start < bdp->node_boot_start || epfn > bdp->node_low_pfn) + return 0; + + /* Don't setup this node's local space twice... */ + if (mem_data[node].pernode_addr) + return 0; + + /* + * Calculate total size needed, incl. what's necessary + * for good alignment and alias prevention. + */ + cpus = early_nr_cpus_node(node); + phys_cpus = early_nr_phys_cpus_node(node); + pernodesize += PERCPU_PAGE_SIZE * cpus; + pernodesize += node * L1_CACHE_BYTES; + pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); + pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + pernodesize = PAGE_ALIGN(pernodesize); + pernode = NODEDATA_ALIGN(start, node); + + /* Is this range big enough for what we want to store here? */ + if (start + len > (pernode + pernodesize + mapsize)) { + mem_data[node].pernode_addr = pernode; + mem_data[node].pernode_size = pernodesize; + memset(__va(pernode), 0, pernodesize); + + cpu_data = (void *)pernode; + pernode += PERCPU_PAGE_SIZE * cpus; + pernode += node * L1_CACHE_BYTES; + + mem_data[node].pgdat = __va(pernode); + pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); + + mem_data[node].node_data = __va(pernode); + pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + + mem_data[node].pgdat->bdata = bdp; + pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); + + /* + * Copy the static per-cpu data into the region we + * just set aside and then setup __per_cpu_offset + * for each CPU on this node. + */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (node == node_cpuid[cpu].nid) { + memcpy(__va(cpu_data), __phys_per_cpu_start, + __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char*)__va(cpu_data) - + __per_cpu_start; + cpu_data += PERCPU_PAGE_SIZE; + } + } + } + + return 0; +} + +/** + * free_node_bootmem - free bootmem allocator memory for use + * @start: physical start of range + * @len: length of range + * @node: node where this range resides + * + * Simply calls the bootmem allocator to free the specified ranged from + * the given pg_data_t's bdata struct. After this function has been called + * for all the entries in the EFI memory map, the bootmem allocator will + * be ready to service allocation requests. + */ +static int __init free_node_bootmem(unsigned long start, unsigned long len, + int node) +{ + free_bootmem_node(mem_data[node].pgdat, start, len); + + return 0; +} + +/** + * reserve_pernode_space - reserve memory for per-node space + * + * Reserve the space used by the bootmem maps & per-node space in the boot + * allocator so that when we actually create the real mem maps we don't + * use their memory. + */ +static void __init reserve_pernode_space(void) +{ + unsigned long base, size, pages; + struct bootmem_data *bdp; + int node; + + for_each_online_node(node) { + pg_data_t *pdp = mem_data[node].pgdat; + + bdp = pdp->bdata; + + /* First the bootmem_map itself */ + pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT); + size = bootmem_bootmap_pages(pages) << PAGE_SHIFT; + base = __pa(bdp->node_bootmem_map); + reserve_bootmem_node(pdp, base, size); + + /* Now the per-node space */ + size = mem_data[node].pernode_size; + base = __pa(mem_data[node].pernode_addr); + reserve_bootmem_node(pdp, base, size); + } +} + +/** + * initialize_pernode_data - fixup per-cpu & per-node pointers + * + * Each node's per-node area has a copy of the global pg_data_t list, so + * we copy that to each node here, as well as setting the per-cpu pointer + * to the local node data structure. The active_cpus field of the per-node + * structure gets setup by the platform_cpu_init() function later. + */ +static void __init initialize_pernode_data(void) +{ + int cpu, node; + pg_data_t *pgdat_list[MAX_NUMNODES]; + + for_each_online_node(node) + pgdat_list[node] = mem_data[node].pgdat; + + /* Copy the pg_data_t list to each node and init the node field */ + for_each_online_node(node) { + memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list, + sizeof(pgdat_list)); + } + + /* Set the node_data pointer for each per-cpu struct */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + node = node_cpuid[cpu].nid; + per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data; + } +} + +/** + * find_memory - walk the EFI memory map and setup the bootmem allocator + * + * Called early in boot to setup the bootmem allocator, and to + * allocate the per-cpu and per-node structures. + */ +void __init find_memory(void) +{ + int node; + + reserve_memory(); + + if (num_online_nodes() == 0) { + printk(KERN_ERR "node info missing!\n"); + node_set_online(0); + } + + min_low_pfn = -1; + max_low_pfn = 0; + + if (num_online_nodes() > 1) + reassign_cpu_only_nodes(); + + /* These actually end up getting called by call_pernode_memory() */ + efi_memmap_walk(filter_rsvd_memory, build_node_maps); + efi_memmap_walk(filter_rsvd_memory, find_pernode_space); + + /* + * Initialize the boot memory maps in reverse order since that's + * what the bootmem allocator expects + */ + for (node = MAX_NUMNODES - 1; node >= 0; node--) { + unsigned long pernode, pernodesize, map; + struct bootmem_data *bdp; + + if (!node_online(node)) + continue; + + bdp = &mem_data[node].bootmem_data; + pernode = mem_data[node].pernode_addr; + pernodesize = mem_data[node].pernode_size; + map = pernode + pernodesize; + + /* Sanity check... */ + if (!pernode) + panic("pernode space for node %d " + "could not be allocated!", node); + + init_bootmem_node(mem_data[node].pgdat, + map>>PAGE_SHIFT, + bdp->node_boot_start>>PAGE_SHIFT, + bdp->node_low_pfn); + } + + efi_memmap_walk(filter_rsvd_memory, free_node_bootmem); + + reserve_pernode_space(); + initialize_pernode_data(); + + max_pfn = max_low_pfn; + + find_initrd(); +} + +/** + * per_cpu_init - setup per-cpu variables + * + * find_pernode_space() does most of this already, we just need to set + * local_per_cpu_offset + */ +void *per_cpu_init(void) +{ + int cpu; + + if (smp_processor_id() == 0) { + for (cpu = 0; cpu < NR_CPUS; cpu++) { + per_cpu(local_per_cpu_offset, cpu) = + __per_cpu_offset[cpu]; + } + } + + return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; +} + +/** + * show_mem - give short summary of memory stats + * + * Shows a simple page count of reserved and used pages in the system. + * For discontig machines, it does this on a per-pgdat basis. + */ +void show_mem(void) +{ + int i, total_reserved = 0; + int total_shared = 0, total_cached = 0; + unsigned long total_present = 0; + pg_data_t *pgdat; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + for_each_pgdat(pgdat) { + unsigned long present = pgdat->node_present_pages; + int shared = 0, cached = 0, reserved = 0; + printk("Node ID: %d\n", pgdat->node_id); + for(i = 0; i < pgdat->node_spanned_pages; i++) { + if (!ia64_pfn_valid(pgdat->node_start_pfn+i)) + continue; + if (PageReserved(pgdat->node_mem_map+i)) + reserved++; + else if (PageSwapCache(pgdat->node_mem_map+i)) + cached++; + else if (page_count(pgdat->node_mem_map+i)) + shared += page_count(pgdat->node_mem_map+i)-1; + } + total_present += present; + total_reserved += reserved; + total_cached += cached; + total_shared += shared; + printk("\t%ld pages of RAM\n", present); + printk("\t%d reserved pages\n", reserved); + printk("\t%d pages shared\n", shared); + printk("\t%d pages swap cached\n", cached); + } + printk("%ld pages of RAM\n", total_present); + printk("%d reserved pages\n", total_reserved); + printk("%d pages shared\n", total_shared); + printk("%d pages swap cached\n", total_cached); + printk("Total of %ld pages in page table cache\n", pgtable_cache_size); + printk("%d free buffer pages\n", nr_free_buffer_pages()); +} + +/** + * call_pernode_memory - use SRAT to call callback functions with node info + * @start: physical start of range + * @len: length of range + * @arg: function to call for each range + * + * efi_memmap_walk() knows nothing about layout of memory across nodes. Find + * out to which node a block of memory belongs. Ignore memory that we cannot + * identify, and split blocks that run across multiple nodes. + * + * Take this opportunity to round the start address up and the end address + * down to page boundaries. + */ +void call_pernode_memory(unsigned long start, unsigned long len, void *arg) +{ + unsigned long rs, re, end = start + len; + void (*func)(unsigned long, unsigned long, int); + int i; + + start = PAGE_ALIGN(start); + end &= PAGE_MASK; + if (start >= end) + return; + + func = arg; + + if (!num_node_memblks) { + /* No SRAT table, so assume one node (node 0) */ + if (start < end) + (*func)(start, end - start, 0); + return; + } + + for (i = 0; i < num_node_memblks; i++) { + rs = max(start, node_memblk[i].start_paddr); + re = min(end, node_memblk[i].start_paddr + + node_memblk[i].size); + + if (rs < re) + (*func)(rs, re - rs, node_memblk[i].nid); + + if (re == end) + break; + } +} + +/** + * count_node_pages - callback to build per-node memory info structures + * @start: physical start of range + * @len: length of range + * @node: node where this range resides + * + * Each node has it's own number of physical pages, DMAable pages, start, and + * end page frame number. This routine will be called by call_pernode_memory() + * for each piece of usable memory and will setup these values for each node. + * Very similar to build_maps(). + */ +static __init int count_node_pages(unsigned long start, unsigned long len, int node) +{ + unsigned long end = start + len; + + mem_data[node].num_physpages += len >> PAGE_SHIFT; + if (start <= __pa(MAX_DMA_ADDRESS)) + mem_data[node].num_dma_physpages += + (min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT; + start = GRANULEROUNDDOWN(start); + start = ORDERROUNDDOWN(start); + end = GRANULEROUNDUP(end); + mem_data[node].max_pfn = max(mem_data[node].max_pfn, + end >> PAGE_SHIFT); + mem_data[node].min_pfn = min(mem_data[node].min_pfn, + start >> PAGE_SHIFT); + + return 0; +} + +/** + * paging_init - setup page tables + * + * paging_init() sets up the page tables for each node of the system and frees + * the bootmem allocator memory for general use. + */ +void __init paging_init(void) +{ + unsigned long max_dma; + unsigned long zones_size[MAX_NR_ZONES]; + unsigned long zholes_size[MAX_NR_ZONES]; + unsigned long pfn_offset = 0; + int node; + + max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; + + /* so min() will work in count_node_pages */ + for_each_online_node(node) + mem_data[node].min_pfn = ~0UL; + + efi_memmap_walk(filter_rsvd_memory, count_node_pages); + + for_each_online_node(node) { + memset(zones_size, 0, sizeof(zones_size)); + memset(zholes_size, 0, sizeof(zholes_size)); + + num_physpages += mem_data[node].num_physpages; + + if (mem_data[node].min_pfn >= max_dma) { + /* All of this node's memory is above ZONE_DMA */ + zones_size[ZONE_NORMAL] = mem_data[node].max_pfn - + mem_data[node].min_pfn; + zholes_size[ZONE_NORMAL] = mem_data[node].max_pfn - + mem_data[node].min_pfn - + mem_data[node].num_physpages; + } else if (mem_data[node].max_pfn < max_dma) { + /* All of this node's memory is in ZONE_DMA */ + zones_size[ZONE_DMA] = mem_data[node].max_pfn - + mem_data[node].min_pfn; + zholes_size[ZONE_DMA] = mem_data[node].max_pfn - + mem_data[node].min_pfn - + mem_data[node].num_dma_physpages; + } else { + /* This node has memory in both zones */ + zones_size[ZONE_DMA] = max_dma - + mem_data[node].min_pfn; + zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - + mem_data[node].num_dma_physpages; + zones_size[ZONE_NORMAL] = mem_data[node].max_pfn - + max_dma; + zholes_size[ZONE_NORMAL] = zones_size[ZONE_NORMAL] - + (mem_data[node].num_physpages - + mem_data[node].num_dma_physpages); + } + + if (node == 0) { + vmalloc_end -= + PAGE_ALIGN(max_low_pfn * sizeof(struct page)); + vmem_map = (struct page *) vmalloc_end; + + efi_memmap_walk(create_mem_map_page_table, NULL); + printk("Virtual mem_map starts at 0x%p\n", vmem_map); + } + + pfn_offset = mem_data[node].min_pfn; + + NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset; + free_area_init_node(node, NODE_DATA(node), zones_size, + pfn_offset, zholes_size); + } + + zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); +} diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c new file mode 100644 index 000000000000..6d259e34f359 --- /dev/null +++ b/arch/ia64/mm/extable.c @@ -0,0 +1,90 @@ +/* + * Kernel exception handling table support. Derived from arch/alpha/mm/extable.c. + * + * Copyright (C) 1998, 1999, 2001-2002, 2004 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <linux/config.h> +#include <linux/sort.h> + +#include <asm/uaccess.h> +#include <asm/module.h> + +static int cmp_ex(const void *a, const void *b) +{ + const struct exception_table_entry *l = a, *r = b; + u64 lip = (u64) &l->addr + l->addr; + u64 rip = (u64) &r->addr + r->addr; + + /* avoid overflow */ + if (lip > rip) + return 1; + if (lip < rip) + return -1; + return 0; +} + +static void swap_ex(void *a, void *b, int size) +{ + struct exception_table_entry *l = a, *r = b, tmp; + u64 delta = (u64) r - (u64) l; + + tmp = *l; + l->addr = r->addr + delta; + l->cont = r->cont + delta; + r->addr = tmp.addr - delta; + r->cont = tmp.cont - delta; +} + +/* + * Sort the exception table. It's usually already sorted, but there + * may be unordered entries due to multiple text sections (such as the + * .init text section). Note that the exception-table-entries contain + * location-relative addresses, which requires a bit of care during + * sorting to avoid overflows in the offset members (e.g., it would + * not be safe to make a temporary copy of an exception-table entry on + * the stack, because the stack may be more than 2GB away from the + * exception-table). + */ +void sort_extable (struct exception_table_entry *start, + struct exception_table_entry *finish) +{ + sort(start, finish - start, sizeof(struct exception_table_entry), + cmp_ex, swap_ex); +} + +const struct exception_table_entry * +search_extable (const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long ip) +{ + const struct exception_table_entry *mid; + unsigned long mid_ip; + long diff; + + while (first <= last) { + mid = &first[(last - first)/2]; + mid_ip = (u64) &mid->addr + mid->addr; + diff = mid_ip - ip; + if (diff == 0) + return mid; + else if (diff < 0) + first = mid + 1; + else + last = mid - 1; + } + return NULL; +} + +void +ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e) +{ + long fix = (u64) &e->cont + e->cont; + + regs->r8 = -EFAULT; + if (fix & 4) + regs->r9 = 0; + regs->cr_iip = fix & ~0xf; + ia64_psr(regs)->ri = fix & 0x3; /* set continuation slot number */ +} diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c new file mode 100644 index 000000000000..da859125aaef --- /dev/null +++ b/arch/ia64/mm/fault.c @@ -0,0 +1,261 @@ +/* + * MMU fault handling support. + * + * Copyright (C) 1998-2002 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> + +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/uaccess.h> + +extern void die (char *, struct pt_regs *, long); + +/* + * This routine is analogous to expand_stack() but instead grows the + * register backing store (which grows towards higher addresses). + * Since the register backing store is access sequentially, we + * disallow growing the RBS by more than a page at a time. Note that + * the VM_GROWSUP flag can be set on any VM area but that's fine + * because the total process size is still limited by RLIMIT_STACK and + * RLIMIT_AS. + */ +static inline long +expand_backing_store (struct vm_area_struct *vma, unsigned long address) +{ + unsigned long grow; + + grow = PAGE_SIZE >> PAGE_SHIFT; + if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur + || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur)) + return -ENOMEM; + vma->vm_end += PAGE_SIZE; + vma->vm_mm->total_vm += grow; + if (vma->vm_flags & VM_LOCKED) + vma->vm_mm->locked_vm += grow; + __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow); + return 0; +} + +/* + * Return TRUE if ADDRESS points at a page in the kernel's mapped segment + * (inside region 5, on ia64) and that page is present. + */ +static int +mapped_kernel_page_is_present (unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep, pte; + + pgd = pgd_offset_k(address); + if (pgd_none(*pgd) || pgd_bad(*pgd)) + return 0; + + pud = pud_offset(pgd, address); + if (pud_none(*pud) || pud_bad(*pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + return 0; + + ptep = pte_offset_kernel(pmd, address); + if (!ptep) + return 0; + + pte = *ptep; + return pte_present(pte); +} + +void +ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) +{ + int signal = SIGSEGV, code = SEGV_MAPERR; + struct vm_area_struct *vma, *prev_vma; + struct mm_struct *mm = current->mm; + struct siginfo si; + unsigned long mask; + + /* + * If we're in an interrupt or have no user context, we must not take the fault.. + */ + if (in_atomic() || !mm) + goto no_context; + +#ifdef CONFIG_VIRTUAL_MEM_MAP + /* + * If fault is in region 5 and we are in the kernel, we may already + * have the mmap_sem (pfn_valid macro is called during mmap). There + * is no vma for region 5 addr's anyway, so skip getting the semaphore + * and go directly to the exception handling code. + */ + + if ((REGION_NUMBER(address) == 5) && !user_mode(regs)) + goto bad_area_no_up; +#endif + + down_read(&mm->mmap_sem); + + vma = find_vma_prev(mm, address, &prev_vma); + if (!vma) + goto bad_area; + + /* find_vma_prev() returns vma such that address < vma->vm_end or NULL */ + if (address < vma->vm_start) + goto check_expansion; + + good_area: + code = SEGV_ACCERR; + + /* OK, we've got a good vm_area for this memory area. Check the access permissions: */ + +# define VM_READ_BIT 0 +# define VM_WRITE_BIT 1 +# define VM_EXEC_BIT 2 + +# if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \ + || (1 << VM_EXEC_BIT) != VM_EXEC) +# error File is out of sync with <linux/mm.h>. Please update. +# endif + + mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) + | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT) + | (((isr >> IA64_ISR_R_BIT) & 1UL) << VM_READ_BIT)); + + if ((vma->vm_flags & mask) != mask) + goto bad_area; + + survive: + /* + * If for any reason at all we couldn't handle the fault, make + * sure we exit gracefully rather than endlessly redo the + * fault. + */ + switch (handle_mm_fault(mm, vma, address, (mask & VM_WRITE) != 0)) { + case VM_FAULT_MINOR: + ++current->min_flt; + break; + case VM_FAULT_MAJOR: + ++current->maj_flt; + break; + case VM_FAULT_SIGBUS: + /* + * We ran out of memory, or some other thing happened + * to us that made us unable to handle the page fault + * gracefully. + */ + signal = SIGBUS; + goto bad_area; + case VM_FAULT_OOM: + goto out_of_memory; + default: + BUG(); + } + up_read(&mm->mmap_sem); + return; + + check_expansion: + if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start) + || REGION_OFFSET(address) >= RGN_MAP_LIMIT) + goto bad_area; + if (expand_stack(vma, address)) + goto bad_area; + } else { + vma = prev_vma; + if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start) + || REGION_OFFSET(address) >= RGN_MAP_LIMIT) + goto bad_area; + if (expand_backing_store(vma, address)) + goto bad_area; + } + goto good_area; + + bad_area: + up_read(&mm->mmap_sem); +#ifdef CONFIG_VIRTUAL_MEM_MAP + bad_area_no_up: +#endif + if ((isr & IA64_ISR_SP) + || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) + { + /* + * This fault was due to a speculative load or lfetch.fault, set the "ed" + * bit in the psr to ensure forward progress. (Target register will get a + * NaT for ld.s, lfetch will be canceled.) + */ + ia64_psr(regs)->ed = 1; + return; + } + if (user_mode(regs)) { + si.si_signo = signal; + si.si_errno = 0; + si.si_code = code; + si.si_addr = (void __user *) address; + si.si_isr = isr; + si.si_flags = __ISR_VALID; + force_sig_info(signal, &si, current); + return; + } + + no_context: + if (isr & IA64_ISR_SP) { + /* + * This fault was due to a speculative load set the "ed" bit in the psr to + * ensure forward progress (target register will get a NaT). + */ + ia64_psr(regs)->ed = 1; + return; + } + + if (ia64_done_with_exception(regs)) + return; + + /* + * Since we have no vma's for region 5, we might get here even if the address is + * valid, due to the VHPT walker inserting a non present translation that becomes + * stale. If that happens, the non present fault handler already purged the stale + * translation, which fixed the problem. So, we check to see if the translation is + * valid, and return if it is. + */ + if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to terminate things + * with extreme prejudice. + */ + bust_spinlocks(1); + + if (address < PAGE_SIZE) + printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address); + else + printk(KERN_ALERT "Unable to handle kernel paging request at " + "virtual address %016lx\n", address); + die("Oops", regs, isr); + bust_spinlocks(0); + do_exit(SIGKILL); + return; + + out_of_memory: + up_read(&mm->mmap_sem); + if (current->pid == 1) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + printk(KERN_CRIT "VM: killing process %s\n", current->comm); + if (user_mode(regs)) + do_exit(SIGKILL); + goto no_context; +} diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c new file mode 100644 index 000000000000..40ad8328ffd5 --- /dev/null +++ b/arch/ia64/mm/hugetlbpage.c @@ -0,0 +1,357 @@ +/* + * IA-64 Huge TLB Page Support for Kernel. + * + * Copyright (C) 2002-2004 Rohit Seth <rohit.seth@intel.com> + * Copyright (C) 2003-2004 Ken Chen <kenneth.w.chen@intel.com> + * + * Sep, 2003: add numa support + * Feb, 2004: dynamic hugetlb page size via boot parameter + */ + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/smp_lock.h> +#include <linux/slab.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> + +unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT; + +static pte_t * +huge_pte_alloc (struct mm_struct *mm, unsigned long addr) +{ + unsigned long taddr = htlbpage_to_page(addr); + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, taddr); + pud = pud_alloc(mm, pgd, taddr); + if (pud) { + pmd = pmd_alloc(mm, pud, taddr); + if (pmd) + pte = pte_alloc_map(mm, pmd, taddr); + } + return pte; +} + +static pte_t * +huge_pte_offset (struct mm_struct *mm, unsigned long addr) +{ + unsigned long taddr = htlbpage_to_page(addr); + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, taddr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, taddr); + if (pud_present(*pud)) { + pmd = pmd_offset(pud, taddr); + if (pmd_present(*pmd)) + pte = pte_offset_map(pmd, taddr); + } + } + + return pte; +} + +#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } + +static void +set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma, + struct page *page, pte_t * page_table, int write_access) +{ + pte_t entry; + + add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); + if (write_access) { + entry = + pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + } else + entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); + entry = pte_mkyoung(entry); + mk_pte_huge(entry); + set_pte(page_table, entry); + return; +} +/* + * This function checks for proper alignment of input addr and len parameters. + */ +int is_aligned_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + if (REGION_NUMBER(addr) != REGION_HPAGE) + return -EINVAL; + + return 0; +} + +int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, + struct vm_area_struct *vma) +{ + pte_t *src_pte, *dst_pte, entry; + struct page *ptepage; + unsigned long addr = vma->vm_start; + unsigned long end = vma->vm_end; + + while (addr < end) { + dst_pte = huge_pte_alloc(dst, addr); + if (!dst_pte) + goto nomem; + src_pte = huge_pte_offset(src, addr); + entry = *src_pte; + ptepage = pte_page(entry); + get_page(ptepage); + set_pte(dst_pte, entry); + add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); + addr += HPAGE_SIZE; + } + return 0; +nomem: + return -ENOMEM; +} + +int +follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, + struct page **pages, struct vm_area_struct **vmas, + unsigned long *st, int *length, int i) +{ + pte_t *ptep, pte; + unsigned long start = *st; + unsigned long pstart; + int len = *length; + struct page *page; + + do { + pstart = start & HPAGE_MASK; + ptep = huge_pte_offset(mm, start); + pte = *ptep; + +back1: + page = pte_page(pte); + if (pages) { + page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT); + get_page(page); + pages[i] = page; + } + if (vmas) + vmas[i] = vma; + i++; + len--; + start += PAGE_SIZE; + if (((start & HPAGE_MASK) == pstart) && len && + (start < vma->vm_end)) + goto back1; + } while (len && start < vma->vm_end); + *length = len; + *st = start; + return i; +} + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write) +{ + struct page *page; + pte_t *ptep; + + if (REGION_NUMBER(addr) != REGION_HPAGE) + return ERR_PTR(-EINVAL); + + ptep = huge_pte_offset(mm, addr); + if (!ptep || pte_none(*ptep)) + return NULL; + page = pte_page(*ptep); + page += ((addr & ~HPAGE_MASK) >> PAGE_SHIFT); + return page; +} +int pmd_huge(pmd_t pmd) +{ + return 0; +} +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) +{ + return NULL; +} + +/* + * Same as generic free_pgtables(), except constant PGDIR_* and pgd_offset + * are hugetlb region specific. + */ +void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev, + unsigned long start, unsigned long end) +{ + unsigned long first = start & HUGETLB_PGDIR_MASK; + unsigned long last = end + HUGETLB_PGDIR_SIZE - 1; + struct mm_struct *mm = tlb->mm; + + if (!prev) { + prev = mm->mmap; + if (!prev) + goto no_mmaps; + if (prev->vm_end > start) { + if (last > prev->vm_start) + last = prev->vm_start; + goto no_mmaps; + } + } + for (;;) { + struct vm_area_struct *next = prev->vm_next; + + if (next) { + if (next->vm_start < start) { + prev = next; + continue; + } + if (last > next->vm_start) + last = next->vm_start; + } + if (prev->vm_end > first) + first = prev->vm_end; + break; + } +no_mmaps: + if (last < first) /* for arches with discontiguous pgd indices */ + return; + clear_page_range(tlb, first, last); +} + +void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long address; + pte_t *pte; + struct page *page; + + BUG_ON(start & (HPAGE_SIZE - 1)); + BUG_ON(end & (HPAGE_SIZE - 1)); + + for (address = start; address < end; address += HPAGE_SIZE) { + pte = huge_pte_offset(mm, address); + if (pte_none(*pte)) + continue; + page = pte_page(*pte); + put_page(page); + pte_clear(mm, address, pte); + } + add_mm_counter(mm, rss, - ((end - start) >> PAGE_SHIFT)); + flush_tlb_range(vma, start, end); +} + +int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) +{ + struct mm_struct *mm = current->mm; + unsigned long addr; + int ret = 0; + + BUG_ON(vma->vm_start & ~HPAGE_MASK); + BUG_ON(vma->vm_end & ~HPAGE_MASK); + + spin_lock(&mm->page_table_lock); + for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { + unsigned long idx; + pte_t *pte = huge_pte_alloc(mm, addr); + struct page *page; + + if (!pte) { + ret = -ENOMEM; + goto out; + } + if (!pte_none(*pte)) + continue; + + idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) + + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); + page = find_get_page(mapping, idx); + if (!page) { + /* charge the fs quota first */ + if (hugetlb_get_quota(mapping)) { + ret = -ENOMEM; + goto out; + } + page = alloc_huge_page(); + if (!page) { + hugetlb_put_quota(mapping); + ret = -ENOMEM; + goto out; + } + ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); + if (! ret) { + unlock_page(page); + } else { + hugetlb_put_quota(mapping); + page_cache_release(page); + goto out; + } + } + set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE); + } +out: + spin_unlock(&mm->page_table_lock); + return ret; +} + +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct vm_area_struct *vmm; + + if (len > RGN_MAP_LIMIT) + return -ENOMEM; + if (len & ~HPAGE_MASK) + return -EINVAL; + /* This code assumes that REGION_HPAGE != 0. */ + if ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE - 1))) + addr = HPAGE_REGION_BASE; + else + addr = ALIGN(addr, HPAGE_SIZE); + for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) { + /* At this point: (!vmm || addr < vmm->vm_end). */ + if (REGION_OFFSET(addr) + len > RGN_MAP_LIMIT) + return -ENOMEM; + if (!vmm || (addr + len) <= vmm->vm_start) + return addr; + addr = ALIGN(vmm->vm_end, HPAGE_SIZE); + } +} + +static int __init hugetlb_setup_sz(char *str) +{ + u64 tr_pages; + unsigned long long size; + + if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0) + /* + * shouldn't happen, but just in case. + */ + tr_pages = 0x15557000UL; + + size = memparse(str, &str); + if (*str || (size & (size-1)) || !(tr_pages & size) || + size <= PAGE_SIZE || + size >= (1UL << PAGE_SHIFT << MAX_ORDER)) { + printk(KERN_WARNING "Invalid huge page size specified\n"); + return 1; + } + + hpage_shift = __ffs(size); + /* + * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT + * override here with new page shift. + */ + ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2); + return 1; +} +__setup("hugepagesz=", hugetlb_setup_sz); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c new file mode 100644 index 000000000000..65cf839573ea --- /dev/null +++ b/arch/ia64/mm/init.c @@ -0,0 +1,597 @@ +/* + * Initialize MMU support. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> + +#include <linux/bootmem.h> +#include <linux/efi.h> +#include <linux/elf.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/personality.h> +#include <linux/reboot.h> +#include <linux/slab.h> +#include <linux/swap.h> +#include <linux/proc_fs.h> +#include <linux/bitops.h> + +#include <asm/a.out.h> +#include <asm/dma.h> +#include <asm/ia32.h> +#include <asm/io.h> +#include <asm/machvec.h> +#include <asm/numa.h> +#include <asm/patch.h> +#include <asm/pgalloc.h> +#include <asm/sal.h> +#include <asm/sections.h> +#include <asm/system.h> +#include <asm/tlb.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> +#include <asm/mca.h> + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + +extern void ia64_tlb_init (void); + +unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; + +#ifdef CONFIG_VIRTUAL_MEM_MAP +unsigned long vmalloc_end = VMALLOC_END_INIT; +EXPORT_SYMBOL(vmalloc_end); +struct page *vmem_map; +EXPORT_SYMBOL(vmem_map); +#endif + +static int pgt_cache_water[2] = { 25, 50 }; + +struct page *zero_page_memmap_ptr; /* map entry for zero page */ +EXPORT_SYMBOL(zero_page_memmap_ptr); + +void +check_pgt_cache (void) +{ + int low, high; + + low = pgt_cache_water[0]; + high = pgt_cache_water[1]; + + preempt_disable(); + if (pgtable_cache_size > (u64) high) { + do { + if (pgd_quicklist) + free_page((unsigned long)pgd_alloc_one_fast(NULL)); + if (pmd_quicklist) + free_page((unsigned long)pmd_alloc_one_fast(NULL, 0)); + } while (pgtable_cache_size > (u64) low); + } + preempt_enable(); +} + +void +lazy_mmu_prot_update (pte_t pte) +{ + unsigned long addr; + struct page *page; + + if (!pte_exec(pte)) + return; /* not an executable page... */ + + page = pte_page(pte); + addr = (unsigned long) page_address(page); + + if (test_bit(PG_arch_1, &page->flags)) + return; /* i-cache is already coherent with d-cache */ + + flush_icache_range(addr, addr + PAGE_SIZE); + set_bit(PG_arch_1, &page->flags); /* mark page as clean */ +} + +inline void +ia64_set_rbs_bot (void) +{ + unsigned long stack_size = current->signal->rlim[RLIMIT_STACK].rlim_max & -16; + + if (stack_size > MAX_USER_STACK_SIZE) + stack_size = MAX_USER_STACK_SIZE; + current->thread.rbs_bot = STACK_TOP - stack_size; +} + +/* + * This performs some platform-dependent address space initialization. + * On IA-64, we want to setup the VM area for the register backing + * store (which grows upwards) and install the gateway page which is + * used for signal trampolines, etc. + */ +void +ia64_init_addr_space (void) +{ + struct vm_area_struct *vma; + + ia64_set_rbs_bot(); + + /* + * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore + * the problem. When the process attempts to write to the register backing store + * for the first time, it will get a SEGFAULT in this case. + */ + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma) { + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = current->thread.rbs_bot & PAGE_MASK; + vma->vm_end = vma->vm_start + PAGE_SIZE; + vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7]; + vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP; + down_write(¤t->mm->mmap_sem); + if (insert_vm_struct(current->mm, vma)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return; + } + up_write(¤t->mm->mmap_sem); + } + + /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ + if (!(current->personality & MMAP_PAGE_ZERO)) { + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma) { + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_end = PAGE_SIZE; + vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); + vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED; + down_write(¤t->mm->mmap_sem); + if (insert_vm_struct(current->mm, vma)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return; + } + up_write(¤t->mm->mmap_sem); + } + } +} + +void +free_initmem (void) +{ + unsigned long addr, eaddr; + + addr = (unsigned long) ia64_imva(__init_begin); + eaddr = (unsigned long) ia64_imva(__init_end); + while (addr < eaddr) { + ClearPageReserved(virt_to_page(addr)); + set_page_count(virt_to_page(addr), 1); + free_page(addr); + ++totalram_pages; + addr += PAGE_SIZE; + } + printk(KERN_INFO "Freeing unused kernel memory: %ldkB freed\n", + (__init_end - __init_begin) >> 10); +} + +void +free_initrd_mem (unsigned long start, unsigned long end) +{ + struct page *page; + /* + * EFI uses 4KB pages while the kernel can use 4KB or bigger. + * Thus EFI and the kernel may have different page sizes. It is + * therefore possible to have the initrd share the same page as + * the end of the kernel (given current setup). + * + * To avoid freeing/using the wrong page (kernel sized) we: + * - align up the beginning of initrd + * - align down the end of initrd + * + * | | + * |=============| a000 + * | | + * | | + * | | 9000 + * |/////////////| + * |/////////////| + * |=============| 8000 + * |///INITRD////| + * |/////////////| + * |/////////////| 7000 + * | | + * |KKKKKKKKKKKKK| + * |=============| 6000 + * |KKKKKKKKKKKKK| + * |KKKKKKKKKKKKK| + * K=kernel using 8KB pages + * + * In this example, we must free page 8000 ONLY. So we must align up + * initrd_start and keep initrd_end as is. + */ + start = PAGE_ALIGN(start); + end = end & PAGE_MASK; + + if (start < end) + printk(KERN_INFO "Freeing initrd memory: %ldkB freed\n", (end - start) >> 10); + + for (; start < end; start += PAGE_SIZE) { + if (!virt_addr_valid(start)) + continue; + page = virt_to_page(start); + ClearPageReserved(page); + set_page_count(page, 1); + free_page(start); + ++totalram_pages; + } +} + +/* + * This installs a clean page in the kernel's page table. + */ +struct page * +put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (!PageReserved(page)) + printk(KERN_ERR "put_kernel_page: page at 0x%p not in reserved memory\n", + page_address(page)); + + pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ + + spin_lock(&init_mm.page_table_lock); + { + pud = pud_alloc(&init_mm, pgd, address); + if (!pud) + goto out; + + pmd = pmd_alloc(&init_mm, pud, address); + if (!pmd) + goto out; + pte = pte_alloc_map(&init_mm, pmd, address); + if (!pte) + goto out; + if (!pte_none(*pte)) { + pte_unmap(pte); + goto out; + } + set_pte(pte, mk_pte(page, pgprot)); + pte_unmap(pte); + } + out: spin_unlock(&init_mm.page_table_lock); + /* no need for flush_tlb */ + return page; +} + +static void +setup_gate (void) +{ + struct page *page; + + /* + * Map the gate page twice: once read-only to export the ELF headers etc. and once + * execute-only page to enable privilege-promotion via "epc": + */ + page = virt_to_page(ia64_imva(__start_gate_section)); + put_kernel_page(page, GATE_ADDR, PAGE_READONLY); +#ifdef HAVE_BUGGY_SEGREL + page = virt_to_page(ia64_imva(__start_gate_section + PAGE_SIZE)); + put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE); +#else + put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE); +#endif + ia64_patch_gate(); +} + +void __devinit +ia64_mmu_init (void *my_cpu_data) +{ + unsigned long psr, pta, impl_va_bits; + extern void __devinit tlb_init (void); + +#ifdef CONFIG_DISABLE_VHPT +# define VHPT_ENABLE_BIT 0 +#else +# define VHPT_ENABLE_BIT 1 +#endif + + /* Pin mapping for percpu area into TLB */ + psr = ia64_clear_ic(); + ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR, + pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)), + PERCPU_PAGE_SHIFT); + + ia64_set_psr(psr); + ia64_srlz_i(); + + /* + * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped + * address space. The IA-64 architecture guarantees that at least 50 bits of + * virtual address space are implemented but if we pick a large enough page size + * (e.g., 64KB), the mapped address space is big enough that it will overlap with + * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages, + * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a + * problem in practice. Alternatively, we could truncate the top of the mapped + * address space to not permit mappings that would overlap with the VMLPT. + * --davidm 00/12/06 + */ +# define pte_bits 3 +# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT) + /* + * The virtual page table has to cover the entire implemented address space within + * a region even though not all of this space may be mappable. The reason for + * this is that the Access bit and Dirty bit fault handlers perform + * non-speculative accesses to the virtual page table, so the address range of the + * virtual page table itself needs to be covered by virtual page table. + */ +# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits) +# define POW2(n) (1ULL << (n)) + + impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61))); + + if (impl_va_bits < 51 || impl_va_bits > 61) + panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1); + + /* place the VMLPT at the end of each page-table mapped region: */ + pta = POW2(61) - POW2(vmlpt_bits); + + if (POW2(mapped_space_bits) >= pta) + panic("mm/init: overlap between virtually mapped linear page table and " + "mapped kernel space!"); + /* + * Set the (virtually mapped linear) page table address. Bit + * 8 selects between the short and long format, bits 2-7 the + * size of the table, and bit 0 whether the VHPT walker is + * enabled. + */ + ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT); + + ia64_tlb_init(); + +#ifdef CONFIG_HUGETLB_PAGE + ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2); + ia64_srlz_d(); +#endif +} + +#ifdef CONFIG_VIRTUAL_MEM_MAP + +int +create_mem_map_page_table (u64 start, u64 end, void *arg) +{ + unsigned long address, start_page, end_page; + struct page *map_start, *map_end; + int node; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + map_start = vmem_map + (__pa(start) >> PAGE_SHIFT); + map_end = vmem_map + (__pa(end) >> PAGE_SHIFT); + + start_page = (unsigned long) map_start & PAGE_MASK; + end_page = PAGE_ALIGN((unsigned long) map_end); + node = paddr_to_nid(__pa(start)); + + for (address = start_page; address < end_page; address += PAGE_SIZE) { + pgd = pgd_offset_k(address); + if (pgd_none(*pgd)) + pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); + pud = pud_offset(pgd, address); + + if (pud_none(*pud)) + pud_populate(&init_mm, pud, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); + pmd = pmd_offset(pud, address); + + if (pmd_none(*pmd)) + pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); + pte = pte_offset_kernel(pmd, address); + + if (pte_none(*pte)) + set_pte(pte, pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> PAGE_SHIFT, + PAGE_KERNEL)); + } + return 0; +} + +struct memmap_init_callback_data { + struct page *start; + struct page *end; + int nid; + unsigned long zone; +}; + +static int +virtual_memmap_init (u64 start, u64 end, void *arg) +{ + struct memmap_init_callback_data *args; + struct page *map_start, *map_end; + + args = (struct memmap_init_callback_data *) arg; + map_start = vmem_map + (__pa(start) >> PAGE_SHIFT); + map_end = vmem_map + (__pa(end) >> PAGE_SHIFT); + + if (map_start < args->start) + map_start = args->start; + if (map_end > args->end) + map_end = args->end; + + /* + * We have to initialize "out of bounds" struct page elements that fit completely + * on the same pages that were allocated for the "in bounds" elements because they + * may be referenced later (and found to be "reserved"). + */ + map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) / sizeof(struct page); + map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long) map_end) + / sizeof(struct page)); + + if (map_start < map_end) + memmap_init_zone((unsigned long)(map_end - map_start), + args->nid, args->zone, page_to_pfn(map_start)); + return 0; +} + +void +memmap_init (unsigned long size, int nid, unsigned long zone, + unsigned long start_pfn) +{ + if (!vmem_map) + memmap_init_zone(size, nid, zone, start_pfn); + else { + struct page *start; + struct memmap_init_callback_data args; + + start = pfn_to_page(start_pfn); + args.start = start; + args.end = start + size; + args.nid = nid; + args.zone = zone; + + efi_memmap_walk(virtual_memmap_init, &args); + } +} + +int +ia64_pfn_valid (unsigned long pfn) +{ + char byte; + struct page *pg = pfn_to_page(pfn); + + return (__get_user(byte, (char __user *) pg) == 0) + && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK)) + || (__get_user(byte, (char __user *) (pg + 1) - 1) == 0)); +} +EXPORT_SYMBOL(ia64_pfn_valid); + +int +find_largest_hole (u64 start, u64 end, void *arg) +{ + u64 *max_gap = arg; + + static u64 last_end = PAGE_OFFSET; + + /* NOTE: this algorithm assumes efi memmap table is ordered */ + + if (*max_gap < (start - last_end)) + *max_gap = start - last_end; + last_end = end; + return 0; +} +#endif /* CONFIG_VIRTUAL_MEM_MAP */ + +static int +count_reserved_pages (u64 start, u64 end, void *arg) +{ + unsigned long num_reserved = 0; + unsigned long *count = arg; + + for (; start < end; start += PAGE_SIZE) + if (PageReserved(virt_to_page(start))) + ++num_reserved; + *count += num_reserved; + return 0; +} + +/* + * Boot command-line option "nolwsys" can be used to disable the use of any light-weight + * system call handler. When this option is in effect, all fsyscalls will end up bubbling + * down into the kernel and calling the normal (heavy-weight) syscall handler. This is + * useful for performance testing, but conceivably could also come in handy for debugging + * purposes. + */ + +static int nolwsys; + +static int __init +nolwsys_setup (char *s) +{ + nolwsys = 1; + return 1; +} + +__setup("nolwsys", nolwsys_setup); + +void +mem_init (void) +{ + long reserved_pages, codesize, datasize, initsize; + unsigned long num_pgt_pages; + pg_data_t *pgdat; + int i; + static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel; + +#ifdef CONFIG_PCI + /* + * This needs to be called _after_ the command line has been parsed but _before_ + * any drivers that may need the PCI DMA interface are initialized or bootmem has + * been freed. + */ + platform_dma_init(); +#endif + +#ifndef CONFIG_DISCONTIGMEM + if (!mem_map) + BUG(); + max_mapnr = max_low_pfn; +#endif + + high_memory = __va(max_low_pfn * PAGE_SIZE); + + kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE); + kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); + kclist_add(&kcore_kernel, _stext, _end - _stext); + + for_each_pgdat(pgdat) + totalram_pages += free_all_bootmem_node(pgdat); + + reserved_pages = 0; + efi_memmap_walk(count_reserved_pages, &reserved_pages); + + codesize = (unsigned long) _etext - (unsigned long) _stext; + datasize = (unsigned long) _edata - (unsigned long) _etext; + initsize = (unsigned long) __init_end - (unsigned long) __init_begin; + + printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, " + "%luk data, %luk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10), + num_physpages << (PAGE_SHIFT - 10), codesize >> 10, + reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10); + + /* + * Allow for enough (cached) page table pages so that we can map the entire memory + * at least once. Each task also needs a couple of page tables pages, so add in a + * fudge factor for that (don't use "threads-max" here; that would be wrong!). + * Don't allow the cache to be more than 10% of total memory, though. + */ +# define NUM_TASKS 500 /* typical number of tasks */ + num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS; + if (num_pgt_pages > nr_free_pages() / 10) + num_pgt_pages = nr_free_pages() / 10; + if (num_pgt_pages > (u64) pgt_cache_water[1]) + pgt_cache_water[1] = num_pgt_pages; + + /* + * For fsyscall entrpoints with no light-weight handler, use the ordinary + * (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry + * code can tell them apart. + */ + for (i = 0; i < NR_syscalls; ++i) { + extern unsigned long fsyscall_table[NR_syscalls]; + extern unsigned long sys_call_table[NR_syscalls]; + + if (!fsyscall_table[i] || nolwsys) + fsyscall_table[i] = sys_call_table[i] | 1; + } + setup_gate(); + +#ifdef CONFIG_IA32_SUPPORT + ia32_mem_init(); +#endif +} diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c new file mode 100644 index 000000000000..77118bbf3d8b --- /dev/null +++ b/arch/ia64/mm/numa.c @@ -0,0 +1,49 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * This file contains NUMA specific variables and functions which can + * be split away from DISCONTIGMEM and are used on NUMA machines with + * contiguous memory. + * + * 2002/08/07 Erich Focht <efocht@ess.nec.de> + */ + +#include <linux/config.h> +#include <linux/cpu.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/node.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <asm/mmzone.h> +#include <asm/numa.h> + + +/* + * The following structures are usually initialized by ACPI or + * similar mechanisms and describe the NUMA characteristics of the machine. + */ +int num_node_memblks; +struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; +struct node_cpuid_s node_cpuid[NR_CPUS]; +/* + * This is a matrix with "distances" between nodes, they should be + * proportional to the memory access latency ratios. + */ +u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; + +/* Identify which cnode a physical address resides on */ +int +paddr_to_nid(unsigned long paddr) +{ + int i; + + for (i = 0; i < num_node_memblks; i++) + if (paddr >= node_memblk[i].start_paddr && + paddr < node_memblk[i].start_paddr + node_memblk[i].size) + break; + + return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0); +} diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c new file mode 100644 index 000000000000..464557e4ed82 --- /dev/null +++ b/arch/ia64/mm/tlb.c @@ -0,0 +1,190 @@ +/* + * TLB support routines. + * + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * 08/02/00 A. Mallick <asit.k.mallick@intel.com> + * Modified RID allocation for SMP + * Goutham Rao <goutham.rao@intel.com> + * IPI based ptc implementation and A-step IPI implementation. + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/mm.h> + +#include <asm/delay.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/pal.h> +#include <asm/tlbflush.h> + +static struct { + unsigned long mask; /* mask of supported purge page-sizes */ + unsigned long max_bits; /* log2() of largest supported purge page-size */ +} purge; + +struct ia64_ctx ia64_ctx = { + .lock = SPIN_LOCK_UNLOCKED, + .next = 1, + .limit = (1 << 15) - 1, /* start out with the safe (architected) limit */ + .max_ctx = ~0U +}; + +DEFINE_PER_CPU(u8, ia64_need_tlb_flush); + +/* + * Acquire the ia64_ctx.lock before calling this function! + */ +void +wrap_mmu_context (struct mm_struct *mm) +{ + unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx; + struct task_struct *tsk; + int i; + + if (ia64_ctx.next > max_ctx) + ia64_ctx.next = 300; /* skip daemons */ + ia64_ctx.limit = max_ctx + 1; + + /* + * Scan all the task's mm->context and set proper safe range + */ + + read_lock(&tasklist_lock); + repeat: + for_each_process(tsk) { + if (!tsk->mm) + continue; + tsk_context = tsk->mm->context; + if (tsk_context == ia64_ctx.next) { + if (++ia64_ctx.next >= ia64_ctx.limit) { + /* empty range: reset the range limit and start over */ + if (ia64_ctx.next > max_ctx) + ia64_ctx.next = 300; + ia64_ctx.limit = max_ctx + 1; + goto repeat; + } + } + if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit)) + ia64_ctx.limit = tsk_context; + } + read_unlock(&tasklist_lock); + /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */ + { + int cpu = get_cpu(); /* prevent preemption/migration */ + for (i = 0; i < NR_CPUS; ++i) + if (cpu_online(i) && (i != cpu)) + per_cpu(ia64_need_tlb_flush, i) = 1; + put_cpu(); + } + local_flush_tlb_all(); +} + +void +ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits) +{ + static DEFINE_SPINLOCK(ptcg_lock); + + /* HW requires global serialization of ptc.ga. */ + spin_lock(&ptcg_lock); + { + do { + /* + * Flush ALAT entries also. + */ + ia64_ptcga(start, (nbits<<2)); + ia64_srlz_i(); + start += (1UL << nbits); + } while (start < end); + } + spin_unlock(&ptcg_lock); +} + +void +local_flush_tlb_all (void) +{ + unsigned long i, j, flags, count0, count1, stride0, stride1, addr; + + addr = local_cpu_data->ptce_base; + count0 = local_cpu_data->ptce_count[0]; + count1 = local_cpu_data->ptce_count[1]; + stride0 = local_cpu_data->ptce_stride[0]; + stride1 = local_cpu_data->ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +void +flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long size = end - start; + unsigned long nbits; + + if (mm != current->active_mm) { + /* this does happen, but perhaps it's not worth optimizing for? */ +#ifdef CONFIG_SMP + flush_tlb_all(); +#else + mm->context = 0; +#endif + return; + } + + nbits = ia64_fls(size + 0xfff); + while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) + ++nbits; + if (nbits > purge.max_bits) + nbits = purge.max_bits; + start &= ~((1UL << nbits) - 1); + +# ifdef CONFIG_SMP + platform_global_tlb_purge(start, end, nbits); +# else + do { + ia64_ptcl(start, (nbits<<2)); + start += (1UL << nbits); + } while (start < end); +# endif + + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} +EXPORT_SYMBOL(flush_tlb_range); + +void __devinit +ia64_tlb_init (void) +{ + ia64_ptce_info_t ptce_info; + unsigned long tr_pgbits; + long status; + + if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) { + printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld;" + "defaulting to architected purge page-sizes.\n", status); + purge.mask = 0x115557000UL; + } + purge.max_bits = ia64_fls(purge.mask); + + ia64_get_ptce(&ptce_info); + local_cpu_data->ptce_base = ptce_info.base; + local_cpu_data->ptce_count[0] = ptce_info.count[0]; + local_cpu_data->ptce_count[1] = ptce_info.count[1]; + local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; + local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; + + local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ +} diff --git a/arch/ia64/module.lds b/arch/ia64/module.lds new file mode 100644 index 000000000000..6481f42fbd8c --- /dev/null +++ b/arch/ia64/module.lds @@ -0,0 +1,13 @@ +SECTIONS { + /* Group unwind sections into a single section: */ + .IA_64.unwind_info : { *(.IA_64.unwind_info*) } + .IA_64.unwind : { *(.IA_64.unwind*) } + /* + * Create place-holder sections to hold the PLTs, GOT, and + * official procedure-descriptors (.opd). + */ + .core.plt : { BYTE(0) } + .init.plt : { BYTE(0) } + .got : { BYTE(0) } + .opd : { BYTE(0) } +} diff --git a/arch/ia64/oprofile/Kconfig b/arch/ia64/oprofile/Kconfig new file mode 100644 index 000000000000..56e6f614b04a --- /dev/null +++ b/arch/ia64/oprofile/Kconfig @@ -0,0 +1,26 @@ + +menu "Profiling support" + depends on EXPERIMENTAL + +config PROFILING + bool "Profiling support (EXPERIMENTAL)" + help + Say Y here to enable the extended profiling support mechanisms used + by profilers such as OProfile. + +config OPROFILE + tristate "OProfile system profiling (EXPERIMENTAL)" + depends on PROFILING + help + OProfile is a profiling system capable of profiling the + whole system, include the kernel, kernel modules, libraries, + and applications. + + Due to firmware bugs, you may need to use the "nohalt" boot + option if you're using OProfile with the hardware performance + counters. + + If unsure, say N. + +endmenu + diff --git a/arch/ia64/oprofile/Makefile b/arch/ia64/oprofile/Makefile new file mode 100644 index 000000000000..aad27a718ee0 --- /dev/null +++ b/arch/ia64/oprofile/Makefile @@ -0,0 +1,10 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-$(CONFIG_PERFMON) += perfmon.o diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c new file mode 100644 index 000000000000..b7dabbfb0d61 --- /dev/null +++ b/arch/ia64/oprofile/backtrace.c @@ -0,0 +1,150 @@ +/** + * @file backtrace.c + * + * @remark Copyright 2004 Silicon Graphics Inc. All Rights Reserved. + * @remark Read the file COPYING + * + * @author Greg Banks <gnb@melbourne.sgi.com> + * @author Keith Owens <kaos@melbourne.sgi.com> + * Based on work done for the ia64 port of the SGI kernprof patch, which is + * Copyright (c) 2003-2004 Silicon Graphics Inc. All Rights Reserved. + */ + +#include <linux/oprofile.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <asm/ptrace.h> +#include <asm/system.h> + +/* + * For IA64 we need to perform a complex little dance to get both + * the struct pt_regs and a synthetic struct switch_stack in place + * to allow the unwind code to work. This dance requires our unwind + * using code to be called from a function called from unw_init_running(). + * There we only get a single void* data pointer, so use this struct + * to hold all the data we need during the unwind. + */ +typedef struct +{ + unsigned int depth; + struct pt_regs *regs; + struct unw_frame_info frame; + u64 *prev_pfs_loc; /* state for WAR for old spinlock ool code */ +} ia64_backtrace_t; + +#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +/* + * Returns non-zero if the PC is in the spinlock contention out-of-line code + * with non-standard calling sequence (on older compilers). + */ +static __inline__ int in_old_ool_spinlock_code(unsigned long pc) +{ + extern const char ia64_spinlock_contention_pre3_4[] __attribute__ ((weak)); + extern const char ia64_spinlock_contention_pre3_4_end[] __attribute__ ((weak)); + unsigned long sc_start = (unsigned long)ia64_spinlock_contention_pre3_4; + unsigned long sc_end = (unsigned long)ia64_spinlock_contention_pre3_4_end; + return (sc_start && sc_end && pc >= sc_start && pc < sc_end); +} +#else +/* Newer spinlock code does a proper br.call and works fine with the unwinder */ +#define in_old_ool_spinlock_code(pc) 0 +#endif + +/* Returns non-zero if the PC is in the Interrupt Vector Table */ +static __inline__ int in_ivt_code(unsigned long pc) +{ + extern char ia64_ivt[]; + return (pc >= (u_long)ia64_ivt && pc < (u_long)ia64_ivt+32768); +} + +/* + * Unwind to next stack frame. + */ +static __inline__ int next_frame(ia64_backtrace_t *bt) +{ + /* + * Avoid unsightly console message from unw_unwind() when attempting + * to unwind through the Interrupt Vector Table which has no unwind + * information. + */ + if (in_ivt_code(bt->frame.ip)) + return 0; + + /* + * WAR for spinlock contention from leaf functions. ia64_spinlock_contention_pre3_4 + * has ar.pfs == r0. Leaf functions do not modify ar.pfs so ar.pfs remains + * as 0, stopping the backtrace. Record the previous ar.pfs when the current + * IP is in ia64_spinlock_contention_pre3_4 then unwind, if pfs_loc has not changed + * after unwind then use pt_regs.ar_pfs which is where the real ar.pfs is for + * leaf functions. + */ + if (bt->prev_pfs_loc && bt->regs && bt->frame.pfs_loc == bt->prev_pfs_loc) + bt->frame.pfs_loc = &bt->regs->ar_pfs; + bt->prev_pfs_loc = (in_old_ool_spinlock_code(bt->frame.ip) ? bt->frame.pfs_loc : NULL); + + return unw_unwind(&bt->frame) == 0; +} + + +static void do_ia64_backtrace(struct unw_frame_info *info, void *vdata) +{ + ia64_backtrace_t *bt = vdata; + struct switch_stack *sw; + int count = 0; + u_long pc, sp; + + sw = (struct switch_stack *)(info+1); + /* padding from unw_init_running */ + sw = (struct switch_stack *)(((unsigned long)sw + 15) & ~15); + + unw_init_frame_info(&bt->frame, current, sw); + + /* skip over interrupt frame and oprofile calls */ + do { + unw_get_sp(&bt->frame, &sp); + if (sp >= (u_long)bt->regs) + break; + if (!next_frame(bt)) + return; + } while (count++ < 200); + + /* finally, grab the actual sample */ + while (bt->depth-- && next_frame(bt)) { + unw_get_ip(&bt->frame, &pc); + oprofile_add_trace(pc); + if (unw_is_intr_frame(&bt->frame)) { + /* + * Interrupt received on kernel stack; this can + * happen when timer interrupt fires while processing + * a softirq from the tail end of a hardware interrupt + * which interrupted a system call. Don't laugh, it + * happens! Splice the backtrace into two parts to + * avoid spurious cycles in the gprof output. + */ + /* TODO: split rather than drop the 2nd half */ + break; + } + } +} + +void +ia64_backtrace(struct pt_regs * const regs, unsigned int depth) +{ + ia64_backtrace_t bt; + unsigned long flags; + + /* + * On IA64 there is little hope of getting backtraces from + * user space programs -- the problems of getting the unwind + * information from arbitrary user programs are extreme. + */ + if (user_mode(regs)) + return; + + bt.depth = depth; + bt.regs = regs; + bt.prev_pfs_loc = NULL; + local_irq_save(flags); + unw_init_running(do_ia64_backtrace, &bt); + local_irq_restore(flags); +} diff --git a/arch/ia64/oprofile/init.c b/arch/ia64/oprofile/init.c new file mode 100644 index 000000000000..125a602a660d --- /dev/null +++ b/arch/ia64/oprofile/init.c @@ -0,0 +1,38 @@ +/** + * @file init.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@movementarian.org> + */ + +#include <linux/kernel.h> +#include <linux/oprofile.h> +#include <linux/init.h> +#include <linux/errno.h> + +extern int perfmon_init(struct oprofile_operations * ops); +extern void perfmon_exit(void); +extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth); + +int __init oprofile_arch_init(struct oprofile_operations * ops) +{ + int ret = -ENODEV; + +#ifdef CONFIG_PERFMON + /* perfmon_init() can fail, but we have no way to report it */ + ret = perfmon_init(ops); +#endif + ops->backtrace = ia64_backtrace; + + return ret; +} + + +void oprofile_arch_exit(void) +{ +#ifdef CONFIG_PERFMON + perfmon_exit(); +#endif +} diff --git a/arch/ia64/oprofile/perfmon.c b/arch/ia64/oprofile/perfmon.c new file mode 100644 index 000000000000..b7975a469fb8 --- /dev/null +++ b/arch/ia64/oprofile/perfmon.c @@ -0,0 +1,100 @@ +/** + * @file perfmon.c + * + * @remark Copyright 2003 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@movementarian.org> + */ + +#include <linux/kernel.h> +#include <linux/config.h> +#include <linux/oprofile.h> +#include <linux/sched.h> +#include <asm/perfmon.h> +#include <asm/ptrace.h> +#include <asm/errno.h> + +static int allow_ints; + +static int +perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, + struct pt_regs *regs, unsigned long stamp) +{ + int event = arg->pmd_eventid; + + arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; + + /* the owner of the oprofile event buffer may have exited + * without perfmon being shutdown (e.g. SIGSEGV) + */ + if (allow_ints) + oprofile_add_sample(regs, event); + return 0; +} + + +static int perfmon_start(void) +{ + allow_ints = 1; + return 0; +} + + +static void perfmon_stop(void) +{ + allow_ints = 0; +} + + +#define OPROFILE_FMT_UUID { \ + 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c } + +static pfm_buffer_fmt_t oprofile_fmt = { + .fmt_name = "oprofile_format", + .fmt_uuid = OPROFILE_FMT_UUID, + .fmt_handler = perfmon_handler, +}; + + +static char * get_cpu_type(void) +{ + __u8 family = local_cpu_data->family; + + switch (family) { + case 0x07: + return "ia64/itanium"; + case 0x1f: + return "ia64/itanium2"; + default: + return "ia64/ia64"; + } +} + + +/* all the ops are handled via userspace for IA64 perfmon */ + +static int using_perfmon; + +int perfmon_init(struct oprofile_operations * ops) +{ + int ret = pfm_register_buffer_fmt(&oprofile_fmt); + if (ret) + return -ENODEV; + + ops->cpu_type = get_cpu_type(); + ops->start = perfmon_start; + ops->stop = perfmon_stop; + using_perfmon = 1; + printk(KERN_INFO "oprofile: using perfmon.\n"); + return 0; +} + + +void perfmon_exit(void) +{ + if (!using_perfmon) + return; + + pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid); +} diff --git a/arch/ia64/pci/Makefile b/arch/ia64/pci/Makefile new file mode 100644 index 000000000000..e66889e6922a --- /dev/null +++ b/arch/ia64/pci/Makefile @@ -0,0 +1,4 @@ +# +# Makefile for the ia64-specific parts of the pci bus +# +obj-y := pci.o diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c new file mode 100644 index 000000000000..88641e5095b5 --- /dev/null +++ b/arch/ia64/pci/pci.c @@ -0,0 +1,735 @@ +/* + * pci.c - Low-Level PCI Access in IA-64 + * + * Derived from bios32.c of i386 tree. + * + * (c) Copyright 2002, 2005 Hewlett-Packard Development Company, L.P. + * David Mosberger-Tang <davidm@hpl.hp.com> + * Bjorn Helgaas <bjorn.helgaas@hp.com> + * Copyright (C) 2004 Silicon Graphics, Inc. + * + * Note: Above list of copyright holders is incomplete... + */ +#include <linux/config.h> + +#include <linux/acpi.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/slab.h> +#include <linux/smp_lock.h> +#include <linux/spinlock.h> + +#include <asm/machvec.h> +#include <asm/page.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/sal.h> +#include <asm/smp.h> +#include <asm/irq.h> +#include <asm/hw_irq.h> + + +static int pci_routeirq; + +/* + * Low-level SAL-based PCI configuration access functions. Note that SAL + * calls are already serialized (via sal_lock), so we don't need another + * synchronization mechanism here. + */ + +#define PCI_SAL_ADDRESS(seg, bus, devfn, reg) \ + (((u64) seg << 24) | (bus << 16) | (devfn << 8) | (reg)) + +/* SAL 3.2 adds support for extended config space. */ + +#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg) \ + (((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg)) + +static int +pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn, + int reg, int len, u32 *value) +{ + u64 addr, data = 0; + int mode, result; + + if (!value || (seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + if ((seg | reg) <= 255) { + addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); + mode = 0; + } else { + addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); + mode = 1; + } + result = ia64_sal_pci_config_read(addr, mode, len, &data); + if (result != 0) + return -EINVAL; + + *value = (u32) data; + return 0; +} + +static int +pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn, + int reg, int len, u32 value) +{ + u64 addr; + int mode, result; + + if ((seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + if ((seg | reg) <= 255) { + addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); + mode = 0; + } else { + addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); + mode = 1; + } + result = ia64_sal_pci_config_write(addr, mode, len, value); + if (result != 0) + return -EINVAL; + return 0; +} + +static struct pci_raw_ops pci_sal_ops = { + .read = pci_sal_read, + .write = pci_sal_write +}; + +struct pci_raw_ops *raw_pci_ops = &pci_sal_ops; + +static int +pci_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) +{ + return raw_pci_ops->read(pci_domain_nr(bus), bus->number, + devfn, where, size, value); +} + +static int +pci_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) +{ + return raw_pci_ops->write(pci_domain_nr(bus), bus->number, + devfn, where, size, value); +} + +struct pci_ops pci_root_ops = { + .read = pci_read, + .write = pci_write, +}; + +#ifdef CONFIG_NUMA +extern acpi_status acpi_map_iosapic(acpi_handle, u32, void *, void **); +static void acpi_map_iosapics(void) +{ + acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL); +} +#else +static void acpi_map_iosapics(void) +{ + return; +} +#endif /* CONFIG_NUMA */ + +static int __init +pci_acpi_init (void) +{ + struct pci_dev *dev = NULL; + + printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); + + acpi_map_iosapics(); + + if (pci_routeirq) { + /* + * PCI IRQ routing is set up by pci_enable_device(), but we + * also do it here in case there are still broken drivers that + * don't use pci_enable_device(). + */ + printk(KERN_INFO "PCI: Routing interrupts for all devices because \"pci=routeirq\" specified\n"); + for_each_pci_dev(dev) + acpi_pci_irq_enable(dev); + } else + printk(KERN_INFO "PCI: If a device doesn't work, try \"pci=routeirq\". If it helps, post a report\n"); + + return 0; +} + +subsys_initcall(pci_acpi_init); + +/* Called by ACPI when it finds a new root bus. */ + +static struct pci_controller * __devinit +alloc_pci_controller (int seg) +{ + struct pci_controller *controller; + + controller = kmalloc(sizeof(*controller), GFP_KERNEL); + if (!controller) + return NULL; + + memset(controller, 0, sizeof(*controller)); + controller->segment = seg; + return controller; +} + +static u64 __devinit +add_io_space (struct acpi_resource_address64 *addr) +{ + u64 offset; + int sparse = 0; + int i; + + if (addr->address_translation_offset == 0) + return IO_SPACE_BASE(0); /* part of legacy IO space */ + + if (addr->attribute.io.translation_attribute == ACPI_SPARSE_TRANSLATION) + sparse = 1; + + offset = (u64) ioremap(addr->address_translation_offset, 0); + for (i = 0; i < num_io_spaces; i++) + if (io_space[i].mmio_base == offset && + io_space[i].sparse == sparse) + return IO_SPACE_BASE(i); + + if (num_io_spaces == MAX_IO_SPACES) { + printk("Too many IO port spaces\n"); + return ~0; + } + + i = num_io_spaces++; + io_space[i].mmio_base = offset; + io_space[i].sparse = sparse; + + return IO_SPACE_BASE(i); +} + +static acpi_status __devinit +count_window (struct acpi_resource *resource, void *data) +{ + unsigned int *windows = (unsigned int *) data; + struct acpi_resource_address64 addr; + acpi_status status; + + status = acpi_resource_to_address64(resource, &addr); + if (ACPI_SUCCESS(status)) + if (addr.resource_type == ACPI_MEMORY_RANGE || + addr.resource_type == ACPI_IO_RANGE) + (*windows)++; + + return AE_OK; +} + +struct pci_root_info { + struct pci_controller *controller; + char *name; +}; + +static __devinit acpi_status add_window(struct acpi_resource *res, void *data) +{ + struct pci_root_info *info = data; + struct pci_window *window; + struct acpi_resource_address64 addr; + acpi_status status; + unsigned long flags, offset = 0; + struct resource *root; + + status = acpi_resource_to_address64(res, &addr); + if (!ACPI_SUCCESS(status)) + return AE_OK; + + if (!addr.address_length) + return AE_OK; + + if (addr.resource_type == ACPI_MEMORY_RANGE) { + flags = IORESOURCE_MEM; + root = &iomem_resource; + offset = addr.address_translation_offset; + } else if (addr.resource_type == ACPI_IO_RANGE) { + flags = IORESOURCE_IO; + root = &ioport_resource; + offset = add_io_space(&addr); + if (offset == ~0) + return AE_OK; + } else + return AE_OK; + + window = &info->controller->window[info->controller->windows++]; + window->resource.name = info->name; + window->resource.flags = flags; + window->resource.start = addr.min_address_range + offset; + window->resource.end = addr.max_address_range + offset; + window->resource.child = NULL; + window->offset = offset; + + if (insert_resource(root, &window->resource)) { + printk(KERN_ERR "alloc 0x%lx-0x%lx from %s for %s failed\n", + window->resource.start, window->resource.end, + root->name, info->name); + } + + return AE_OK; +} + +static void __devinit +pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl) +{ + int i, j; + + j = 0; + for (i = 0; i < ctrl->windows; i++) { + struct resource *res = &ctrl->window[i].resource; + /* HP's firmware has a hack to work around a Windows bug. + * Ignore these tiny memory ranges */ + if ((res->flags & IORESOURCE_MEM) && + (res->end - res->start < 16)) + continue; + if (j >= PCI_BUS_NUM_RESOURCES) { + printk("Ignoring range [%lx-%lx] (%lx)\n", res->start, + res->end, res->flags); + continue; + } + bus->resource[j++] = res; + } +} + +struct pci_bus * __devinit +pci_acpi_scan_root(struct acpi_device *device, int domain, int bus) +{ + struct pci_root_info info; + struct pci_controller *controller; + unsigned int windows = 0; + struct pci_bus *pbus; + char *name; + + controller = alloc_pci_controller(domain); + if (!controller) + goto out1; + + controller->acpi_handle = device->handle; + + acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, + &windows); + controller->window = kmalloc(sizeof(*controller->window) * windows, + GFP_KERNEL); + if (!controller->window) + goto out2; + + name = kmalloc(16, GFP_KERNEL); + if (!name) + goto out3; + + sprintf(name, "PCI Bus %04x:%02x", domain, bus); + info.controller = controller; + info.name = name; + acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window, + &info); + + pbus = pci_scan_bus(bus, &pci_root_ops, controller); + if (pbus) + pcibios_setup_root_windows(pbus, controller); + + return pbus; + +out3: + kfree(controller->window); +out2: + kfree(controller); +out1: + return NULL; +} + +void pcibios_resource_to_bus(struct pci_dev *dev, + struct pci_bus_region *region, struct resource *res) +{ + struct pci_controller *controller = PCI_CONTROLLER(dev); + unsigned long offset = 0; + int i; + + for (i = 0; i < controller->windows; i++) { + struct pci_window *window = &controller->window[i]; + if (!(window->resource.flags & res->flags)) + continue; + if (window->resource.start > res->start) + continue; + if (window->resource.end < res->end) + continue; + offset = window->offset; + break; + } + + region->start = res->start - offset; + region->end = res->end - offset; +} +EXPORT_SYMBOL(pcibios_resource_to_bus); + +void pcibios_bus_to_resource(struct pci_dev *dev, + struct resource *res, struct pci_bus_region *region) +{ + struct pci_controller *controller = PCI_CONTROLLER(dev); + unsigned long offset = 0; + int i; + + for (i = 0; i < controller->windows; i++) { + struct pci_window *window = &controller->window[i]; + if (!(window->resource.flags & res->flags)) + continue; + if (window->resource.start - window->offset > region->start) + continue; + if (window->resource.end - window->offset < region->end) + continue; + offset = window->offset; + break; + } + + res->start = region->start + offset; + res->end = region->end + offset; +} + +static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) +{ + struct pci_bus_region region; + int i; + int limit = (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) ? \ + PCI_BRIDGE_RESOURCES : PCI_NUM_RESOURCES; + + for (i = 0; i < limit; i++) { + if (!dev->resource[i].flags) + continue; + region.start = dev->resource[i].start; + region.end = dev->resource[i].end; + pcibios_bus_to_resource(dev, &dev->resource[i], ®ion); + pci_claim_resource(dev, i); + } +} + +/* + * Called after each bus is probed, but before its children are examined. + */ +void __devinit +pcibios_fixup_bus (struct pci_bus *b) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &b->devices, bus_list) + pcibios_fixup_device_resources(dev); + + return; +} + +void __devinit +pcibios_update_irq (struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); + + /* ??? FIXME -- record old value for shutdown. */ +} + +static inline int +pcibios_enable_resources (struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + if (!dev) + return -EINVAL; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for (idx=0; idx<6; idx++) { + /* Only set up the desired resources. */ + if (!(mask & (1 << idx))) + continue; + + r = &dev->resource[idx]; + if (!r->start && r->end) { + printk(KERN_ERR + "PCI: Device %s not available because of resource collisions\n", + pci_name(dev)); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (dev->resource[PCI_ROM_RESOURCE].start) + cmd |= PCI_COMMAND_MEMORY; + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +int +pcibios_enable_device (struct pci_dev *dev, int mask) +{ + int ret; + + ret = pcibios_enable_resources(dev, mask); + if (ret < 0) + return ret; + + return acpi_pci_irq_enable(dev); +} + +#ifdef CONFIG_ACPI_DEALLOCATE_IRQ +void +pcibios_disable_device (struct pci_dev *dev) +{ + acpi_pci_irq_disable(dev); +} +#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */ + +void +pcibios_align_resource (void *data, struct resource *res, + unsigned long size, unsigned long align) +{ +} + +/* + * PCI BIOS setup, always defaults to SAL interface + */ +char * __init +pcibios_setup (char *str) +{ + if (!strcmp(str, "routeirq")) + pci_routeirq = 1; + return NULL; +} + +int +pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + /* + * I/O space cannot be accessed via normal processor loads and + * stores on this platform. + */ + if (mmap_state == pci_mmap_io) + /* + * XXX we could relax this for I/O spaces for which ACPI + * indicates that the space is 1-to-1 mapped. But at the + * moment, we don't support multiple PCI address spaces and + * the legacy I/O space is not 1-to-1 mapped, so this is moot. + */ + return -EINVAL; + + /* + * Leave vm_pgoff as-is, the PCI space address is the physical + * address on this platform. + */ + vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO); + + if (write_combine && efi_range_is_wc(vma->vm_start, + vma->vm_end - vma->vm_start)) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +/** + * ia64_pci_get_legacy_mem - generic legacy mem routine + * @bus: bus to get legacy memory base address for + * + * Find the base of legacy memory for @bus. This is typically the first + * megabyte of bus address space for @bus or is simply 0 on platforms whose + * chipsets support legacy I/O and memory routing. Returns the base address + * or an error pointer if an error occurred. + * + * This is the ia64 generic version of this routine. Other platforms + * are free to override it with a machine vector. + */ +char *ia64_pci_get_legacy_mem(struct pci_bus *bus) +{ + return (char *)__IA64_UNCACHED_OFFSET; +} + +/** + * pci_mmap_legacy_page_range - map legacy memory space to userland + * @bus: bus whose legacy space we're mapping + * @vma: vma passed in by mmap + * + * Map legacy memory space for this device back to userspace using a machine + * vector to get the base address. + */ +int +pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma) +{ + char *addr; + + addr = pci_get_legacy_mem(bus); + if (IS_ERR(addr)) + return PTR_ERR(addr); + + vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO); + + if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +/** + * ia64_pci_legacy_read - read from legacy I/O space + * @bus: bus to read + * @port: legacy port value + * @val: caller allocated storage for returned value + * @size: number of bytes to read + * + * Simply reads @size bytes from @port and puts the result in @val. + * + * Again, this (and the write routine) are generic versions that can be + * overridden by the platform. This is necessary on platforms that don't + * support legacy I/O routing or that hard fail on legacy I/O timeouts. + */ +int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) +{ + int ret = size; + + switch (size) { + case 1: + *val = inb(port); + break; + case 2: + *val = inw(port); + break; + case 4: + *val = inl(port); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +/** + * ia64_pci_legacy_write - perform a legacy I/O write + * @bus: bus pointer + * @port: port to write + * @val: value to write + * @size: number of bytes to write from @val + * + * Simply writes @size bytes of @val to @port. + */ +int ia64_pci_legacy_write(struct pci_dev *bus, u16 port, u32 val, u8 size) +{ + int ret = 0; + + switch (size) { + case 1: + outb(val, port); + break; + case 2: + outw(val, port); + break; + case 4: + outl(val, port); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +/** + * pci_cacheline_size - determine cacheline size for PCI devices + * @dev: void + * + * We want to use the line-size of the outer-most cache. We assume + * that this line-size is the same for all CPUs. + * + * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info(). + * + * RETURNS: An appropriate -ERRNO error value on eror, or zero for success. + */ +static unsigned long +pci_cacheline_size (void) +{ + u64 levels, unique_caches; + s64 status; + pal_cache_config_info_t cci; + static u8 cacheline_size; + + if (cacheline_size) + return cacheline_size; + + status = ia64_pal_cache_summary(&levels, &unique_caches); + if (status != 0) { + printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n", + __FUNCTION__, status); + return SMP_CACHE_BYTES; + } + + status = ia64_pal_cache_config_info(levels - 1, /* cache_type (data_or_unified)= */ 2, + &cci); + if (status != 0) { + printk(KERN_ERR "%s: ia64_pal_cache_config_info() failed (status=%ld)\n", + __FUNCTION__, status); + return SMP_CACHE_BYTES; + } + cacheline_size = 1 << cci.pcci_line_size; + return cacheline_size; +} + +/** + * pcibios_prep_mwi - helper function for drivers/pci/pci.c:pci_set_mwi() + * @dev: the PCI device for which MWI is enabled + * + * For ia64, we can get the cacheline sizes from PAL. + * + * RETURNS: An appropriate -ERRNO error value on eror, or zero for success. + */ +int +pcibios_prep_mwi (struct pci_dev *dev) +{ + unsigned long desired_linesize, current_linesize; + int rc = 0; + u8 pci_linesize; + + desired_linesize = pci_cacheline_size(); + + pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &pci_linesize); + current_linesize = 4 * pci_linesize; + if (desired_linesize != current_linesize) { + printk(KERN_WARNING "PCI: slot %s has incorrect PCI cache line size of %lu bytes,", + pci_name(dev), current_linesize); + if (current_linesize > desired_linesize) { + printk(" expected %lu bytes instead\n", desired_linesize); + rc = -EINVAL; + } else { + printk(" correcting to %lu\n", desired_linesize); + pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, desired_linesize / 4); + } + } + return rc; +} + +int pci_vector_resources(int last, int nr_released) +{ + int count = nr_released; + + count += (IA64_LAST_DEVICE_VECTOR - last); + + return count; +} diff --git a/arch/ia64/scripts/check-gas b/arch/ia64/scripts/check-gas new file mode 100755 index 000000000000..2499e0b2243d --- /dev/null +++ b/arch/ia64/scripts/check-gas @@ -0,0 +1,15 @@ +#!/bin/sh +dir=$(dirname $0) +CC=$1 +OBJDUMP=$2 +tmp=${TMPDIR:-/tmp} +out=$tmp/out$$.o +$CC -c $dir/check-gas-asm.S -o $out +res=$($OBJDUMP -r --section .data $out | fgrep 00004 | tr -s ' ' |cut -f3 -d' ') +rm -f $out +if [ $res != ".text" ]; then + echo buggy +else + echo good +fi +exit 0 diff --git a/arch/ia64/scripts/check-gas-asm.S b/arch/ia64/scripts/check-gas-asm.S new file mode 100644 index 000000000000..010e1d227e5d --- /dev/null +++ b/arch/ia64/scripts/check-gas-asm.S @@ -0,0 +1,2 @@ +[1:] nop 0 + .xdata4 ".data", 0, 1b-. diff --git a/arch/ia64/scripts/check-model.c b/arch/ia64/scripts/check-model.c new file mode 100644 index 000000000000..e1d4e86e3d63 --- /dev/null +++ b/arch/ia64/scripts/check-model.c @@ -0,0 +1 @@ +int __attribute__ ((__model__ (__small__))) x; diff --git a/arch/ia64/scripts/check-segrel.S b/arch/ia64/scripts/check-segrel.S new file mode 100644 index 000000000000..3be4e3dbeb83 --- /dev/null +++ b/arch/ia64/scripts/check-segrel.S @@ -0,0 +1,4 @@ + .rodata + data4 @segrel(start) + .data +start: diff --git a/arch/ia64/scripts/check-segrel.lds b/arch/ia64/scripts/check-segrel.lds new file mode 100644 index 000000000000..1c2f13e181d0 --- /dev/null +++ b/arch/ia64/scripts/check-segrel.lds @@ -0,0 +1,11 @@ +SECTIONS { + . = SIZEOF_HEADERS; + .rodata : { *(.rodata) } :ro + . = 0xa0000; + .data : { *(.data) } :dat + /DISCARD/ : { *(*) } +} +PHDRS { + ro PT_LOAD FILEHDR PHDRS; + dat PT_LOAD; +} diff --git a/arch/ia64/scripts/check-serialize.S b/arch/ia64/scripts/check-serialize.S new file mode 100644 index 000000000000..0400c106806c --- /dev/null +++ b/arch/ia64/scripts/check-serialize.S @@ -0,0 +1,2 @@ + .serialize.data + .serialize.instruction diff --git a/arch/ia64/scripts/check-text-align.S b/arch/ia64/scripts/check-text-align.S new file mode 100644 index 000000000000..03f586abb734 --- /dev/null +++ b/arch/ia64/scripts/check-text-align.S @@ -0,0 +1,6 @@ + .proc foo + .prologue +foo: .save rp, r2 + nop 0 + .align 64 + .endp foo diff --git a/arch/ia64/scripts/toolchain-flags b/arch/ia64/scripts/toolchain-flags new file mode 100755 index 000000000000..3f0c2adacb70 --- /dev/null +++ b/arch/ia64/scripts/toolchain-flags @@ -0,0 +1,53 @@ +#!/bin/sh +# +# Check whether linker can handle cross-segment @segrel(): +# +CPPFLAGS="" +CC=$1 +OBJDUMP=$2 +READELF=$3 +dir=$(dirname $0) +tmp=${TMPDIR:-/tmp} +out=$tmp/out$$ + +# Check whether cross-segment segment-relative relocs work fine. We need +# that for building the gate DSO: + +$CC -nostdlib -static -Wl,-T$dir/check-segrel.lds $dir/check-segrel.S -o $out +res=$($OBJDUMP --full --section .rodata $out | fgrep 000 | cut -f3 -d' ') +rm -f $out +if [ $res != 00000a00 ]; then + CPPFLAGS="$CPPFLAGS -DHAVE_BUGGY_SEGREL" + cat >&2 <<EOF +warning: your linker cannot handle cross-segment segment-relative relocations. + please upgrade to a newer version (it is safe to use this linker, but + the kernel will be bigger than strictly necessary). +EOF +fi + +# Check whether .align inside a function works as expected. + +$CC -c $dir/check-text-align.S -o $out +$READELF -u $out | fgrep -q 'prologue(rlen=12)' +res=$? +rm -f $out +if [ $res -eq 0 ]; then + CPPFLAGS="$CPPFLAGS -DHAVE_WORKING_TEXT_ALIGN" +fi + +if ! $CC -c $dir/check-model.c -o $out 2>&1 | grep __model__ | grep -q attrib +then + CPPFLAGS="$CPPFLAGS -DHAVE_MODEL_SMALL_ATTRIBUTE" +fi +rm -f $out + +# Check whether assembler supports .serialize.{data,instruction} directive. + +$CC -c $dir/check-serialize.S -o $out 2>/dev/null +res=$? +rm -f $out +if [ $res -eq 0 ]; then + CPPFLAGS="$CPPFLAGS -DHAVE_SERIALIZE_DIRECTIVE" +fi + +echo $CPPFLAGS diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py new file mode 100755 index 000000000000..c27849889e19 --- /dev/null +++ b/arch/ia64/scripts/unwcheck.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# +# Usage: unwcheck.py FILE +# +# This script checks the unwind info of each function in file FILE +# and verifies that the sum of the region-lengths matches the total +# length of the function. +# +# Based on a shell/awk script originally written by Harish Patil, +# which was converted to Perl by Matthew Chapman, which was converted +# to Python by David Mosberger. +# +import os +import re +import sys + +if len(sys.argv) != 2: + print "Usage: %s FILE" % sys.argv[0] + sys.exit(2) + +readelf = os.getenv("READELF", "readelf") + +start_pattern = re.compile("<([^>]*)>: \[0x([0-9a-f]+)-0x([0-9a-f]+)\]") +rlen_pattern = re.compile(".*rlen=([0-9]+)") + +def check_func (func, slots, rlen_sum): + if slots != rlen_sum: + global num_errors + num_errors += 1 + if not func: func = "[%#x-%#x]" % (start, end) + print "ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum) + return + +num_funcs = 0 +num_errors = 0 +func = False +slots = 0 +rlen_sum = 0 +for line in os.popen("%s -u %s" % (readelf, sys.argv[1])): + m = start_pattern.match(line) + if m: + check_func(func, slots, rlen_sum) + + func = m.group(1) + start = long(m.group(2), 16) + end = long(m.group(3), 16) + slots = 3 * (end - start) / 16 + rlen_sum = 0L + num_funcs += 1 + else: + m = rlen_pattern.match(line) + if m: + rlen_sum += long(m.group(1)) +check_func(func, slots, rlen_sum) + +if num_errors == 0: + print "No errors detected in %u functions." % num_funcs +else: + if num_errors > 1: + err="errors" + else: + err="error" + print "%u %s detected in %u functions." % (num_errors, err, num_funcs) + sys.exit(1) diff --git a/arch/ia64/sn/Makefile b/arch/ia64/sn/Makefile new file mode 100644 index 000000000000..a269f6d84c29 --- /dev/null +++ b/arch/ia64/sn/Makefile @@ -0,0 +1,14 @@ +# arch/ia64/sn/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2004 Silicon Graphics, Inc. All Rights Reserved. +# +# Makefile for the sn ia64 subplatform +# + +CPPFLAGS += -I$(srctree)/arch/ia64/sn/include + +obj-y += kernel/ pci/ diff --git a/arch/ia64/sn/include/ioerror.h b/arch/ia64/sn/include/ioerror.h new file mode 100644 index 000000000000..e68f2b0789a7 --- /dev/null +++ b/arch/ia64/sn/include/ioerror.h @@ -0,0 +1,81 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_IOERROR_H +#define _ASM_IA64_SN_IOERROR_H + +/* + * IO error structure. + * + * This structure would expand to hold the information retrieved from + * all IO related error registers. + * + * This structure is defined to hold all system specific + * information related to a single error. + * + * This serves a couple of purpose. + * - Error handling often involves translating one form of address to other + * form. So, instead of having different data structures at each level, + * we have a single structure, and the appropriate fields get filled in + * at each layer. + * - This provides a way to dump all error related information in any layer + * of erorr handling (debugging aid). + * + * A second possibility is to allow each layer to define its own error + * data structure, and fill in the proper fields. This has the advantage + * of isolating the layers. + * A big concern is the potential stack usage (and overflow), if each layer + * defines these structures on stack (assuming we don't want to do kmalloc. + * + * Any layer wishing to pass extra information to a layer next to it in + * error handling hierarchy, can do so as a separate parameter. + */ + +typedef struct io_error_s { + /* Bit fields indicating which structure fields are valid */ + union { + struct { + unsigned ievb_errortype:1; + unsigned ievb_widgetnum:1; + unsigned ievb_widgetdev:1; + unsigned ievb_srccpu:1; + unsigned ievb_srcnode:1; + unsigned ievb_errnode:1; + unsigned ievb_sysioaddr:1; + unsigned ievb_xtalkaddr:1; + unsigned ievb_busspace:1; + unsigned ievb_busaddr:1; + unsigned ievb_vaddr:1; + unsigned ievb_memaddr:1; + unsigned ievb_epc:1; + unsigned ievb_ef:1; + unsigned ievb_tnum:1; + } iev_b; + unsigned iev_a; + } ie_v; + + short ie_errortype; /* error type: extra info about error */ + short ie_widgetnum; /* Widget number that's in error */ + short ie_widgetdev; /* Device within widget in error */ + cpuid_t ie_srccpu; /* CPU on srcnode generating error */ + cnodeid_t ie_srcnode; /* Node which caused the error */ + cnodeid_t ie_errnode; /* Node where error was noticed */ + iopaddr_t ie_sysioaddr; /* Sys specific IO address */ + iopaddr_t ie_xtalkaddr; /* Xtalk (48bit) addr of Error */ + iopaddr_t ie_busspace; /* Bus specific address space */ + iopaddr_t ie_busaddr; /* Bus specific address */ + caddr_t ie_vaddr; /* Virtual address of error */ + iopaddr_t ie_memaddr; /* Physical memory address */ + caddr_t ie_epc; /* pc when error reported */ + caddr_t ie_ef; /* eframe when error reported */ + short ie_tnum; /* Xtalk TNUM field */ +} ioerror_t; + +#define IOERROR_INIT(e) do { (e)->ie_v.iev_a = 0; } while (0) +#define IOERROR_SETVALUE(e,f,v) do { (e)->ie_ ## f = (v); (e)->ie_v.iev_b.ievb_ ## f = 1; } while (0) + +#endif /* _ASM_IA64_SN_IOERROR_H */ diff --git a/arch/ia64/sn/include/pci/pcibr_provider.h b/arch/ia64/sn/include/pci/pcibr_provider.h new file mode 100644 index 000000000000..b1f05ffec70b --- /dev/null +++ b/arch/ia64/sn/include/pci/pcibr_provider.h @@ -0,0 +1,149 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H +#define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H + +/* Workarounds */ +#define PV907516 (1 << 1) /* TIOCP: Don't write the write buffer flush reg */ + +#define BUSTYPE_MASK 0x1 + +/* Macros given a pcibus structure */ +#define IS_PCIX(ps) ((ps)->pbi_bridge_mode & BUSTYPE_MASK) +#define IS_PCI_BRIDGE_ASIC(asic) (asic == PCIIO_ASIC_TYPE_PIC || \ + asic == PCIIO_ASIC_TYPE_TIOCP) +#define IS_PIC_SOFT(ps) (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_PIC) + + +/* + * The different PCI Bridge types supported on the SGI Altix platforms + */ +#define PCIBR_BRIDGETYPE_UNKNOWN -1 +#define PCIBR_BRIDGETYPE_PIC 2 +#define PCIBR_BRIDGETYPE_TIOCP 3 + +/* + * Bridge 64bit Direct Map Attributes + */ +#define PCI64_ATTR_PREF (1ull << 59) +#define PCI64_ATTR_PREC (1ull << 58) +#define PCI64_ATTR_VIRTUAL (1ull << 57) +#define PCI64_ATTR_BAR (1ull << 56) +#define PCI64_ATTR_SWAP (1ull << 55) +#define PCI64_ATTR_VIRTUAL1 (1ull << 54) + +#define PCI32_LOCAL_BASE 0 +#define PCI32_MAPPED_BASE 0x40000000 +#define PCI32_DIRECT_BASE 0x80000000 + +#define IS_PCI32_MAPPED(x) ((uint64_t)(x) < PCI32_DIRECT_BASE && \ + (uint64_t)(x) >= PCI32_MAPPED_BASE) +#define IS_PCI32_DIRECT(x) ((uint64_t)(x) >= PCI32_MAPPED_BASE) + + +/* + * Bridge PMU Address Transaltion Entry Attibutes + */ +#define PCI32_ATE_V (0x1 << 0) +#define PCI32_ATE_CO (0x1 << 1) +#define PCI32_ATE_PREC (0x1 << 2) +#define PCI32_ATE_PREF (0x1 << 3) +#define PCI32_ATE_BAR (0x1 << 4) +#define PCI32_ATE_ADDR_SHFT 12 + +#define MINIMAL_ATES_REQUIRED(addr, size) \ + (IOPG(IOPGOFF(addr) + (size) - 1) == IOPG((size) - 1)) + +#define MINIMAL_ATE_FLAG(addr, size) \ + (MINIMAL_ATES_REQUIRED((uint64_t)addr, size) ? 1 : 0) + +/* bit 29 of the pci address is the SWAP bit */ +#define ATE_SWAPSHIFT 29 +#define ATE_SWAP_ON(x) ((x) |= (1 << ATE_SWAPSHIFT)) +#define ATE_SWAP_OFF(x) ((x) &= ~(1 << ATE_SWAPSHIFT)) + +/* + * I/O page size + */ +#if PAGE_SIZE < 16384 +#define IOPFNSHIFT 12 /* 4K per mapped page */ +#else +#define IOPFNSHIFT 14 /* 16K per mapped page */ +#endif + +#define IOPGSIZE (1 << IOPFNSHIFT) +#define IOPG(x) ((x) >> IOPFNSHIFT) +#define IOPGOFF(x) ((x) & (IOPGSIZE-1)) + +#define PCIBR_DEV_SWAP_DIR (1ull << 19) +#define PCIBR_CTRL_PAGE_SIZE (0x1 << 21) + +/* + * PMU resources. + */ +struct ate_resource{ + uint64_t *ate; + uint64_t num_ate; + uint64_t lowest_free_index; +}; + +struct pcibus_info { + struct pcibus_bussoft pbi_buscommon; /* common header */ + uint32_t pbi_moduleid; + short pbi_bridge_type; + short pbi_bridge_mode; + + struct ate_resource pbi_int_ate_resource; + uint64_t pbi_int_ate_size; + + uint64_t pbi_dir_xbase; + char pbi_hub_xid; + + uint64_t pbi_devreg[8]; + spinlock_t pbi_lock; + + uint32_t pbi_valid_devices; + uint32_t pbi_enabled_devices; +}; + +/* + * pcibus_info structure locking macros + */ +inline static unsigned long +pcibr_lock(struct pcibus_info *pcibus_info) +{ + unsigned long flag; + spin_lock_irqsave(&pcibus_info->pbi_lock, flag); + return(flag); +} +#define pcibr_unlock(pcibus_info, flag) spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag) + +extern void *pcibr_bus_fixup(struct pcibus_bussoft *); +extern uint64_t pcibr_dma_map(struct pcidev_info *, unsigned long, size_t, unsigned int); +extern void pcibr_dma_unmap(struct pcidev_info *, dma_addr_t, int); + +/* + * prototypes for the bridge asic register access routines in pcibr_reg.c + */ +extern void pcireg_control_bit_clr(struct pcibus_info *, uint64_t); +extern void pcireg_control_bit_set(struct pcibus_info *, uint64_t); +extern uint64_t pcireg_tflush_get(struct pcibus_info *); +extern uint64_t pcireg_intr_status_get(struct pcibus_info *); +extern void pcireg_intr_enable_bit_clr(struct pcibus_info *, uint64_t); +extern void pcireg_intr_enable_bit_set(struct pcibus_info *, uint64_t); +extern void pcireg_intr_addr_addr_set(struct pcibus_info *, int, uint64_t); +extern void pcireg_force_intr_set(struct pcibus_info *, int); +extern uint64_t pcireg_wrb_flush_get(struct pcibus_info *, int); +extern void pcireg_int_ate_set(struct pcibus_info *, int, uint64_t); +extern uint64_t * pcireg_int_ate_addr(struct pcibus_info *, int); +extern void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info); +extern void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info); +extern int pcibr_ate_alloc(struct pcibus_info *, int); +extern void pcibr_ate_free(struct pcibus_info *, int); +extern void ate_write(struct pcibus_info *, int, int, uint64_t); +#endif diff --git a/arch/ia64/sn/include/pci/pcibus_provider_defs.h b/arch/ia64/sn/include/pci/pcibus_provider_defs.h new file mode 100644 index 000000000000..07065615bbea --- /dev/null +++ b/arch/ia64/sn/include/pci/pcibus_provider_defs.h @@ -0,0 +1,43 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H +#define _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H + +/* + * SN pci asic types. Do not ever renumber these or reuse values. The + * values must agree with what prom thinks they are. + */ + +#define PCIIO_ASIC_TYPE_UNKNOWN 0 +#define PCIIO_ASIC_TYPE_PPB 1 +#define PCIIO_ASIC_TYPE_PIC 2 +#define PCIIO_ASIC_TYPE_TIOCP 3 + +/* + * Common pciio bus provider data. There should be one of these as the + * first field in any pciio based provider soft structure (e.g. pcibr_soft + * tioca_soft, etc). + */ + +struct pcibus_bussoft { + uint32_t bs_asic_type; /* chipset type */ + uint32_t bs_xid; /* xwidget id */ + uint64_t bs_persist_busnum; /* Persistent Bus Number */ + uint64_t bs_legacy_io; /* legacy io pio addr */ + uint64_t bs_legacy_mem; /* legacy mem pio addr */ + uint64_t bs_base; /* widget base */ + struct xwidget_info *bs_xwidget_info; +}; + +/* + * DMA mapping flags + */ + +#define SN_PCIDMA_CONSISTENT 0x0001 + +#endif /* _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H */ diff --git a/arch/ia64/sn/include/pci/pcidev.h b/arch/ia64/sn/include/pci/pcidev.h new file mode 100644 index 000000000000..81eb95d3bf47 --- /dev/null +++ b/arch/ia64/sn/include/pci/pcidev.h @@ -0,0 +1,54 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_PCI_PCIDEV_H +#define _ASM_IA64_SN_PCI_PCIDEV_H + +#include <linux/pci.h> + +extern struct sn_irq_info **sn_irq; + +#define SN_PCIDEV_INFO(pci_dev) \ + ((struct pcidev_info *)(pci_dev)->sysdata) + +/* + * Given a pci_bus, return the sn pcibus_bussoft struct. Note that + * this only works for root busses, not for busses represented by PPB's. + */ + +#define SN_PCIBUS_BUSSOFT(pci_bus) \ + ((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data)) + +/* + * Given a struct pci_dev, return the sn pcibus_bussoft struct. Note + * that this is not equivalent to SN_PCIBUS_BUSSOFT(pci_dev->bus) due + * due to possible PPB's in the path. + */ + +#define SN_PCIDEV_BUSSOFT(pci_dev) \ + (SN_PCIDEV_INFO(pci_dev)->pdi_host_pcidev_info->pdi_pcibus_info) + +#define PCIIO_BUS_NONE 255 /* bus 255 reserved */ +#define PCIIO_SLOT_NONE 255 +#define PCIIO_FUNC_NONE 255 +#define PCIIO_VENDOR_ID_NONE (-1) + +struct pcidev_info { + uint64_t pdi_pio_mapped_addr[7]; /* 6 BARs PLUS 1 ROM */ + uint64_t pdi_slot_host_handle; /* Bus and devfn Host pci_dev */ + + struct pcibus_bussoft *pdi_pcibus_info; /* Kernel common bus soft */ + struct pcidev_info *pdi_host_pcidev_info; /* Kernel Host pci_dev */ + struct pci_dev *pdi_linux_pcidev; /* Kernel pci_dev */ + + struct sn_irq_info *pdi_sn_irq_info; +}; + +extern void sn_irq_fixup(struct pci_dev *pci_dev, + struct sn_irq_info *sn_irq_info); + +#endif /* _ASM_IA64_SN_PCI_PCIDEV_H */ diff --git a/arch/ia64/sn/include/pci/pic.h b/arch/ia64/sn/include/pci/pic.h new file mode 100644 index 000000000000..fd18acecb1e6 --- /dev/null +++ b/arch/ia64/sn/include/pci/pic.h @@ -0,0 +1,261 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_PCI_PIC_H +#define _ASM_IA64_SN_PCI_PIC_H + +/* + * PIC AS DEVICE ZERO + * ------------------ + * + * PIC handles PCI/X busses. PCI/X requires that the 'bridge' (i.e. PIC) + * be designated as 'device 0'. That is a departure from earlier SGI + * PCI bridges. Because of that we use config space 1 to access the + * config space of the first actual PCI device on the bus. + * Here's what the PIC manual says: + * + * The current PCI-X bus specification now defines that the parent + * hosts bus bridge (PIC for example) must be device 0 on bus 0. PIC + * reduced the total number of devices from 8 to 4 and removed the + * device registers and windows, now only supporting devices 0,1,2, and + * 3. PIC did leave all 8 configuration space windows. The reason was + * there was nothing to gain by removing them. Here in lies the problem. + * The device numbering we do using 0 through 3 is unrelated to the device + * numbering which PCI-X requires in configuration space. In the past we + * correlated Configs pace and our device space 0 <-> 0, 1 <-> 1, etc. + * PCI-X requires we start a 1, not 0 and currently the PX brick + * does associate our: + * + * device 0 with configuration space window 1, + * device 1 with configuration space window 2, + * device 2 with configuration space window 3, + * device 3 with configuration space window 4. + * + * The net effect is that all config space access are off-by-one with + * relation to other per-slot accesses on the PIC. + * Here is a table that shows some of that: + * + * Internal Slot# + * | + * | 0 1 2 3 + * ----------|--------------------------------------- + * config | 0x21000 0x22000 0x23000 0x24000 + * | + * even rrb | 0[0] n/a 1[0] n/a [] == implied even/odd + * | + * odd rrb | n/a 0[1] n/a 1[1] + * | + * int dev | 00 01 10 11 + * | + * ext slot# | 1 2 3 4 + * ----------|--------------------------------------- + */ + +#define PIC_ATE_TARGETID_SHFT 8 +#define PIC_HOST_INTR_ADDR 0x0000FFFFFFFFFFFFUL +#define PIC_PCI64_ATTR_TARG_SHFT 60 + + +/***************************************************************************** + *********************** PIC MMR structure mapping *************************** + *****************************************************************************/ + +/* NOTE: PIC WAR. PV#854697. PIC does not allow writes just to [31:0] + * of a 64-bit register. When writing PIC registers, always write the + * entire 64 bits. + */ + +struct pic { + + /* 0x000000-0x00FFFF -- Local Registers */ + + /* 0x000000-0x000057 -- Standard Widget Configuration */ + uint64_t p_wid_id; /* 0x000000 */ + uint64_t p_wid_stat; /* 0x000008 */ + uint64_t p_wid_err_upper; /* 0x000010 */ + uint64_t p_wid_err_lower; /* 0x000018 */ + #define p_wid_err p_wid_err_lower + uint64_t p_wid_control; /* 0x000020 */ + uint64_t p_wid_req_timeout; /* 0x000028 */ + uint64_t p_wid_int_upper; /* 0x000030 */ + uint64_t p_wid_int_lower; /* 0x000038 */ + #define p_wid_int p_wid_int_lower + uint64_t p_wid_err_cmdword; /* 0x000040 */ + uint64_t p_wid_llp; /* 0x000048 */ + uint64_t p_wid_tflush; /* 0x000050 */ + + /* 0x000058-0x00007F -- Bridge-specific Widget Configuration */ + uint64_t p_wid_aux_err; /* 0x000058 */ + uint64_t p_wid_resp_upper; /* 0x000060 */ + uint64_t p_wid_resp_lower; /* 0x000068 */ + #define p_wid_resp p_wid_resp_lower + uint64_t p_wid_tst_pin_ctrl; /* 0x000070 */ + uint64_t p_wid_addr_lkerr; /* 0x000078 */ + + /* 0x000080-0x00008F -- PMU & MAP */ + uint64_t p_dir_map; /* 0x000080 */ + uint64_t _pad_000088; /* 0x000088 */ + + /* 0x000090-0x00009F -- SSRAM */ + uint64_t p_map_fault; /* 0x000090 */ + uint64_t _pad_000098; /* 0x000098 */ + + /* 0x0000A0-0x0000AF -- Arbitration */ + uint64_t p_arb; /* 0x0000A0 */ + uint64_t _pad_0000A8; /* 0x0000A8 */ + + /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */ + uint64_t p_ate_parity_err; /* 0x0000B0 */ + uint64_t _pad_0000B8; /* 0x0000B8 */ + + /* 0x0000C0-0x0000FF -- PCI/GIO */ + uint64_t p_bus_timeout; /* 0x0000C0 */ + uint64_t p_pci_cfg; /* 0x0000C8 */ + uint64_t p_pci_err_upper; /* 0x0000D0 */ + uint64_t p_pci_err_lower; /* 0x0000D8 */ + #define p_pci_err p_pci_err_lower + uint64_t _pad_0000E0[4]; /* 0x0000{E0..F8} */ + + /* 0x000100-0x0001FF -- Interrupt */ + uint64_t p_int_status; /* 0x000100 */ + uint64_t p_int_enable; /* 0x000108 */ + uint64_t p_int_rst_stat; /* 0x000110 */ + uint64_t p_int_mode; /* 0x000118 */ + uint64_t p_int_device; /* 0x000120 */ + uint64_t p_int_host_err; /* 0x000128 */ + uint64_t p_int_addr[8]; /* 0x0001{30,,,68} */ + uint64_t p_err_int_view; /* 0x000170 */ + uint64_t p_mult_int; /* 0x000178 */ + uint64_t p_force_always[8]; /* 0x0001{80,,,B8} */ + uint64_t p_force_pin[8]; /* 0x0001{C0,,,F8} */ + + /* 0x000200-0x000298 -- Device */ + uint64_t p_device[4]; /* 0x0002{00,,,18} */ + uint64_t _pad_000220[4]; /* 0x0002{20,,,38} */ + uint64_t p_wr_req_buf[4]; /* 0x0002{40,,,58} */ + uint64_t _pad_000260[4]; /* 0x0002{60,,,78} */ + uint64_t p_rrb_map[2]; /* 0x0002{80,,,88} */ + #define p_even_resp p_rrb_map[0] /* 0x000280 */ + #define p_odd_resp p_rrb_map[1] /* 0x000288 */ + uint64_t p_resp_status; /* 0x000290 */ + uint64_t p_resp_clear; /* 0x000298 */ + + uint64_t _pad_0002A0[12]; /* 0x0002{A0..F8} */ + + /* 0x000300-0x0003F8 -- Buffer Address Match Registers */ + struct { + uint64_t upper; /* 0x0003{00,,,F0} */ + uint64_t lower; /* 0x0003{08,,,F8} */ + } p_buf_addr_match[16]; + + /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */ + struct { + uint64_t flush_w_touch; /* 0x000{400,,,5C0} */ + uint64_t flush_wo_touch; /* 0x000{408,,,5C8} */ + uint64_t inflight; /* 0x000{410,,,5D0} */ + uint64_t prefetch; /* 0x000{418,,,5D8} */ + uint64_t total_pci_retry; /* 0x000{420,,,5E0} */ + uint64_t max_pci_retry; /* 0x000{428,,,5E8} */ + uint64_t max_latency; /* 0x000{430,,,5F0} */ + uint64_t clear_all; /* 0x000{438,,,5F8} */ + } p_buf_count[8]; + + + /* 0x000600-0x0009FF -- PCI/X registers */ + uint64_t p_pcix_bus_err_addr; /* 0x000600 */ + uint64_t p_pcix_bus_err_attr; /* 0x000608 */ + uint64_t p_pcix_bus_err_data; /* 0x000610 */ + uint64_t p_pcix_pio_split_addr; /* 0x000618 */ + uint64_t p_pcix_pio_split_attr; /* 0x000620 */ + uint64_t p_pcix_dma_req_err_attr; /* 0x000628 */ + uint64_t p_pcix_dma_req_err_addr; /* 0x000630 */ + uint64_t p_pcix_timeout; /* 0x000638 */ + + uint64_t _pad_000640[120]; /* 0x000{640,,,9F8} */ + + /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */ + struct { + uint64_t p_buf_addr; /* 0x000{A00,,,AF0} */ + uint64_t p_buf_attr; /* 0X000{A08,,,AF8} */ + } p_pcix_read_buf_64[16]; + + struct { + uint64_t p_buf_addr; /* 0x000{B00,,,BE0} */ + uint64_t p_buf_attr; /* 0x000{B08,,,BE8} */ + uint64_t p_buf_valid; /* 0x000{B10,,,BF0} */ + uint64_t __pad1; /* 0x000{B18,,,BF8} */ + } p_pcix_write_buf_64[8]; + + /* End of Local Registers -- Start of Address Map space */ + + char _pad_000c00[0x010000 - 0x000c00]; + + /* 0x010000-0x011fff -- Internal ATE RAM (Auto Parity Generation) */ + uint64_t p_int_ate_ram[1024]; /* 0x010000-0x011fff */ + + /* 0x012000-0x013fff -- Internal ATE RAM (Manual Parity Generation) */ + uint64_t p_int_ate_ram_mp[1024]; /* 0x012000-0x013fff */ + + char _pad_014000[0x18000 - 0x014000]; + + /* 0x18000-0x197F8 -- PIC Write Request Ram */ + uint64_t p_wr_req_lower[256]; /* 0x18000 - 0x187F8 */ + uint64_t p_wr_req_upper[256]; /* 0x18800 - 0x18FF8 */ + uint64_t p_wr_req_parity[256]; /* 0x19000 - 0x197F8 */ + + char _pad_019800[0x20000 - 0x019800]; + + /* 0x020000-0x027FFF -- PCI Device Configuration Spaces */ + union { + uint8_t c[0x1000 / 1]; /* 0x02{0000,,,7FFF} */ + uint16_t s[0x1000 / 2]; /* 0x02{0000,,,7FFF} */ + uint32_t l[0x1000 / 4]; /* 0x02{0000,,,7FFF} */ + uint64_t d[0x1000 / 8]; /* 0x02{0000,,,7FFF} */ + union { + uint8_t c[0x100 / 1]; + uint16_t s[0x100 / 2]; + uint32_t l[0x100 / 4]; + uint64_t d[0x100 / 8]; + } f[8]; + } p_type0_cfg_dev[8]; /* 0x02{0000,,,7FFF} */ + + /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */ + union { + uint8_t c[0x1000 / 1]; /* 0x028000-0x029000 */ + uint16_t s[0x1000 / 2]; /* 0x028000-0x029000 */ + uint32_t l[0x1000 / 4]; /* 0x028000-0x029000 */ + uint64_t d[0x1000 / 8]; /* 0x028000-0x029000 */ + union { + uint8_t c[0x100 / 1]; + uint16_t s[0x100 / 2]; + uint32_t l[0x100 / 4]; + uint64_t d[0x100 / 8]; + } f[8]; + } p_type1_cfg; /* 0x028000-0x029000 */ + + char _pad_029000[0x030000-0x029000]; + + /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */ + union { + uint8_t c[8 / 1]; + uint16_t s[8 / 2]; + uint32_t l[8 / 4]; + uint64_t d[8 / 8]; + } p_pci_iack; /* 0x030000-0x030007 */ + + char _pad_030007[0x040000-0x030008]; + + /* 0x040000-0x030007 -- PCIX Special Cycle */ + union { + uint8_t c[8 / 1]; + uint16_t s[8 / 2]; + uint32_t l[8 / 4]; + uint64_t d[8 / 8]; + } p_pcix_cycle; /* 0x040000-0x040007 */ +}; + +#endif /* _ASM_IA64_SN_PCI_PIC_H */ diff --git a/arch/ia64/sn/include/pci/tiocp.h b/arch/ia64/sn/include/pci/tiocp.h new file mode 100644 index 000000000000..f07c83b2bf6e --- /dev/null +++ b/arch/ia64/sn/include/pci/tiocp.h @@ -0,0 +1,256 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003-2004 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_PCI_TIOCP_H +#define _ASM_IA64_SN_PCI_TIOCP_H + +#define TIOCP_HOST_INTR_ADDR 0x003FFFFFFFFFFFFFUL +#define TIOCP_PCI64_CMDTYPE_MEM (0x1ull << 60) + + +/***************************************************************************** + *********************** TIOCP MMR structure mapping *************************** + *****************************************************************************/ + +struct tiocp{ + + /* 0x000000-0x00FFFF -- Local Registers */ + + /* 0x000000-0x000057 -- (Legacy Widget Space) Configuration */ + uint64_t cp_id; /* 0x000000 */ + uint64_t cp_stat; /* 0x000008 */ + uint64_t cp_err_upper; /* 0x000010 */ + uint64_t cp_err_lower; /* 0x000018 */ + #define cp_err cp_err_lower + uint64_t cp_control; /* 0x000020 */ + uint64_t cp_req_timeout; /* 0x000028 */ + uint64_t cp_intr_upper; /* 0x000030 */ + uint64_t cp_intr_lower; /* 0x000038 */ + #define cp_intr cp_intr_lower + uint64_t cp_err_cmdword; /* 0x000040 */ + uint64_t _pad_000048; /* 0x000048 */ + uint64_t cp_tflush; /* 0x000050 */ + + /* 0x000058-0x00007F -- Bridge-specific Configuration */ + uint64_t cp_aux_err; /* 0x000058 */ + uint64_t cp_resp_upper; /* 0x000060 */ + uint64_t cp_resp_lower; /* 0x000068 */ + #define cp_resp cp_resp_lower + uint64_t cp_tst_pin_ctrl; /* 0x000070 */ + uint64_t cp_addr_lkerr; /* 0x000078 */ + + /* 0x000080-0x00008F -- PMU & MAP */ + uint64_t cp_dir_map; /* 0x000080 */ + uint64_t _pad_000088; /* 0x000088 */ + + /* 0x000090-0x00009F -- SSRAM */ + uint64_t cp_map_fault; /* 0x000090 */ + uint64_t _pad_000098; /* 0x000098 */ + + /* 0x0000A0-0x0000AF -- Arbitration */ + uint64_t cp_arb; /* 0x0000A0 */ + uint64_t _pad_0000A8; /* 0x0000A8 */ + + /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */ + uint64_t cp_ate_parity_err; /* 0x0000B0 */ + uint64_t _pad_0000B8; /* 0x0000B8 */ + + /* 0x0000C0-0x0000FF -- PCI/GIO */ + uint64_t cp_bus_timeout; /* 0x0000C0 */ + uint64_t cp_pci_cfg; /* 0x0000C8 */ + uint64_t cp_pci_err_upper; /* 0x0000D0 */ + uint64_t cp_pci_err_lower; /* 0x0000D8 */ + #define cp_pci_err cp_pci_err_lower + uint64_t _pad_0000E0[4]; /* 0x0000{E0..F8} */ + + /* 0x000100-0x0001FF -- Interrupt */ + uint64_t cp_int_status; /* 0x000100 */ + uint64_t cp_int_enable; /* 0x000108 */ + uint64_t cp_int_rst_stat; /* 0x000110 */ + uint64_t cp_int_mode; /* 0x000118 */ + uint64_t cp_int_device; /* 0x000120 */ + uint64_t cp_int_host_err; /* 0x000128 */ + uint64_t cp_int_addr[8]; /* 0x0001{30,,,68} */ + uint64_t cp_err_int_view; /* 0x000170 */ + uint64_t cp_mult_int; /* 0x000178 */ + uint64_t cp_force_always[8]; /* 0x0001{80,,,B8} */ + uint64_t cp_force_pin[8]; /* 0x0001{C0,,,F8} */ + + /* 0x000200-0x000298 -- Device */ + uint64_t cp_device[4]; /* 0x0002{00,,,18} */ + uint64_t _pad_000220[4]; /* 0x0002{20,,,38} */ + uint64_t cp_wr_req_buf[4]; /* 0x0002{40,,,58} */ + uint64_t _pad_000260[4]; /* 0x0002{60,,,78} */ + uint64_t cp_rrb_map[2]; /* 0x0002{80,,,88} */ + #define cp_even_resp cp_rrb_map[0] /* 0x000280 */ + #define cp_odd_resp cp_rrb_map[1] /* 0x000288 */ + uint64_t cp_resp_status; /* 0x000290 */ + uint64_t cp_resp_clear; /* 0x000298 */ + + uint64_t _pad_0002A0[12]; /* 0x0002{A0..F8} */ + + /* 0x000300-0x0003F8 -- Buffer Address Match Registers */ + struct { + uint64_t upper; /* 0x0003{00,,,F0} */ + uint64_t lower; /* 0x0003{08,,,F8} */ + } cp_buf_addr_match[16]; + + /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */ + struct { + uint64_t flush_w_touch; /* 0x000{400,,,5C0} */ + uint64_t flush_wo_touch; /* 0x000{408,,,5C8} */ + uint64_t inflight; /* 0x000{410,,,5D0} */ + uint64_t prefetch; /* 0x000{418,,,5D8} */ + uint64_t total_pci_retry; /* 0x000{420,,,5E0} */ + uint64_t max_pci_retry; /* 0x000{428,,,5E8} */ + uint64_t max_latency; /* 0x000{430,,,5F0} */ + uint64_t clear_all; /* 0x000{438,,,5F8} */ + } cp_buf_count[8]; + + + /* 0x000600-0x0009FF -- PCI/X registers */ + uint64_t cp_pcix_bus_err_addr; /* 0x000600 */ + uint64_t cp_pcix_bus_err_attr; /* 0x000608 */ + uint64_t cp_pcix_bus_err_data; /* 0x000610 */ + uint64_t cp_pcix_pio_split_addr; /* 0x000618 */ + uint64_t cp_pcix_pio_split_attr; /* 0x000620 */ + uint64_t cp_pcix_dma_req_err_attr; /* 0x000628 */ + uint64_t cp_pcix_dma_req_err_addr; /* 0x000630 */ + uint64_t cp_pcix_timeout; /* 0x000638 */ + + uint64_t _pad_000640[24]; /* 0x000{640,,,6F8} */ + + /* 0x000700-0x000737 -- Debug Registers */ + uint64_t cp_ct_debug_ctl; /* 0x000700 */ + uint64_t cp_br_debug_ctl; /* 0x000708 */ + uint64_t cp_mux3_debug_ctl; /* 0x000710 */ + uint64_t cp_mux4_debug_ctl; /* 0x000718 */ + uint64_t cp_mux5_debug_ctl; /* 0x000720 */ + uint64_t cp_mux6_debug_ctl; /* 0x000728 */ + uint64_t cp_mux7_debug_ctl; /* 0x000730 */ + + uint64_t _pad_000738[89]; /* 0x000{738,,,9F8} */ + + /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */ + struct { + uint64_t cp_buf_addr; /* 0x000{A00,,,AF0} */ + uint64_t cp_buf_attr; /* 0X000{A08,,,AF8} */ + } cp_pcix_read_buf_64[16]; + + struct { + uint64_t cp_buf_addr; /* 0x000{B00,,,BE0} */ + uint64_t cp_buf_attr; /* 0x000{B08,,,BE8} */ + uint64_t cp_buf_valid; /* 0x000{B10,,,BF0} */ + uint64_t __pad1; /* 0x000{B18,,,BF8} */ + } cp_pcix_write_buf_64[8]; + + /* End of Local Registers -- Start of Address Map space */ + + char _pad_000c00[0x010000 - 0x000c00]; + + /* 0x010000-0x011FF8 -- Internal ATE RAM (Auto Parity Generation) */ + uint64_t cp_int_ate_ram[1024]; /* 0x010000-0x011FF8 */ + + char _pad_012000[0x14000 - 0x012000]; + + /* 0x014000-0x015FF8 -- Internal ATE RAM (Manual Parity Generation) */ + uint64_t cp_int_ate_ram_mp[1024]; /* 0x014000-0x015FF8 */ + + char _pad_016000[0x18000 - 0x016000]; + + /* 0x18000-0x197F8 -- TIOCP Write Request Ram */ + uint64_t cp_wr_req_lower[256]; /* 0x18000 - 0x187F8 */ + uint64_t cp_wr_req_upper[256]; /* 0x18800 - 0x18FF8 */ + uint64_t cp_wr_req_parity[256]; /* 0x19000 - 0x197F8 */ + + char _pad_019800[0x1C000 - 0x019800]; + + /* 0x1C000-0x1EFF8 -- TIOCP Read Response Ram */ + uint64_t cp_rd_resp_lower[512]; /* 0x1C000 - 0x1CFF8 */ + uint64_t cp_rd_resp_upper[512]; /* 0x1D000 - 0x1DFF8 */ + uint64_t cp_rd_resp_parity[512]; /* 0x1E000 - 0x1EFF8 */ + + char _pad_01F000[0x20000 - 0x01F000]; + + /* 0x020000-0x021FFF -- Host Device (CP) Configuration Space (not used) */ + char _pad_020000[0x021000 - 0x20000]; + + /* 0x021000-0x027FFF -- PCI Device Configuration Spaces */ + union { + uint8_t c[0x1000 / 1]; /* 0x02{0000,,,7FFF} */ + uint16_t s[0x1000 / 2]; /* 0x02{0000,,,7FFF} */ + uint32_t l[0x1000 / 4]; /* 0x02{0000,,,7FFF} */ + uint64_t d[0x1000 / 8]; /* 0x02{0000,,,7FFF} */ + union { + uint8_t c[0x100 / 1]; + uint16_t s[0x100 / 2]; + uint32_t l[0x100 / 4]; + uint64_t d[0x100 / 8]; + } f[8]; + } cp_type0_cfg_dev[7]; /* 0x02{1000,,,7FFF} */ + + /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */ + union { + uint8_t c[0x1000 / 1]; /* 0x028000-0x029000 */ + uint16_t s[0x1000 / 2]; /* 0x028000-0x029000 */ + uint32_t l[0x1000 / 4]; /* 0x028000-0x029000 */ + uint64_t d[0x1000 / 8]; /* 0x028000-0x029000 */ + union { + uint8_t c[0x100 / 1]; + uint16_t s[0x100 / 2]; + uint32_t l[0x100 / 4]; + uint64_t d[0x100 / 8]; + } f[8]; + } cp_type1_cfg; /* 0x028000-0x029000 */ + + char _pad_029000[0x030000-0x029000]; + + /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */ + union { + uint8_t c[8 / 1]; + uint16_t s[8 / 2]; + uint32_t l[8 / 4]; + uint64_t d[8 / 8]; + } cp_pci_iack; /* 0x030000-0x030007 */ + + char _pad_030007[0x040000-0x030008]; + + /* 0x040000-0x040007 -- PCIX Special Cycle */ + union { + uint8_t c[8 / 1]; + uint16_t s[8 / 2]; + uint32_t l[8 / 4]; + uint64_t d[8 / 8]; + } cp_pcix_cycle; /* 0x040000-0x040007 */ + + char _pad_040007[0x200000-0x040008]; + + /* 0x200000-0x7FFFFF -- PCI/GIO Device Spaces */ + union { + uint8_t c[0x100000 / 1]; + uint16_t s[0x100000 / 2]; + uint32_t l[0x100000 / 4]; + uint64_t d[0x100000 / 8]; + } cp_devio_raw[6]; /* 0x200000-0x7FFFFF */ + + #define cp_devio(n) cp_devio_raw[((n)<2)?(n*2):(n+2)] + + char _pad_800000[0xA00000-0x800000]; + + /* 0xA00000-0xBFFFFF -- PCI/GIO Device Spaces w/flush */ + union { + uint8_t c[0x100000 / 1]; + uint16_t s[0x100000 / 2]; + uint32_t l[0x100000 / 4]; + uint64_t d[0x100000 / 8]; + } cp_devio_raw_flush[6]; /* 0xA00000-0xBFFFFF */ + + #define cp_devio_flush(n) cp_devio_raw_flush[((n)<2)?(n*2):(n+2)] + +}; + +#endif /* _ASM_IA64_SN_PCI_TIOCP_H */ diff --git a/arch/ia64/sn/include/tio.h b/arch/ia64/sn/include/tio.h new file mode 100644 index 000000000000..0139124dd54a --- /dev/null +++ b/arch/ia64/sn/include/tio.h @@ -0,0 +1,37 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_TIO_H +#define _ASM_IA64_SN_TIO_H + +#define TIO_MMR_ADDR_MOD + +#define TIO_NODE_ID TIO_MMR_ADDR_MOD(0x0000000090060e80) + +#define TIO_ITTE_BASE 0xb0008800 /* base of translation table entries */ +#define TIO_ITTE(bigwin) (TIO_ITTE_BASE + 8*(bigwin)) + +#define TIO_ITTE_OFFSET_BITS 8 /* size of offset field */ +#define TIO_ITTE_OFFSET_MASK ((1<<TIO_ITTE_OFFSET_BITS)-1) +#define TIO_ITTE_OFFSET_SHIFT 0 + +#define TIO_ITTE_WIDGET_BITS 2 /* size of widget field */ +#define TIO_ITTE_WIDGET_MASK ((1<<TIO_ITTE_WIDGET_BITS)-1) +#define TIO_ITTE_WIDGET_SHIFT 12 +#define TIO_ITTE_VALID_MASK 0x1 +#define TIO_ITTE_VALID_SHIFT 16 + + +#define TIO_ITTE_PUT(nasid, bigwin, widget, addr, valid) \ + REMOTE_HUB_S((nasid), TIO_ITTE(bigwin), \ + (((((addr) >> TIO_BWIN_SIZE_BITS) & \ + TIO_ITTE_OFFSET_MASK) << TIO_ITTE_OFFSET_SHIFT) | \ + (((widget) & TIO_ITTE_WIDGET_MASK) << TIO_ITTE_WIDGET_SHIFT)) | \ + (( (valid) & TIO_ITTE_VALID_MASK) << TIO_ITTE_VALID_SHIFT)) + +#endif /* _ASM_IA64_SN_TIO_H */ diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h new file mode 100644 index 000000000000..868e7ecae84b --- /dev/null +++ b/arch/ia64/sn/include/xtalk/hubdev.h @@ -0,0 +1,67 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_XTALK_HUBDEV_H +#define _ASM_IA64_SN_XTALK_HUBDEV_H + +#define HUB_WIDGET_ID_MAX 0xf +#define DEV_PER_WIDGET (2*2*8) +#define IIO_ITTE_WIDGET_BITS 4 /* size of widget field */ +#define IIO_ITTE_WIDGET_MASK ((1<<IIO_ITTE_WIDGET_BITS)-1) +#define IIO_ITTE_WIDGET_SHIFT 8 + +/* + * Use the top big window as a surrogate for the first small window + */ +#define SWIN0_BIGWIN HUB_NUM_BIG_WINDOW +#define IIO_NUM_ITTES 7 +#define HUB_NUM_BIG_WINDOW (IIO_NUM_ITTES - 1) + +struct sn_flush_device_list { + int sfdl_bus; + int sfdl_slot; + int sfdl_pin; + struct bar_list { + unsigned long start; + unsigned long end; + } sfdl_bar_list[6]; + unsigned long sfdl_force_int_addr; + unsigned long sfdl_flush_value; + volatile unsigned long *sfdl_flush_addr; + uint64_t sfdl_persistent_busnum; + struct pcibus_info *sfdl_pcibus_info; + spinlock_t sfdl_flush_lock; +}; + +/* + * **widget_p - Used as an array[wid_num][device] of sn_flush_device_list. + */ +struct sn_flush_nasid_entry { + struct sn_flush_device_list **widget_p; /* Used as a array of wid_num */ + uint64_t iio_itte[8]; +}; + +struct hubdev_info { + geoid_t hdi_geoid; + short hdi_nasid; + short hdi_peer_nasid; /* Dual Porting Peer */ + + struct sn_flush_nasid_entry hdi_flush_nasid_list; + struct xwidget_info hdi_xwidget_info[HUB_WIDGET_ID_MAX + 1]; + + + void *hdi_nodepda; + void *hdi_node_vertex; + void *hdi_xtalk_vertex; +}; + +extern void hubdev_init_node(nodepda_t *, cnodeid_t); +extern void hub_error_init(struct hubdev_info *); +extern void ice_error_init(struct hubdev_info *); + + +#endif /* _ASM_IA64_SN_XTALK_HUBDEV_H */ diff --git a/arch/ia64/sn/include/xtalk/xbow.h b/arch/ia64/sn/include/xtalk/xbow.h new file mode 100644 index 000000000000..ec56b3432f17 --- /dev/null +++ b/arch/ia64/sn/include/xtalk/xbow.h @@ -0,0 +1,291 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All Rights Reserved. + */ +#ifndef _ASM_IA64_SN_XTALK_XBOW_H +#define _ASM_IA64_SN_XTALK_XBOW_H + +#define XBOW_PORT_8 0x8 +#define XBOW_PORT_C 0xc +#define XBOW_PORT_F 0xf + +#define MAX_XBOW_PORTS 8 /* number of ports on xbow chip */ +#define BASE_XBOW_PORT XBOW_PORT_8 /* Lowest external port */ + +#define XBOW_CREDIT 4 + +#define MAX_XBOW_NAME 16 + +/* Register set for each xbow link */ +typedef volatile struct xb_linkregs_s { +/* + * we access these through synergy unswizzled space, so the address + * gets twiddled (i.e. references to 0x4 actually go to 0x0 and vv.) + * That's why we put the register first and filler second. + */ + uint32_t link_ibf; + uint32_t filler0; /* filler for proper alignment */ + uint32_t link_control; + uint32_t filler1; + uint32_t link_status; + uint32_t filler2; + uint32_t link_arb_upper; + uint32_t filler3; + uint32_t link_arb_lower; + uint32_t filler4; + uint32_t link_status_clr; + uint32_t filler5; + uint32_t link_reset; + uint32_t filler6; + uint32_t link_aux_status; + uint32_t filler7; +} xb_linkregs_t; + +typedef volatile struct xbow_s { + /* standard widget configuration 0x000000-0x000057 */ + struct widget_cfg xb_widget; /* 0x000000 */ + + /* helper fieldnames for accessing bridge widget */ + +#define xb_wid_id xb_widget.w_id +#define xb_wid_stat xb_widget.w_status +#define xb_wid_err_upper xb_widget.w_err_upper_addr +#define xb_wid_err_lower xb_widget.w_err_lower_addr +#define xb_wid_control xb_widget.w_control +#define xb_wid_req_timeout xb_widget.w_req_timeout +#define xb_wid_int_upper xb_widget.w_intdest_upper_addr +#define xb_wid_int_lower xb_widget.w_intdest_lower_addr +#define xb_wid_err_cmdword xb_widget.w_err_cmd_word +#define xb_wid_llp xb_widget.w_llp_cfg +#define xb_wid_stat_clr xb_widget.w_tflush + +/* + * we access these through synergy unswizzled space, so the address + * gets twiddled (i.e. references to 0x4 actually go to 0x0 and vv.) + * That's why we put the register first and filler second. + */ + /* xbow-specific widget configuration 0x000058-0x0000FF */ + uint32_t xb_wid_arb_reload; /* 0x00005C */ + uint32_t _pad_000058; + uint32_t xb_perf_ctr_a; /* 0x000064 */ + uint32_t _pad_000060; + uint32_t xb_perf_ctr_b; /* 0x00006c */ + uint32_t _pad_000068; + uint32_t xb_nic; /* 0x000074 */ + uint32_t _pad_000070; + + /* Xbridge only */ + uint32_t xb_w0_rst_fnc; /* 0x00007C */ + uint32_t _pad_000078; + uint32_t xb_l8_rst_fnc; /* 0x000084 */ + uint32_t _pad_000080; + uint32_t xb_l9_rst_fnc; /* 0x00008c */ + uint32_t _pad_000088; + uint32_t xb_la_rst_fnc; /* 0x000094 */ + uint32_t _pad_000090; + uint32_t xb_lb_rst_fnc; /* 0x00009c */ + uint32_t _pad_000098; + uint32_t xb_lc_rst_fnc; /* 0x0000a4 */ + uint32_t _pad_0000a0; + uint32_t xb_ld_rst_fnc; /* 0x0000ac */ + uint32_t _pad_0000a8; + uint32_t xb_le_rst_fnc; /* 0x0000b4 */ + uint32_t _pad_0000b0; + uint32_t xb_lf_rst_fnc; /* 0x0000bc */ + uint32_t _pad_0000b8; + uint32_t xb_lock; /* 0x0000c4 */ + uint32_t _pad_0000c0; + uint32_t xb_lock_clr; /* 0x0000cc */ + uint32_t _pad_0000c8; + /* end of Xbridge only */ + uint32_t _pad_0000d0[12]; + + /* Link Specific Registers, port 8..15 0x000100-0x000300 */ + xb_linkregs_t xb_link_raw[MAX_XBOW_PORTS]; +#define xb_link(p) xb_link_raw[(p) & (MAX_XBOW_PORTS - 1)] + +} xbow_t; + +#define XB_FLAGS_EXISTS 0x1 /* device exists */ +#define XB_FLAGS_MASTER 0x2 +#define XB_FLAGS_SLAVE 0x0 +#define XB_FLAGS_GBR 0x4 +#define XB_FLAGS_16BIT 0x8 +#define XB_FLAGS_8BIT 0x0 + +/* is widget port number valid? (based on version 7.0 of xbow spec) */ +#define XBOW_WIDGET_IS_VALID(wid) ((wid) >= XBOW_PORT_8 && (wid) <= XBOW_PORT_F) + +/* whether to use upper or lower arbitration register, given source widget id */ +#define XBOW_ARB_IS_UPPER(wid) ((wid) >= XBOW_PORT_8 && (wid) <= XBOW_PORT_B) +#define XBOW_ARB_IS_LOWER(wid) ((wid) >= XBOW_PORT_C && (wid) <= XBOW_PORT_F) + +/* offset of arbitration register, given source widget id */ +#define XBOW_ARB_OFF(wid) (XBOW_ARB_IS_UPPER(wid) ? 0x1c : 0x24) + +#define XBOW_WID_ID WIDGET_ID +#define XBOW_WID_STAT WIDGET_STATUS +#define XBOW_WID_ERR_UPPER WIDGET_ERR_UPPER_ADDR +#define XBOW_WID_ERR_LOWER WIDGET_ERR_LOWER_ADDR +#define XBOW_WID_CONTROL WIDGET_CONTROL +#define XBOW_WID_REQ_TO WIDGET_REQ_TIMEOUT +#define XBOW_WID_INT_UPPER WIDGET_INTDEST_UPPER_ADDR +#define XBOW_WID_INT_LOWER WIDGET_INTDEST_LOWER_ADDR +#define XBOW_WID_ERR_CMDWORD WIDGET_ERR_CMD_WORD +#define XBOW_WID_LLP WIDGET_LLP_CFG +#define XBOW_WID_STAT_CLR WIDGET_TFLUSH +#define XBOW_WID_ARB_RELOAD 0x5c +#define XBOW_WID_PERF_CTR_A 0x64 +#define XBOW_WID_PERF_CTR_B 0x6c +#define XBOW_WID_NIC 0x74 + +/* Xbridge only */ +#define XBOW_W0_RST_FNC 0x00007C +#define XBOW_L8_RST_FNC 0x000084 +#define XBOW_L9_RST_FNC 0x00008c +#define XBOW_LA_RST_FNC 0x000094 +#define XBOW_LB_RST_FNC 0x00009c +#define XBOW_LC_RST_FNC 0x0000a4 +#define XBOW_LD_RST_FNC 0x0000ac +#define XBOW_LE_RST_FNC 0x0000b4 +#define XBOW_LF_RST_FNC 0x0000bc +#define XBOW_RESET_FENCE(x) ((x) > 7 && (x) < 16) ? \ + (XBOW_W0_RST_FNC + ((x) - 7) * 8) : \ + ((x) == 0) ? XBOW_W0_RST_FNC : 0 +#define XBOW_LOCK 0x0000c4 +#define XBOW_LOCK_CLR 0x0000cc +/* End of Xbridge only */ + +/* used only in ide, but defined here within the reserved portion */ +/* of the widget0 address space (before 0xf4) */ +#define XBOW_WID_UNDEF 0xe4 + +/* xbow link register set base, legal value for x is 0x8..0xf */ +#define XB_LINK_BASE 0x100 +#define XB_LINK_OFFSET 0x40 +#define XB_LINK_REG_BASE(x) (XB_LINK_BASE + ((x) & (MAX_XBOW_PORTS - 1)) * XB_LINK_OFFSET) + +#define XB_LINK_IBUF_FLUSH(x) (XB_LINK_REG_BASE(x) + 0x4) +#define XB_LINK_CTRL(x) (XB_LINK_REG_BASE(x) + 0xc) +#define XB_LINK_STATUS(x) (XB_LINK_REG_BASE(x) + 0x14) +#define XB_LINK_ARB_UPPER(x) (XB_LINK_REG_BASE(x) + 0x1c) +#define XB_LINK_ARB_LOWER(x) (XB_LINK_REG_BASE(x) + 0x24) +#define XB_LINK_STATUS_CLR(x) (XB_LINK_REG_BASE(x) + 0x2c) +#define XB_LINK_RESET(x) (XB_LINK_REG_BASE(x) + 0x34) +#define XB_LINK_AUX_STATUS(x) (XB_LINK_REG_BASE(x) + 0x3c) + +/* link_control(x) */ +#define XB_CTRL_LINKALIVE_IE 0x80000000 /* link comes alive */ + /* reserved: 0x40000000 */ +#define XB_CTRL_PERF_CTR_MODE_MSK 0x30000000 /* perf counter mode */ +#define XB_CTRL_IBUF_LEVEL_MSK 0x0e000000 /* input packet buffer level */ +#define XB_CTRL_8BIT_MODE 0x01000000 /* force link into 8 bit mode */ +#define XB_CTRL_BAD_LLP_PKT 0x00800000 /* force bad LLP packet */ +#define XB_CTRL_WIDGET_CR_MSK 0x007c0000 /* LLP widget credit mask */ +#define XB_CTRL_WIDGET_CR_SHFT 18 /* LLP widget credit shift */ +#define XB_CTRL_ILLEGAL_DST_IE 0x00020000 /* illegal destination */ +#define XB_CTRL_OALLOC_IBUF_IE 0x00010000 /* overallocated input buffer */ + /* reserved: 0x0000fe00 */ +#define XB_CTRL_BNDWDTH_ALLOC_IE 0x00000100 /* bandwidth alloc */ +#define XB_CTRL_RCV_CNT_OFLOW_IE 0x00000080 /* rcv retry overflow */ +#define XB_CTRL_XMT_CNT_OFLOW_IE 0x00000040 /* xmt retry overflow */ +#define XB_CTRL_XMT_MAX_RTRY_IE 0x00000020 /* max transmit retry */ +#define XB_CTRL_RCV_IE 0x00000010 /* receive */ +#define XB_CTRL_XMT_RTRY_IE 0x00000008 /* transmit retry */ + /* reserved: 0x00000004 */ +#define XB_CTRL_MAXREQ_TOUT_IE 0x00000002 /* maximum request timeout */ +#define XB_CTRL_SRC_TOUT_IE 0x00000001 /* source timeout */ + +/* link_status(x) */ +#define XB_STAT_LINKALIVE XB_CTRL_LINKALIVE_IE + /* reserved: 0x7ff80000 */ +#define XB_STAT_MULTI_ERR 0x00040000 /* multi error */ +#define XB_STAT_ILLEGAL_DST_ERR XB_CTRL_ILLEGAL_DST_IE +#define XB_STAT_OALLOC_IBUF_ERR XB_CTRL_OALLOC_IBUF_IE +#define XB_STAT_BNDWDTH_ALLOC_ID_MSK 0x0000ff00 /* port bitmask */ +#define XB_STAT_RCV_CNT_OFLOW_ERR XB_CTRL_RCV_CNT_OFLOW_IE +#define XB_STAT_XMT_CNT_OFLOW_ERR XB_CTRL_XMT_CNT_OFLOW_IE +#define XB_STAT_XMT_MAX_RTRY_ERR XB_CTRL_XMT_MAX_RTRY_IE +#define XB_STAT_RCV_ERR XB_CTRL_RCV_IE +#define XB_STAT_XMT_RTRY_ERR XB_CTRL_XMT_RTRY_IE + /* reserved: 0x00000004 */ +#define XB_STAT_MAXREQ_TOUT_ERR XB_CTRL_MAXREQ_TOUT_IE +#define XB_STAT_SRC_TOUT_ERR XB_CTRL_SRC_TOUT_IE + +/* link_aux_status(x) */ +#define XB_AUX_STAT_RCV_CNT 0xff000000 +#define XB_AUX_STAT_XMT_CNT 0x00ff0000 +#define XB_AUX_STAT_TOUT_DST 0x0000ff00 +#define XB_AUX_LINKFAIL_RST_BAD 0x00000040 +#define XB_AUX_STAT_PRESENT 0x00000020 +#define XB_AUX_STAT_PORT_WIDTH 0x00000010 + /* reserved: 0x0000000f */ + +/* + * link_arb_upper/link_arb_lower(x), (reg) should be the link_arb_upper + * register if (x) is 0x8..0xb, link_arb_lower if (x) is 0xc..0xf + */ +#define XB_ARB_GBR_MSK 0x1f +#define XB_ARB_RR_MSK 0x7 +#define XB_ARB_GBR_SHFT(x) (((x) & 0x3) * 8) +#define XB_ARB_RR_SHFT(x) (((x) & 0x3) * 8 + 5) +#define XB_ARB_GBR_CNT(reg,x) ((reg) >> XB_ARB_GBR_SHFT(x) & XB_ARB_GBR_MSK) +#define XB_ARB_RR_CNT(reg,x) ((reg) >> XB_ARB_RR_SHFT(x) & XB_ARB_RR_MSK) + +/* XBOW_WID_STAT */ +#define XB_WID_STAT_LINK_INTR_SHFT (24) +#define XB_WID_STAT_LINK_INTR_MASK (0xFF << XB_WID_STAT_LINK_INTR_SHFT) +#define XB_WID_STAT_LINK_INTR(x) (0x1 << (((x)&7) + XB_WID_STAT_LINK_INTR_SHFT)) +#define XB_WID_STAT_WIDGET0_INTR 0x00800000 +#define XB_WID_STAT_SRCID_MASK 0x000003c0 /* Xbridge only */ +#define XB_WID_STAT_REG_ACC_ERR 0x00000020 +#define XB_WID_STAT_RECV_TOUT 0x00000010 /* Xbridge only */ +#define XB_WID_STAT_ARB_TOUT 0x00000008 /* Xbridge only */ +#define XB_WID_STAT_XTALK_ERR 0x00000004 +#define XB_WID_STAT_DST_TOUT 0x00000002 /* Xbridge only */ +#define XB_WID_STAT_MULTI_ERR 0x00000001 + +#define XB_WID_STAT_SRCID_SHFT 6 + +/* XBOW_WID_CONTROL */ +#define XB_WID_CTRL_REG_ACC_IE XB_WID_STAT_REG_ACC_ERR +#define XB_WID_CTRL_RECV_TOUT XB_WID_STAT_RECV_TOUT +#define XB_WID_CTRL_ARB_TOUT XB_WID_STAT_ARB_TOUT +#define XB_WID_CTRL_XTALK_IE XB_WID_STAT_XTALK_ERR + +/* XBOW_WID_INT_UPPER */ +/* defined in xwidget.h for WIDGET_INTDEST_UPPER_ADDR */ + +/* XBOW WIDGET part number, in the ID register */ +#define XBOW_WIDGET_PART_NUM 0x0 /* crossbow */ +#define XXBOW_WIDGET_PART_NUM 0xd000 /* Xbridge */ +#define XBOW_WIDGET_MFGR_NUM 0x0 +#define XXBOW_WIDGET_MFGR_NUM 0x0 +#define PXBOW_WIDGET_PART_NUM 0xd100 /* PIC */ + +#define XBOW_REV_1_0 0x1 /* xbow rev 1.0 is "1" */ +#define XBOW_REV_1_1 0x2 /* xbow rev 1.1 is "2" */ +#define XBOW_REV_1_2 0x3 /* xbow rev 1.2 is "3" */ +#define XBOW_REV_1_3 0x4 /* xbow rev 1.3 is "4" */ +#define XBOW_REV_2_0 0x5 /* xbow rev 2.0 is "5" */ + +#define XXBOW_PART_REV_1_0 (XXBOW_WIDGET_PART_NUM << 4 | 0x1 ) +#define XXBOW_PART_REV_2_0 (XXBOW_WIDGET_PART_NUM << 4 | 0x2 ) + +/* XBOW_WID_ARB_RELOAD */ +#define XBOW_WID_ARB_RELOAD_INT 0x3f /* GBR reload interval */ + +#define IS_XBRIDGE_XBOW(wid) \ + (XWIDGET_PART_NUM(wid) == XXBOW_WIDGET_PART_NUM && \ + XWIDGET_MFG_NUM(wid) == XXBOW_WIDGET_MFGR_NUM) + +#define IS_PIC_XBOW(wid) \ + (XWIDGET_PART_NUM(wid) == PXBOW_WIDGET_PART_NUM && \ + XWIDGET_MFG_NUM(wid) == XXBOW_WIDGET_MFGR_NUM) + +#define XBOW_WAR_ENABLED(pv, widid) ((1 << XWIDGET_REV_NUM(widid)) & pv) + +#endif /* _ASM_IA64_SN_XTALK_XBOW_H */ diff --git a/arch/ia64/sn/include/xtalk/xwidgetdev.h b/arch/ia64/sn/include/xtalk/xwidgetdev.h new file mode 100644 index 000000000000..c5f4bc5cc033 --- /dev/null +++ b/arch/ia64/sn/include/xtalk/xwidgetdev.h @@ -0,0 +1,70 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992-1997,2000-2003 Silicon Graphics, Inc. All Rights Reserved. + */ +#ifndef _ASM_IA64_SN_XTALK_XWIDGET_H +#define _ASM_IA64_SN_XTALK_XWIDGET_H + +/* WIDGET_ID */ +#define WIDGET_REV_NUM 0xf0000000 +#define WIDGET_PART_NUM 0x0ffff000 +#define WIDGET_MFG_NUM 0x00000ffe +#define WIDGET_REV_NUM_SHFT 28 +#define WIDGET_PART_NUM_SHFT 12 +#define WIDGET_MFG_NUM_SHFT 1 + +#define XWIDGET_PART_NUM(widgetid) (((widgetid) & WIDGET_PART_NUM) >> WIDGET_PART_NUM_SHFT) +#define XWIDGET_REV_NUM(widgetid) (((widgetid) & WIDGET_REV_NUM) >> WIDGET_REV_NUM_SHFT) +#define XWIDGET_MFG_NUM(widgetid) (((widgetid) & WIDGET_MFG_NUM) >> WIDGET_MFG_NUM_SHFT) +#define XWIDGET_PART_REV_NUM(widgetid) ((XWIDGET_PART_NUM(widgetid) << 4) | \ + XWIDGET_REV_NUM(widgetid)) +#define XWIDGET_PART_REV_NUM_REV(partrev) (partrev & 0xf) + +/* widget configuration registers */ +struct widget_cfg{ + uint32_t w_id; /* 0x04 */ + uint32_t w_pad_0; /* 0x00 */ + uint32_t w_status; /* 0x0c */ + uint32_t w_pad_1; /* 0x08 */ + uint32_t w_err_upper_addr; /* 0x14 */ + uint32_t w_pad_2; /* 0x10 */ + uint32_t w_err_lower_addr; /* 0x1c */ + uint32_t w_pad_3; /* 0x18 */ + uint32_t w_control; /* 0x24 */ + uint32_t w_pad_4; /* 0x20 */ + uint32_t w_req_timeout; /* 0x2c */ + uint32_t w_pad_5; /* 0x28 */ + uint32_t w_intdest_upper_addr; /* 0x34 */ + uint32_t w_pad_6; /* 0x30 */ + uint32_t w_intdest_lower_addr; /* 0x3c */ + uint32_t w_pad_7; /* 0x38 */ + uint32_t w_err_cmd_word; /* 0x44 */ + uint32_t w_pad_8; /* 0x40 */ + uint32_t w_llp_cfg; /* 0x4c */ + uint32_t w_pad_9; /* 0x48 */ + uint32_t w_tflush; /* 0x54 */ + uint32_t w_pad_10; /* 0x50 */ +}; + +/* + * Crosstalk Widget Hardware Identification, as defined in the Crosstalk spec. + */ +struct xwidget_hwid{ + int mfg_num; + int rev_num; + int part_num; +}; + +struct xwidget_info{ + + struct xwidget_hwid xwi_hwid; /* Widget Identification */ + char xwi_masterxid; /* Hub's Widget Port Number */ + void *xwi_hubinfo; /* Hub's provider private info */ + uint64_t *xwi_hub_provider; /* prom provider functions */ + void *xwi_vertex; +}; + +#endif /* _ASM_IA64_SN_XTALK_XWIDGET_H */ diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile new file mode 100644 index 000000000000..6c7f4d9e8ea0 --- /dev/null +++ b/arch/ia64/sn/kernel/Makefile @@ -0,0 +1,12 @@ +# arch/ia64/sn/kernel/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1999,2001-2003 Silicon Graphics, Inc. All Rights Reserved. +# + +obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ + huberror.o io_init.o iomv.o klconflib.o sn2/ +obj-$(CONFIG_IA64_GENERIC) += machvec.o diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c new file mode 100644 index 000000000000..ce0bc4085eae --- /dev/null +++ b/arch/ia64/sn/kernel/bte.c @@ -0,0 +1,453 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/addrs.h> +#include <asm/sn/arch.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/pda.h> +#include <asm/sn/shubio.h> +#include <asm/nodedata.h> +#include <asm/delay.h> + +#include <linux/bootmem.h> +#include <linux/string.h> +#include <linux/sched.h> + +#include <asm/sn/bte.h> + +#ifndef L1_CACHE_MASK +#define L1_CACHE_MASK (L1_CACHE_BYTES - 1) +#endif + +/* two interfaces on two btes */ +#define MAX_INTERFACES_TO_TRY 4 + +static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface) +{ + nodepda_t *tmp_nodepda; + + tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid)); + return &tmp_nodepda->bte_if[interface]; + +} + +/************************************************************************ + * Block Transfer Engine copy related functions. + * + ***********************************************************************/ + +/* + * bte_copy(src, dest, len, mode, notification) + * + * Use the block transfer engine to move kernel memory from src to dest + * using the assigned mode. + * + * Paramaters: + * src - physical address of the transfer source. + * dest - physical address of the transfer destination. + * len - number of bytes to transfer from source to dest. + * mode - hardware defined. See reference information + * for IBCT0/1 in the SHUB Programmers Reference + * notification - kernel virtual address of the notification cache + * line. If NULL, the default is used and + * the bte_copy is synchronous. + * + * NOTE: This function requires src, dest, and len to + * be cacheline aligned. + */ +bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) +{ + u64 transfer_size; + u64 transfer_stat; + struct bteinfo_s *bte; + bte_result_t bte_status; + unsigned long irq_flags; + unsigned long itc_end = 0; + struct bteinfo_s *btes_to_try[MAX_INTERFACES_TO_TRY]; + int bte_if_index; + int bte_pri, bte_sec; + + BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n", + src, dest, len, mode, notification)); + + if (len == 0) { + return BTE_SUCCESS; + } + + BUG_ON((len & L1_CACHE_MASK) || + (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK)); + BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT))); + + /* CPU 0 (per node) tries bte0 first, CPU 1 try bte1 first */ + if (cpuid_to_subnode(smp_processor_id()) == 0) { + bte_pri = 0; + bte_sec = 1; + } else { + bte_pri = 1; + bte_sec = 0; + } + + if (mode & BTE_USE_DEST) { + /* try remote then local */ + btes_to_try[0] = bte_if_on_node(NASID_GET(dest), bte_pri); + btes_to_try[1] = bte_if_on_node(NASID_GET(dest), bte_sec); + if (mode & BTE_USE_ANY) { + btes_to_try[2] = bte_if_on_node(get_nasid(), bte_pri); + btes_to_try[3] = bte_if_on_node(get_nasid(), bte_sec); + } else { + btes_to_try[2] = NULL; + btes_to_try[3] = NULL; + } + } else { + /* try local then remote */ + btes_to_try[0] = bte_if_on_node(get_nasid(), bte_pri); + btes_to_try[1] = bte_if_on_node(get_nasid(), bte_sec); + if (mode & BTE_USE_ANY) { + btes_to_try[2] = bte_if_on_node(NASID_GET(dest), bte_pri); + btes_to_try[3] = bte_if_on_node(NASID_GET(dest), bte_sec); + } else { + btes_to_try[2] = NULL; + btes_to_try[3] = NULL; + } + } + +retry_bteop: + do { + local_irq_save(irq_flags); + + bte_if_index = 0; + + /* Attempt to lock one of the BTE interfaces. */ + while (bte_if_index < MAX_INTERFACES_TO_TRY) { + bte = btes_to_try[bte_if_index++]; + + if (bte == NULL) { + continue; + } + + if (spin_trylock(&bte->spinlock)) { + if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) || + (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) { + /* Got the lock but BTE still busy */ + spin_unlock(&bte->spinlock); + } else { + /* we got the lock and it's not busy */ + break; + } + } + bte = NULL; + } + + if (bte != NULL) { + break; + } + + local_irq_restore(irq_flags); + + if (!(mode & BTE_WACQUIRE)) { + return BTEFAIL_NOTAVAIL; + } + } while (1); + + if (notification == NULL) { + /* User does not want to be notified. */ + bte->most_rcnt_na = &bte->notify; + } else { + bte->most_rcnt_na = notification; + } + + /* Calculate the number of cache lines to transfer. */ + transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK); + + /* Initialize the notification to a known value. */ + *bte->most_rcnt_na = BTE_WORD_BUSY; + + /* Set the status reg busy bit and transfer length */ + BTE_PRINTKV(("IBLS = 0x%lx\n", IBLS_BUSY | transfer_size)); + BTE_LNSTAT_STORE(bte, IBLS_BUSY | transfer_size); + + /* Set the source and destination registers */ + BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src)))); + BTE_SRC_STORE(bte, TO_PHYS(src)); + BTE_PRINTKV(("IBDA = 0x%lx)\n", (TO_PHYS(dest)))); + BTE_DEST_STORE(bte, TO_PHYS(dest)); + + /* Set the notification register */ + BTE_PRINTKV(("IBNA = 0x%lx)\n", + TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)))); + BTE_NOTIF_STORE(bte, + TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na))); + + /* Initiate the transfer */ + BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode))); + BTE_CTRL_STORE(bte, BTE_VALID_MODE(mode)); + + itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec); + + spin_unlock_irqrestore(&bte->spinlock, irq_flags); + + if (notification != NULL) { + return BTE_SUCCESS; + } + + while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) { + if (ia64_get_itc() > itc_end) { + BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n", + NASID_GET(bte->bte_base_addr), bte->bte_num, + BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) ); + bte->bte_error_count++; + bte->bh_error = IBLS_ERROR; + bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode)); + *bte->most_rcnt_na = BTE_WORD_AVAILABLE; + goto retry_bteop; + } + } + + BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n", + BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); + + if (transfer_stat & IBLS_ERROR) { + bte_status = transfer_stat & ~IBLS_ERROR; + } else { + bte_status = BTE_SUCCESS; + } + *bte->most_rcnt_na = BTE_WORD_AVAILABLE; + + BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n", + BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); + + return bte_status; +} + +EXPORT_SYMBOL(bte_copy); + +/* + * bte_unaligned_copy(src, dest, len, mode) + * + * use the block transfer engine to move kernel + * memory from src to dest using the assigned mode. + * + * Paramaters: + * src - physical address of the transfer source. + * dest - physical address of the transfer destination. + * len - number of bytes to transfer from source to dest. + * mode - hardware defined. See reference information + * for IBCT0/1 in the SGI documentation. + * + * NOTE: If the source, dest, and len are all cache line aligned, + * then it would be _FAR_ preferrable to use bte_copy instead. + */ +bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) +{ + int destFirstCacheOffset; + u64 headBteSource; + u64 headBteLen; + u64 headBcopySrcOffset; + u64 headBcopyDest; + u64 headBcopyLen; + u64 footBteSource; + u64 footBteLen; + u64 footBcopyDest; + u64 footBcopyLen; + bte_result_t rv; + char *bteBlock, *bteBlock_unaligned; + + if (len == 0) { + return BTE_SUCCESS; + } + + /* temporary buffer used during unaligned transfers */ + bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, + GFP_KERNEL | GFP_DMA); + if (bteBlock_unaligned == NULL) { + return BTEFAIL_NOTAVAIL; + } + bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned); + + headBcopySrcOffset = src & L1_CACHE_MASK; + destFirstCacheOffset = dest & L1_CACHE_MASK; + + /* + * At this point, the transfer is broken into + * (up to) three sections. The first section is + * from the start address to the first physical + * cache line, the second is from the first physical + * cache line to the last complete cache line, + * and the third is from the last cache line to the + * end of the buffer. The first and third sections + * are handled by bte copying into a temporary buffer + * and then bcopy'ing the necessary section into the + * final location. The middle section is handled with + * a standard bte copy. + * + * One nasty exception to the above rule is when the + * source and destination are not symetrically + * mis-aligned. If the source offset from the first + * cache line is different from the destination offset, + * we make the first section be the entire transfer + * and the bcopy the entire block into place. + */ + if (headBcopySrcOffset == destFirstCacheOffset) { + + /* + * Both the source and destination are the same + * distance from a cache line boundary so we can + * use the bte to transfer the bulk of the + * data. + */ + headBteSource = src & ~L1_CACHE_MASK; + headBcopyDest = dest; + if (headBcopySrcOffset) { + headBcopyLen = + (len > + (L1_CACHE_BYTES - + headBcopySrcOffset) ? L1_CACHE_BYTES + - headBcopySrcOffset : len); + headBteLen = L1_CACHE_BYTES; + } else { + headBcopyLen = 0; + headBteLen = 0; + } + + if (len > headBcopyLen) { + footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK; + footBteLen = L1_CACHE_BYTES; + + footBteSource = src + len - footBcopyLen; + footBcopyDest = dest + len - footBcopyLen; + + if (footBcopyDest == (headBcopyDest + headBcopyLen)) { + /* + * We have two contigous bcopy + * blocks. Merge them. + */ + headBcopyLen += footBcopyLen; + headBteLen += footBteLen; + } else if (footBcopyLen > 0) { + rv = bte_copy(footBteSource, + ia64_tpa((unsigned long)bteBlock), + footBteLen, mode, NULL); + if (rv != BTE_SUCCESS) { + kfree(bteBlock_unaligned); + return rv; + } + + memcpy(__va(footBcopyDest), + (char *)bteBlock, footBcopyLen); + } + } else { + footBcopyLen = 0; + footBteLen = 0; + } + + if (len > (headBcopyLen + footBcopyLen)) { + /* now transfer the middle. */ + rv = bte_copy((src + headBcopyLen), + (dest + + headBcopyLen), + (len - headBcopyLen - + footBcopyLen), mode, NULL); + if (rv != BTE_SUCCESS) { + kfree(bteBlock_unaligned); + return rv; + } + + } + } else { + + /* + * The transfer is not symetric, we will + * allocate a buffer large enough for all the + * data, bte_copy into that buffer and then + * bcopy to the destination. + */ + + /* Add the leader from source */ + headBteLen = len + (src & L1_CACHE_MASK); + /* Add the trailing bytes from footer. */ + headBteLen += L1_CACHE_BYTES - (headBteLen & L1_CACHE_MASK); + headBteSource = src & ~L1_CACHE_MASK; + headBcopySrcOffset = src & L1_CACHE_MASK; + headBcopyDest = dest; + headBcopyLen = len; + } + + if (headBcopyLen > 0) { + rv = bte_copy(headBteSource, + ia64_tpa((unsigned long)bteBlock), headBteLen, + mode, NULL); + if (rv != BTE_SUCCESS) { + kfree(bteBlock_unaligned); + return rv; + } + + memcpy(__va(headBcopyDest), ((char *)bteBlock + + headBcopySrcOffset), headBcopyLen); + } + kfree(bteBlock_unaligned); + return BTE_SUCCESS; +} + +EXPORT_SYMBOL(bte_unaligned_copy); + +/************************************************************************ + * Block Transfer Engine initialization functions. + * + ***********************************************************************/ + +/* + * bte_init_node(nodepda, cnode) + * + * Initialize the nodepda structure with BTE base addresses and + * spinlocks. + */ +void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode) +{ + int i; + + /* + * Indicate that all the block transfer engines on this node + * are available. + */ + + /* + * Allocate one bte_recover_t structure per node. It holds + * the recovery lock for node. All the bte interface structures + * will point at this one bte_recover structure to get the lock. + */ + spin_lock_init(&mynodepda->bte_recovery_lock); + init_timer(&mynodepda->bte_recovery_timer); + mynodepda->bte_recovery_timer.function = bte_error_handler; + mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda; + + for (i = 0; i < BTES_PER_NODE; i++) { + /* Which link status register should we use? */ + unsigned long link_status = (i == 0 ? IIO_IBLS0 : IIO_IBLS1); + mynodepda->bte_if[i].bte_base_addr = (u64 *) + REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), link_status); + + /* + * Initialize the notification and spinlock + * so the first transfer can occur. + */ + mynodepda->bte_if[i].most_rcnt_na = + &(mynodepda->bte_if[i].notify); + mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE; + spin_lock_init(&mynodepda->bte_if[i].spinlock); + + mynodepda->bte_if[i].bte_cnode = cnode; + mynodepda->bte_if[i].bte_error_count = 0; + mynodepda->bte_if[i].bte_num = i; + mynodepda->bte_if[i].cleanup_active = 0; + mynodepda->bte_if[i].bh_error = 0; + } + +} diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c new file mode 100644 index 000000000000..fd104312c6bd --- /dev/null +++ b/arch/ia64/sn/kernel/bte_error.c @@ -0,0 +1,198 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/types.h> +#include <asm/sn/sn_sal.h> +#include "ioerror.h" +#include <asm/sn/addrs.h> +#include <asm/sn/shubio.h> +#include <asm/sn/geo.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include <asm/sn/bte.h> +#include <asm/param.h> + +/* + * Bte error handling is done in two parts. The first captures + * any crb related errors. Since there can be multiple crbs per + * interface and multiple interfaces active, we need to wait until + * all active crbs are completed. This is the first job of the + * second part error handler. When all bte related CRBs are cleanly + * completed, it resets the interfaces and gets them ready for new + * transfers to be queued. + */ + +void bte_error_handler(unsigned long); + +/* + * Wait until all BTE related CRBs are completed + * and then reset the interfaces. + */ +void bte_error_handler(unsigned long _nodepda) +{ + struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; + spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock; + struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; + nasid_t nasid; + int i; + int valid_crbs; + unsigned long irq_flags; + volatile u64 *notify; + bte_result_t bh_error; + ii_imem_u_t imem; /* II IMEM Register */ + ii_icrb0_d_u_t icrbd; /* II CRB Register D */ + ii_ibcr_u_t ibcr; + ii_icmr_u_t icmr; + ii_ieclr_u_t ieclr; + + BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda, + smp_processor_id())); + + spin_lock_irqsave(recovery_lock, irq_flags); + + if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) && + (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) { + BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda, + smp_processor_id())); + spin_unlock_irqrestore(recovery_lock, irq_flags); + return; + } + /* + * Lock all interfaces on this node to prevent new transfers + * from being queued. + */ + for (i = 0; i < BTES_PER_NODE; i++) { + if (err_nodepda->bte_if[i].cleanup_active) { + continue; + } + spin_lock(&err_nodepda->bte_if[i].spinlock); + BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda, + smp_processor_id(), i)); + err_nodepda->bte_if[i].cleanup_active = 1; + } + + /* Determine information about our hub */ + nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); + + /* + * A BTE transfer can use multiple CRBs. We need to make sure + * that all the BTE CRBs are complete (or timed out) before + * attempting to clean up the error. Resetting the BTE while + * there are still BTE CRBs active will hang the BTE. + * We should look at all the CRBs to see if they are allocated + * to the BTE and see if they are still active. When none + * are active, we can continue with the cleanup. + * + * We also want to make sure that the local NI port is up. + * When a router resets the NI port can go down, while it + * goes through the LLP handshake, but then comes back up. + */ + icmr.ii_icmr_regval = REMOTE_HUB_L(nasid, IIO_ICMR); + if (icmr.ii_icmr_fld_s.i_crb_mark != 0) { + /* + * There are errors which still need to be cleaned up by + * hubiio_crb_error_handler + */ + mod_timer(recovery_timer, HZ * 5); + BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, + smp_processor_id())); + spin_unlock_irqrestore(recovery_lock, irq_flags); + return; + } + if (icmr.ii_icmr_fld_s.i_crb_vld != 0) { + + valid_crbs = icmr.ii_icmr_fld_s.i_crb_vld; + + for (i = 0; i < IIO_NUM_CRBS; i++) { + if (!((1 << i) & valid_crbs)) { + /* This crb was not marked as valid, ignore */ + continue; + } + icrbd.ii_icrb0_d_regval = + REMOTE_HUB_L(nasid, IIO_ICRB_D(i)); + if (icrbd.d_bteop) { + mod_timer(recovery_timer, HZ * 5); + BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n", + err_nodepda, smp_processor_id(), + i)); + spin_unlock_irqrestore(recovery_lock, + irq_flags); + return; + } + } + } + + BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id())); + /* Reenable both bte interfaces */ + imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM); + imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1; + REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval); + + /* Clear BTE0/1 error bits */ + ieclr.ii_ieclr_regval = 0; + if (err_nodepda->bte_if[0].bh_error != BTE_SUCCESS) + ieclr.ii_ieclr_fld_s.i_e_bte_0 = 1; + if (err_nodepda->bte_if[1].bh_error != BTE_SUCCESS) + ieclr.ii_ieclr_fld_s.i_e_bte_1 = 1; + REMOTE_HUB_S(nasid, IIO_IECLR, ieclr.ii_ieclr_regval); + + /* Reinitialize both BTE state machines. */ + ibcr.ii_ibcr_regval = REMOTE_HUB_L(nasid, IIO_IBCR); + ibcr.ii_ibcr_fld_s.i_soft_reset = 1; + REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval); + + for (i = 0; i < BTES_PER_NODE; i++) { + bh_error = err_nodepda->bte_if[i].bh_error; + if (bh_error != BTE_SUCCESS) { + /* There is an error which needs to be notified */ + notify = err_nodepda->bte_if[i].most_rcnt_na; + BTE_PRINTK(("cnode %d bte %d error=0x%lx\n", + err_nodepda->bte_if[i].bte_cnode, + err_nodepda->bte_if[i].bte_num, + IBLS_ERROR | (u64) bh_error)); + *notify = IBLS_ERROR | bh_error; + err_nodepda->bte_if[i].bh_error = BTE_SUCCESS; + } + + err_nodepda->bte_if[i].cleanup_active = 0; + BTE_PRINTK(("eh:%p:%d Unlocked %d\n", err_nodepda, + smp_processor_id(), i)); + spin_unlock(&err_nodepda->bte_if[i].spinlock); + } + + del_timer(recovery_timer); + + spin_unlock_irqrestore(recovery_lock, irq_flags); +} + +/* + * First part error handler. This is called whenever any error CRB interrupt + * is generated by the II. + */ +void +bte_crb_error_handler(cnodeid_t cnode, int btenum, + int crbnum, ioerror_t * ioe, int bteop) +{ + struct bteinfo_s *bte; + + + bte = &(NODEPDA(cnode)->bte_if[btenum]); + + /* + * The caller has already figured out the error type, we save that + * in the bte handle structure for the thread excercising the + * interface to consume. + */ + bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET; + bte->bte_error_count++; + + BTE_PRINTK(("Got an error on cnode %d bte %d: HW error type 0x%x\n", + bte->bte_cnode, bte->bte_num, ioe->ie_errortype)); + bte_error_handler((unsigned long) NODEPDA(cnode)); +} + diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c new file mode 100644 index 000000000000..2bdf684c5066 --- /dev/null +++ b/arch/ia64/sn/kernel/huberror.c @@ -0,0 +1,201 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000,2002-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <asm/delay.h> +#include <asm/sn/sn_sal.h> +#include "ioerror.h" +#include <asm/sn/addrs.h> +#include <asm/sn/shubio.h> +#include <asm/sn/geo.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include <asm/sn/bte.h> + +void hubiio_crb_error_handler(struct hubdev_info *hubdev_info); +extern void bte_crb_error_handler(cnodeid_t, int, int, ioerror_t *, + int); +static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep) +{ + struct hubdev_info *hubdev_info; + struct ia64_sal_retval ret_stuff; + nasid_t nasid; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + hubdev_info = (struct hubdev_info *)arg; + nasid = hubdev_info->hdi_nasid; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, + (u64) nasid, 0, 0, 0, 0, 0, 0); + + if ((int)ret_stuff.v0) + panic("hubii_eint_handler(): Fatal TIO Error"); + + if (!(nasid & 1)) /* Not a TIO, handle CRB errors */ + (void)hubiio_crb_error_handler(hubdev_info); + + return IRQ_HANDLED; +} + +/* + * Free the hub CRB "crbnum" which encountered an error. + * Assumption is, error handling was successfully done, + * and we now want to return the CRB back to Hub for normal usage. + * + * In order to free the CRB, all that's needed is to de-allocate it + * + * Assumption: + * No other processor is mucking around with the hub control register. + * So, upper layer has to single thread this. + */ +void hubiio_crb_free(struct hubdev_info *hubdev_info, int crbnum) +{ + ii_icrb0_b_u_t icrbb; + + /* + * The hardware does NOT clear the mark bit, so it must get cleared + * here to be sure the error is not processed twice. + */ + icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(hubdev_info->hdi_nasid, + IIO_ICRB_B(crbnum)); + icrbb.b_mark = 0; + REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICRB_B(crbnum), + icrbb.ii_icrb0_b_regval); + /* + * Deallocate the register wait till hub indicates it's done. + */ + REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum)); + while (REMOTE_HUB_L(hubdev_info->hdi_nasid, IIO_ICDR) & IIO_ICDR_PND) + udelay(1); + +} + +/* + * hubiio_crb_error_handler + * + * This routine gets invoked when a hub gets an error + * interrupt. So, the routine is running in interrupt context + * at error interrupt level. + * Action: + * It's responsible for identifying ALL the CRBs that are marked + * with error, and process them. + * + * If you find the CRB that's marked with error, map this to the + * reason it caused error, and invoke appropriate error handler. + * + * XXX Be aware of the information in the context register. + * + * NOTE: + * Use REMOTE_HUB_* macro instead of LOCAL_HUB_* so that the interrupt + * handler can be run on any node. (not necessarily the node + * corresponding to the hub that encountered error). + */ + +void hubiio_crb_error_handler(struct hubdev_info *hubdev_info) +{ + nasid_t nasid; + ii_icrb0_a_u_t icrba; /* II CRB Register A */ + ii_icrb0_b_u_t icrbb; /* II CRB Register B */ + ii_icrb0_c_u_t icrbc; /* II CRB Register C */ + ii_icrb0_d_u_t icrbd; /* II CRB Register D */ + ii_icrb0_e_u_t icrbe; /* II CRB Register D */ + int i; + int num_errors = 0; /* Num of errors handled */ + ioerror_t ioerror; + + nasid = hubdev_info->hdi_nasid; + + /* + * XXX - Add locking for any recovery actions + */ + /* + * Scan through all CRBs in the Hub, and handle the errors + * in any of the CRBs marked. + */ + for (i = 0; i < IIO_NUM_CRBS; i++) { + /* Check this crb entry to see if it is in error. */ + icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i)); + + if (icrbb.b_mark == 0) { + continue; + } + + icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i)); + + IOERROR_INIT(&ioerror); + + /* read other CRB error registers. */ + icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i)); + icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i)); + icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i)); + + IOERROR_SETVALUE(&ioerror, errortype, icrbb.b_ecode); + + /* Check if this error is due to BTE operation, + * and handle it separately. + */ + if (icrbd.d_bteop || + ((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 || + icrbb.b_initiator == IIO_ICRB_INIT_BTE1) && + (icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE || + icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))) { + + int bte_num; + + if (icrbd.d_bteop) + bte_num = icrbc.c_btenum; + else /* b_initiator bit 2 gives BTE number */ + bte_num = (icrbb.b_initiator & 0x4) >> 2; + + hubiio_crb_free(hubdev_info, i); + + bte_crb_error_handler(nasid_to_cnodeid(nasid), bte_num, + i, &ioerror, icrbd.d_bteop); + num_errors++; + continue; + } + } +} + +/* + * Function : hub_error_init + * Purpose : initialize the error handling requirements for a given hub. + * Parameters : cnode, the compact nodeid. + * Assumptions : Called only once per hub, either by a local cpu. Or by a + * remote cpu, when this hub is headless.(cpuless) + * Returns : None + */ +void hub_error_init(struct hubdev_info *hubdev_info) +{ + if (request_irq(SGI_II_ERROR, (void *)hub_eint_handler, SA_SHIRQ, + "SN_hub_error", (void *)hubdev_info)) + printk("hub_error_init: Failed to request_irq for 0x%p\n", + hubdev_info); + return; +} + + +/* + * Function : ice_error_init + * Purpose : initialize the error handling requirements for a given tio. + * Parameters : cnode, the compact nodeid. + * Assumptions : Called only once per tio. + * Returns : None + */ +void ice_error_init(struct hubdev_info *hubdev_info) +{ + if (request_irq + (SGI_TIO_ERROR, (void *)hub_eint_handler, SA_SHIRQ, "SN_TIO_error", + (void *)hubdev_info)) + printk("ice_error_init: request_irq() error hubdev_info 0x%p\n", + hubdev_info); + return; +} + diff --git a/arch/ia64/sn/kernel/idle.c b/arch/ia64/sn/kernel/idle.c new file mode 100644 index 000000000000..49d178f022b5 --- /dev/null +++ b/arch/ia64/sn/kernel/idle.c @@ -0,0 +1,30 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2001-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <asm/sn/leds.h> + +void snidle(int state) +{ + if (state) { + if (pda->idle_flag == 0) { + /* + * Turn the activity LED off. + */ + set_led_bits(0, LED_CPU_ACTIVITY); + } + + pda->idle_flag = 1; + } else { + /* + * Turn the activity LED on. + */ + set_led_bits(LED_CPU_ACTIVITY, LED_CPU_ACTIVITY); + + pda->idle_flag = 0; + } +} diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c new file mode 100644 index 000000000000..001880812b7c --- /dev/null +++ b/arch/ia64/sn/kernel/io_init.c @@ -0,0 +1,411 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/bootmem.h> +#include <linux/nodemask.h> +#include <asm/sn/types.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> +#include "pci/pcibus_provider_defs.h" +#include "pci/pcidev.h" +#include "pci/pcibr_provider.h" +#include "xtalk/xwidgetdev.h" +#include <asm/sn/geo.h> +#include "xtalk/hubdev.h" +#include <asm/sn/io.h> +#include <asm/sn/simulator.h> + +char master_baseio_wid; +nasid_t master_nasid = INVALID_NASID; /* Partition Master */ + +struct slab_info { + struct hubdev_info hubdev; +}; + +struct brick { + moduleid_t id; /* Module ID of this module */ + struct slab_info slab_info[MAX_SLABS + 1]; +}; + +int sn_ioif_inited = 0; /* SN I/O infrastructure initialized? */ + +/* + * Retrieve the DMA Flush List given nasid. This list is needed + * to implement the WAR - Flush DMA data on PIO Reads. + */ +static inline uint64_t +sal_get_widget_dmaflush_list(u64 nasid, u64 widget_num, u64 address) +{ + + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST, + (u64) nasid, (u64) widget_num, (u64) address, 0, 0, 0, + 0); + return ret_stuff.v0; + +} + +/* + * Retrieve the hub device info structure for the given nasid. + */ +static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address) +{ + + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_HUBDEV_INFO, + (u64) handle, (u64) address, 0, 0, 0, 0, 0); + return ret_stuff.v0; +} + +/* + * Retrieve the pci bus information given the bus number. + */ +static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) +{ + + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_PCIBUS_INFO, + (u64) segment, (u64) busnum, (u64) address, 0, 0, 0, 0); + return ret_stuff.v0; +} + +/* + * Retrieve the pci device information given the bus and device|function number. + */ +static inline uint64_t +sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, + u64 sn_irq_info) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_PCIDEV_INFO, + (u64) segment, (u64) bus_number, (u64) devfn, + (u64) pci_dev, + sn_irq_info, 0, 0); + return ret_stuff.v0; +} + +/* + * sn_alloc_pci_sysdata() - This routine allocates a pci controller + * which is expected as the pci_dev and pci_bus sysdata by the Linux + * PCI infrastructure. + */ +static inline struct pci_controller *sn_alloc_pci_sysdata(void) +{ + struct pci_controller *pci_sysdata; + + pci_sysdata = kmalloc(sizeof(*pci_sysdata), GFP_KERNEL); + if (!pci_sysdata) + BUG(); + + memset(pci_sysdata, 0, sizeof(*pci_sysdata)); + return pci_sysdata; +} + +/* + * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for + * each node in the system. + */ +static void sn_fixup_ionodes(void) +{ + + struct sn_flush_device_list *sn_flush_device_list; + struct hubdev_info *hubdev; + uint64_t status; + uint64_t nasid; + int i, widget; + + for (i = 0; i < numionodes; i++) { + hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); + nasid = cnodeid_to_nasid(i); + status = sal_get_hubdev_info(nasid, (uint64_t) __pa(hubdev)); + if (status) + continue; + + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) + hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev; + + if (!hubdev->hdi_flush_nasid_list.widget_p) + continue; + + hubdev->hdi_flush_nasid_list.widget_p = + kmalloc((HUB_WIDGET_ID_MAX + 1) * + sizeof(struct sn_flush_device_list *), GFP_KERNEL); + + memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0, + (HUB_WIDGET_ID_MAX + 1) * + sizeof(struct sn_flush_device_list *)); + + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { + sn_flush_device_list = kmalloc(DEV_PER_WIDGET * + sizeof(struct + sn_flush_device_list), + GFP_KERNEL); + memset(sn_flush_device_list, 0x0, + DEV_PER_WIDGET * + sizeof(struct sn_flush_device_list)); + + status = + sal_get_widget_dmaflush_list(nasid, widget, + (uint64_t) + __pa + (sn_flush_device_list)); + if (status) { + kfree(sn_flush_device_list); + continue; + } + + hubdev->hdi_flush_nasid_list.widget_p[widget] = + sn_flush_device_list; + } + + if (!(i & 1)) + hub_error_init(hubdev); + else + ice_error_init(hubdev); + } + +} + +/* + * sn_pci_fixup_slot() - This routine sets up a slot's resources + * consistent with the Linux PCI abstraction layer. Resources acquired + * from our PCI provider include PIO maps to BAR space and interrupt + * objects. + */ +static void sn_pci_fixup_slot(struct pci_dev *dev) +{ + int idx; + int segment = 0; + uint64_t size; + struct sn_irq_info *sn_irq_info; + struct pci_dev *host_pci_dev; + int status = 0; + + dev->sysdata = kmalloc(sizeof(struct pcidev_info), GFP_KERNEL); + if (SN_PCIDEV_INFO(dev) <= 0) + BUG(); /* Cannot afford to run out of memory */ + memset(SN_PCIDEV_INFO(dev), 0, sizeof(struct pcidev_info)); + + sn_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (sn_irq_info <= 0) + BUG(); /* Cannot afford to run out of memory */ + memset(sn_irq_info, 0, sizeof(struct sn_irq_info)); + + /* Call to retrieve pci device information needed by kernel. */ + status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, + dev->devfn, + (u64) __pa(SN_PCIDEV_INFO(dev)), + (u64) __pa(sn_irq_info)); + if (status) + BUG(); /* Cannot get platform pci device information information */ + + /* Copy over PIO Mapped Addresses */ + for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { + unsigned long start, end, addr; + + if (!SN_PCIDEV_INFO(dev)->pdi_pio_mapped_addr[idx]) + continue; + + start = dev->resource[idx].start; + end = dev->resource[idx].end; + size = end - start; + addr = SN_PCIDEV_INFO(dev)->pdi_pio_mapped_addr[idx]; + addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET; + dev->resource[idx].start = addr; + dev->resource[idx].end = addr + size; + if (dev->resource[idx].flags & IORESOURCE_IO) + dev->resource[idx].parent = &ioport_resource; + else + dev->resource[idx].parent = &iomem_resource; + } + + /* set up host bus linkages */ + host_pci_dev = + pci_find_slot(SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32, + SN_PCIDEV_INFO(dev)-> + pdi_slot_host_handle & 0xffffffff); + SN_PCIDEV_INFO(dev)->pdi_host_pcidev_info = + SN_PCIDEV_INFO(host_pci_dev); + SN_PCIDEV_INFO(dev)->pdi_linux_pcidev = dev; + SN_PCIDEV_INFO(dev)->pdi_pcibus_info = SN_PCIBUS_BUSSOFT(dev->bus); + + /* Only set up IRQ stuff if this device has a host bus context */ + if (SN_PCIDEV_BUSSOFT(dev) && sn_irq_info->irq_irq) { + SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = sn_irq_info; + dev->irq = SN_PCIDEV_INFO(dev)->pdi_sn_irq_info->irq_irq; + sn_irq_fixup(dev, sn_irq_info); + } +} + +/* + * sn_pci_controller_fixup() - This routine sets up a bus's resources + * consistent with the Linux PCI abstraction layer. + */ +static void sn_pci_controller_fixup(int segment, int busnum) +{ + int status = 0; + int nasid, cnode; + struct pci_bus *bus; + struct pci_controller *controller; + struct pcibus_bussoft *prom_bussoft_ptr; + struct hubdev_info *hubdev_info; + void *provider_soft; + + status = + sal_get_pcibus_info((u64) segment, (u64) busnum, + (u64) ia64_tpa(&prom_bussoft_ptr)); + if (status > 0) { + return; /* bus # does not exist */ + } + + prom_bussoft_ptr = __va(prom_bussoft_ptr); + controller = sn_alloc_pci_sysdata(); + /* controller non-zero is BUG'd in sn_alloc_pci_sysdata */ + + bus = pci_scan_bus(busnum, &pci_root_ops, controller); + if (bus == NULL) { + return; /* error, or bus already scanned */ + } + + /* + * Per-provider fixup. Copies the contents from prom to local + * area and links SN_PCIBUS_BUSSOFT(). + * + * Note: Provider is responsible for ensuring that prom_bussoft_ptr + * represents an asic-type that it can handle. + */ + + if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) { + return; /* no further fixup necessary */ + } + + provider_soft = pcibr_bus_fixup(prom_bussoft_ptr); + if (provider_soft == NULL) { + return; /* fixup failed or not applicable */ + } + + /* + * Generic bus fixup goes here. Don't reference prom_bussoft_ptr + * after this point. + */ + + bus->sysdata = controller; + PCI_CONTROLLER(bus)->platform_data = provider_soft; + + nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base); + cnode = nasid_to_cnodeid(nasid); + hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info = + &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]); +} + +/* + * Ugly hack to get PCI setup until we have a proper ACPI namespace. + */ + +#define PCI_BUSES_TO_SCAN 256 + +static int __init sn_pci_init(void) +{ + int i = 0; + struct pci_dev *pci_dev = NULL; + extern void sn_init_cpei_timer(void); +#ifdef CONFIG_PROC_FS + extern void register_sn_procfs(void); +#endif + + if (!ia64_platform_is("sn2") || IS_RUNNING_ON_SIMULATOR()) + return 0; + + /* + * This is needed to avoid bounce limit checks in the blk layer + */ + ia64_max_iommu_merge_mask = ~PAGE_MASK; + sn_fixup_ionodes(); + sn_irq = kmalloc(sizeof(struct sn_irq_info *) * NR_IRQS, GFP_KERNEL); + if (sn_irq <= 0) + BUG(); /* Canno afford to run out of memory. */ + memset(sn_irq, 0, sizeof(struct sn_irq_info *) * NR_IRQS); + + sn_init_cpei_timer(); + +#ifdef CONFIG_PROC_FS + register_sn_procfs(); +#endif + + for (i = 0; i < PCI_BUSES_TO_SCAN; i++) { + sn_pci_controller_fixup(0, i); + } + + /* + * Generic Linux PCI Layer has created the pci_bus and pci_dev + * structures - time for us to add our SN PLatform specific + * information. + */ + + while ((pci_dev = + pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL) { + sn_pci_fixup_slot(pci_dev); + } + + sn_ioif_inited = 1; /* sn I/O infrastructure now initialized */ + + return 0; +} + +/* + * hubdev_init_node() - Creates the HUB data structure and link them to it's + * own NODE specific data area. + */ +void hubdev_init_node(nodepda_t * npda, cnodeid_t node) +{ + + struct hubdev_info *hubdev_info; + + if (node >= num_online_nodes()) /* Headless/memless IO nodes */ + hubdev_info = + (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(0), + sizeof(struct + hubdev_info)); + else + hubdev_info = + (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(node), + sizeof(struct + hubdev_info)); + npda->pdinfo = (void *)hubdev_info; + +} + +geoid_t +cnodeid_get_geoid(cnodeid_t cnode) +{ + + struct hubdev_info *hubdev; + + hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + return hubdev->hdi_geoid; + +} + +subsys_initcall(sn_pci_init); diff --git a/arch/ia64/sn/kernel/iomv.c b/arch/ia64/sn/kernel/iomv.c new file mode 100644 index 000000000000..fec6d8b8237b --- /dev/null +++ b/arch/ia64/sn/kernel/iomv.c @@ -0,0 +1,70 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/module.h> +#include <asm/io.h> +#include <asm/delay.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/simulator.h> +#include <asm/sn/pda.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/shub_mmr.h> + +/** + * sn_io_addr - convert an in/out port to an i/o address + * @port: port to convert + * + * Legacy in/out instructions are converted to ld/st instructions + * on IA64. This routine will convert a port number into a valid + * SN i/o address. Used by sn_in*() and sn_out*(). + */ +void *sn_io_addr(unsigned long port) +{ + if (!IS_RUNNING_ON_SIMULATOR()) { + /* On sn2, legacy I/O ports don't point at anything */ + if (port < (64 * 1024)) + return NULL; + return ((void *)(port | __IA64_UNCACHED_OFFSET)); + } else { + /* but the simulator uses them... */ + unsigned long addr; + + /* + * word align port, but need more than 10 bits + * for accessing registers in bedrock local block + * (so we don't do port&0xfff) + */ + addr = (is_shub2() ? 0xc00000028c000000UL : 0xc0000087cc000000UL) | ((port >> 2) << 12); + if ((port >= 0x1f0 && port <= 0x1f7) || port == 0x3f6 || port == 0x3f7) + addr |= port; + return (void *)addr; + } +} + +EXPORT_SYMBOL(sn_io_addr); + +/** + * __sn_mmiowb - I/O space memory barrier + * + * See include/asm-ia64/io.h and Documentation/DocBook/deviceiobook.tmpl + * for details. + * + * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear. + * See PV 871084 for details about the WAR about zero value. + * + */ +void __sn_mmiowb(void) +{ + volatile unsigned long *adr = pda->pio_write_status_addr; + unsigned long val = pda->pio_write_status_val; + + while ((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != val) + cpu_relax(); +} + +EXPORT_SYMBOL(__sn_mmiowb); diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c new file mode 100644 index 000000000000..3be44724f6c8 --- /dev/null +++ b/arch/ia64/sn/kernel/irq.c @@ -0,0 +1,431 @@ +/* + * Platform dependent support for SGI SN + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/irq.h> +#include <asm/sn/intr.h> +#include <asm/sn/addrs.h> +#include <asm/sn/arch.h> +#include "xtalk/xwidgetdev.h" +#include "pci/pcibus_provider_defs.h" +#include "pci/pcidev.h" +#include "pci/pcibr_provider.h" +#include <asm/sn/shub_mmr.h> +#include <asm/sn/sn_sal.h> + +static void force_interrupt(int irq); +static void register_intr_pda(struct sn_irq_info *sn_irq_info); +static void unregister_intr_pda(struct sn_irq_info *sn_irq_info); + +extern int sn_force_interrupt_flag; +extern int sn_ioif_inited; +struct sn_irq_info **sn_irq; + +static inline uint64_t sn_intr_alloc(nasid_t local_nasid, int local_widget, + u64 sn_irq_info, + int req_irq, nasid_t req_nasid, + int req_slice) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_ALLOC, (u64) local_nasid, + (u64) local_widget, (u64) sn_irq_info, (u64) req_irq, + (u64) req_nasid, (u64) req_slice); + return ret_stuff.status; +} + +static inline void sn_intr_free(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_FREE, (u64) local_nasid, + (u64) local_widget, (u64) sn_irq_info->irq_irq, + (u64) sn_irq_info->irq_cookie, 0, 0); +} + +static unsigned int sn_startup_irq(unsigned int irq) +{ + return 0; +} + +static void sn_shutdown_irq(unsigned int irq) +{ +} + +static void sn_disable_irq(unsigned int irq) +{ +} + +static void sn_enable_irq(unsigned int irq) +{ +} + +static void sn_ack_irq(unsigned int irq) +{ + uint64_t event_occurred, mask = 0; + int nasid; + + irq = irq & 0xff; + nasid = get_nasid(); + event_occurred = + HUB_L((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED)); + if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) { + mask |= (1 << SH_EVENT_OCCURRED_UART_INT_SHFT); + } + if (event_occurred & SH_EVENT_OCCURRED_IPI_INT_MASK) { + mask |= (1 << SH_EVENT_OCCURRED_IPI_INT_SHFT); + } + if (event_occurred & SH_EVENT_OCCURRED_II_INT0_MASK) { + mask |= (1 << SH_EVENT_OCCURRED_II_INT0_SHFT); + } + if (event_occurred & SH_EVENT_OCCURRED_II_INT1_MASK) { + mask |= (1 << SH_EVENT_OCCURRED_II_INT1_SHFT); + } + HUB_S((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED_ALIAS), + mask); + __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs); + + move_irq(irq); +} + +static void sn_end_irq(unsigned int irq) +{ + int nasid; + int ivec; + uint64_t event_occurred; + + ivec = irq & 0xff; + if (ivec == SGI_UART_VECTOR) { + nasid = get_nasid(); + event_occurred = HUB_L((uint64_t *) GLOBAL_MMR_ADDR + (nasid, SH_EVENT_OCCURRED)); + /* If the UART bit is set here, we may have received an + * interrupt from the UART that the driver missed. To + * make sure, we IPI ourselves to force us to look again. + */ + if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) { + platform_send_ipi(smp_processor_id(), SGI_UART_VECTOR, + IA64_IPI_DM_INT, 0); + } + } + __clear_bit(ivec, (volatile void *)pda->sn_in_service_ivecs); + if (sn_force_interrupt_flag) + force_interrupt(irq); +} + +static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct sn_irq_info *sn_irq_info = sn_irq[irq]; + struct sn_irq_info *tmp_sn_irq_info; + int cpuid, cpuphys; + nasid_t t_nasid; /* nasid to target */ + int t_slice; /* slice to target */ + + /* allocate a temp sn_irq_info struct to get new target info */ + tmp_sn_irq_info = kmalloc(sizeof(*tmp_sn_irq_info), GFP_KERNEL); + if (!tmp_sn_irq_info) + return; + + cpuid = first_cpu(mask); + cpuphys = cpu_physical_id(cpuid); + t_nasid = cpuid_to_nasid(cpuid); + t_slice = cpuid_to_slice(cpuid); + + while (sn_irq_info) { + int status; + int local_widget; + uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge; + nasid_t local_nasid = NASID_GET(bridge); + + if (!bridge) + break; /* irq is not a device interrupt */ + + if (local_nasid & 1) + local_widget = TIO_SWIN_WIDGETNUM(bridge); + else + local_widget = SWIN_WIDGETNUM(bridge); + + /* Free the old PROM sn_irq_info structure */ + sn_intr_free(local_nasid, local_widget, sn_irq_info); + + /* allocate a new PROM sn_irq_info struct */ + status = sn_intr_alloc(local_nasid, local_widget, + __pa(tmp_sn_irq_info), irq, t_nasid, + t_slice); + + if (status == 0) { + /* Update kernels sn_irq_info with new target info */ + unregister_intr_pda(sn_irq_info); + sn_irq_info->irq_cpuid = cpuid; + sn_irq_info->irq_nasid = t_nasid; + sn_irq_info->irq_slice = t_slice; + sn_irq_info->irq_xtalkaddr = + tmp_sn_irq_info->irq_xtalkaddr; + sn_irq_info->irq_cookie = tmp_sn_irq_info->irq_cookie; + register_intr_pda(sn_irq_info); + + if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type)) { + pcibr_change_devices_irq(sn_irq_info); + } + + sn_irq_info = sn_irq_info->irq_next; + +#ifdef CONFIG_SMP + set_irq_affinity_info((irq & 0xff), cpuphys, 0); +#endif + } else { + break; /* snp_affinity failed the intr_alloc */ + } + } + kfree(tmp_sn_irq_info); +} + +struct hw_interrupt_type irq_type_sn = { + "SN hub", + sn_startup_irq, + sn_shutdown_irq, + sn_enable_irq, + sn_disable_irq, + sn_ack_irq, + sn_end_irq, + sn_set_affinity_irq +}; + +unsigned int sn_local_vector_to_irq(u8 vector) +{ + return (CPU_VECTOR_TO_IRQ(smp_processor_id(), vector)); +} + +void sn_irq_init(void) +{ + int i; + irq_desc_t *base_desc = irq_desc; + + for (i = 0; i < NR_IRQS; i++) { + if (base_desc[i].handler == &no_irq_type) { + base_desc[i].handler = &irq_type_sn; + } + } +} + +static void register_intr_pda(struct sn_irq_info *sn_irq_info) +{ + int irq = sn_irq_info->irq_irq; + int cpu = sn_irq_info->irq_cpuid; + + if (pdacpu(cpu)->sn_last_irq < irq) { + pdacpu(cpu)->sn_last_irq = irq; + } + + if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq) { + pdacpu(cpu)->sn_first_irq = irq; + } +} + +static void unregister_intr_pda(struct sn_irq_info *sn_irq_info) +{ + int irq = sn_irq_info->irq_irq; + int cpu = sn_irq_info->irq_cpuid; + struct sn_irq_info *tmp_irq_info; + int i, foundmatch; + + if (pdacpu(cpu)->sn_last_irq == irq) { + foundmatch = 0; + for (i = pdacpu(cpu)->sn_last_irq - 1; i; i--) { + tmp_irq_info = sn_irq[i]; + while (tmp_irq_info) { + if (tmp_irq_info->irq_cpuid == cpu) { + foundmatch++; + break; + } + tmp_irq_info = tmp_irq_info->irq_next; + } + if (foundmatch) { + break; + } + } + pdacpu(cpu)->sn_last_irq = i; + } + + if (pdacpu(cpu)->sn_first_irq == irq) { + foundmatch = 0; + for (i = pdacpu(cpu)->sn_first_irq + 1; i < NR_IRQS; i++) { + tmp_irq_info = sn_irq[i]; + while (tmp_irq_info) { + if (tmp_irq_info->irq_cpuid == cpu) { + foundmatch++; + break; + } + tmp_irq_info = tmp_irq_info->irq_next; + } + if (foundmatch) { + break; + } + } + pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i); + } +} + +struct sn_irq_info *sn_irq_alloc(nasid_t local_nasid, int local_widget, int irq, + nasid_t nasid, int slice) +{ + struct sn_irq_info *sn_irq_info; + int status; + + sn_irq_info = kmalloc(sizeof(*sn_irq_info), GFP_KERNEL); + if (sn_irq_info == NULL) + return NULL; + + memset(sn_irq_info, 0x0, sizeof(*sn_irq_info)); + + status = + sn_intr_alloc(local_nasid, local_widget, __pa(sn_irq_info), irq, + nasid, slice); + + if (status) { + kfree(sn_irq_info); + return NULL; + } else { + return sn_irq_info; + } +} + +void sn_irq_free(struct sn_irq_info *sn_irq_info) +{ + uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge; + nasid_t local_nasid = NASID_GET(bridge); + int local_widget; + + if (local_nasid & 1) /* tio check */ + local_widget = TIO_SWIN_WIDGETNUM(bridge); + else + local_widget = SWIN_WIDGETNUM(bridge); + + sn_intr_free(local_nasid, local_widget, sn_irq_info); + + kfree(sn_irq_info); +} + +void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) +{ + nasid_t nasid = sn_irq_info->irq_nasid; + int slice = sn_irq_info->irq_slice; + int cpu = nasid_slice_to_cpuid(nasid, slice); + + sn_irq_info->irq_cpuid = cpu; + sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev); + + /* link it into the sn_irq[irq] list */ + sn_irq_info->irq_next = sn_irq[sn_irq_info->irq_irq]; + sn_irq[sn_irq_info->irq_irq] = sn_irq_info; + + (void)register_intr_pda(sn_irq_info); +} + +static void force_interrupt(int irq) +{ + struct sn_irq_info *sn_irq_info; + + if (!sn_ioif_inited) + return; + sn_irq_info = sn_irq[irq]; + while (sn_irq_info) { + if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && + (sn_irq_info->irq_bridge != NULL)) { + pcibr_force_interrupt(sn_irq_info); + } + sn_irq_info = sn_irq_info->irq_next; + } +} + +/* + * Check for lost interrupts. If the PIC int_status reg. says that + * an interrupt has been sent, but not handled, and the interrupt + * is not pending in either the cpu irr regs or in the soft irr regs, + * and the interrupt is not in service, then the interrupt may have + * been lost. Force an interrupt on that pin. It is possible that + * the interrupt is in flight, so we may generate a spurious interrupt, + * but we should never miss a real lost interrupt. + */ +static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) +{ + uint64_t regval; + int irr_reg_num; + int irr_bit; + uint64_t irr_reg; + struct pcidev_info *pcidev_info; + struct pcibus_info *pcibus_info; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info-> + pdi_pcibus_info; + regval = pcireg_intr_status_get(pcibus_info); + + irr_reg_num = irq_to_vector(irq) / 64; + irr_bit = irq_to_vector(irq) % 64; + switch (irr_reg_num) { + case 0: + irr_reg = ia64_getreg(_IA64_REG_CR_IRR0); + break; + case 1: + irr_reg = ia64_getreg(_IA64_REG_CR_IRR1); + break; + case 2: + irr_reg = ia64_getreg(_IA64_REG_CR_IRR2); + break; + case 3: + irr_reg = ia64_getreg(_IA64_REG_CR_IRR3); + break; + } + if (!test_bit(irr_bit, &irr_reg)) { + if (!test_bit(irq, pda->sn_soft_irr)) { + if (!test_bit(irq, pda->sn_in_service_ivecs)) { + regval &= 0xff; + if (sn_irq_info->irq_int_bit & regval & + sn_irq_info->irq_last_intr) { + regval &= + ~(sn_irq_info-> + irq_int_bit & regval); + pcibr_force_interrupt(sn_irq_info); + } + } + } + } + sn_irq_info->irq_last_intr = regval; +} + +void sn_lb_int_war_check(void) +{ + int i; + + if (!sn_ioif_inited || pda->sn_first_irq == 0) + return; + for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) { + struct sn_irq_info *sn_irq_info = sn_irq[i]; + while (sn_irq_info) { + /* Only call for PCI bridges that are fully initialized. */ + if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && + (sn_irq_info->irq_bridge != NULL)) { + sn_check_intr(i, sn_irq_info); + } + sn_irq_info = sn_irq_info->irq_next; + } + } +} diff --git a/arch/ia64/sn/kernel/klconflib.c b/arch/ia64/sn/kernel/klconflib.c new file mode 100644 index 000000000000..0f11a3299cd2 --- /dev/null +++ b/arch/ia64/sn/kernel/klconflib.c @@ -0,0 +1,108 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <asm/sn/types.h> +#include <asm/sn/module.h> +#include <asm/sn/l1.h> + +char brick_types[MAX_BRICK_TYPES + 1] = "cri.xdpn%#=vo^kjbf890123456789..."; +/* + * Format a module id for printing. + * + * There are three possible formats: + * + * MODULE_FORMAT_BRIEF is the brief 6-character format, including + * the actual brick-type as recorded in the + * moduleid_t, eg. 002c15 for a C-brick, or + * 101#17 for a PX-brick. + * + * MODULE_FORMAT_LONG is the hwgraph format, eg. rack/002/bay/15 + * of rack/101/bay/17 (note that the brick + * type does not appear in this format). + * + * MODULE_FORMAT_LCD is like MODULE_FORMAT_BRIEF, except that it + * ensures that the module id provided appears + * exactly as it would on the LCD display of + * the corresponding brick, eg. still 002c15 + * for a C-brick, but 101p17 for a PX-brick. + * + * maule (9/13/04): Removed top-level check for (fmt == MODULE_FORMAT_LCD) + * making MODULE_FORMAT_LCD equivalent to MODULE_FORMAT_BRIEF. It was + * decided that all callers should assume the returned string should be what + * is displayed on the brick L1 LCD. + */ +void +format_module_id(char *buffer, moduleid_t m, int fmt) +{ + int rack, position; + unsigned char brickchar; + + rack = MODULE_GET_RACK(m); + brickchar = MODULE_GET_BTCHAR(m); + + /* Be sure we use the same brick type character as displayed + * on the brick's LCD + */ + switch (brickchar) + { + case L1_BRICKTYPE_GA: + case L1_BRICKTYPE_OPUS_TIO: + brickchar = L1_BRICKTYPE_C; + break; + + case L1_BRICKTYPE_PX: + case L1_BRICKTYPE_PE: + case L1_BRICKTYPE_PA: + case L1_BRICKTYPE_SA: /* we can move this to the "I's" later + * if that makes more sense + */ + brickchar = L1_BRICKTYPE_P; + break; + + case L1_BRICKTYPE_IX: + case L1_BRICKTYPE_IA: + + brickchar = L1_BRICKTYPE_I; + break; + } + + position = MODULE_GET_BPOS(m); + + if ((fmt == MODULE_FORMAT_BRIEF) || (fmt == MODULE_FORMAT_LCD)) { + /* Brief module number format, eg. 002c15 */ + + /* Decompress the rack number */ + *buffer++ = '0' + RACK_GET_CLASS(rack); + *buffer++ = '0' + RACK_GET_GROUP(rack); + *buffer++ = '0' + RACK_GET_NUM(rack); + + /* Add the brick type */ + *buffer++ = brickchar; + } + else if (fmt == MODULE_FORMAT_LONG) { + /* Fuller hwgraph format, eg. rack/002/bay/15 */ + + strcpy(buffer, "rack" "/"); buffer += strlen(buffer); + + *buffer++ = '0' + RACK_GET_CLASS(rack); + *buffer++ = '0' + RACK_GET_GROUP(rack); + *buffer++ = '0' + RACK_GET_NUM(rack); + + strcpy(buffer, "/" "bay" "/"); buffer += strlen(buffer); + } + + /* Add the bay position, using at least two digits */ + if (position < 10) + *buffer++ = '0'; + sprintf(buffer, "%d", position); + +} diff --git a/arch/ia64/sn/kernel/machvec.c b/arch/ia64/sn/kernel/machvec.c new file mode 100644 index 000000000000..02bb9155840c --- /dev/null +++ b/arch/ia64/sn/kernel/machvec.c @@ -0,0 +1,11 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2002-2003 Silicon Graphics, Inc. All Rights Reserved. + */ + +#define MACHVEC_PLATFORM_NAME sn2 +#define MACHVEC_PLATFORM_HEADER <asm/machvec_sn2.h> +#include <asm/machvec_init.h> diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c new file mode 100644 index 000000000000..857774bb2c9a --- /dev/null +++ b/arch/ia64/sn/kernel/mca.c @@ -0,0 +1,135 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/timer.h> +#include <linux/vmalloc.h> +#include <asm/mca.h> +#include <asm/sal.h> +#include <asm/sn/sn_sal.h> + +/* + * Interval for calling SAL to poll for errors that do NOT cause error + * interrupts. SAL will raise a CPEI if any errors are present that + * need to be logged. + */ +#define CPEI_INTERVAL (5*HZ) + +struct timer_list sn_cpei_timer; +void sn_init_cpei_timer(void); + +/* Printing oemdata from mca uses data that is not passed through SAL, it is + * global. Only one user at a time. + */ +static DECLARE_MUTEX(sn_oemdata_mutex); +static u8 **sn_oemdata; +static u64 *sn_oemdata_size, sn_oemdata_bufsize; + +/* + * print_hook + * + * This function is the callback routine that SAL calls to log error + * info for platform errors. buf is appended to sn_oemdata, resizing as + * required. + */ +static int print_hook(const char *fmt, ...) +{ + char buf[400]; + int len; + va_list args; + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + len = strlen(buf); + while (*sn_oemdata_size + len + 1 > sn_oemdata_bufsize) { + u8 *newbuf = vmalloc(sn_oemdata_bufsize += 1000); + if (!newbuf) { + printk(KERN_ERR "%s: unable to extend sn_oemdata\n", + __FUNCTION__); + return 0; + } + memcpy(newbuf, *sn_oemdata, *sn_oemdata_size); + vfree(*sn_oemdata); + *sn_oemdata = newbuf; + } + memcpy(*sn_oemdata + *sn_oemdata_size, buf, len + 1); + *sn_oemdata_size += len; + return 0; +} + +static void sn_cpei_handler(int irq, void *devid, struct pt_regs *regs) +{ + /* + * this function's sole purpose is to call SAL when we receive + * a CE interrupt from SHUB or when the timer routine decides + * we need to call SAL to check for CEs. + */ + + /* CALL SAL_LOG_CE */ + + ia64_sn_plat_cpei_handler(); +} + +static void sn_cpei_timer_handler(unsigned long dummy) +{ + sn_cpei_handler(-1, NULL, NULL); + mod_timer(&sn_cpei_timer, jiffies + CPEI_INTERVAL); +} + +void sn_init_cpei_timer(void) +{ + init_timer(&sn_cpei_timer); + sn_cpei_timer.expires = jiffies + CPEI_INTERVAL; + sn_cpei_timer.function = sn_cpei_timer_handler; + add_timer(&sn_cpei_timer); +} + +static int +sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata, + u64 * oemdata_size) +{ + down(&sn_oemdata_mutex); + sn_oemdata = oemdata; + sn_oemdata_size = oemdata_size; + sn_oemdata_bufsize = 0; + ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header); + up(&sn_oemdata_mutex); + return 0; +} + +/* Callback when userspace salinfo wants to decode oem data via the platform + * kernel and/or prom. + */ +int sn_salinfo_platform_oemdata(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size) +{ + efi_guid_t guid = *(efi_guid_t *)sect_header; + int valid = 0; + *oemdata_size = 0; + vfree(*oemdata); + *oemdata = NULL; + if (efi_guidcmp(guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID) == 0) { + sal_log_plat_specific_err_info_t *psei = (sal_log_plat_specific_err_info_t *)sect_header; + valid = psei->valid.oem_data; + } else if (efi_guidcmp(guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) { + sal_log_mem_dev_err_info_t *mdei = (sal_log_mem_dev_err_info_t *)sect_header; + valid = mdei->valid.oem_data; + } + if (valid) + return sn_platform_plat_specific_err_print(sect_header, oemdata, oemdata_size); + else + return 0; +} + +static int __init sn_salinfo_init(void) +{ + salinfo_platform_oemdata = &sn_salinfo_platform_oemdata; + return 0; +} + +module_init(sn_salinfo_init) diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c new file mode 100644 index 000000000000..f0306b516afb --- /dev/null +++ b/arch/ia64/sn/kernel/setup.c @@ -0,0 +1,621 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/kdev_t.h> +#include <linux/string.h> +#include <linux/tty.h> +#include <linux/console.h> +#include <linux/timex.h> +#include <linux/sched.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/serial.h> +#include <linux/irq.h> +#include <linux/bootmem.h> +#include <linux/mmzone.h> +#include <linux/interrupt.h> +#include <linux/acpi.h> +#include <linux/compiler.h> +#include <linux/sched.h> +#include <linux/root_dev.h> +#include <linux/nodemask.h> + +#include <asm/io.h> +#include <asm/sal.h> +#include <asm/machvec.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/sn/arch.h> +#include <asm/sn/addrs.h> +#include <asm/sn/pda.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/simulator.h> +#include <asm/sn/leds.h> +#include <asm/sn/bte.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/clksupport.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/geo.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include <asm/sn/klconfig.h> + + +DEFINE_PER_CPU(struct pda_s, pda_percpu); + +#define MAX_PHYS_MEMORY (1UL << 49) /* 1 TB */ + +lboard_t *root_lboard[MAX_COMPACT_NODES]; + +extern void bte_init_node(nodepda_t *, cnodeid_t); + +extern void sn_timer_init(void); +extern unsigned long last_time_offset; +extern void (*ia64_mark_idle) (int); +extern void snidle(int); +extern unsigned char acpi_kbd_controller_present; + +unsigned long sn_rtc_cycles_per_second; +EXPORT_SYMBOL(sn_rtc_cycles_per_second); + +DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); +EXPORT_PER_CPU_SYMBOL(__sn_hub_info); + +partid_t sn_partid = -1; +EXPORT_SYMBOL(sn_partid); +char sn_system_serial_number_string[128]; +EXPORT_SYMBOL(sn_system_serial_number_string); +u64 sn_partition_serial_number; +EXPORT_SYMBOL(sn_partition_serial_number); +u8 sn_partition_id; +EXPORT_SYMBOL(sn_partition_id); +u8 sn_system_size; +EXPORT_SYMBOL(sn_system_size); +u8 sn_sharing_domain_size; +EXPORT_SYMBOL(sn_sharing_domain_size); +u8 sn_coherency_id; +EXPORT_SYMBOL(sn_coherency_id); +u8 sn_region_size; +EXPORT_SYMBOL(sn_region_size); + +short physical_node_map[MAX_PHYSNODE_ID]; + +EXPORT_SYMBOL(physical_node_map); + +int numionodes; + +static void sn_init_pdas(char **); +static void scan_for_ionodes(void); + +static nodepda_t *nodepdaindr[MAX_COMPACT_NODES]; + +/* + * The format of "screen_info" is strange, and due to early i386-setup + * code. This is just enough to make the console code think we're on a + * VGA color display. + */ +struct screen_info sn_screen_info = { + .orig_x = 0, + .orig_y = 0, + .orig_video_mode = 3, + .orig_video_cols = 80, + .orig_video_ega_bx = 3, + .orig_video_lines = 25, + .orig_video_isVGA = 1, + .orig_video_points = 16 +}; + +/* + * This is here so we can use the CMOS detection in ide-probe.c to + * determine what drives are present. In theory, we don't need this + * as the auto-detection could be done via ide-probe.c:do_probe() but + * in practice that would be much slower, which is painful when + * running in the simulator. Note that passing zeroes in DRIVE_INFO + * is sufficient (the IDE driver will autodetect the drive geometry). + */ +#ifdef CONFIG_IA64_GENERIC +extern char drive_info[4 * 16]; +#else +char drive_info[4 * 16]; +#endif + +/* + * Get nasid of current cpu early in boot before nodepda is initialized + */ +static int +boot_get_nasid(void) +{ + int nasid; + + if (ia64_sn_get_sapic_info(get_sapicid(), &nasid, NULL, NULL)) + BUG(); + return nasid; +} + +/* + * This routine can only be used during init, since + * smp_boot_data is an init data structure. + * We have to use smp_boot_data.cpu_phys_id to find + * the physical id of the processor because the normal + * cpu_physical_id() relies on data structures that + * may not be initialized yet. + */ + +static int __init pxm_to_nasid(int pxm) +{ + int i; + int nid; + + nid = pxm_to_nid_map[pxm]; + for (i = 0; i < num_node_memblks; i++) { + if (node_memblk[i].nid == nid) { + return NASID_GET(node_memblk[i].start_paddr); + } + } + return -1; +} + +/** + * early_sn_setup - early setup routine for SN platforms + * + * Sets up an initial console to aid debugging. Intended primarily + * for bringup. See start_kernel() in init/main.c. + */ + +void __init early_sn_setup(void) +{ + efi_system_table_t *efi_systab; + efi_config_table_t *config_tables; + struct ia64_sal_systab *sal_systab; + struct ia64_sal_desc_entry_point *ep; + char *p; + int i, j; + + /* + * Parse enough of the SAL tables to locate the SAL entry point. Since, console + * IO on SN2 is done via SAL calls, early_printk won't work without this. + * + * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c. + * Any changes to those file may have to be made hereas well. + */ + efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab); + config_tables = __va(efi_systab->tables); + for (i = 0; i < efi_systab->nr_tables; i++) { + if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == + 0) { + sal_systab = __va(config_tables[i].table); + p = (char *)(sal_systab + 1); + for (j = 0; j < sal_systab->entry_count; j++) { + if (*p == SAL_DESC_ENTRY_POINT) { + ep = (struct ia64_sal_desc_entry_point + *)p; + ia64_sal_handler_init(__va + (ep->sal_proc), + __va(ep->gp)); + return; + } + p += SAL_DESC_SIZE(*p); + } + } + } + /* Uh-oh, SAL not available?? */ + printk(KERN_ERR "failed to find SAL entry point\n"); +} + +extern int platform_intr_list[]; +extern nasid_t master_nasid; +static int shub_1_1_found __initdata; + +/* + * sn_check_for_wars + * + * Set flag for enabling shub specific wars + */ + +static inline int __init is_shub_1_1(int nasid) +{ + unsigned long id; + int rev; + + if (is_shub2()) + return 0; + id = REMOTE_HUB_L(nasid, SH1_SHUB_ID); + rev = (id & SH1_SHUB_ID_REVISION_MASK) >> SH1_SHUB_ID_REVISION_SHFT; + return rev <= 2; +} + +static void __init sn_check_for_wars(void) +{ + int cnode; + + if (is_shub2()) { + /* none yet */ + } else { + for_each_online_node(cnode) { + if (is_shub_1_1(cnodeid_to_nasid(cnode))) + sn_hub_info->shub_1_1_found = 1; + } + } +} + +/** + * sn_setup - SN platform setup routine + * @cmdline_p: kernel command line + * + * Handles platform setup for SN machines. This includes determining + * the RTC frequency (via a SAL call), initializing secondary CPUs, and + * setting up per-node data areas. The console is also initialized here. + */ +void __init sn_setup(char **cmdline_p) +{ + long status, ticks_per_sec, drift; + int pxm; + int major = sn_sal_rev_major(), minor = sn_sal_rev_minor(); + extern void sn_cpu_init(void); + + /* + * If the generic code has enabled vga console support - lets + * get rid of it again. This is a kludge for the fact that ACPI + * currtently has no way of informing us if legacy VGA is available + * or not. + */ +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) + if (conswitchp == &vga_con) { + printk(KERN_DEBUG "SGI: Disabling VGA console\n"); +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#else + conswitchp = NULL; +#endif /* CONFIG_DUMMY_CONSOLE */ + } +#endif /* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */ + + MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY; + + memset(physical_node_map, -1, sizeof(physical_node_map)); + for (pxm = 0; pxm < MAX_PXM_DOMAINS; pxm++) + if (pxm_to_nid_map[pxm] != -1) + physical_node_map[pxm_to_nasid(pxm)] = + pxm_to_nid_map[pxm]; + + /* + * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard + * support here so we don't have to listen to failed keyboard probe + * messages. + */ + if ((major < 2 || (major == 2 && minor <= 9)) && + acpi_kbd_controller_present) { + printk(KERN_INFO "Disabling legacy keyboard support as prom " + "is too old and doesn't provide FADT\n"); + acpi_kbd_controller_present = 0; + } + + printk("SGI SAL version %x.%02x\n", major, minor); + + /* + * Confirm the SAL we're running on is recent enough... + */ + if ((major < SN_SAL_MIN_MAJOR) || (major == SN_SAL_MIN_MAJOR && + minor < SN_SAL_MIN_MINOR)) { + printk(KERN_ERR "This kernel needs SGI SAL version >= " + "%x.%02x\n", SN_SAL_MIN_MAJOR, SN_SAL_MIN_MINOR); + panic("PROM version too old\n"); + } + + master_nasid = boot_get_nasid(); + + status = + ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, + &drift); + if (status != 0 || ticks_per_sec < 100000) { + printk(KERN_WARNING + "unable to determine platform RTC clock frequency, guessing.\n"); + /* PROM gives wrong value for clock freq. so guess */ + sn_rtc_cycles_per_second = 1000000000000UL / 30000UL; + } else + sn_rtc_cycles_per_second = ticks_per_sec; + + platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR; + + /* + * we set the default root device to /dev/hda + * to make simulation easy + */ + ROOT_DEV = Root_HDA1; + + /* + * Create the PDAs and NODEPDAs for all the cpus. + */ + sn_init_pdas(cmdline_p); + + ia64_mark_idle = &snidle; + + /* + * For the bootcpu, we do this here. All other cpus will make the + * call as part of cpu_init in slave cpu initialization. + */ + sn_cpu_init(); + +#ifdef CONFIG_SMP + init_smp_config(); +#endif + screen_info = sn_screen_info; + + sn_timer_init(); +} + +/** + * sn_init_pdas - setup node data areas + * + * One time setup for Node Data Area. Called by sn_setup(). + */ +static void __init sn_init_pdas(char **cmdline_p) +{ + cnodeid_t cnode; + + memset(pda->cnodeid_to_nasid_table, -1, + sizeof(pda->cnodeid_to_nasid_table)); + for_each_online_node(cnode) + pda->cnodeid_to_nasid_table[cnode] = + pxm_to_nasid(nid_to_pxm_map[cnode]); + + numionodes = num_online_nodes(); + scan_for_ionodes(); + + /* + * Allocate & initalize the nodepda for each node. + */ + for_each_online_node(cnode) { + nodepdaindr[cnode] = + alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t)); + memset(nodepdaindr[cnode], 0, sizeof(nodepda_t)); + memset(nodepdaindr[cnode]->phys_cpuid, -1, + sizeof(nodepdaindr[cnode]->phys_cpuid)); + } + + /* + * Allocate & initialize nodepda for TIOs. For now, put them on node 0. + */ + for (cnode = num_online_nodes(); cnode < numionodes; cnode++) { + nodepdaindr[cnode] = + alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t)); + memset(nodepdaindr[cnode], 0, sizeof(nodepda_t)); + } + + /* + * Now copy the array of nodepda pointers to each nodepda. + */ + for (cnode = 0; cnode < numionodes; cnode++) + memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr, + sizeof(nodepdaindr)); + + /* + * Set up IO related platform-dependent nodepda fields. + * The following routine actually sets up the hubinfo struct + * in nodepda. + */ + for_each_online_node(cnode) { + bte_init_node(nodepdaindr[cnode], cnode); + } + + /* + * Initialize the per node hubdev. This includes IO Nodes and + * headless/memless nodes. + */ + for (cnode = 0; cnode < numionodes; cnode++) { + hubdev_init_node(nodepdaindr[cnode], cnode); + } +} + +/** + * sn_cpu_init - initialize per-cpu data areas + * @cpuid: cpuid of the caller + * + * Called during cpu initialization on each cpu as it starts. + * Currently, initializes the per-cpu data area for SNIA. + * Also sets up a few fields in the nodepda. Also known as + * platform_cpu_init() by the ia64 machvec code. + */ +void __init sn_cpu_init(void) +{ + int cpuid; + int cpuphyid; + int nasid; + int subnode; + int slice; + int cnode; + int i; + static int wars_have_been_checked; + + memset(pda, 0, sizeof(pda)); + if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, &sn_hub_info->nasid_bitmask, &sn_hub_info->nasid_shift, + &sn_system_size, &sn_sharing_domain_size, &sn_partition_id, + &sn_coherency_id, &sn_region_size)) + BUG(); + sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2; + + /* + * The boot cpu makes this call again after platform initialization is + * complete. + */ + if (nodepdaindr[0] == NULL) + return; + + cpuid = smp_processor_id(); + cpuphyid = get_sapicid(); + + if (ia64_sn_get_sapic_info(cpuphyid, &nasid, &subnode, &slice)) + BUG(); + + for (i=0; i < MAX_NUMNODES; i++) { + if (nodepdaindr[i]) { + nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid; + nodepdaindr[i]->phys_cpuid[cpuid].slice = slice; + nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode; + } + } + + cnode = nasid_to_cnodeid(nasid); + + pda->p_nodepda = nodepdaindr[cnode]; + pda->led_address = + (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT)); + pda->led_state = LED_ALWAYS_SET; + pda->hb_count = HZ / 2; + pda->hb_state = 0; + pda->idle_flag = 0; + + if (cpuid != 0) { + memcpy(pda->cnodeid_to_nasid_table, + pdacpu(0)->cnodeid_to_nasid_table, + sizeof(pda->cnodeid_to_nasid_table)); + } + + /* + * Check for WARs. + * Only needs to be done once, on BSP. + * Has to be done after loop above, because it uses pda.cnodeid_to_nasid_table[i]. + * Has to be done before assignment below. + */ + if (!wars_have_been_checked) { + sn_check_for_wars(); + wars_have_been_checked = 1; + } + sn_hub_info->shub_1_1_found = shub_1_1_found; + + /* + * Set up addresses of PIO/MEM write status registers. + */ + { + u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, SH1_PIO_WRITE_STATUS_1, 0}; + u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_1, + SH2_PIO_WRITE_STATUS_2, SH2_PIO_WRITE_STATUS_3}; + u64 *pio; + pio = is_shub1() ? pio1 : pio2; + pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]); + pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0; + } + + /* + * WAR addresses for SHUB 1.x. + */ + if (local_node_data->active_cpu_count++ == 0 && is_shub1()) { + int buddy_nasid; + buddy_nasid = + cnodeid_to_nasid(numa_node_id() == + num_online_nodes() - 1 ? 0 : numa_node_id() + 1); + pda->pio_shub_war_cam_addr = + (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, + SH1_PI_CAM_CONTROL); + } +} + +/* + * Scan klconfig for ionodes. Add the nasids to the + * physical_node_map and the pda and increment numionodes. + */ + +static void __init scan_for_ionodes(void) +{ + int nasid = 0; + lboard_t *brd; + + /* Setup ionodes with memory */ + for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) { + char *klgraph_header; + cnodeid_t cnodeid; + + if (physical_node_map[nasid] == -1) + continue; + + cnodeid = -1; + klgraph_header = __va(ia64_sn_get_klconfig_addr(nasid)); + if (!klgraph_header) { + if (IS_RUNNING_ON_SIMULATOR()) + continue; + BUG(); /* All nodes must have klconfig tables! */ + } + cnodeid = nasid_to_cnodeid(nasid); + root_lboard[cnodeid] = (lboard_t *) + NODE_OFFSET_TO_LBOARD((nasid), + ((kl_config_hdr_t + *) (klgraph_header))-> + ch_board_info); + } + + /* Scan headless/memless IO Nodes. */ + for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) { + /* if there's no nasid, don't try to read the klconfig on the node */ + if (physical_node_map[nasid] == -1) + continue; + brd = find_lboard_any((lboard_t *) + root_lboard[nasid_to_cnodeid(nasid)], + KLTYPE_SNIA); + if (brd) { + brd = KLCF_NEXT_ANY(brd); /* Skip this node's lboard */ + if (!brd) + continue; + } + + brd = find_lboard_any(brd, KLTYPE_SNIA); + + while (brd) { + pda->cnodeid_to_nasid_table[numionodes] = + brd->brd_nasid; + physical_node_map[brd->brd_nasid] = numionodes; + root_lboard[numionodes] = brd; + numionodes++; + brd = KLCF_NEXT_ANY(brd); + if (!brd) + break; + + brd = find_lboard_any(brd, KLTYPE_SNIA); + } + } + + /* Scan for TIO nodes. */ + for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) { + /* if there's no nasid, don't try to read the klconfig on the node */ + if (physical_node_map[nasid] == -1) + continue; + brd = find_lboard_any((lboard_t *) + root_lboard[nasid_to_cnodeid(nasid)], + KLTYPE_TIO); + while (brd) { + pda->cnodeid_to_nasid_table[numionodes] = + brd->brd_nasid; + physical_node_map[brd->brd_nasid] = numionodes; + root_lboard[numionodes] = brd; + numionodes++; + brd = KLCF_NEXT_ANY(brd); + if (!brd) + break; + + brd = find_lboard_any(brd, KLTYPE_TIO); + } + } + +} + +int +nasid_slice_to_cpuid(int nasid, int slice) +{ + long cpu; + + for (cpu=0; cpu < NR_CPUS; cpu++) + if (nodepda->phys_cpuid[cpu].nasid == nasid && nodepda->phys_cpuid[cpu].slice == slice) + return cpu; + + return -1; +} diff --git a/arch/ia64/sn/kernel/sn2/Makefile b/arch/ia64/sn/kernel/sn2/Makefile new file mode 100644 index 000000000000..170bde4549da --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/Makefile @@ -0,0 +1,13 @@ +# arch/ia64/sn/kernel/sn2/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1999,2001-2002 Silicon Graphics, Inc. All rights reserved. +# +# sn2 specific kernel files +# + +obj-y += cache.o io.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o \ + prominfo_proc.o timer.o timer_interrupt.o sn_hwperf.o diff --git a/arch/ia64/sn/kernel/sn2/cache.c b/arch/ia64/sn/kernel/sn2/cache.c new file mode 100644 index 000000000000..bc3cfa17cd0f --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/cache.c @@ -0,0 +1,34 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2003 Silicon Graphics, Inc. All rights reserved. + * + */ +#include <linux/module.h> +#include <asm/pgalloc.h> + +/** + * sn_flush_all_caches - flush a range of address from all caches (incl. L4) + * @flush_addr: identity mapped region 7 address to start flushing + * @bytes: number of bytes to flush + * + * Flush a range of addresses from all caches including L4. + * All addresses fully or partially contained within + * @flush_addr to @flush_addr + @bytes are flushed + * from the all caches. + */ +void +sn_flush_all_caches(long flush_addr, long bytes) +{ + flush_icache_range(flush_addr, flush_addr+bytes); + /* + * The last call may have returned before the caches + * were actually flushed, so we call it again to make + * sure. + */ + flush_icache_range(flush_addr, flush_addr+bytes); + mb(); +} +EXPORT_SYMBOL(sn_flush_all_caches); diff --git a/arch/ia64/sn/kernel/sn2/io.c b/arch/ia64/sn/kernel/sn2/io.c new file mode 100644 index 000000000000..a12c0586de38 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/io.c @@ -0,0 +1,101 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved. + * + * The generic kernel requires function pointers to these routines, so + * we wrap the inlines from asm/ia64/sn/sn2/io.h here. + */ + +#include <asm/sn/io.h> + +#ifdef CONFIG_IA64_GENERIC + +#undef __sn_inb +#undef __sn_inw +#undef __sn_inl +#undef __sn_outb +#undef __sn_outw +#undef __sn_outl +#undef __sn_readb +#undef __sn_readw +#undef __sn_readl +#undef __sn_readq +#undef __sn_readb_relaxed +#undef __sn_readw_relaxed +#undef __sn_readl_relaxed +#undef __sn_readq_relaxed + +unsigned int __sn_inb(unsigned long port) +{ + return ___sn_inb(port); +} + +unsigned int __sn_inw(unsigned long port) +{ + return ___sn_inw(port); +} + +unsigned int __sn_inl(unsigned long port) +{ + return ___sn_inl(port); +} + +void __sn_outb(unsigned char val, unsigned long port) +{ + ___sn_outb(val, port); +} + +void __sn_outw(unsigned short val, unsigned long port) +{ + ___sn_outw(val, port); +} + +void __sn_outl(unsigned int val, unsigned long port) +{ + ___sn_outl(val, port); +} + +unsigned char __sn_readb(void __iomem *addr) +{ + return ___sn_readb(addr); +} + +unsigned short __sn_readw(void __iomem *addr) +{ + return ___sn_readw(addr); +} + +unsigned int __sn_readl(void __iomem *addr) +{ + return ___sn_readl(addr); +} + +unsigned long __sn_readq(void __iomem *addr) +{ + return ___sn_readq(addr); +} + +unsigned char __sn_readb_relaxed(void __iomem *addr) +{ + return ___sn_readb_relaxed(addr); +} + +unsigned short __sn_readw_relaxed(void __iomem *addr) +{ + return ___sn_readw_relaxed(addr); +} + +unsigned int __sn_readl_relaxed(void __iomem *addr) +{ + return ___sn_readl_relaxed(addr); +} + +unsigned long __sn_readq_relaxed(void __iomem *addr) +{ + return ___sn_readq_relaxed(addr); +} + +#endif diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c new file mode 100644 index 000000000000..81c63b2f8ae9 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c @@ -0,0 +1,279 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All Rights Reserved. + * + * Module to export the system's Firmware Interface Tables, including + * PROM revision numbers and banners, in /proc + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/proc_fs.h> +#include <linux/nodemask.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/addrs.h> + +MODULE_DESCRIPTION("PROM version reporting for /proc"); +MODULE_AUTHOR("Chad Talbott"); +MODULE_LICENSE("GPL"); + +/* Standard Intel FIT entry types */ +#define FIT_ENTRY_FIT_HEADER 0x00 /* FIT header entry */ +#define FIT_ENTRY_PAL_B 0x01 /* PAL_B entry */ +/* Entries 0x02 through 0x0D reserved by Intel */ +#define FIT_ENTRY_PAL_A_PROC 0x0E /* Processor-specific PAL_A entry */ +#define FIT_ENTRY_PAL_A 0x0F /* PAL_A entry, same as... */ +#define FIT_ENTRY_PAL_A_GEN 0x0F /* ...Generic PAL_A entry */ +#define FIT_ENTRY_UNUSED 0x7F /* Unused (reserved by Intel?) */ +/* OEM-defined entries range from 0x10 to 0x7E. */ +#define FIT_ENTRY_SAL_A 0x10 /* SAL_A entry */ +#define FIT_ENTRY_SAL_B 0x11 /* SAL_B entry */ +#define FIT_ENTRY_SALRUNTIME 0x12 /* SAL runtime entry */ +#define FIT_ENTRY_EFI 0x1F /* EFI entry */ +#define FIT_ENTRY_FPSWA 0x20 /* embedded fpswa entry */ +#define FIT_ENTRY_VMLINUX 0x21 /* embedded vmlinux entry */ + +#define FIT_MAJOR_SHIFT (32 + 8) +#define FIT_MAJOR_MASK ((1 << 8) - 1) +#define FIT_MINOR_SHIFT 32 +#define FIT_MINOR_MASK ((1 << 8) - 1) + +#define FIT_MAJOR(q) \ + ((unsigned) ((q) >> FIT_MAJOR_SHIFT) & FIT_MAJOR_MASK) +#define FIT_MINOR(q) \ + ((unsigned) ((q) >> FIT_MINOR_SHIFT) & FIT_MINOR_MASK) + +#define FIT_TYPE_SHIFT (32 + 16) +#define FIT_TYPE_MASK ((1 << 7) - 1) + +#define FIT_TYPE(q) \ + ((unsigned) ((q) >> FIT_TYPE_SHIFT) & FIT_TYPE_MASK) + +struct fit_type_map_t { + unsigned char type; + const char *name; +}; + +static const struct fit_type_map_t fit_entry_types[] = { + {FIT_ENTRY_FIT_HEADER, "FIT Header"}, + {FIT_ENTRY_PAL_A_GEN, "Generic PAL_A"}, + {FIT_ENTRY_PAL_A_PROC, "Processor-specific PAL_A"}, + {FIT_ENTRY_PAL_A, "PAL_A"}, + {FIT_ENTRY_PAL_B, "PAL_B"}, + {FIT_ENTRY_SAL_A, "SAL_A"}, + {FIT_ENTRY_SAL_B, "SAL_B"}, + {FIT_ENTRY_SALRUNTIME, "SAL runtime"}, + {FIT_ENTRY_EFI, "EFI"}, + {FIT_ENTRY_VMLINUX, "Embedded Linux"}, + {FIT_ENTRY_FPSWA, "Embedded FPSWA"}, + {FIT_ENTRY_UNUSED, "Unused"}, + {0xff, "Error"}, +}; + +static const char *fit_type_name(unsigned char type) +{ + struct fit_type_map_t const *mapp; + + for (mapp = fit_entry_types; mapp->type != 0xff; mapp++) + if (type == mapp->type) + return mapp->name; + + if ((type > FIT_ENTRY_PAL_A) && (type < FIT_ENTRY_UNUSED)) + return "OEM type"; + if ((type > FIT_ENTRY_PAL_B) && (type < FIT_ENTRY_PAL_A)) + return "Reserved"; + + return "Unknown type"; +} + +static int +get_fit_entry(unsigned long nasid, int index, unsigned long *fentry, + char *banner, int banlen) +{ + return ia64_sn_get_fit_compt(nasid, index, fentry, banner, banlen); +} + + +/* + * These two routines display the FIT table for each node. + */ +static int dump_fit_entry(char *page, unsigned long *fentry) +{ + unsigned type; + + type = FIT_TYPE(fentry[1]); + return sprintf(page, "%02x %-25s %x.%02x %016lx %u\n", + type, + fit_type_name(type), + FIT_MAJOR(fentry[1]), FIT_MINOR(fentry[1]), + fentry[0], + /* mult by sixteen to get size in bytes */ + (unsigned)(fentry[1] & 0xffffff) * 16); +} + + +/* + * We assume that the fit table will be small enough that we can print + * the whole thing into one page. (This is true for our default 16kB + * pages -- each entry is about 60 chars wide when printed.) I read + * somewhere that the maximum size of the FIT is 128 entries, so we're + * OK except for 4kB pages (and no one is going to do that on SN + * anyway). + */ +static int +dump_fit(char *page, unsigned long nasid) +{ + unsigned long fentry[2]; + int index; + char *p; + + p = page; + for (index=0;;index++) { + BUG_ON(index * 60 > PAGE_SIZE); + if (get_fit_entry(nasid, index, fentry, NULL, 0)) + break; + p += dump_fit_entry(p, fentry); + } + + return p - page; +} + +static int +dump_version(char *page, unsigned long nasid) +{ + unsigned long fentry[2]; + char banner[128]; + int index; + int len; + + for (index = 0; ; index++) { + if (get_fit_entry(nasid, index, fentry, banner, + sizeof(banner))) + return 0; + if (FIT_TYPE(fentry[1]) == FIT_ENTRY_SAL_A) + break; + } + + len = sprintf(page, "%x.%02x\n", FIT_MAJOR(fentry[1]), + FIT_MINOR(fentry[1])); + page += len; + + if (banner[0]) + len += snprintf(page, PAGE_SIZE-len, "%s\n", banner); + + return len; +} + +/* same as in proc_misc.c */ +static int +proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, + int len) +{ + if (len <= off + count) + *eof = 1; + *start = page + off; + len -= off; + if (len > count) + len = count; + if (len < 0) + len = 0; + return len; +} + +static int +read_version_entry(char *page, char **start, off_t off, int count, int *eof, + void *data) +{ + int len = 0; + + /* data holds the NASID of the node */ + len = dump_version(page, (unsigned long)data); + len = proc_calc_metrics(page, start, off, count, eof, len); + return len; +} + +static int +read_fit_entry(char *page, char **start, off_t off, int count, int *eof, + void *data) +{ + int len = 0; + + /* data holds the NASID of the node */ + len = dump_fit(page, (unsigned long)data); + len = proc_calc_metrics(page, start, off, count, eof, len); + + return len; +} + +/* module entry points */ +int __init prominfo_init(void); +void __exit prominfo_exit(void); + +module_init(prominfo_init); +module_exit(prominfo_exit); + +static struct proc_dir_entry **proc_entries; +static struct proc_dir_entry *sgi_prominfo_entry; + +#define NODE_NAME_LEN 11 + +int __init prominfo_init(void) +{ + struct proc_dir_entry **entp; + struct proc_dir_entry *p; + cnodeid_t cnodeid; + unsigned long nasid; + char name[NODE_NAME_LEN]; + + if (!ia64_platform_is("sn2")) + return 0; + + proc_entries = kmalloc(num_online_nodes() * sizeof(struct proc_dir_entry *), + GFP_KERNEL); + + sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL); + + entp = proc_entries; + for_each_online_node(cnodeid) { + sprintf(name, "node%d", cnodeid); + *entp = proc_mkdir(name, sgi_prominfo_entry); + nasid = cnodeid_to_nasid(cnodeid); + p = create_proc_read_entry( + "fit", 0, *entp, read_fit_entry, + (void *)nasid); + if (p) + p->owner = THIS_MODULE; + p = create_proc_read_entry( + "version", 0, *entp, read_version_entry, + (void *)nasid); + if (p) + p->owner = THIS_MODULE; + entp++; + } + + return 0; +} + +void __exit prominfo_exit(void) +{ + struct proc_dir_entry **entp; + unsigned cnodeid; + char name[NODE_NAME_LEN]; + + entp = proc_entries; + for_each_online_node(cnodeid) { + remove_proc_entry("fit", *entp); + remove_proc_entry("version", *entp); + sprintf(name, "node%d", cnodeid); + remove_proc_entry(name, sgi_prominfo_entry); + entp++; + } + remove_proc_entry("sgi_prominfo", NULL); + kfree(proc_entries); +} diff --git a/arch/ia64/sn/kernel/sn2/ptc_deadlock.S b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S new file mode 100644 index 000000000000..7947312801ec --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S @@ -0,0 +1,82 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <asm/sn/shub_mmr.h> + +#define DEADLOCKBIT SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT +#define WRITECOUNTMASK SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK +#define ALIAS_OFFSET (SH1_PIO_WRITE_STATUS_0_ALIAS-SH1_PIO_WRITE_STATUS_0) + + + .global sn2_ptc_deadlock_recovery_core + .proc sn2_ptc_deadlock_recovery_core + +sn2_ptc_deadlock_recovery_core: + .regstk 6,0,0,0 + + ptc0 = in0 + data0 = in1 + ptc1 = in2 + data1 = in3 + piowc = in4 + zeroval = in5 + piowcphy = r30 + psrsave = r2 + scr1 = r16 + scr2 = r17 + mask = r18 + + + extr.u piowcphy=piowc,0,61;; // Convert piowc to uncached physical address + dep piowcphy=-1,piowcphy,63,1 + movl mask=WRITECOUNTMASK + +1: + add scr2=ALIAS_OFFSET,piowc // Address of WRITE_STATUS alias register + mov scr1=7;; // Clear DEADLOCK, WRITE_ERROR, MULTI_WRITE_ERROR + st8.rel [scr2]=scr1;; + +5: ld8.acq scr1=[piowc];; // Wait for PIOs to complete. + and scr2=scr1,mask;; // mask of writecount bits + cmp.ne p6,p0=zeroval,scr2 +(p6) br.cond.sptk 5b + + + + ////////////// BEGIN PHYSICAL MODE //////////////////// + mov psrsave=psr // Disable IC (no PMIs) + rsm psr.i | psr.dt | psr.ic;; + srlz.i;; + + st8.rel [ptc0]=data0 // Write PTC0 & wait for completion. + +5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + and scr2=scr1,mask;; // mask of writecount bits + cmp.ne p6,p0=zeroval,scr2 +(p6) br.cond.sptk 5b;; + + tbit.nz p8,p7=scr1,DEADLOCKBIT;;// Test for DEADLOCK +(p7) cmp.ne p7,p0=r0,ptc1;; // Test for non-null ptc1 + +(p7) st8.rel [ptc1]=data1;; // Now write PTC1. + +5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + and scr2=scr1,mask;; // mask of writecount bits + cmp.ne p6,p0=zeroval,scr2 +(p6) br.cond.sptk 5b + + tbit.nz p8,p0=scr1,DEADLOCKBIT;;// Test for DEADLOCK + + mov psr.l=psrsave;; // Reenable IC + srlz.i;; + ////////////// END PHYSICAL MODE //////////////////// + +(p8) br.cond.spnt 1b;; // Repeat if DEADLOCK occurred. + + br.ret.sptk rp + .endp sn2_ptc_deadlock_recovery_core diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c new file mode 100644 index 000000000000..7af05a7ac743 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -0,0 +1,295 @@ +/* + * SN2 Platform specific SMP Support + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/threads.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/bitops.h> +#include <linux/nodemask.h> + +#include <asm/processor.h> +#include <asm/irq.h> +#include <asm/sal.h> +#include <asm/system.h> +#include <asm/delay.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/tlb.h> +#include <asm/numa.h> +#include <asm/hw_irq.h> +#include <asm/current.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/rw_mmr.h> + +void sn2_ptc_deadlock_recovery(volatile unsigned long *, unsigned long data0, + volatile unsigned long *, unsigned long data1); + +static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); + +static unsigned long sn2_ptc_deadlock_count; + +static inline unsigned long wait_piowc(void) +{ + volatile unsigned long *piows, zeroval; + unsigned long ws; + + piows = pda->pio_write_status_addr; + zeroval = pda->pio_write_status_val; + do { + cpu_relax(); + } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval); + return ws; +} + +void sn_tlb_migrate_finish(struct mm_struct *mm) +{ + if (mm == current->mm) + flush_tlb_mm(mm); +} + +/** + * sn2_global_tlb_purge - globally purge translation cache of virtual address range + * @start: start of virtual address range + * @end: end of virtual address range + * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) + * + * Purges the translation caches of all processors of the given virtual address + * range. + * + * Note: + * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. + * - cpu_vm_mask is converted into a nodemask of the nodes containing the + * cpus in cpu_vm_mask. + * - if only one bit is set in cpu_vm_mask & it is the current cpu, + * then only the local TLB needs to be flushed. This flushing can be done + * using ptc.l. This is the common case & avoids the global spinlock. + * - if multiple cpus have loaded the context, then flushing has to be + * done with ptc.g/MMRs under protection of the global ptc_lock. + */ + +void +sn2_global_tlb_purge(unsigned long start, unsigned long end, + unsigned long nbits) +{ + int i, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; + volatile unsigned long *ptc0, *ptc1; + unsigned long flags = 0, data0 = 0, data1 = 0; + struct mm_struct *mm = current->active_mm; + short nasids[MAX_NUMNODES], nix; + nodemask_t nodes_flushed; + + nodes_clear(nodes_flushed); + i = 0; + + for_each_cpu_mask(cpu, mm->cpu_vm_mask) { + cnode = cpu_to_node(cpu); + node_set(cnode, nodes_flushed); + lcpu = cpu; + i++; + } + + preempt_disable(); + + if (likely(i == 1 && lcpu == smp_processor_id())) { + do { + ia64_ptcl(start, nbits << 2); + start += (1UL << nbits); + } while (start < end); + ia64_srlz_i(); + preempt_enable(); + return; + } + + if (atomic_read(&mm->mm_users) == 1) { + flush_tlb_mm(mm); + preempt_enable(); + return; + } + + nix = 0; + for_each_node_mask(cnode, nodes_flushed) + nasids[nix++] = cnodeid_to_nasid(cnode); + + shub1 = is_shub1(); + if (shub1) { + data0 = (1UL << SH1_PTC_0_A_SHFT) | + (nbits << SH1_PTC_0_PS_SHFT) | + ((ia64_get_rr(start) >> 8) << SH1_PTC_0_RID_SHFT) | + (1UL << SH1_PTC_0_START_SHFT); + ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); + ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); + } else { + data0 = (1UL << SH2_PTC_A_SHFT) | + (nbits << SH2_PTC_PS_SHFT) | + (1UL << SH2_PTC_START_SHFT); + ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + + ((ia64_get_rr(start) >> 8) << SH2_PTC_RID_SHFT) ); + ptc1 = NULL; + } + + + mynasid = get_nasid(); + + spin_lock_irqsave(&sn2_global_ptc_lock, flags); + + do { + if (shub1) + data1 = start | (1UL << SH1_PTC_1_START_SHFT); + else + data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); + for (i = 0; i < nix; i++) { + nasid = nasids[i]; + if (unlikely(nasid == mynasid)) { + ia64_ptcga(start, nbits << 2); + ia64_srlz_i(); + } else { + ptc0 = CHANGE_NASID(nasid, ptc0); + if (ptc1) + ptc1 = CHANGE_NASID(nasid, ptc1); + pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, + data1); + flushed = 1; + } + } + + if (flushed + && (wait_piowc() & + SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK)) { + sn2_ptc_deadlock_recovery(ptc0, data0, ptc1, data1); + } + + start += (1UL << nbits); + + } while (start < end); + + spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + + preempt_enable(); +} + +/* + * sn2_ptc_deadlock_recovery + * + * Recover from PTC deadlocks conditions. Recovery requires stepping thru each + * TLB flush transaction. The recovery sequence is somewhat tricky & is + * coded in assembly language. + */ +void sn2_ptc_deadlock_recovery(volatile unsigned long *ptc0, unsigned long data0, + volatile unsigned long *ptc1, unsigned long data1) +{ + extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long); + int cnode, mycnode, nasid; + volatile unsigned long *piows; + volatile unsigned long zeroval; + + sn2_ptc_deadlock_count++; + + piows = pda->pio_write_status_addr; + zeroval = pda->pio_write_status_val; + + mycnode = numa_node_id(); + + for_each_online_node(cnode) { + if (is_headless_node(cnode) || cnode == mycnode) + continue; + nasid = cnodeid_to_nasid(cnode); + ptc0 = CHANGE_NASID(nasid, ptc0); + if (ptc1) + ptc1 = CHANGE_NASID(nasid, ptc1); + sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); + } +} + +/** + * sn_send_IPI_phys - send an IPI to a Nasid and slice + * @nasid: nasid to receive the interrupt (may be outside partition) + * @physid: physical cpuid to receive the interrupt. + * @vector: command to send + * @delivery_mode: delivery mechanism + * + * Sends an IPI (interprocessor interrupt) to the processor specified by + * @physid + * + * @delivery_mode can be one of the following + * + * %IA64_IPI_DM_INT - pend an interrupt + * %IA64_IPI_DM_PMI - pend a PMI + * %IA64_IPI_DM_NMI - pend an NMI + * %IA64_IPI_DM_INIT - pend an INIT interrupt + */ +void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode) +{ + long val; + unsigned long flags = 0; + volatile long *p; + + p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT); + val = (1UL << SH_IPI_INT_SEND_SHFT) | + (physid << SH_IPI_INT_PID_SHFT) | + ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) | + ((long)vector << SH_IPI_INT_IDX_SHFT) | + (0x000feeUL << SH_IPI_INT_BASE_SHFT); + + mb(); + if (enable_shub_wars_1_1()) { + spin_lock_irqsave(&sn2_global_ptc_lock, flags); + } + pio_phys_write_mmr(p, val); + if (enable_shub_wars_1_1()) { + wait_piowc(); + spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + } + +} + +EXPORT_SYMBOL(sn_send_IPI_phys); + +/** + * sn2_send_IPI - send an IPI to a processor + * @cpuid: target of the IPI + * @vector: command to send + * @delivery_mode: delivery mechanism + * @redirect: redirect the IPI? + * + * Sends an IPI (InterProcessor Interrupt) to the processor specified by + * @cpuid. @vector specifies the command to send, while @delivery_mode can + * be one of the following + * + * %IA64_IPI_DM_INT - pend an interrupt + * %IA64_IPI_DM_PMI - pend a PMI + * %IA64_IPI_DM_NMI - pend an NMI + * %IA64_IPI_DM_INIT - pend an INIT interrupt + */ +void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect) +{ + long physid; + int nasid; + + physid = cpu_physical_id(cpuid); + nasid = cpuid_to_nasid(cpuid); + + /* the following is used only when starting cpus at boot time */ + if (unlikely(nasid == -1)) + ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL); + + sn_send_IPI_phys(nasid, physid, vector, delivery_mode); +} diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c new file mode 100644 index 000000000000..197356460ee1 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -0,0 +1,690 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004-2005 Silicon Graphics, Inc. All rights reserved. + * + * SGI Altix topology and hardware performance monitoring API. + * Mark Goodwin <markgw@sgi.com>. + * + * Creates /proc/sgi_sn/sn_topology (read-only) to export + * info about Altix nodes, routers, CPUs and NumaLink + * interconnection/topology. + * + * Also creates a dynamic misc device named "sn_hwperf" + * that supports an ioctl interface to call down into SAL + * to discover hw objects, topology and to read/write + * memory mapped registers, e.g. for performance monitoring. + * The "sn_hwperf" device is registered only after the procfs + * file is first opened, i.e. only if/when it's needed. + * + * This API is used by SGI Performance Co-Pilot and other + * tools, see http://oss.sgi.com/projects/pcp + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/seq_file.h> +#include <linux/miscdevice.h> +#include <linux/cpumask.h> +#include <linux/smp_lock.h> +#include <linux/nodemask.h> +#include <asm/processor.h> +#include <asm/topology.h> +#include <asm/smp.h> +#include <asm/semaphore.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <asm/sal.h> +#include <asm/sn/io.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/module.h> +#include <asm/sn/geo.h> +#include <asm/sn/sn2/sn_hwperf.h> + +static void *sn_hwperf_salheap = NULL; +static int sn_hwperf_obj_cnt = 0; +static nasid_t sn_hwperf_master_nasid = INVALID_NASID; +static int sn_hwperf_init(void); +static DECLARE_MUTEX(sn_hwperf_init_mutex); + +static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret) +{ + int e; + u64 sz; + struct sn_hwperf_object_info *objbuf = NULL; + + if ((e = sn_hwperf_init()) < 0) { + printk("sn_hwperf_init failed: err %d\n", e); + goto out; + } + + sz = sn_hwperf_obj_cnt * sizeof(struct sn_hwperf_object_info); + if ((objbuf = (struct sn_hwperf_object_info *) vmalloc(sz)) == NULL) { + printk("sn_hwperf_enum_objects: vmalloc(%d) failed\n", (int)sz); + e = -ENOMEM; + goto out; + } + + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_OBJECTS, + 0, sz, (u64) objbuf, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + vfree(objbuf); + } + +out: + *nobj = sn_hwperf_obj_cnt; + *ret = objbuf; + return e; +} + +static int sn_hwperf_geoid_to_cnode(char *location) +{ + int cnode; + geoid_t geoid; + moduleid_t module_id; + char type; + int rack, slot, slab; + int this_rack, this_slot, this_slab; + + if (sscanf(location, "%03d%c%02d#%d", &rack, &type, &slot, &slab) != 4) + return -1; + + for (cnode = 0; cnode < numionodes; cnode++) { + geoid = cnodeid_get_geoid(cnode); + module_id = geo_module(geoid); + this_rack = MODULE_GET_RACK(module_id); + this_slot = MODULE_GET_BPOS(module_id); + this_slab = geo_slab(geoid); + if (rack == this_rack && slot == this_slot && slab == this_slab) + break; + } + + return cnode < numionodes ? cnode : -1; +} + +static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj) +{ + if (!obj->sn_hwp_this_part) + return -1; + return sn_hwperf_geoid_to_cnode(obj->location); +} + +static int sn_hwperf_generic_ordinal(struct sn_hwperf_object_info *obj, + struct sn_hwperf_object_info *objs) +{ + int ordinal; + struct sn_hwperf_object_info *p; + + for (ordinal=0, p=objs; p != obj; p++) { + if (SN_HWPERF_FOREIGN(p)) + continue; + if (SN_HWPERF_SAME_OBJTYPE(p, obj)) + ordinal++; + } + + return ordinal; +} + +static const char *slabname_node = "node"; /* SHub asic */ +static const char *slabname_ionode = "ionode"; /* TIO asic */ +static const char *slabname_router = "router"; /* NL3R or NL4R */ +static const char *slabname_other = "other"; /* unknown asic */ + +static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj, + struct sn_hwperf_object_info *objs, int *ordinal) +{ + int isnode; + const char *slabname = slabname_other; + + if ((isnode = SN_HWPERF_IS_NODE(obj)) || SN_HWPERF_IS_IONODE(obj)) { + slabname = isnode ? slabname_node : slabname_ionode; + *ordinal = sn_hwperf_obj_to_cnode(obj); + } + else { + *ordinal = sn_hwperf_generic_ordinal(obj, objs); + if (SN_HWPERF_IS_ROUTER(obj)) + slabname = slabname_router; + } + + return slabname; +} + +static int sn_topology_show(struct seq_file *s, void *d) +{ + int sz; + int pt; + int e; + int i; + int j; + const char *slabname; + int ordinal; + cpumask_t cpumask; + char slice; + struct cpuinfo_ia64 *c; + struct sn_hwperf_port_info *ptdata; + struct sn_hwperf_object_info *p; + struct sn_hwperf_object_info *obj = d; /* this object */ + struct sn_hwperf_object_info *objs = s->private; /* all objects */ + + if (obj == objs) { + seq_printf(s, "# sn_topology version 1\n"); + seq_printf(s, "# objtype ordinal location partition" + " [attribute value [, ...]]\n"); + } + + if (SN_HWPERF_FOREIGN(obj)) { + /* private in another partition: not interesting */ + return 0; + } + + for (i = 0; obj->name[i]; i++) { + if (obj->name[i] == ' ') + obj->name[i] = '_'; + } + + slabname = sn_hwperf_get_slabname(obj, objs, &ordinal); + seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location, + obj->sn_hwp_this_part ? "local" : "shared", obj->name); + + if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) + seq_putc(s, '\n'); + else { + seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal)); + for (i=0; i < numionodes; i++) { + seq_printf(s, i ? ":%d" : ", dist %d", + node_distance(ordinal, i)); + } + seq_putc(s, '\n'); + + /* + * CPUs on this node, if any + */ + cpumask = node_to_cpumask(ordinal); + for_each_online_cpu(i) { + if (cpu_isset(i, cpumask)) { + slice = 'a' + cpuid_to_slice(i); + c = cpu_data(i); + seq_printf(s, "cpu %d %s%c local" + " freq %luMHz, arch ia64", + i, obj->location, slice, + c->proc_freq / 1000000); + for_each_online_cpu(j) { + seq_printf(s, j ? ":%d" : ", dist %d", + node_distance( + cpuid_to_cnodeid(i), + cpuid_to_cnodeid(j))); + } + seq_putc(s, '\n'); + } + } + } + + if (obj->ports) { + /* + * numalink ports + */ + sz = obj->ports * sizeof(struct sn_hwperf_port_info); + if ((ptdata = vmalloc(sz)) == NULL) + return -ENOMEM; + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, obj->id, sz, + (u64) ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) + return -EINVAL; + for (ordinal=0, p=objs; p != obj; p++) { + if (!SN_HWPERF_FOREIGN(p)) + ordinal += p->ports; + } + for (pt = 0; pt < obj->ports; pt++) { + for (p = objs, i = 0; i < sn_hwperf_obj_cnt; i++, p++) { + if (ptdata[pt].conn_id == p->id) { + break; + } + } + seq_printf(s, "numalink %d %s-%d", + ordinal+pt, obj->location, ptdata[pt].port); + + if (i >= sn_hwperf_obj_cnt) { + /* no connection */ + seq_puts(s, " local endpoint disconnected" + ", protocol unknown\n"); + continue; + } + + if (obj->sn_hwp_this_part && p->sn_hwp_this_part) + /* both ends local to this partition */ + seq_puts(s, " local"); + else if (!obj->sn_hwp_this_part && !p->sn_hwp_this_part) + /* both ends of the link in foreign partiton */ + seq_puts(s, " foreign"); + else + /* link straddles a partition */ + seq_puts(s, " shared"); + + /* + * Unlikely, but strictly should query the LLP config + * registers because an NL4R can be configured to run + * NL3 protocol, even when not talking to an NL3 router. + * Ditto for node-node. + */ + seq_printf(s, " endpoint %s-%d, protocol %s\n", + p->location, ptdata[pt].conn_port, + (SN_HWPERF_IS_NL3ROUTER(obj) || + SN_HWPERF_IS_NL3ROUTER(p)) ? "LLP3" : "LLP4"); + } + vfree(ptdata); + } + + return 0; +} + +static void *sn_topology_start(struct seq_file *s, loff_t * pos) +{ + struct sn_hwperf_object_info *objs = s->private; + + if (*pos < sn_hwperf_obj_cnt) + return (void *)(objs + *pos); + + return NULL; +} + +static void *sn_topology_next(struct seq_file *s, void *v, loff_t * pos) +{ + ++*pos; + return sn_topology_start(s, pos); +} + +static void sn_topology_stop(struct seq_file *m, void *v) +{ + return; +} + +/* + * /proc/sgi_sn/sn_topology, read-only using seq_file + */ +static struct seq_operations sn_topology_seq_ops = { + .start = sn_topology_start, + .next = sn_topology_next, + .stop = sn_topology_stop, + .show = sn_topology_show +}; + +struct sn_hwperf_op_info { + u64 op; + struct sn_hwperf_ioctl_args *a; + void *p; + int *v0; + int ret; +}; + +static void sn_hwperf_call_sal(void *info) +{ + struct sn_hwperf_op_info *op_info = info; + int r; + + r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op_info->op, + op_info->a->arg, op_info->a->sz, + (u64) op_info->p, 0, 0, op_info->v0); + op_info->ret = r; +} + +static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info) +{ + u32 cpu; + u32 use_ipi; + int r = 0; + cpumask_t save_allowed; + + cpu = (op_info->a->arg & SN_HWPERF_ARG_CPU_MASK) >> 32; + use_ipi = op_info->a->arg & SN_HWPERF_ARG_USE_IPI_MASK; + op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK; + + if (cpu != SN_HWPERF_ARG_ANY_CPU) { + if (cpu >= num_online_cpus() || !cpu_online(cpu)) { + r = -EINVAL; + goto out; + } + } + + if (cpu == SN_HWPERF_ARG_ANY_CPU || cpu == get_cpu()) { + /* don't care, or already on correct cpu */ + sn_hwperf_call_sal(op_info); + } + else { + if (use_ipi) { + /* use an interprocessor interrupt to call SAL */ + smp_call_function_single(cpu, sn_hwperf_call_sal, + op_info, 1, 1); + } + else { + /* migrate the task before calling SAL */ + save_allowed = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + sn_hwperf_call_sal(op_info); + set_cpus_allowed(current, save_allowed); + } + } + r = op_info->ret; + +out: + return r; +} + +/* map SAL hwperf error code to system error code */ +static int sn_hwperf_map_err(int hwperf_err) +{ + int e; + + switch(hwperf_err) { + case SN_HWPERF_OP_OK: + e = 0; + break; + + case SN_HWPERF_OP_NOMEM: + e = -ENOMEM; + break; + + case SN_HWPERF_OP_NO_PERM: + e = -EPERM; + break; + + case SN_HWPERF_OP_IO_ERROR: + e = -EIO; + break; + + case SN_HWPERF_OP_BUSY: + case SN_HWPERF_OP_RECONFIGURE: + e = -EAGAIN; + break; + + case SN_HWPERF_OP_INVAL: + default: + e = -EINVAL; + break; + } + + return e; +} + +/* + * ioctl for "sn_hwperf" misc device + */ +static int +sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) +{ + struct sn_hwperf_ioctl_args a; + struct cpuinfo_ia64 *cdata; + struct sn_hwperf_object_info *objs; + struct sn_hwperf_object_info *cpuobj; + struct sn_hwperf_op_info op_info; + void *p = NULL; + int nobj; + char slice; + int node; + int r; + int v0; + int i; + int j; + + unlock_kernel(); + + /* only user requests are allowed here */ + if ((op & SN_HWPERF_OP_MASK) < 10) { + r = -EINVAL; + goto error; + } + r = copy_from_user(&a, (const void __user *)arg, + sizeof(struct sn_hwperf_ioctl_args)); + if (r != 0) { + r = -EFAULT; + goto error; + } + + /* + * Allocate memory to hold a kernel copy of the user buffer. The + * buffer contents are either copied in or out (or both) of user + * space depending on the flags encoded in the requested operation. + */ + if (a.ptr) { + p = vmalloc(a.sz); + if (!p) { + r = -ENOMEM; + goto error; + } + } + + if (op & SN_HWPERF_OP_MEM_COPYIN) { + r = copy_from_user(p, (const void __user *)a.ptr, a.sz); + if (r != 0) { + r = -EFAULT; + goto error; + } + } + + switch (op) { + case SN_HWPERF_GET_CPU_INFO: + if (a.sz == sizeof(u64)) { + /* special case to get size needed */ + *(u64 *) p = (u64) num_online_cpus() * + sizeof(struct sn_hwperf_object_info); + } else + if (a.sz < num_online_cpus() * sizeof(struct sn_hwperf_object_info)) { + r = -ENOMEM; + goto error; + } else + if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { + memset(p, 0, a.sz); + for (i = 0; i < nobj; i++) { + node = sn_hwperf_obj_to_cnode(objs + i); + for_each_online_cpu(j) { + if (node != cpu_to_node(j)) + continue; + cpuobj = (struct sn_hwperf_object_info *) p + j; + slice = 'a' + cpuid_to_slice(j); + cdata = cpu_data(j); + cpuobj->id = j; + snprintf(cpuobj->name, + sizeof(cpuobj->name), + "CPU %luMHz %s", + cdata->proc_freq / 1000000, + cdata->vendor); + snprintf(cpuobj->location, + sizeof(cpuobj->location), + "%s%c", objs[i].location, + slice); + } + } + + vfree(objs); + } + break; + + case SN_HWPERF_GET_NODE_NASID: + if (a.sz != sizeof(u64) || + (node = a.arg) < 0 || node >= numionodes) { + r = -EINVAL; + goto error; + } + *(u64 *)p = (u64)cnodeid_to_nasid(node); + break; + + case SN_HWPERF_GET_OBJ_NODE: + if (a.sz != sizeof(u64) || a.arg < 0) { + r = -EINVAL; + goto error; + } + if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { + if (a.arg >= nobj) { + r = -EINVAL; + vfree(objs); + goto error; + } + if (objs[(i = a.arg)].id != a.arg) { + for (i = 0; i < nobj; i++) { + if (objs[i].id == a.arg) + break; + } + } + if (i == nobj) { + r = -EINVAL; + vfree(objs); + goto error; + } + *(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i); + vfree(objs); + } + break; + + case SN_HWPERF_GET_MMRS: + case SN_HWPERF_SET_MMRS: + case SN_HWPERF_OBJECT_DISTANCE: + op_info.p = p; + op_info.a = &a; + op_info.v0 = &v0; + op_info.op = op; + r = sn_hwperf_op_cpu(&op_info); + if (r) { + r = sn_hwperf_map_err(r); + goto error; + } + break; + + default: + /* all other ops are a direct SAL call */ + r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op, + a.arg, a.sz, (u64) p, 0, 0, &v0); + if (r) { + r = sn_hwperf_map_err(r); + goto error; + } + a.v0 = v0; + break; + } + + if (op & SN_HWPERF_OP_MEM_COPYOUT) { + r = copy_to_user((void __user *)a.ptr, p, a.sz); + if (r != 0) { + r = -EFAULT; + goto error; + } + } + +error: + vfree(p); + + lock_kernel(); + return r; +} + +static struct file_operations sn_hwperf_fops = { + .ioctl = sn_hwperf_ioctl, +}; + +static struct miscdevice sn_hwperf_dev = { + MISC_DYNAMIC_MINOR, + "sn_hwperf", + &sn_hwperf_fops +}; + +static int sn_hwperf_init(void) +{ + u64 v; + int salr; + int e = 0; + + /* single threaded, once-only initialization */ + down(&sn_hwperf_init_mutex); + if (sn_hwperf_salheap) { + up(&sn_hwperf_init_mutex); + return e; + } + + /* + * The PROM code needs a fixed reference node. For convenience the + * same node as the console I/O is used. + */ + sn_hwperf_master_nasid = (nasid_t) ia64_sn_get_console_nasid(); + + /* + * Request the needed size and install the PROM scratch area. + * The PROM keeps various tracking bits in this memory area. + */ + salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + (u64) SN_HWPERF_GET_HEAPSIZE, 0, + (u64) sizeof(u64), (u64) &v, 0, 0, NULL); + if (salr != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto out; + } + + if ((sn_hwperf_salheap = vmalloc(v)) == NULL) { + e = -ENOMEM; + goto out; + } + salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_INSTALL_HEAP, 0, v, + (u64) sn_hwperf_salheap, 0, 0, NULL); + if (salr != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto out; + } + + salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_OBJECT_COUNT, 0, + sizeof(u64), (u64) &v, 0, 0, NULL); + if (salr != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto out; + } + sn_hwperf_obj_cnt = (int)v; + +out: + if (e < 0 && sn_hwperf_salheap) { + vfree(sn_hwperf_salheap); + sn_hwperf_salheap = NULL; + sn_hwperf_obj_cnt = 0; + } + + if (!e) { + /* + * Register a dynamic misc device for ioctl. Platforms + * supporting hotplug will create /dev/sn_hwperf, else + * user can to look up the minor number in /proc/misc. + */ + if ((e = misc_register(&sn_hwperf_dev)) != 0) { + printk(KERN_ERR "sn_hwperf_init: misc register " + "for \"sn_hwperf\" failed, err %d\n", e); + } + } + + up(&sn_hwperf_init_mutex); + return e; +} + +int sn_topology_open(struct inode *inode, struct file *file) +{ + int e; + struct seq_file *seq; + struct sn_hwperf_object_info *objbuf; + int nobj; + + if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) { + e = seq_open(file, &sn_topology_seq_ops); + seq = file->private_data; + seq->private = objbuf; + } + + return e; +} + +int sn_topology_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + + vfree(seq->private); + return seq_release(inode, file); +} diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c new file mode 100644 index 000000000000..6a80fca807b9 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -0,0 +1,149 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ +#include <linux/config.h> +#include <asm/uaccess.h> + +#ifdef CONFIG_PROC_FS +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <asm/sn/sn_sal.h> + +static int partition_id_show(struct seq_file *s, void *p) +{ + seq_printf(s, "%d\n", sn_local_partid()); + return 0; +} + +static int partition_id_open(struct inode *inode, struct file *file) +{ + return single_open(file, partition_id_show, NULL); +} + +static int system_serial_number_show(struct seq_file *s, void *p) +{ + seq_printf(s, "%s\n", sn_system_serial_number()); + return 0; +} + +static int system_serial_number_open(struct inode *inode, struct file *file) +{ + return single_open(file, system_serial_number_show, NULL); +} + +static int licenseID_show(struct seq_file *s, void *p) +{ + seq_printf(s, "0x%lx\n", sn_partition_serial_number_val()); + return 0; +} + +static int licenseID_open(struct inode *inode, struct file *file) +{ + return single_open(file, licenseID_show, NULL); +} + +/* + * Enable forced interrupt by default. + * When set, the sn interrupt handler writes the force interrupt register on + * the bridge chip. The hardware will then send an interrupt message if the + * interrupt line is active. This mimics a level sensitive interrupt. + */ +int sn_force_interrupt_flag = 1; + +static int sn_force_interrupt_show(struct seq_file *s, void *p) +{ + seq_printf(s, "Force interrupt is %s\n", + sn_force_interrupt_flag ? "enabled" : "disabled"); + return 0; +} + +static ssize_t sn_force_interrupt_write_proc(struct file *file, + const char __user *buffer, size_t count, loff_t *data) +{ + char val; + + if (copy_from_user(&val, buffer, 1)) + return -EFAULT; + + sn_force_interrupt_flag = (val == '0') ? 0 : 1; + return count; +} + +static int sn_force_interrupt_open(struct inode *inode, struct file *file) +{ + return single_open(file, sn_force_interrupt_show, NULL); +} + +static int coherence_id_show(struct seq_file *s, void *p) +{ + seq_printf(s, "%d\n", partition_coherence_id()); + + return 0; +} + +static int coherence_id_open(struct inode *inode, struct file *file) +{ + return single_open(file, coherence_id_show, NULL); +} + +static struct proc_dir_entry *sn_procfs_create_entry( + const char *name, struct proc_dir_entry *parent, + int (*openfunc)(struct inode *, struct file *), + int (*releasefunc)(struct inode *, struct file *)) +{ + struct proc_dir_entry *e = create_proc_entry(name, 0444, parent); + + if (e) { + e->proc_fops = (struct file_operations *)kmalloc( + sizeof(struct file_operations), GFP_KERNEL); + if (e->proc_fops) { + memset(e->proc_fops, 0, sizeof(struct file_operations)); + e->proc_fops->open = openfunc; + e->proc_fops->read = seq_read; + e->proc_fops->llseek = seq_lseek; + e->proc_fops->release = releasefunc; + } + } + + return e; +} + +/* /proc/sgi_sn/sn_topology uses seq_file, see sn_hwperf.c */ +extern int sn_topology_open(struct inode *, struct file *); +extern int sn_topology_release(struct inode *, struct file *); + +void register_sn_procfs(void) +{ + static struct proc_dir_entry *sgi_proc_dir = NULL; + struct proc_dir_entry *e; + + BUG_ON(sgi_proc_dir != NULL); + if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL))) + return; + + sn_procfs_create_entry("partition_id", sgi_proc_dir, + partition_id_open, single_release); + + sn_procfs_create_entry("system_serial_number", sgi_proc_dir, + system_serial_number_open, single_release); + + sn_procfs_create_entry("licenseID", sgi_proc_dir, + licenseID_open, single_release); + + e = sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir, + sn_force_interrupt_open, single_release); + if (e) + e->proc_fops->write = sn_force_interrupt_write_proc; + + sn_procfs_create_entry("coherence_id", sgi_proc_dir, + coherence_id_open, single_release); + + sn_procfs_create_entry("sn_topology", sgi_proc_dir, + sn_topology_open, sn_topology_release); +} + +#endif /* CONFIG_PROC_FS */ diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c new file mode 100644 index 000000000000..deb9baf4d473 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/timer.c @@ -0,0 +1,36 @@ +/* + * linux/arch/ia64/sn/kernel/sn2/timer.c + * + * Copyright (C) 2003 Silicon Graphics, Inc. + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger <davidm@hpl.hp.com>: updated for new timer-interpolation infrastructure + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/interrupt.h> + +#include <asm/hw_irq.h> +#include <asm/system.h> + +#include <asm/sn/leds.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/clksupport.h> + +extern unsigned long sn_rtc_cycles_per_second; + +static struct time_interpolator sn2_interpolator = { + .drift = -1, + .shift = 10, + .mask = (1LL << 55) - 1, + .source = TIME_SOURCE_MMIO64 +}; + +void __init sn_timer_init(void) +{ + sn2_interpolator.frequency = sn_rtc_cycles_per_second; + sn2_interpolator.addr = RTC_COUNTER_ADDR; + register_time_interpolator(&sn2_interpolator); +} diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c new file mode 100644 index 000000000000..cde7375390b0 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c @@ -0,0 +1,63 @@ +/* + * + * + * Copyright (c) 2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/NoticeExplan + */ + +#include <linux/interrupt.h> +#include <asm/sn/pda.h> +#include <asm/sn/leds.h> + +extern void sn_lb_int_war_check(void); +extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs); + +#define SN_LB_INT_WAR_INTERVAL 100 + +void sn_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + /* LED blinking */ + if (!pda->hb_count--) { + pda->hb_count = HZ / 2; + set_led_bits(pda->hb_state ^= + LED_CPU_HEARTBEAT, LED_CPU_HEARTBEAT); + } + + if (enable_shub_wars_1_1()) { + /* Bugfix code for SHUB 1.1 */ + if (pda->pio_shub_war_cam_addr) + *pda->pio_shub_war_cam_addr = 0x8000000000000010UL; + } + if (pda->sn_lb_int_war_ticks == 0) + sn_lb_int_war_check(); + pda->sn_lb_int_war_ticks++; + if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL) + pda->sn_lb_int_war_ticks = 0; +} diff --git a/arch/ia64/sn/pci/Makefile b/arch/ia64/sn/pci/Makefile new file mode 100644 index 000000000000..b5dca0097a8e --- /dev/null +++ b/arch/ia64/sn/pci/Makefile @@ -0,0 +1,10 @@ +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. +# +# Makefile for the sn pci general routines. + +obj-y := pci_dma.o pcibr/ diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c new file mode 100644 index 000000000000..f680824f819d --- /dev/null +++ b/arch/ia64/sn/pci/pci_dma.c @@ -0,0 +1,363 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000,2002-2005 Silicon Graphics, Inc. All rights reserved. + * + * Routines for PCI DMA mapping. See Documentation/DMA-API.txt for + * a description of how these routines should be used. + */ + +#include <linux/module.h> +#include <asm/dma.h> +#include <asm/sn/sn_sal.h> +#include "pci/pcibus_provider_defs.h" +#include "pci/pcidev.h" +#include "pci/pcibr_provider.h" + +#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) +#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG)) + +/** + * sn_dma_supported - test a DMA mask + * @dev: device to test + * @mask: DMA mask to test + * + * Return whether the given PCI device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during PCI bus mastering, then you would pass 0x00ffffff as the mask to + * this function. Of course, SN only supports devices that have 32 or more + * address bits when using the PMU. + */ +int sn_dma_supported(struct device *dev, u64 mask) +{ + BUG_ON(dev->bus != &pci_bus_type); + + if (mask < 0x7fffffff) + return 0; + return 1; +} +EXPORT_SYMBOL(sn_dma_supported); + +/** + * sn_dma_set_mask - set the DMA mask + * @dev: device to set + * @dma_mask: new mask + * + * Set @dev's DMA mask if the hw supports it. + */ +int sn_dma_set_mask(struct device *dev, u64 dma_mask) +{ + BUG_ON(dev->bus != &pci_bus_type); + + if (!sn_dma_supported(dev, dma_mask)) + return 0; + + *dev->dma_mask = dma_mask; + return 1; +} +EXPORT_SYMBOL(sn_dma_set_mask); + +/** + * sn_dma_alloc_coherent - allocate memory for coherent DMA + * @dev: device to allocate for + * @size: size of the region + * @dma_handle: DMA (bus) address + * @flags: memory allocation flags + * + * dma_alloc_coherent() returns a pointer to a memory region suitable for + * coherent DMA traffic to/from a PCI device. On SN platforms, this means + * that @dma_handle will have the %PCIIO_DMA_CMD flag set. + * + * This interface is usually used for "command" streams (e.g. the command + * queue for a SCSI controller). See Documentation/DMA-API.txt for + * more information. + */ +void *sn_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, int flags) +{ + void *cpuaddr; + unsigned long phys_addr; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev)); + + BUG_ON(dev->bus != &pci_bus_type); + + /* + * Allocate the memory. + * FIXME: We should be doing alloc_pages_node for the node closest + * to the PCI device. + */ + if (!(cpuaddr = (void *)__get_free_pages(GFP_ATOMIC, get_order(size)))) + return NULL; + + memset(cpuaddr, 0x0, size); + + /* physical addr. of the memory we just got */ + phys_addr = __pa(cpuaddr); + + /* + * 64 bit address translations should never fail. + * 32 bit translations can fail if there are insufficient mapping + * resources. + */ + + *dma_handle = pcibr_dma_map(pcidev_info, phys_addr, size, + SN_PCIDMA_CONSISTENT); + if (!*dma_handle) { + printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__); + free_pages((unsigned long)cpuaddr, get_order(size)); + return NULL; + } + + return cpuaddr; +} +EXPORT_SYMBOL(sn_dma_alloc_coherent); + +/** + * sn_pci_free_coherent - free memory associated with coherent DMAable region + * @dev: device to free for + * @size: size to free + * @cpu_addr: kernel virtual address to free + * @dma_handle: DMA address associated with this region + * + * Frees the memory allocated by dma_alloc_coherent(), potentially unmapping + * any associated IOMMU mappings. + */ +void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle) +{ + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev)); + + BUG_ON(dev->bus != &pci_bus_type); + + pcibr_dma_unmap(pcidev_info, dma_handle, 0); + free_pages((unsigned long)cpu_addr, get_order(size)); +} +EXPORT_SYMBOL(sn_dma_free_coherent); + +/** + * sn_dma_map_single - map a single page for DMA + * @dev: device to map for + * @cpu_addr: kernel virtual address of the region to map + * @size: size of the region + * @direction: DMA direction + * + * Map the region pointed to by @cpu_addr for DMA and return the + * DMA address. + * + * We map this to the one step pcibr_dmamap_trans interface rather than + * the two step pcibr_dmamap_alloc/pcibr_dmamap_addr because we have + * no way of saving the dmamap handle from the alloc to later free + * (which is pretty much unacceptable). + * + * TODO: simplify our interface; + * figure out how to save dmamap handle so can use two step. + */ +dma_addr_t sn_dma_map_single(struct device *dev, void *cpu_addr, size_t size, + int direction) +{ + dma_addr_t dma_addr; + unsigned long phys_addr; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev)); + + BUG_ON(dev->bus != &pci_bus_type); + + phys_addr = __pa(cpu_addr); + dma_addr = pcibr_dma_map(pcidev_info, phys_addr, size, 0); + if (!dma_addr) { + printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__); + return 0; + } + return dma_addr; +} +EXPORT_SYMBOL(sn_dma_map_single); + +/** + * sn_dma_unmap_single - unamp a DMA mapped page + * @dev: device to sync + * @dma_addr: DMA address to sync + * @size: size of region + * @direction: DMA direction + * + * This routine is supposed to sync the DMA region specified + * by @dma_handle into the coherence domain. On SN, we're always cache + * coherent, so we just need to free any ATEs associated with this mapping. + */ +void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + int direction) +{ + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev)); + + BUG_ON(dev->bus != &pci_bus_type); + pcibr_dma_unmap(pcidev_info, dma_addr, direction); +} +EXPORT_SYMBOL(sn_dma_unmap_single); + +/** + * sn_dma_unmap_sg - unmap a DMA scatterlist + * @dev: device to unmap + * @sg: scatterlist to unmap + * @nhwentries: number of scatterlist entries + * @direction: DMA direction + * + * Unmap a set of streaming mode DMA translations. + */ +void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nhwentries, int direction) +{ + int i; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev)); + + BUG_ON(dev->bus != &pci_bus_type); + + for (i = 0; i < nhwentries; i++, sg++) { + pcibr_dma_unmap(pcidev_info, sg->dma_address, direction); + sg->dma_address = (dma_addr_t) NULL; + sg->dma_length = 0; + } +} +EXPORT_SYMBOL(sn_dma_unmap_sg); + +/** + * sn_dma_map_sg - map a scatterlist for DMA + * @dev: device to map for + * @sg: scatterlist to map + * @nhwentries: number of entries + * @direction: direction of the DMA transaction + * + * Maps each entry of @sg for DMA. + */ +int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + int direction) +{ + unsigned long phys_addr; + struct scatterlist *saved_sg = sg; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev)); + int i; + + BUG_ON(dev->bus != &pci_bus_type); + + /* + * Setup a DMA address for each entry in the scatterlist. + */ + for (i = 0; i < nhwentries; i++, sg++) { + phys_addr = SG_ENT_PHYS_ADDRESS(sg); + sg->dma_address = pcibr_dma_map(pcidev_info, phys_addr, + sg->length, 0); + + if (!sg->dma_address) { + printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__); + + /* + * Free any successfully allocated entries. + */ + if (i > 0) + sn_dma_unmap_sg(dev, saved_sg, i, direction); + return 0; + } + + sg->dma_length = sg->length; + } + + return nhwentries; +} +EXPORT_SYMBOL(sn_dma_map_sg); + +void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, int direction) +{ + BUG_ON(dev->bus != &pci_bus_type); +} +EXPORT_SYMBOL(sn_dma_sync_single_for_cpu); + +void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, int direction) +{ + BUG_ON(dev->bus != &pci_bus_type); +} +EXPORT_SYMBOL(sn_dma_sync_single_for_device); + +void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, int direction) +{ + BUG_ON(dev->bus != &pci_bus_type); +} +EXPORT_SYMBOL(sn_dma_sync_sg_for_cpu); + +void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, int direction) +{ + BUG_ON(dev->bus != &pci_bus_type); +} +EXPORT_SYMBOL(sn_dma_sync_sg_for_device); + +int sn_dma_mapping_error(dma_addr_t dma_addr) +{ + return 0; +} +EXPORT_SYMBOL(sn_dma_mapping_error); + +char *sn_pci_get_legacy_mem(struct pci_bus *bus) +{ + if (!SN_PCIBUS_BUSSOFT(bus)) + return ERR_PTR(-ENODEV); + + return (char *)(SN_PCIBUS_BUSSOFT(bus)->bs_legacy_mem | __IA64_UNCACHED_OFFSET); +} + +int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) +{ + unsigned long addr; + int ret; + + if (!SN_PCIBUS_BUSSOFT(bus)) + return -ENODEV; + + addr = SN_PCIBUS_BUSSOFT(bus)->bs_legacy_io | __IA64_UNCACHED_OFFSET; + addr += port; + + ret = ia64_sn_probe_mem(addr, (long)size, (void *)val); + + if (ret == 2) + return -EINVAL; + + if (ret == 1) + *val = -1; + + return size; +} + +int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) +{ + int ret = size; + unsigned long paddr; + unsigned long *addr; + + if (!SN_PCIBUS_BUSSOFT(bus)) { + ret = -ENODEV; + goto out; + } + + /* Put the phys addr in uncached space */ + paddr = SN_PCIBUS_BUSSOFT(bus)->bs_legacy_io | __IA64_UNCACHED_OFFSET; + paddr += port; + addr = (unsigned long *)paddr; + + switch (size) { + case 1: + *(volatile u8 *)(addr) = (u8)(val); + break; + case 2: + *(volatile u16 *)(addr) = (u16)(val); + break; + case 4: + *(volatile u32 *)(addr) = (u32)(val); + break; + default: + ret = -EINVAL; + break; + } + out: + return ret; +} diff --git a/arch/ia64/sn/pci/pcibr/Makefile b/arch/ia64/sn/pci/pcibr/Makefile new file mode 100644 index 000000000000..1850c4a94c41 --- /dev/null +++ b/arch/ia64/sn/pci/pcibr/Makefile @@ -0,0 +1,11 @@ +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2002-2004 Silicon Graphics, Inc. All Rights Reserved. +# +# Makefile for the sn2 io routines. + +obj-y += pcibr_dma.o pcibr_reg.o \ + pcibr_ate.o pcibr_provider.o diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c new file mode 100644 index 000000000000..9d6854666f9b --- /dev/null +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -0,0 +1,188 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <asm/sn/sn_sal.h> +#include "pci/pcibus_provider_defs.h" +#include "pci/pcidev.h" +#include "pci/pcibr_provider.h" + +int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */ + +/* + * mark_ate: Mark the ate as either free or inuse. + */ +static void mark_ate(struct ate_resource *ate_resource, int start, int number, + uint64_t value) +{ + + uint64_t *ate = ate_resource->ate; + int index; + int length = 0; + + for (index = start; length < number; index++, length++) + ate[index] = value; + +} + +/* + * find_free_ate: Find the first free ate index starting from the given + * index for the desired consequtive count. + */ +static int find_free_ate(struct ate_resource *ate_resource, int start, + int count) +{ + + uint64_t *ate = ate_resource->ate; + int index; + int start_free; + + for (index = start; index < ate_resource->num_ate;) { + if (!ate[index]) { + int i; + int free; + free = 0; + start_free = index; /* Found start free ate */ + for (i = start_free; i < ate_resource->num_ate; i++) { + if (!ate[i]) { /* This is free */ + if (++free == count) + return start_free; + } else { + index = i + 1; + break; + } + } + } else + index++; /* Try next ate */ + } + + return -1; +} + +/* + * free_ate_resource: Free the requested number of ATEs. + */ +static inline void free_ate_resource(struct ate_resource *ate_resource, + int start) +{ + + mark_ate(ate_resource, start, ate_resource->ate[start], 0); + if ((ate_resource->lowest_free_index > start) || + (ate_resource->lowest_free_index < 0)) + ate_resource->lowest_free_index = start; + +} + +/* + * alloc_ate_resource: Allocate the requested number of ATEs. + */ +static inline int alloc_ate_resource(struct ate_resource *ate_resource, + int ate_needed) +{ + + int start_index; + + /* + * Check for ate exhaustion. + */ + if (ate_resource->lowest_free_index < 0) + return -1; + + /* + * Find the required number of free consequtive ates. + */ + start_index = + find_free_ate(ate_resource, ate_resource->lowest_free_index, + ate_needed); + if (start_index >= 0) + mark_ate(ate_resource, start_index, ate_needed, ate_needed); + + ate_resource->lowest_free_index = + find_free_ate(ate_resource, ate_resource->lowest_free_index, 1); + + return start_index; +} + +/* + * Allocate "count" contiguous Bridge Address Translation Entries + * on the specified bridge to be used for PCI to XTALK mappings. + * Indices in rm map range from 1..num_entries. Indicies returned + * to caller range from 0..num_entries-1. + * + * Return the start index on success, -1 on failure. + */ +int pcibr_ate_alloc(struct pcibus_info *pcibus_info, int count) +{ + int status = 0; + uint64_t flag; + + flag = pcibr_lock(pcibus_info); + status = alloc_ate_resource(&pcibus_info->pbi_int_ate_resource, count); + + if (status < 0) { + /* Failed to allocate */ + pcibr_unlock(pcibus_info, flag); + return -1; + } + + pcibr_unlock(pcibus_info, flag); + + return status; +} + +/* + * Setup an Address Translation Entry as specified. Use either the Bridge + * internal maps or the external map RAM, as appropriate. + */ +static inline uint64_t *pcibr_ate_addr(struct pcibus_info *pcibus_info, + int ate_index) +{ + if (ate_index < pcibus_info->pbi_int_ate_size) { + return pcireg_int_ate_addr(pcibus_info, ate_index); + } + panic("pcibr_ate_addr: invalid ate_index 0x%x", ate_index); +} + +/* + * Update the ate. + */ +void inline +ate_write(struct pcibus_info *pcibus_info, int ate_index, int count, + volatile uint64_t ate) +{ + while (count-- > 0) { + if (ate_index < pcibus_info->pbi_int_ate_size) { + pcireg_int_ate_set(pcibus_info, ate_index, ate); + } else { + panic("ate_write: invalid ate_index 0x%x", ate_index); + } + ate_index++; + ate += IOPGSIZE; + } + + pcireg_tflush_get(pcibus_info); /* wait until Bridge PIO complete */ +} + +void pcibr_ate_free(struct pcibus_info *pcibus_info, int index) +{ + + volatile uint64_t ate; + int count; + uint64_t flags; + + if (pcibr_invalidate_ate) { + /* For debugging purposes, clear the valid bit in the ATE */ + ate = *pcibr_ate_addr(pcibus_info, index); + count = pcibus_info->pbi_int_ate_resource.ate[index]; + ate_write(pcibus_info, index, count, (ate & ~PCI32_ATE_V)); + } + + flags = pcibr_lock(pcibus_info); + free_ate_resource(&pcibus_info->pbi_int_ate_resource, index); + pcibr_unlock(pcibus_info, flags); +} diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c new file mode 100644 index 000000000000..b1d66ac065c8 --- /dev/null +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -0,0 +1,379 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <linux/pci.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/geo.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include "pci/pcibus_provider_defs.h" +#include "pci/pcidev.h" +#include "pci/tiocp.h" +#include "pci/pic.h" +#include "pci/pcibr_provider.h" +#include "pci/tiocp.h" +#include "tio.h" +#include <asm/sn/addrs.h> + +extern int sn_ioif_inited; + +/* ===================================================================== + * DMA MANAGEMENT + * + * The Bridge ASIC provides three methods of doing DMA: via a "direct map" + * register available in 32-bit PCI space (which selects a contiguous 2G + * address space on some other widget), via "direct" addressing via 64-bit + * PCI space (all destination information comes from the PCI address, + * including transfer attributes), and via a "mapped" region that allows + * a bunch of different small mappings to be established with the PMU. + * + * For efficiency, we most prefer to use the 32bit direct mapping facility, + * since it requires no resource allocations. The advantage of using the + * PMU over the 64-bit direct is that single-cycle PCI addressing can be + * used; the advantage of using 64-bit direct over PMU addressing is that + * we do not have to allocate entries in the PMU. + */ + +static uint64_t +pcibr_dmamap_ate32(struct pcidev_info *info, + uint64_t paddr, size_t req_size, uint64_t flags) +{ + + struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; + struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> + pdi_pcibus_info; + uint8_t internal_device = (PCI_SLOT(pcidev_info->pdi_host_pcidev_info-> + pdi_linux_pcidev->devfn)) - 1; + int ate_count; + int ate_index; + uint64_t ate_flags = flags | PCI32_ATE_V; + uint64_t ate; + uint64_t pci_addr; + uint64_t xio_addr; + uint64_t offset; + + /* PIC in PCI-X mode does not supports 32bit PageMap mode */ + if (IS_PIC_SOFT(pcibus_info) && IS_PCIX(pcibus_info)) { + return 0; + } + + /* Calculate the number of ATEs needed. */ + if (!(MINIMAL_ATE_FLAG(paddr, req_size))) { + ate_count = IOPG((IOPGSIZE - 1) /* worst case start offset */ + +req_size /* max mapping bytes */ + - 1) + 1; /* round UP */ + } else { /* assume requested target is page aligned */ + ate_count = IOPG(req_size /* max mapping bytes */ + - 1) + 1; /* round UP */ + } + + /* Get the number of ATEs required. */ + ate_index = pcibr_ate_alloc(pcibus_info, ate_count); + if (ate_index < 0) + return 0; + + /* In PCI-X mode, Prefetch not supported */ + if (IS_PCIX(pcibus_info)) + ate_flags &= ~(PCI32_ATE_PREF); + + xio_addr = + IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr); + offset = IOPGOFF(xio_addr); + ate = ate_flags | (xio_addr - offset); + + /* If PIC, put the targetid in the ATE */ + if (IS_PIC_SOFT(pcibus_info)) { + ate |= (pcibus_info->pbi_hub_xid << PIC_ATE_TARGETID_SHFT); + } + ate_write(pcibus_info, ate_index, ate_count, ate); + + /* + * Set up the DMA mapped Address. + */ + pci_addr = PCI32_MAPPED_BASE + offset + IOPGSIZE * ate_index; + + /* + * If swap was set in device in pcibr_endian_set() + * we need to turn swapping on. + */ + if (pcibus_info->pbi_devreg[internal_device] & PCIBR_DEV_SWAP_DIR) + ATE_SWAP_ON(pci_addr); + + return pci_addr; +} + +static uint64_t +pcibr_dmatrans_direct64(struct pcidev_info * info, uint64_t paddr, + uint64_t dma_attributes) +{ + struct pcibus_info *pcibus_info = (struct pcibus_info *) + ((info->pdi_host_pcidev_info)->pdi_pcibus_info); + uint64_t pci_addr; + + /* Translate to Crosstalk View of Physical Address */ + pci_addr = (IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr)) | dma_attributes; + + /* Handle Bus mode */ + if (IS_PCIX(pcibus_info)) + pci_addr &= ~PCI64_ATTR_PREF; + + /* Handle Bridge Chipset differences */ + if (IS_PIC_SOFT(pcibus_info)) { + pci_addr |= + ((uint64_t) pcibus_info-> + pbi_hub_xid << PIC_PCI64_ATTR_TARG_SHFT); + } else + pci_addr |= TIOCP_PCI64_CMDTYPE_MEM; + + /* If PCI mode, func zero uses VCHAN0, every other func uses VCHAN1 */ + if (!IS_PCIX(pcibus_info) && PCI_FUNC(info->pdi_linux_pcidev->devfn)) + pci_addr |= PCI64_ATTR_VIRTUAL; + + return pci_addr; + +} + +static uint64_t +pcibr_dmatrans_direct32(struct pcidev_info * info, + uint64_t paddr, size_t req_size, uint64_t flags) +{ + + struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; + struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> + pdi_pcibus_info; + uint64_t xio_addr; + + uint64_t xio_base; + uint64_t offset; + uint64_t endoff; + + if (IS_PCIX(pcibus_info)) { + return 0; + } + + xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr); + + xio_base = pcibus_info->pbi_dir_xbase; + offset = xio_addr - xio_base; + endoff = req_size + offset; + if ((req_size > (1ULL << 31)) || /* Too Big */ + (xio_addr < xio_base) || /* Out of range for mappings */ + (endoff > (1ULL << 31))) { /* Too Big */ + return 0; + } + + return PCI32_DIRECT_BASE | offset; + +} + +/* + * Wrapper routine for free'ing DMA maps + * DMA mappings for Direct 64 and 32 do not have any DMA maps. + */ +void +pcibr_dma_unmap(struct pcidev_info *pcidev_info, dma_addr_t dma_handle, + int direction) +{ + struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> + pdi_pcibus_info; + + if (IS_PCI32_MAPPED(dma_handle)) { + int ate_index; + + ate_index = + IOPG((ATE_SWAP_OFF(dma_handle) - PCI32_MAPPED_BASE)); + pcibr_ate_free(pcibus_info, ate_index); + } +} + +/* + * On SN systems there is a race condition between a PIO read response and + * DMA's. In rare cases, the read response may beat the DMA, causing the + * driver to think that data in memory is complete and meaningful. This code + * eliminates that race. This routine is called by the PIO read routines + * after doing the read. For PIC this routine then forces a fake interrupt + * on another line, which is logically associated with the slot that the PIO + * is addressed to. It then spins while watching the memory location that + * the interrupt is targetted to. When the interrupt response arrives, we + * are sure that the DMA has landed in memory and it is safe for the driver + * to proceed. For TIOCP use the Device(x) Write Request Buffer Flush + * Bridge register since it ensures the data has entered the coherence domain, + * unlike the PIC Device(x) Write Request Buffer Flush register. + */ + +void sn_dma_flush(uint64_t addr) +{ + nasid_t nasid; + int is_tio; + int wid_num; + int i, j; + int bwin; + uint64_t flags; + struct hubdev_info *hubinfo; + volatile struct sn_flush_device_list *p; + struct sn_flush_nasid_entry *flush_nasid_list; + + if (!sn_ioif_inited) + return; + + nasid = NASID_GET(addr); + if (-1 == nasid_to_cnodeid(nasid)) + return; + + hubinfo = (NODEPDA(nasid_to_cnodeid(nasid)))->pdinfo; + + if (!hubinfo) { + BUG(); + } + is_tio = (nasid & 1); + if (is_tio) { + wid_num = TIO_SWIN_WIDGETNUM(addr); + bwin = TIO_BWIN_WINDOWNUM(addr); + } else { + wid_num = SWIN_WIDGETNUM(addr); + bwin = BWIN_WINDOWNUM(addr); + } + + flush_nasid_list = &hubinfo->hdi_flush_nasid_list; + if (flush_nasid_list->widget_p == NULL) + return; + if (bwin > 0) { + uint64_t itte = flush_nasid_list->iio_itte[bwin]; + + if (is_tio) { + wid_num = (itte >> TIO_ITTE_WIDGET_SHIFT) & + TIO_ITTE_WIDGET_MASK; + } else { + wid_num = (itte >> IIO_ITTE_WIDGET_SHIFT) & + IIO_ITTE_WIDGET_MASK; + } + } + if (flush_nasid_list->widget_p == NULL) + return; + if (flush_nasid_list->widget_p[wid_num] == NULL) + return; + p = &flush_nasid_list->widget_p[wid_num][0]; + + /* find a matching BAR */ + for (i = 0; i < DEV_PER_WIDGET; i++) { + for (j = 0; j < PCI_ROM_RESOURCE; j++) { + if (p->sfdl_bar_list[j].start == 0) + break; + if (addr >= p->sfdl_bar_list[j].start + && addr <= p->sfdl_bar_list[j].end) + break; + } + if (j < PCI_ROM_RESOURCE && p->sfdl_bar_list[j].start != 0) + break; + p++; + } + + /* if no matching BAR, return without doing anything. */ + if (i == DEV_PER_WIDGET) + return; + + /* + * For TIOCP use the Device(x) Write Request Buffer Flush Bridge + * register since it ensures the data has entered the coherence + * domain, unlike PIC + */ + if (is_tio) { + uint32_t tio_id = REMOTE_HUB_L(nasid, TIO_NODE_ID); + uint32_t revnum = XWIDGET_PART_REV_NUM(tio_id); + + /* TIOCP BRINGUP WAR (PV907516): Don't write buffer flush reg */ + if ((1 << XWIDGET_PART_REV_NUM_REV(revnum)) & PV907516) { + return; + } else { + pcireg_wrb_flush_get(p->sfdl_pcibus_info, + (p->sfdl_slot - 1)); + } + } else { + spin_lock_irqsave(&((struct sn_flush_device_list *)p)-> + sfdl_flush_lock, flags); + + p->sfdl_flush_value = 0; + + /* force an interrupt. */ + *(volatile uint32_t *)(p->sfdl_force_int_addr) = 1; + + /* wait for the interrupt to come back. */ + while (*(p->sfdl_flush_addr) != 0x10f) ; + + /* okay, everything is synched up. */ + spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock, flags); + } + return; +} + +/* + * Wrapper DMA interface. Called from pci_dma.c routines. + */ + +uint64_t +pcibr_dma_map(struct pcidev_info * pcidev_info, unsigned long phys_addr, + size_t size, unsigned int flags) +{ + dma_addr_t dma_handle; + struct pci_dev *pcidev = pcidev_info->pdi_linux_pcidev; + + if (flags & SN_PCIDMA_CONSISTENT) { + /* sn_pci_alloc_consistent interfaces */ + if (pcidev->dev.coherent_dma_mask == ~0UL) { + dma_handle = + pcibr_dmatrans_direct64(pcidev_info, phys_addr, + PCI64_ATTR_BAR); + } else { + dma_handle = + (dma_addr_t) pcibr_dmamap_ate32(pcidev_info, + phys_addr, size, + PCI32_ATE_BAR); + } + } else { + /* map_sg/map_single interfaces */ + + /* SN cannot support DMA addresses smaller than 32 bits. */ + if (pcidev->dma_mask < 0x7fffffff) { + return 0; + } + + if (pcidev->dma_mask == ~0UL) { + /* + * Handle the most common case: 64 bit cards. This + * call should always succeed. + */ + + dma_handle = + pcibr_dmatrans_direct64(pcidev_info, phys_addr, + PCI64_ATTR_PREF); + } else { + /* Handle 32-63 bit cards via direct mapping */ + dma_handle = + pcibr_dmatrans_direct32(pcidev_info, phys_addr, + size, 0); + if (!dma_handle) { + /* + * It is a 32 bit card and we cannot do direct mapping, + * so we use an ATE. + */ + + dma_handle = + pcibr_dmamap_ate32(pcidev_info, phys_addr, + size, PCI32_ATE_PREF); + } + } + } + + return dma_handle; +} + +EXPORT_SYMBOL(sn_dma_flush); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c new file mode 100644 index 000000000000..92bd278cf7ff --- /dev/null +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -0,0 +1,170 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <asm/sn/sn_sal.h> +#include "xtalk/xwidgetdev.h" +#include <asm/sn/geo.h> +#include "xtalk/hubdev.h" +#include "pci/pcibus_provider_defs.h" +#include "pci/pcidev.h" +#include "pci/pcibr_provider.h" +#include <asm/sn/addrs.h> + + +static int sal_pcibr_error_interrupt(struct pcibus_info *soft) +{ + struct ia64_sal_retval ret_stuff; + uint64_t busnum; + int segment; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + segment = 0; + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_ERROR_INTERRUPT, + (u64) segment, (u64) busnum, 0, 0, 0, 0, 0); + + return (int)ret_stuff.v0; +} + +/* + * PCI Bridge Error interrupt handler. Gets invoked whenever a PCI + * bridge sends an error interrupt. + */ +static irqreturn_t +pcibr_error_intr_handler(int irq, void *arg, struct pt_regs *regs) +{ + struct pcibus_info *soft = (struct pcibus_info *)arg; + + if (sal_pcibr_error_interrupt(soft) < 0) { + panic("pcibr_error_intr_handler(): Fatal Bridge Error"); + } + return IRQ_HANDLED; +} + +void * +pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft) +{ + int nasid, cnode, j; + struct hubdev_info *hubdev_info; + struct pcibus_info *soft; + struct sn_flush_device_list *sn_flush_device_list; + + if (! IS_PCI_BRIDGE_ASIC(prom_bussoft->bs_asic_type)) { + return NULL; + } + + /* + * Allocate kernel bus soft and copy from prom. + */ + + soft = kmalloc(sizeof(struct pcibus_info), GFP_KERNEL); + if (!soft) { + return NULL; + } + + memcpy(soft, prom_bussoft, sizeof(struct pcibus_info)); + soft->pbi_buscommon.bs_base = + (((u64) soft->pbi_buscommon. + bs_base << 4) >> 4) | __IA64_UNCACHED_OFFSET; + + spin_lock_init(&soft->pbi_lock); + + /* + * register the bridge's error interrupt handler + */ + if (request_irq(SGI_PCIBR_ERROR, (void *)pcibr_error_intr_handler, + SA_SHIRQ, "PCIBR error", (void *)(soft))) { + printk(KERN_WARNING + "pcibr cannot allocate interrupt for error handler\n"); + } + + /* + * Update the Bridge with the "kernel" pagesize + */ + if (PAGE_SIZE < 16384) { + pcireg_control_bit_clr(soft, PCIBR_CTRL_PAGE_SIZE); + } else { + pcireg_control_bit_set(soft, PCIBR_CTRL_PAGE_SIZE); + } + + nasid = NASID_GET(soft->pbi_buscommon.bs_base); + cnode = nasid_to_cnodeid(nasid); + hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + + if (hubdev_info->hdi_flush_nasid_list.widget_p) { + sn_flush_device_list = hubdev_info->hdi_flush_nasid_list. + widget_p[(int)soft->pbi_buscommon.bs_xid]; + if (sn_flush_device_list) { + for (j = 0; j < DEV_PER_WIDGET; + j++, sn_flush_device_list++) { + if (sn_flush_device_list->sfdl_slot == -1) + continue; + if (sn_flush_device_list-> + sfdl_persistent_busnum == + soft->pbi_buscommon.bs_persist_busnum) + sn_flush_device_list->sfdl_pcibus_info = + soft; + } + } + } + + /* Setup the PMU ATE map */ + soft->pbi_int_ate_resource.lowest_free_index = 0; + soft->pbi_int_ate_resource.ate = + kmalloc(soft->pbi_int_ate_size * sizeof(uint64_t), GFP_KERNEL); + memset(soft->pbi_int_ate_resource.ate, 0, + (soft->pbi_int_ate_size * sizeof(uint64_t))); + + return soft; +} + +void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct pcibus_info *pcibus_info; + int bit = sn_irq_info->irq_int_bit; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (pcidev_info) { + pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info-> + pdi_pcibus_info; + pcireg_force_intr_set(pcibus_info, bit); + } +} + +void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct pcibus_info *pcibus_info; + int bit = sn_irq_info->irq_int_bit; + uint64_t xtalk_addr = sn_irq_info->irq_xtalkaddr; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (pcidev_info) { + pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info-> + pdi_pcibus_info; + + /* Disable the device's IRQ */ + pcireg_intr_enable_bit_clr(pcibus_info, bit); + + /* Change the device's IRQ */ + pcireg_intr_addr_addr_set(pcibus_info, bit, xtalk_addr); + + /* Re-enable the device's IRQ */ + pcireg_intr_enable_bit_set(pcibus_info, bit); + + pcibr_force_interrupt(sn_irq_info); + } +} diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c new file mode 100644 index 000000000000..74a74a7d2a13 --- /dev/null +++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c @@ -0,0 +1,282 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include "pci/pcibus_provider_defs.h" +#include "pci/pcidev.h" +#include "pci/tiocp.h" +#include "pci/pic.h" +#include "pci/pcibr_provider.h" + +union br_ptr { + struct tiocp tio; + struct pic pic; +}; + +/* + * Control Register Access -- Read/Write 0000_0020 + */ +void pcireg_control_bit_clr(struct pcibus_info *pcibus_info, uint64_t bits) +{ + union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ptr->tio.cp_control &= ~bits; + break; + case PCIBR_BRIDGETYPE_PIC: + ptr->pic.p_wid_control &= ~bits; + break; + default: + panic + ("pcireg_control_bit_clr: unknown bridgetype bridge 0x%p", + (void *)ptr); + } + } +} + +void pcireg_control_bit_set(struct pcibus_info *pcibus_info, uint64_t bits) +{ + union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ptr->tio.cp_control |= bits; + break; + case PCIBR_BRIDGETYPE_PIC: + ptr->pic.p_wid_control |= bits; + break; + default: + panic + ("pcireg_control_bit_set: unknown bridgetype bridge 0x%p", + (void *)ptr); + } + } +} + +/* + * PCI/PCIX Target Flush Register Access -- Read Only 0000_0050 + */ +uint64_t pcireg_tflush_get(struct pcibus_info *pcibus_info) +{ + union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base; + uint64_t ret = 0; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ret = ptr->tio.cp_tflush; + break; + case PCIBR_BRIDGETYPE_PIC: + ret = ptr->pic.p_wid_tflush; + break; + default: + panic + ("pcireg_tflush_get: unknown bridgetype bridge 0x%p", + (void *)ptr); + } + } + + /* Read of the Target Flush should always return zero */ + if (ret != 0) + panic("pcireg_tflush_get:Target Flush failed\n"); + + return ret; +} + +/* + * Interrupt Status Register Access -- Read Only 0000_0100 + */ +uint64_t pcireg_intr_status_get(struct pcibus_info * pcibus_info) +{ + union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base; + uint64_t ret = 0; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ret = ptr->tio.cp_int_status; + break; + case PCIBR_BRIDGETYPE_PIC: + ret = ptr->pic.p_int_status; + break; + default: + panic + ("pcireg_intr_status_get: unknown bridgetype bridge 0x%p", + (void *)ptr); + } + } + return ret; +} + +/* + * Interrupt Enable Register Access -- Read/Write 0000_0108 + */ +void pcireg_intr_enable_bit_clr(struct pcibus_info *pcibus_info, uint64_t bits) +{ + union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ptr->tio.cp_int_enable &= ~bits; + break; + case PCIBR_BRIDGETYPE_PIC: + ptr->pic.p_int_enable &= ~bits; + break; + default: + panic + ("pcireg_intr_enable_bit_clr: unknown bridgetype bridge 0x%p", + (void *)ptr); + } + } +} + +void pcireg_intr_enable_bit_set(struct pcibus_info *pcibus_info, uint64_t bits) +{ + union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ptr->tio.cp_int_enable |= bits; + break; + case PCIBR_BRIDGETYPE_PIC: + ptr->pic.p_int_enable |= bits; + break; + default: + panic + ("pcireg_intr_enable_bit_set: unknown bridgetype bridge 0x%p", + (void *)ptr); + } + } +} + +/* + * Intr Host Address Register (int_addr) -- Read/Write 0000_0130 - 0000_0168 + */ +void pcireg_intr_addr_addr_set(struct pcibus_info *pcibus_info, int int_n, + uint64_t addr) +{ + union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ptr->tio.cp_int_addr[int_n] &= ~TIOCP_HOST_INTR_ADDR; + ptr->tio.cp_int_addr[int_n] |= + (addr & TIOCP_HOST_INTR_ADDR); + break; + case PCIBR_BRIDGETYPE_PIC: + ptr->pic.p_int_addr[int_n] &= ~PIC_HOST_INTR_ADDR; + ptr->pic.p_int_addr[int_n] |= + (addr & PIC_HOST_INTR_ADDR); + break; + default: + panic + ("pcireg_intr_addr_addr_get: unknown bridgetype bridge 0x%p", + (void *)ptr); + } + } +} + +/* + * Force Interrupt Register Access -- Write Only 0000_01C0 - 0000_01F8 + */ +void pcireg_force_intr_set(struct pcibus_info *pcibus_info, int int_n) +{ + union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ptr->tio.cp_force_pin[int_n] = 1; + break; + case PCIBR_BRIDGETYPE_PIC: + ptr->pic.p_force_pin[int_n] = 1; + break; + default: + panic + ("pcireg_force_intr_set: unknown bridgetype bridge 0x%p", + (void *)ptr); + } + } +} + +/* + * Device(x) Write Buffer Flush Reg Access -- Read Only 0000_0240 - 0000_0258 + */ +uint64_t pcireg_wrb_flush_get(struct pcibus_info *pcibus_info, int device) +{ + union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base; + uint64_t ret = 0; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ret = ptr->tio.cp_wr_req_buf[device]; + break; + case PCIBR_BRIDGETYPE_PIC: + ret = ptr->pic.p_wr_req_buf[device]; + break; + default: + panic("pcireg_wrb_flush_get: unknown bridgetype bridge 0x%p", (void *)ptr); + } + + } + /* Read of the Write Buffer Flush should always return zero */ + return ret; +} + +void pcireg_int_ate_set(struct pcibus_info *pcibus_info, int ate_index, + uint64_t val) +{ + union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ptr->tio.cp_int_ate_ram[ate_index] = (uint64_t) val; + break; + case PCIBR_BRIDGETYPE_PIC: + ptr->pic.p_int_ate_ram[ate_index] = (uint64_t) val; + break; + default: + panic + ("pcireg_int_ate_set: unknown bridgetype bridge 0x%p", + (void *)ptr); + } + } +} + +uint64_t *pcireg_int_ate_addr(struct pcibus_info *pcibus_info, int ate_index) +{ + union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base; + uint64_t *ret = (uint64_t *) 0; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ret = + (uint64_t *) & (ptr->tio.cp_int_ate_ram[ate_index]); + break; + case PCIBR_BRIDGETYPE_PIC: + ret = + (uint64_t *) & (ptr->pic.p_int_ate_ram[ate_index]); + break; + default: + panic + ("pcireg_int_ate_addr: unknown bridgetype bridge 0x%p", + (void *)ptr); + } + } + return ret; +} |