summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig16
-rw-r--r--arch/powerpc/platforms/pseries/Makefile2
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c441
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c38
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c68
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c10
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c244
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c36
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c12
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c66
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c243
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c18
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c9
-rw-r--r--arch/powerpc/platforms/pseries/of_helpers.c8
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c106
-rw-r--r--arch/powerpc/platforms/pseries/pci.c7
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c18
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h1
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c23
-rw-r--r--arch/powerpc/platforms/pseries/ras.c462
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c8
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.c550
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.h114
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c15
-rw-r--r--arch/powerpc/platforms/pseries/setup.c40
-rw-r--r--arch/powerpc/platforms/pseries/smp.c3
-rw-r--r--arch/powerpc/platforms/pseries/svm.c85
-rw-r--r--arch/powerpc/platforms/pseries/vio.c6
28 files changed, 1797 insertions, 852 deletions
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index f7b484f55553..24c18362e5ea 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -21,7 +21,6 @@ config PPC_PSERIES
select PPC_DOORBELL
select HOTPLUG_CPU
select ARCH_RANDOM
- select PPC_DOORBELL
select FORCE_SMP
select SWIOTLB
default y
@@ -108,6 +107,7 @@ config PPC_SMLPAR
config CMM
tristate "Collaborative memory management"
depends on PPC_SMLPAR
+ select MEMORY_BALLOON
default y
help
Select this option, if you want to enable the kernel interface
@@ -145,3 +145,17 @@ config PAPR_SCM
tristate "Support for the PAPR Storage Class Memory interface"
help
Enable access to hypervisor provided storage class memory.
+
+config PPC_SVM
+ bool "Secure virtual machine (SVM) support for POWER"
+ depends on PPC_PSERIES
+ select SWIOTLB
+ select ARCH_HAS_MEM_ENCRYPT
+ select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+ help
+ There are certain POWER platforms which support secure guests using
+ the Protected Execution Facility, with the help of an Ultravisor
+ executing below the hypervisor layer. This enables support for
+ those guests.
+
+ If unsure, say "N".
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index ab3d59aeacca..a3c74a5cf20d 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -26,6 +26,8 @@ obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_IBMEBUS) += ibmebus.o
obj-$(CONFIG_PAPR_SCM) += papr_scm.o
obj-$(CONFIG_PPC_SPLPAR) += vphn.o
+obj-$(CONFIG_PPC_SVM) += svm.o
+obj-$(CONFIG_FA_DUMP) += rtas-fadump.o
ifdef CONFIG_PPC_PSERIES
obj-$(CONFIG_SUSPEND) += suspend.o
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index b33251d75927..9dba7e880885 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -19,6 +19,10 @@
#include <linux/stringify.h>
#include <linux/swap.h>
#include <linux/device.h>
+#include <linux/mount.h>
+#include <linux/pseudo_fs.h>
+#include <linux/magic.h>
+#include <linux/balloon_compaction.h>
#include <asm/firmware.h>
#include <asm/hvcall.h>
#include <asm/mmu.h>
@@ -38,12 +42,8 @@
#define CMM_MIN_MEM_MB 256
#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
-/*
- * The priority level tries to ensure that this notifier is called as
- * late as possible to reduce thrashing in the shared memory pool.
- */
+
#define CMM_MEM_HOTPLUG_PRI 1
-#define CMM_MEM_ISOLATE_PRI 15
static unsigned int delay = CMM_DEFAULT_DELAY;
static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
@@ -51,6 +51,8 @@ static unsigned int oom_kb = CMM_OOM_KB;
static unsigned int cmm_debug = CMM_DEBUG;
static unsigned int cmm_disabled = CMM_DISABLE;
static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
+static bool __read_mostly simulate;
+static unsigned long simulate_loan_target_kb;
static struct device cmm_dev;
MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
@@ -74,35 +76,31 @@ MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
module_param_named(debug, cmm_debug, uint, 0644);
MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
"[Default=" __stringify(CMM_DEBUG) "]");
-
-#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
+module_param_named(simulate, simulate, bool, 0444);
+MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
-struct cmm_page_array {
- struct cmm_page_array *next;
- unsigned long index;
- unsigned long page[CMM_NR_PAGES];
-};
-
-static unsigned long loaned_pages;
+static atomic_long_t loaned_pages;
static unsigned long loaned_pages_target;
static unsigned long oom_freed_pages;
-static struct cmm_page_array *cmm_page_list;
-static DEFINE_SPINLOCK(cmm_lock);
-
static DEFINE_MUTEX(hotplug_mutex);
static int hotplug_occurred; /* protected by the hotplug mutex */
static struct task_struct *cmm_thread_ptr;
+static struct balloon_dev_info b_dev_info;
-static long plpar_page_set_loaned(unsigned long vpa)
+static long plpar_page_set_loaned(struct page *page)
{
+ const unsigned long vpa = page_to_phys(page);
unsigned long cmo_page_sz = cmo_get_page_size();
long rc = 0;
int i;
+ if (unlikely(simulate))
+ return 0;
+
for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
@@ -113,12 +111,16 @@ static long plpar_page_set_loaned(unsigned long vpa)
return rc;
}
-static long plpar_page_set_active(unsigned long vpa)
+static long plpar_page_set_active(struct page *page)
{
+ const unsigned long vpa = page_to_phys(page);
unsigned long cmo_page_sz = cmo_get_page_size();
long rc = 0;
int i;
+ if (unlikely(simulate))
+ return 0;
+
for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
@@ -138,8 +140,7 @@ static long plpar_page_set_active(unsigned long vpa)
**/
static long cmm_alloc_pages(long nr)
{
- struct cmm_page_array *pa, *npa;
- unsigned long addr;
+ struct page *page;
long rc;
cmm_dbg("Begin request for %ld pages\n", nr);
@@ -156,46 +157,19 @@ static long cmm_alloc_pages(long nr)
break;
}
- addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
- __GFP_NORETRY | __GFP_NOMEMALLOC);
- if (!addr)
+ page = balloon_page_alloc();
+ if (!page)
break;
- spin_lock(&cmm_lock);
- pa = cmm_page_list;
- if (!pa || pa->index >= CMM_NR_PAGES) {
- /* Need a new page for the page list. */
- spin_unlock(&cmm_lock);
- npa = (struct cmm_page_array *)__get_free_page(
- GFP_NOIO | __GFP_NOWARN |
- __GFP_NORETRY | __GFP_NOMEMALLOC);
- if (!npa) {
- pr_info("%s: Can not allocate new page list\n", __func__);
- free_page(addr);
- break;
- }
- spin_lock(&cmm_lock);
- pa = cmm_page_list;
-
- if (!pa || pa->index >= CMM_NR_PAGES) {
- npa->next = pa;
- npa->index = 0;
- pa = npa;
- cmm_page_list = pa;
- } else
- free_page((unsigned long) npa);
- }
-
- if ((rc = plpar_page_set_loaned(__pa(addr)))) {
+ rc = plpar_page_set_loaned(page);
+ if (rc) {
pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
- spin_unlock(&cmm_lock);
- free_page(addr);
+ __free_page(page);
break;
}
- pa->page[pa->index++] = addr;
- loaned_pages++;
- totalram_pages_dec();
- spin_unlock(&cmm_lock);
+ balloon_page_enqueue(&b_dev_info, page);
+ atomic_long_inc(&loaned_pages);
+ adjust_managed_page_count(page, -1);
nr--;
}
@@ -212,30 +186,19 @@ static long cmm_alloc_pages(long nr)
**/
static long cmm_free_pages(long nr)
{
- struct cmm_page_array *pa;
- unsigned long addr;
+ struct page *page;
cmm_dbg("Begin free of %ld pages.\n", nr);
- spin_lock(&cmm_lock);
- pa = cmm_page_list;
while (nr) {
- if (!pa || pa->index <= 0)
+ page = balloon_page_dequeue(&b_dev_info);
+ if (!page)
break;
- addr = pa->page[--pa->index];
-
- if (pa->index == 0) {
- pa = pa->next;
- free_page((unsigned long) cmm_page_list);
- cmm_page_list = pa;
- }
-
- plpar_page_set_active(__pa(addr));
- free_page(addr);
- loaned_pages--;
+ plpar_page_set_active(page);
+ adjust_managed_page_count(page, 1);
+ __free_page(page);
+ atomic_long_dec(&loaned_pages);
nr--;
- totalram_pages_inc();
}
- spin_unlock(&cmm_lock);
cmm_dbg("End request with %ld pages unfulfilled\n", nr);
return nr;
}
@@ -257,7 +220,7 @@ static int cmm_oom_notify(struct notifier_block *self,
cmm_dbg("OOM processing started\n");
nr = cmm_free_pages(nr);
- loaned_pages_target = loaned_pages;
+ loaned_pages_target = atomic_long_read(&loaned_pages);
*freed += KB2PAGES(oom_kb) - nr;
oom_freed_pages += KB2PAGES(oom_kb) - nr;
cmm_dbg("OOM processing complete\n");
@@ -274,19 +237,24 @@ static int cmm_oom_notify(struct notifier_block *self,
**/
static void cmm_get_mpp(void)
{
+ const long __loaned_pages = atomic_long_read(&loaned_pages);
+ const long total_pages = totalram_pages() + __loaned_pages;
int rc;
struct hvcall_mpp_data mpp_data;
signed long active_pages_target, page_loan_request, target;
- signed long total_pages = totalram_pages() + loaned_pages;
signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
- rc = h_get_mpp(&mpp_data);
-
- if (rc != H_SUCCESS)
- return;
-
- page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
- target = page_loan_request + (signed long)loaned_pages;
+ if (likely(!simulate)) {
+ rc = h_get_mpp(&mpp_data);
+ if (rc != H_SUCCESS)
+ return;
+ page_loan_request = div_s64((s64)mpp_data.loan_request,
+ PAGE_SIZE);
+ target = page_loan_request + __loaned_pages;
+ } else {
+ target = KB2PAGES(simulate_loan_target_kb);
+ page_loan_request = target - __loaned_pages;
+ }
if (target < 0 || total_pages < min_mem_pages)
target = 0;
@@ -307,7 +275,7 @@ static void cmm_get_mpp(void)
loaned_pages_target = target;
cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
- page_loan_request, loaned_pages, loaned_pages_target,
+ page_loan_request, __loaned_pages, loaned_pages_target,
oom_freed_pages, totalram_pages());
}
@@ -325,6 +293,7 @@ static struct notifier_block cmm_oom_nb = {
static int cmm_thread(void *dummy)
{
unsigned long timeleft;
+ long __loaned_pages;
while (1) {
timeleft = msleep_interruptible(delay * 1000);
@@ -355,11 +324,12 @@ static int cmm_thread(void *dummy)
cmm_get_mpp();
- if (loaned_pages_target > loaned_pages) {
- if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
- loaned_pages_target = loaned_pages;
- } else if (loaned_pages_target < loaned_pages)
- cmm_free_pages(loaned_pages - loaned_pages_target);
+ __loaned_pages = atomic_long_read(&loaned_pages);
+ if (loaned_pages_target > __loaned_pages) {
+ if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
+ loaned_pages_target = __loaned_pages;
+ } else if (loaned_pages_target < __loaned_pages)
+ cmm_free_pages(__loaned_pages - loaned_pages_target);
}
return 0;
}
@@ -373,7 +343,7 @@ static int cmm_thread(void *dummy)
} \
static DEVICE_ATTR(name, 0444, show_##name, NULL)
-CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
+CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
static ssize_t show_oom_pages(struct device *dev,
@@ -406,11 +376,18 @@ static struct device_attribute *cmm_attrs[] = {
&dev_attr_oom_freed_kb,
};
+static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
+ simulate_loan_target_kb);
+
static struct bus_type cmm_subsys = {
.name = "cmm",
.dev_name = "cmm",
};
+static void cmm_release_device(struct device *dev)
+{
+}
+
/**
* cmm_sysfs_register - Register with sysfs
*
@@ -426,6 +403,7 @@ static int cmm_sysfs_register(struct device *dev)
dev->id = 0;
dev->bus = &cmm_subsys;
+ dev->release = cmm_release_device;
if ((rc = device_register(dev)))
goto subsys_unregister;
@@ -435,6 +413,11 @@ static int cmm_sysfs_register(struct device *dev)
goto fail;
}
+ if (!simulate)
+ return 0;
+ rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
+ if (rc)
+ goto fail;
return 0;
fail:
@@ -471,7 +454,7 @@ static int cmm_reboot_notifier(struct notifier_block *nb,
if (cmm_thread_ptr)
kthread_stop(cmm_thread_ptr);
cmm_thread_ptr = NULL;
- cmm_free_pages(loaned_pages);
+ cmm_free_pages(atomic_long_read(&loaned_pages));
}
return NOTIFY_DONE;
}
@@ -481,142 +464,6 @@ static struct notifier_block cmm_reboot_nb = {
};
/**
- * cmm_count_pages - Count the number of pages loaned in a particular range.
- *
- * @arg: memory_isolate_notify structure with address range and count
- *
- * Return value:
- * 0 on success
- **/
-static unsigned long cmm_count_pages(void *arg)
-{
- struct memory_isolate_notify *marg = arg;
- struct cmm_page_array *pa;
- unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
- unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
- unsigned long idx;
-
- spin_lock(&cmm_lock);
- pa = cmm_page_list;
- while (pa) {
- if ((unsigned long)pa >= start && (unsigned long)pa < end)
- marg->pages_found++;
- for (idx = 0; idx < pa->index; idx++)
- if (pa->page[idx] >= start && pa->page[idx] < end)
- marg->pages_found++;
- pa = pa->next;
- }
- spin_unlock(&cmm_lock);
- return 0;
-}
-
-/**
- * cmm_memory_isolate_cb - Handle memory isolation notifier calls
- * @self: notifier block struct
- * @action: action to take
- * @arg: struct memory_isolate_notify data for handler
- *
- * Return value:
- * NOTIFY_OK or notifier error based on subfunction return value
- **/
-static int cmm_memory_isolate_cb(struct notifier_block *self,
- unsigned long action, void *arg)
-{
- int ret = 0;
-
- if (action == MEM_ISOLATE_COUNT)
- ret = cmm_count_pages(arg);
-
- return notifier_from_errno(ret);
-}
-
-static struct notifier_block cmm_mem_isolate_nb = {
- .notifier_call = cmm_memory_isolate_cb,
- .priority = CMM_MEM_ISOLATE_PRI
-};
-
-/**
- * cmm_mem_going_offline - Unloan pages where memory is to be removed
- * @arg: memory_notify structure with page range to be offlined
- *
- * Return value:
- * 0 on success
- **/
-static int cmm_mem_going_offline(void *arg)
-{
- struct memory_notify *marg = arg;
- unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
- unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
- struct cmm_page_array *pa_curr, *pa_last, *npa;
- unsigned long idx;
- unsigned long freed = 0;
-
- cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
- start_page, marg->nr_pages);
- spin_lock(&cmm_lock);
-
- /* Search the page list for pages in the range to be offlined */
- pa_last = pa_curr = cmm_page_list;
- while (pa_curr) {
- for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
- if ((pa_curr->page[idx] < start_page) ||
- (pa_curr->page[idx] >= end_page))
- continue;
-
- plpar_page_set_active(__pa(pa_curr->page[idx]));
- free_page(pa_curr->page[idx]);
- freed++;
- loaned_pages--;
- totalram_pages_inc();
- pa_curr->page[idx] = pa_last->page[--pa_last->index];
- if (pa_last->index == 0) {
- if (pa_curr == pa_last)
- pa_curr = pa_last->next;
- pa_last = pa_last->next;
- free_page((unsigned long)cmm_page_list);
- cmm_page_list = pa_last;
- }
- }
- pa_curr = pa_curr->next;
- }
-
- /* Search for page list structures in the range to be offlined */
- pa_last = NULL;
- pa_curr = cmm_page_list;
- while (pa_curr) {
- if (((unsigned long)pa_curr >= start_page) &&
- ((unsigned long)pa_curr < end_page)) {
- npa = (struct cmm_page_array *)__get_free_page(
- GFP_NOIO | __GFP_NOWARN |
- __GFP_NORETRY | __GFP_NOMEMALLOC);
- if (!npa) {
- spin_unlock(&cmm_lock);
- cmm_dbg("Failed to allocate memory for list "
- "management. Memory hotplug "
- "failed.\n");
- return -ENOMEM;
- }
- memcpy(npa, pa_curr, PAGE_SIZE);
- if (pa_curr == cmm_page_list)
- cmm_page_list = npa;
- if (pa_last)
- pa_last->next = npa;
- free_page((unsigned long) pa_curr);
- freed++;
- pa_curr = npa;
- }
-
- pa_last = pa_curr;
- pa_curr = pa_curr->next;
- }
-
- spin_unlock(&cmm_lock);
- cmm_dbg("Released %ld pages in the search range.\n", freed);
-
- return 0;
-}
-
-/**
* cmm_memory_cb - Handle memory hotplug notifier calls
* @self: notifier block struct
* @action: action to take
@@ -635,7 +482,6 @@ static int cmm_memory_cb(struct notifier_block *self,
case MEM_GOING_OFFLINE:
mutex_lock(&hotplug_mutex);
hotplug_occurred = 1;
- ret = cmm_mem_going_offline(arg);
break;
case MEM_OFFLINE:
case MEM_CANCEL_OFFLINE:
@@ -656,6 +502,116 @@ static struct notifier_block cmm_mem_nb = {
.priority = CMM_MEM_HOTPLUG_PRI
};
+#ifdef CONFIG_BALLOON_COMPACTION
+static struct vfsmount *balloon_mnt;
+
+static int cmm_init_fs_context(struct fs_context *fc)
+{
+ return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type balloon_fs = {
+ .name = "ppc-cmm",
+ .init_fs_context = cmm_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
+ struct page *newpage, struct page *page,
+ enum migrate_mode mode)
+{
+ unsigned long flags;
+
+ /*
+ * loan/"inflate" the newpage first.
+ *
+ * We might race against the cmm_thread who might discover after our
+ * loan request that another page is to be unloaned. However, once
+ * the cmm_thread runs again later, this error will automatically
+ * be corrected.
+ */
+ if (plpar_page_set_loaned(newpage)) {
+ /* Unlikely, but possible. Tell the caller not to retry now. */
+ pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
+ return -EBUSY;
+ }
+
+ /* balloon page list reference */
+ get_page(newpage);
+
+ /*
+ * When we migrate a page to a different zone, we have to fixup the
+ * count of both involved zones as we adjusted the managed page count
+ * when inflating.
+ */
+ if (page_zone(page) != page_zone(newpage)) {
+ adjust_managed_page_count(page, 1);
+ adjust_managed_page_count(newpage, -1);
+ }
+
+ spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+ balloon_page_insert(b_dev_info, newpage);
+ balloon_page_delete(page);
+ b_dev_info->isolated_pages--;
+ spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+
+ /*
+ * activate/"deflate" the old page. We ignore any errors just like the
+ * other callers.
+ */
+ plpar_page_set_active(page);
+
+ /* balloon page list reference */
+ put_page(page);
+
+ return MIGRATEPAGE_SUCCESS;
+}
+
+static int cmm_balloon_compaction_init(void)
+{
+ int rc;
+
+ balloon_devinfo_init(&b_dev_info);
+ b_dev_info.migratepage = cmm_migratepage;
+
+ balloon_mnt = kern_mount(&balloon_fs);
+ if (IS_ERR(balloon_mnt)) {
+ rc = PTR_ERR(balloon_mnt);
+ balloon_mnt = NULL;
+ return rc;
+ }
+
+ b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
+ if (IS_ERR(b_dev_info.inode)) {
+ rc = PTR_ERR(b_dev_info.inode);
+ b_dev_info.inode = NULL;
+ kern_unmount(balloon_mnt);
+ balloon_mnt = NULL;
+ return rc;
+ }
+
+ b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
+ return 0;
+}
+static void cmm_balloon_compaction_deinit(void)
+{
+ if (b_dev_info.inode)
+ iput(b_dev_info.inode);
+ b_dev_info.inode = NULL;
+ kern_unmount(balloon_mnt);
+ balloon_mnt = NULL;
+}
+#else /* CONFIG_BALLOON_COMPACTION */
+static int cmm_balloon_compaction_init(void)
+{
+ return 0;
+}
+
+static void cmm_balloon_compaction_deinit(void)
+{
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
+
/**
* cmm_init - Module initialization
*
@@ -664,26 +620,31 @@ static struct notifier_block cmm_mem_nb = {
**/
static int cmm_init(void)
{
- int rc = -ENOMEM;
+ int rc;
- if (!firmware_has_feature(FW_FEATURE_CMO))
+ if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
return -EOPNOTSUPP;
- if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
+ rc = cmm_balloon_compaction_init();
+ if (rc)
return rc;
+ rc = register_oom_notifier(&cmm_oom_nb);
+ if (rc < 0)
+ goto out_balloon_compaction;
+
if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
goto out_oom_notifier;
if ((rc = cmm_sysfs_register(&cmm_dev)))
goto out_reboot_notifier;
- if (register_memory_notifier(&cmm_mem_nb) ||
- register_memory_isolate_notifier(&cmm_mem_isolate_nb))
+ rc = register_memory_notifier(&cmm_mem_nb);
+ if (rc)
goto out_unregister_notifier;
if (cmm_disabled)
- return rc;
+ return 0;
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
if (IS_ERR(cmm_thread_ptr)) {
@@ -691,16 +652,16 @@ static int cmm_init(void)
goto out_unregister_notifier;
}
- return rc;
-
+ return 0;
out_unregister_notifier:
unregister_memory_notifier(&cmm_mem_nb);
- unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
cmm_unregister_sysfs(&cmm_dev);
out_reboot_notifier:
unregister_reboot_notifier(&cmm_reboot_nb);
out_oom_notifier:
unregister_oom_notifier(&cmm_oom_nb);
+out_balloon_compaction:
+ cmm_balloon_compaction_deinit();
return rc;
}
@@ -717,9 +678,9 @@ static void cmm_exit(void)
unregister_oom_notifier(&cmm_oom_nb);
unregister_reboot_notifier(&cmm_reboot_nb);
unregister_memory_notifier(&cmm_mem_nb);
- unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
- cmm_free_pages(loaned_pages);
+ cmm_free_pages(atomic_long_read(&loaned_pages));
cmm_unregister_sysfs(&cmm_dev);
+ cmm_balloon_compaction_deinit();
}
/**
@@ -739,7 +700,7 @@ static int cmm_set_disable(const char *val, const struct kernel_param *kp)
if (cmm_thread_ptr)
kthread_stop(cmm_thread_ptr);
cmm_thread_ptr = NULL;
- cmm_free_pages(loaned_pages);
+ cmm_free_pages(atomic_long_read(&loaned_pages));
} else if (!disable && cmm_disabled) {
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
if (IS_ERR(cmm_thread_ptr))
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 2b87480f2837..eab8aa293743 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -19,7 +19,6 @@
struct dtl {
struct dtl_entry *buf;
- struct dentry *file;
int cpu;
int buf_entries;
u64 last_idx;
@@ -320,46 +319,28 @@ static const struct file_operations dtl_fops = {
static struct dentry *dtl_dir;
-static int dtl_setup_file(struct dtl *dtl)
+static void dtl_setup_file(struct dtl *dtl)
{
char name[10];
sprintf(name, "cpu-%d", dtl->cpu);
- dtl->file = debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops);
- if (!dtl->file)
- return -ENOMEM;
-
- return 0;
+ debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops);
}
static int dtl_init(void)
{
- struct dentry *event_mask_file, *buf_entries_file;
- int rc, i;
+ int i;
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return -ENODEV;
/* set up common debugfs structure */
- rc = -ENOMEM;
dtl_dir = debugfs_create_dir("dtl", powerpc_debugfs_root);
- if (!dtl_dir) {
- printk(KERN_WARNING "%s: can't create dtl root dir\n",
- __func__);
- goto err;
- }
- event_mask_file = debugfs_create_x8("dtl_event_mask", 0600,
- dtl_dir, &dtl_event_mask);
- buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400,
- dtl_dir, &dtl_buf_entries);
-
- if (!event_mask_file || !buf_entries_file) {
- printk(KERN_WARNING "%s: can't create dtl files\n", __func__);
- goto err_remove_dir;
- }
+ debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask);
+ debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries);
/* set up the per-cpu log structures */
for_each_possible_cpu(i) {
@@ -367,16 +348,9 @@ static int dtl_init(void)
spin_lock_init(&dtl->lock);
dtl->cpu = i;
- rc = dtl_setup_file(dtl);
- if (rc)
- goto err_remove_dir;
+ dtl_setup_file(dtl);
}
return 0;
-
-err_remove_dir:
- debugfs_remove_recursive(dtl_dir);
-err:
- return rc;
}
machine_arch_initcall(pseries, dtl_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 9edae1863e2f..893ba3f562c4 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -42,42 +42,44 @@ static int ibm_get_config_addr_info;
static int ibm_get_config_addr_info2;
static int ibm_configure_pe;
-#ifdef CONFIG_PCI_IOV
void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
{
struct pci_dn *pdn = pci_get_pdn(pdev);
- struct pci_dn *physfn_pdn;
- struct eeh_dev *edev;
- if (!pdev->is_virtfn)
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
return;
- pdn->device_id = pdev->device;
- pdn->vendor_id = pdev->vendor;
- pdn->class_code = pdev->class;
- /*
- * Last allow unfreeze return code used for retrieval
- * by user space in eeh-sysfs to show the last command
- * completion from platform.
- */
- pdn->last_allow_rc = 0;
- physfn_pdn = pci_get_pdn(pdev->physfn);
- pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index];
- edev = pdn_to_eeh_dev(pdn);
+ dev_dbg(&pdev->dev, "EEH: Setting up device\n");
+#ifdef CONFIG_PCI_IOV
+ if (pdev->is_virtfn) {
+ struct pci_dn *physfn_pdn;
- /*
- * The following operations will fail if VF's sysfs files
- * aren't created or its resources aren't finalized.
- */
+ pdn->device_id = pdev->device;
+ pdn->vendor_id = pdev->vendor;
+ pdn->class_code = pdev->class;
+ /*
+ * Last allow unfreeze return code used for retrieval
+ * by user space in eeh-sysfs to show the last command
+ * completion from platform.
+ */
+ pdn->last_allow_rc = 0;
+ physfn_pdn = pci_get_pdn(pdev->physfn);
+ pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index];
+ }
+#endif
eeh_add_device_early(pdn);
eeh_add_device_late(pdev);
- edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
- eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */
- eeh_add_to_parent_pe(edev); /* Add as VF PE type */
- eeh_sysfs_add_device(pdev);
+#ifdef CONFIG_PCI_IOV
+ if (pdev->is_virtfn) {
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-}
+ edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+ eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */
+ eeh_add_to_parent_pe(edev); /* Add as VF PE type */
+ }
#endif
+ eeh_sysfs_add_device(pdev);
+}
/*
* Buffer for reporting slot-error-detail rtas calls. Its here
@@ -144,10 +146,8 @@ static int pseries_eeh_init(void)
/* Set EEH probe mode */
eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
-#ifdef CONFIG_PCI_IOV
/* Set EEH machine dependent code */
ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
-#endif
return 0;
}
@@ -251,6 +251,8 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
return NULL;
+ eeh_edev_dbg(edev, "Probing device\n");
+
/*
* Update class code and mode of eeh device. We need
* correctly reflects that current device is root port
@@ -280,8 +282,11 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
/* Enable EEH on the device */
+ eeh_edev_dbg(edev, "Enabling EEH on device\n");
ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
- if (!ret) {
+ if (ret) {
+ eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
+ } else {
/* Retrieve PE address */
edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
pe.addr = edev->pe_config_addr;
@@ -297,11 +302,6 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
if (enable) {
eeh_add_flag(EEH_ENABLED);
eeh_add_to_parent_pe(edev);
-
- pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%x-PE#%x\n",
- __func__, pdn->busno, PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn), pe.phb->global_number,
- pe.addr);
} else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
(pdn_to_eeh_dev(pdn->parent))->pe) {
/* This device doesn't support EEH, but it may have an
@@ -310,6 +310,8 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
eeh_add_to_parent_pe(edev);
}
+ eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n",
+ (enable ? "enabled" : "unsupported"), ret);
}
/* Save memory bars */
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index d4a8f1702417..3e49cc23a97a 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -22,6 +22,7 @@
#include <asm/firmware.h>
#include <asm/prom.h>
#include <asm/udbg.h>
+#include <asm/svm.h>
#include "pseries.h"
@@ -55,7 +56,8 @@ hypertas_fw_features_table[] = {
{FW_FEATURE_LLAN, "hcall-lLAN"},
{FW_FEATURE_BULK_REMOVE, "hcall-bulk"},
{FW_FEATURE_XDABR, "hcall-xdabr"},
- {FW_FEATURE_MULTITCE, "hcall-multi-tce"},
+ {FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE,
+ "hcall-multi-tce"},
{FW_FEATURE_SPLPAR, "hcall-splpar"},
{FW_FEATURE_VPHN, "hcall-vphn"},
{FW_FEATURE_SET_MODE, "hcall-set-mode"},
@@ -100,6 +102,12 @@ static void __init fw_hypertas_feature_init(const char *hypertas,
}
}
+ if (is_secure_guest() &&
+ (powerpc_firmware_features & FW_FEATURE_PUT_TCE_IND)) {
+ powerpc_firmware_features &= ~FW_FEATURE_PUT_TCE_IND;
+ pr_debug("SVM: disabling PUT_TCE_IND firmware feature\n");
+ }
+
pr_debug(" <- fw_hypertas_feature_init()\n");
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index bbda646b63b5..3e8cbfe7a80f 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -338,6 +338,62 @@ static void pseries_remove_processor(struct device_node *np)
cpu_maps_update_done();
}
+static int dlpar_offline_cpu(struct device_node *dn)
+{
+ int rc = 0;
+ unsigned int cpu;
+ int len, nthreads, i;
+ const __be32 *intserv;
+ u32 thread;
+
+ intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+ if (!intserv)
+ return -EINVAL;
+
+ nthreads = len / sizeof(u32);
+
+ cpu_maps_update_begin();
+ for (i = 0; i < nthreads; i++) {
+ thread = be32_to_cpu(intserv[i]);
+ for_each_present_cpu(cpu) {
+ if (get_hard_smp_processor_id(cpu) != thread)
+ continue;
+
+ if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+ break;
+
+ if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
+ set_preferred_offline_state(cpu,
+ CPU_STATE_OFFLINE);
+ cpu_maps_update_done();
+ timed_topology_update(1);
+ rc = device_offline(get_cpu_device(cpu));
+ if (rc)
+ goto out;
+ cpu_maps_update_begin();
+ break;
+ }
+
+ /*
+ * The cpu is in CPU_STATE_INACTIVE.
+ * Upgrade it's state to CPU_STATE_OFFLINE.
+ */
+ set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
+ WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS);
+ __cpu_die(cpu);
+ break;
+ }
+ if (cpu == num_possible_cpus()) {
+ pr_warn("Could not find cpu to offline with physical id 0x%x\n",
+ thread);
+ }
+ }
+ cpu_maps_update_done();
+
+out:
+ return rc;
+}
+
static int dlpar_online_cpu(struct device_node *dn)
{
int rc = 0;
@@ -364,8 +420,10 @@ static int dlpar_online_cpu(struct device_node *dn)
timed_topology_update(1);
find_and_online_cpu_nid(cpu);
rc = device_online(get_cpu_device(cpu));
- if (rc)
+ if (rc) {
+ dlpar_offline_cpu(dn);
goto out;
+ }
cpu_maps_update_begin();
break;
@@ -407,17 +465,67 @@ static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
return found;
}
+static bool drc_info_valid_index(struct device_node *parent, u32 drc_index)
+{
+ struct property *info;
+ struct of_drc_info drc;
+ const __be32 *value;
+ u32 index;
+ int count, i, j;
+
+ info = of_find_property(parent, "ibm,drc-info", NULL);
+ if (!info)
+ return false;
+
+ value = of_prop_next_u32(info, NULL, &count);
+
+ /* First value of ibm,drc-info is number of drc-info records */
+ if (value)
+ value++;
+ else
+ return false;
+
+ for (i = 0; i < count; i++) {
+ if (of_read_drc_info_cell(&info, &value, &drc))
+ return false;
+
+ if (strncmp(drc.drc_type, "CPU", 3))
+ break;
+
+ if (drc_index > drc.last_drc_index)
+ continue;
+
+ index = drc.drc_index_start;
+ for (j = 0; j < drc.num_sequential_elems; j++) {
+ if (drc_index == index)
+ return true;
+
+ index += drc.sequential_inc;
+ }
+ }
+
+ return false;
+}
+
static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
{
bool found = false;
int rc, index;
- index = 0;
+ if (of_find_property(parent, "ibm,drc-info", NULL))
+ return drc_info_valid_index(parent, drc_index);
+
+ /* Note that the format of the ibm,drc-indexes array is
+ * the number of entries in the array followed by the array
+ * of drc values so we start looking at index = 1.
+ */
+ index = 1;
while (!found) {
u32 drc;
rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
index++, &drc);
+
if (rc)
break;
@@ -505,63 +613,6 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
return rc;
}
-static int dlpar_offline_cpu(struct device_node *dn)
-{
- int rc = 0;
- unsigned int cpu;
- int len, nthreads, i;
- const __be32 *intserv;
- u32 thread;
-
- intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
- if (!intserv)
- return -EINVAL;
-
- nthreads = len / sizeof(u32);
-
- cpu_maps_update_begin();
- for (i = 0; i < nthreads; i++) {
- thread = be32_to_cpu(intserv[i]);
- for_each_present_cpu(cpu) {
- if (get_hard_smp_processor_id(cpu) != thread)
- continue;
-
- if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
- break;
-
- if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
- set_preferred_offline_state(cpu,
- CPU_STATE_OFFLINE);
- cpu_maps_update_done();
- timed_topology_update(1);
- rc = device_offline(get_cpu_device(cpu));
- if (rc)
- goto out;
- cpu_maps_update_begin();
- break;
-
- }
-
- /*
- * The cpu is in CPU_STATE_INACTIVE.
- * Upgrade it's state to CPU_STATE_OFFLINE.
- */
- set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
- BUG_ON(plpar_hcall_norets(H_PROD, thread)
- != H_SUCCESS);
- __cpu_die(cpu);
- break;
- }
- if (cpu == num_possible_cpus())
- printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread);
- }
- cpu_maps_update_done();
-
-out:
- return rc;
-
-}
-
static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
{
int rc;
@@ -717,19 +768,52 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
return rc;
}
-static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
+static int find_drc_info_cpus_to_add(struct device_node *cpus,
+ struct property *info,
+ u32 *cpu_drcs, u32 cpus_to_add)
{
- struct device_node *parent;
+ struct of_drc_info drc;
+ const __be32 *value;
+ u32 count, drc_index;
int cpus_found = 0;
- int index, rc;
+ int i, j;
- parent = of_find_node_by_path("/cpus");
- if (!parent) {
- pr_warn("Could not find CPU root node in device tree\n");
- kfree(cpu_drcs);
+ if (!info)
return -1;
+
+ value = of_prop_next_u32(info, NULL, &count);
+ if (value)
+ value++;
+
+ for (i = 0; i < count; i++) {
+ of_read_drc_info_cell(&info, &value, &drc);
+ if (strncmp(drc.drc_type, "CPU", 3))
+ break;
+
+ drc_index = drc.drc_index_start;
+ for (j = 0; j < drc.num_sequential_elems; j++) {
+ if (dlpar_cpu_exists(cpus, drc_index))
+ continue;
+
+ cpu_drcs[cpus_found++] = drc_index;
+
+ if (cpus_found == cpus_to_add)
+ return cpus_found;
+
+ drc_index += drc.sequential_inc;
+ }
}
+ return cpus_found;
+}
+
+static int find_drc_index_cpus_to_add(struct device_node *cpus,
+ u32 *cpu_drcs, u32 cpus_to_add)
+{
+ int cpus_found = 0;
+ int index, rc;
+ u32 drc_index;
+
/* Search the ibm,drc-indexes array for possible CPU drcs to
* add. Note that the format of the ibm,drc-indexes array is
* the number of entries in the array followed by the array
@@ -737,25 +821,25 @@ static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
*/
index = 1;
while (cpus_found < cpus_to_add) {
- u32 drc;
+ rc = of_property_read_u32_index(cpus, "ibm,drc-indexes",
+ index++, &drc_index);
- rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
- index++, &drc);
if (rc)
break;
- if (dlpar_cpu_exists(parent, drc))
+ if (dlpar_cpu_exists(cpus, drc_index))
continue;
- cpu_drcs[cpus_found++] = drc;
+ cpu_drcs[cpus_found++] = drc_index;
}
- of_node_put(parent);
return cpus_found;
}
static int dlpar_cpu_add_by_count(u32 cpus_to_add)
{
+ struct device_node *parent;
+ struct property *info;
u32 *cpu_drcs;
int cpus_added = 0;
int cpus_found;
@@ -767,7 +851,21 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
if (!cpu_drcs)
return -EINVAL;
- cpus_found = find_dlpar_cpus_to_add(cpu_drcs, cpus_to_add);
+ parent = of_find_node_by_path("/cpus");
+ if (!parent) {
+ pr_warn("Could not find CPU root node in device tree\n");
+ kfree(cpu_drcs);
+ return -1;
+ }
+
+ info = of_find_property(parent, "ibm,drc-info", NULL);
+ if (info)
+ cpus_found = find_drc_info_cpus_to_add(parent, info, cpu_drcs, cpus_to_add);
+ else
+ cpus_found = find_drc_index_cpus_to_add(parent, cpu_drcs, cpus_to_add);
+
+ of_node_put(parent);
+
if (cpus_found < cpus_to_add) {
pr_warn("Failed to find enough CPUs (%d of %d) to add\n",
cpus_found, cpus_to_add);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 46d0d35b9ca4..a4d40a3ceea3 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -338,7 +338,7 @@ static int pseries_remove_mem_node(struct device_node *np)
static bool lmb_is_removable(struct drmem_lmb *lmb)
{
int i, scns_per_block;
- int rc = 1;
+ bool rc = true;
unsigned long pfn, block_sz;
u64 phys_addr;
@@ -360,14 +360,16 @@ static bool lmb_is_removable(struct drmem_lmb *lmb)
for (i = 0; i < scns_per_block; i++) {
pfn = PFN_DOWN(phys_addr);
- if (!pfn_present(pfn))
+ if (!pfn_present(pfn)) {
+ phys_addr += MIN_MEMORY_BLOCK_SIZE;
continue;
+ }
- rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+ rc = rc && is_mem_section_removable(pfn, PAGES_PER_SECTION);
phys_addr += MIN_MEMORY_BLOCK_SIZE;
}
- return rc ? true : false;
+ return rc;
}
static int dlpar_add_lmb(struct drmem_lmb *);
@@ -880,34 +882,44 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
switch (hp_elog->action) {
case PSERIES_HP_ELOG_ACTION_ADD:
- if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
+ switch (hp_elog->id_type) {
+ case PSERIES_HP_ELOG_ID_DRC_COUNT:
count = hp_elog->_drc_u.drc_count;
rc = dlpar_memory_add_by_count(count);
- } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_INDEX:
drc_index = hp_elog->_drc_u.drc_index;
rc = dlpar_memory_add_by_index(drc_index);
- } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_IC:
count = hp_elog->_drc_u.ic.count;
drc_index = hp_elog->_drc_u.ic.index;
rc = dlpar_memory_add_by_ic(count, drc_index);
- } else {
+ break;
+ default:
rc = -EINVAL;
+ break;
}
break;
case PSERIES_HP_ELOG_ACTION_REMOVE:
- if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
+ switch (hp_elog->id_type) {
+ case PSERIES_HP_ELOG_ID_DRC_COUNT:
count = hp_elog->_drc_u.drc_count;
rc = dlpar_memory_remove_by_count(count);
- } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_INDEX:
drc_index = hp_elog->_drc_u.drc_index;
rc = dlpar_memory_remove_by_index(drc_index);
- } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_IC:
count = hp_elog->_drc_u.ic.count;
drc_index = hp_elog->_drc_u.ic.index;
rc = dlpar_memory_remove_by_ic(count, drc_index);
- } else {
+ break;
+ default:
rc = -EINVAL;
+ break;
}
break;
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index bcc1b67417a8..c40c62ec432e 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -129,7 +129,6 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, long retval,
static int __init hcall_inst_init(void)
{
struct dentry *hcall_root;
- struct dentry *hcall_file;
char cpu_name_buf[CPU_NAME_BUF_SIZE];
int cpu;
@@ -145,17 +144,12 @@ static int __init hcall_inst_init(void)
}
hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
- if (!hcall_root)
- return -ENOMEM;
for_each_possible_cpu(cpu) {
snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
- hcall_file = debugfs_create_file(cpu_name_buf, 0444,
- hcall_root,
- per_cpu(hcall_stats, cpu),
- &hcall_inst_seq_fops);
- if (!hcall_file)
- return -ENOMEM;
+ debugfs_create_file(cpu_name_buf, 0444, hcall_root,
+ per_cpu(hcall_stats, cpu),
+ &hcall_inst_seq_fops);
}
return 0;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 889dc2e44b89..2e0a8eab5588 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -132,10 +132,10 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
return be64_to_cpu(*tcep);
}
-static void tce_free_pSeriesLP(struct iommu_table*, long, long);
+static void tce_free_pSeriesLP(unsigned long liobn, long, long);
static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
-static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
unsigned long attrs)
@@ -146,25 +146,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
int ret = 0;
long tcenum_start = tcenum, npages_start = npages;
- rpn = __pa(uaddr) >> TCE_SHIFT;
+ rpn = __pa(uaddr) >> tceshift;
proto_tce = TCE_PCI_READ;
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
while (npages--) {
- tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
- rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
+ tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
+ rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
- tce_free_pSeriesLP(tbl, tcenum_start,
+ tce_free_pSeriesLP(liobn, tcenum_start,
(npages_start - (npages + 1)));
break;
}
if (rc && printk_ratelimit()) {
printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
- printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
+ printk("\tindex = 0x%llx\n", (u64)liobn);
printk("\ttcenum = 0x%llx\n", (u64)tcenum);
printk("\ttce val = 0x%llx\n", tce );
dump_stack();
@@ -192,8 +192,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
int ret = 0;
unsigned long flags;
- if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
- return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+ if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
+ return tce_build_pSeriesLP(tbl->it_index, tcenum,
+ tbl->it_page_shift, npages, uaddr,
direction, attrs);
}
@@ -209,8 +210,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
/* If allocation fails, fall back to the loop implementation */
if (!tcep) {
local_irq_restore(flags);
- return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
- direction, attrs);
+ return tce_build_pSeriesLP(tbl->it_index, tcenum,
+ tbl->it_page_shift,
+ npages, uaddr, direction, attrs);
}
__this_cpu_write(tce_page, tcep);
}
@@ -261,16 +263,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
return ret;
}
-static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
+static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
{
u64 rc;
while (npages--) {
- rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
+ rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);
if (rc && printk_ratelimit()) {
printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
- printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
+ printk("\tindex = 0x%llx\n", (u64)liobn);
printk("\ttcenum = 0x%llx\n", (u64)tcenum);
dump_stack();
}
@@ -284,8 +286,8 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
{
u64 rc;
- if (!firmware_has_feature(FW_FEATURE_MULTITCE))
- return tce_free_pSeriesLP(tbl, tcenum, npages);
+ if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
+ return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
@@ -400,6 +402,19 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
u64 rc = 0;
long l, limit;
+ if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
+ unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
+ unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
+ be64_to_cpu(maprange->dma_base);
+ unsigned long tcenum = dmastart >> tceshift;
+ unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift;
+ void *uaddr = __va(start_pfn << PAGE_SHIFT);
+
+ return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn),
+ tcenum, tceshift, npages, (unsigned long) uaddr,
+ DMA_BIDIRECTIONAL, 0);
+ }
+
local_irq_disable(); /* to protect tcep and the page behind it */
tcep = __this_cpu_read(tce_page);
@@ -609,7 +624,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
iommu_table_setparms(pci->phb, dn, tbl);
tbl->it_ops = &iommu_table_pseries_ops;
- iommu_init_table(tbl, pci->phb->node);
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
/* Divide the rest (1.75GB) among the children */
pci->phb->dma_window_size = 0x80000000ul;
@@ -621,7 +636,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
#ifdef CONFIG_IOMMU_API
static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
- long *tce, enum dma_data_direction *direction)
+ long *tce, enum dma_data_direction *direction,
+ bool realmode)
{
long rc;
unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
@@ -649,7 +665,7 @@ static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
struct iommu_table_ops iommu_table_lpar_multi_ops = {
.set = tce_buildmulti_pSeriesLP,
#ifdef CONFIG_IOMMU_API
- .exchange = tce_exchange_pseries,
+ .xchg_no_kill = tce_exchange_pseries,
#endif
.clear = tce_freemulti_pSeriesLP,
.get = tce_get_pSeriesLP
@@ -690,7 +706,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
ppci->table_group, dma_window);
tbl->it_ops = &iommu_table_lpar_multi_ops;
- iommu_init_table(tbl, ppci->phb->node);
+ iommu_init_table(tbl, ppci->phb->node, 0, 0);
iommu_register_group(ppci->table_group,
pci_domain_nr(bus), 0);
pr_debug(" created table: %p\n", ppci->table_group);
@@ -719,7 +735,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
tbl = PCI_DN(dn)->table_group->tables[0];
iommu_table_setparms(phb, dn, tbl);
tbl->it_ops = &iommu_table_pseries_ops;
- iommu_init_table(tbl, phb->node);
+ iommu_init_table(tbl, phb->node, 0, 0);
set_iommu_table_base(&dev->dev, tbl);
return;
}
@@ -1169,7 +1185,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
iommu_table_setparms_lpar(pci->phb, pdn, tbl,
pci->table_group, dma_window);
tbl->it_ops = &iommu_table_lpar_multi_ops;
- iommu_init_table(tbl, pci->phb->node);
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
iommu_register_group(pci->table_group,
pci_domain_nr(pci->phb->bus), 0);
pr_debug(" created table: %p\n", pci->table_group);
@@ -1325,9 +1341,11 @@ static int __init disable_multitce(char *str)
{
if (strcmp(str, "off") == 0 &&
firmware_has_feature(FW_FEATURE_LPAR) &&
- firmware_has_feature(FW_FEATURE_MULTITCE)) {
+ (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) ||
+ firmware_has_feature(FW_FEATURE_STUFF_TCE))) {
printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
- powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
+ powerpc_firmware_features &=
+ ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE);
}
return 1;
}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 09bb878c21e0..3c3da25b445c 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -56,6 +56,22 @@ EXPORT_SYMBOL(plpar_hcall);
EXPORT_SYMBOL(plpar_hcall9);
EXPORT_SYMBOL(plpar_hcall_norets);
+/*
+ * H_BLOCK_REMOVE supported block size for this page size in segment who's base
+ * page size is that page size.
+ *
+ * The first index is the segment base page size, the second one is the actual
+ * page size.
+ */
+static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
+
+/*
+ * Due to the involved complexity, and that the current hypervisor is only
+ * returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE
+ * buffer size to 8 size block.
+ */
+#define HBLKRM_SUPPORTED_BLOCK_SIZE 8
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static u8 dtl_mask = DTL_LOG_PREEMPT;
#else
@@ -566,12 +582,12 @@ static int vcpudispatch_stats_open(struct inode *inode, struct file *file)
return single_open(file, vcpudispatch_stats_display, NULL);
}
-static const struct file_operations vcpudispatch_stats_proc_ops = {
- .open = vcpudispatch_stats_open,
- .read = seq_read,
- .write = vcpudispatch_stats_write,
- .llseek = seq_lseek,
- .release = single_release,
+static const struct proc_ops vcpudispatch_stats_proc_ops = {
+ .proc_open = vcpudispatch_stats_open,
+ .proc_read = seq_read,
+ .proc_write = vcpudispatch_stats_write,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
};
static ssize_t vcpudispatch_stats_freq_write(struct file *file,
@@ -610,12 +626,12 @@ static int vcpudispatch_stats_freq_open(struct inode *inode, struct file *file)
return single_open(file, vcpudispatch_stats_freq_display, NULL);
}
-static const struct file_operations vcpudispatch_stats_freq_proc_ops = {
- .open = vcpudispatch_stats_freq_open,
- .read = seq_read,
- .write = vcpudispatch_stats_freq_write,
- .llseek = seq_lseek,
- .release = single_release,
+static const struct proc_ops vcpudispatch_stats_freq_proc_ops = {
+ .proc_open = vcpudispatch_stats_freq_open,
+ .proc_read = seq_read,
+ .proc_write = vcpudispatch_stats_freq_write,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
};
static int __init vcpudispatch_stats_procfs_init(void)
@@ -758,7 +774,7 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
/* don't remove a bolted entry */
lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
- (0x1UL << 4), &dummy1, &dummy2);
+ HPTE_V_BOLTED, &dummy1, &dummy2);
if (lpar_rc == H_SUCCESS)
return i;
@@ -922,11 +938,19 @@ static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
want_v = hpte_encode_avpn(vpn, psize, ssize);
- /* Bolted entries are always in the primary group */
+ /*
+ * We try to keep bolted entries always in primary hash
+ * But in some case we can find them in secondary too.
+ */
hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
- if (slot < 0)
- return -1;
+ if (slot < 0) {
+ /* Try in secondary */
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
+ if (slot < 0)
+ return -1;
+ }
return hpte_group + slot;
}
@@ -984,6 +1008,17 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL
+/*
+ * Returned true if we are supporting this block size for the specified segment
+ * base page size and actual page size.
+ *
+ * Currently, we only support 8 size block.
+ */
+static inline bool is_supported_hlbkrm(int bpsize, int psize)
+{
+ return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE);
+}
+
/**
* H_BLOCK_REMOVE caller.
* @idx should point to the latest @param entry set with a PTEX.
@@ -1143,7 +1178,8 @@ static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
if (lock_tlbie)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
- if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+ /* Assuming THP size is 16M */
+ if (is_supported_hlbkrm(psize, MMU_PAGE_16M))
hugepage_block_invalidate(slot, vpn, count, psize, ssize);
else
hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
@@ -1312,6 +1348,140 @@ static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
}
/*
+ * TLB Block Invalidate Characteristics
+ *
+ * These characteristics define the size of the block the hcall H_BLOCK_REMOVE
+ * is able to process for each couple segment base page size, actual page size.
+ *
+ * The ibm,get-system-parameter properties is returning a buffer with the
+ * following layout:
+ *
+ * [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ]
+ * -----------------
+ * TLB Block Invalidate Specifiers:
+ * [ 1 byte LOG base 2 of the TLB invalidate block size being specified ]
+ * [ 1 byte Number of page sizes (N) that are supported for the specified
+ * TLB invalidate block size ]
+ * [ 1 byte Encoded segment base page size and actual page size
+ * MSB=0 means 4k segment base page size and actual page size
+ * MSB=1 the penc value in mmu_psize_def ]
+ * ...
+ * -----------------
+ * Next TLB Block Invalidate Specifiers...
+ * -----------------
+ * [ 0 ]
+ */
+static inline void set_hblkrm_bloc_size(int bpsize, int psize,
+ unsigned int block_size)
+{
+ if (block_size > hblkrm_size[bpsize][psize])
+ hblkrm_size[bpsize][psize] = block_size;
+}
+
+/*
+ * Decode the Encoded segment base page size and actual page size.
+ * PAPR specifies:
+ * - bit 7 is the L bit
+ * - bits 0-5 are the penc value
+ * If the L bit is 0, this means 4K segment base page size and actual page size
+ * otherwise the penc value should be read.
+ */
+#define HBLKRM_L_MASK 0x80
+#define HBLKRM_PENC_MASK 0x3f
+static inline void __init check_lp_set_hblkrm(unsigned int lp,
+ unsigned int block_size)
+{
+ unsigned int bpsize, psize;
+
+ /* First, check the L bit, if not set, this means 4K */
+ if ((lp & HBLKRM_L_MASK) == 0) {
+ set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
+ return;
+ }
+
+ lp &= HBLKRM_PENC_MASK;
+ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
+ struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ if (def->penc[psize] == lp) {
+ set_hblkrm_bloc_size(bpsize, psize, block_size);
+ return;
+ }
+ }
+ }
+}
+
+#define SPLPAR_TLB_BIC_TOKEN 50
+
+/*
+ * The size of the TLB Block Invalidate Characteristics is variable. But at the
+ * maximum it will be the number of possible page sizes *2 + 10 bytes.
+ * Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size
+ * (128 bytes) for the buffer to get plenty of space.
+ */
+#define SPLPAR_TLB_BIC_MAXLENGTH 128
+
+void __init pseries_lpar_read_hblkrm_characteristics(void)
+{
+ unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
+ int call_status, len, idx, bpsize;
+
+ if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+ return;
+
+ spin_lock(&rtas_data_buf_lock);
+ memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
+ call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+ NULL,
+ SPLPAR_TLB_BIC_TOKEN,
+ __pa(rtas_data_buf),
+ RTAS_DATA_BUF_SIZE);
+ memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
+ local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
+ spin_unlock(&rtas_data_buf_lock);
+
+ if (call_status != 0) {
+ pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
+ __FILE__, __func__, call_status);
+ return;
+ }
+
+ /*
+ * The first two (2) bytes of the data in the buffer are the length of
+ * the returned data, not counting these first two (2) bytes.
+ */
+ len = be16_to_cpu(*((u16 *)local_buffer)) + 2;
+ if (len > SPLPAR_TLB_BIC_MAXLENGTH) {
+ pr_warn("%s too large returned buffer %d", __func__, len);
+ return;
+ }
+
+ idx = 2;
+ while (idx < len) {
+ u8 block_shift = local_buffer[idx++];
+ u32 block_size;
+ unsigned int npsize;
+
+ if (!block_shift)
+ break;
+
+ block_size = 1 << block_shift;
+
+ for (npsize = local_buffer[idx++];
+ npsize > 0 && idx < len; npsize--)
+ check_lp_set_hblkrm((unsigned int) local_buffer[idx++],
+ block_size);
+ }
+
+ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
+ for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
+ if (hblkrm_size[bpsize][idx])
+ pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
+ bpsize, idx, hblkrm_size[bpsize][idx]);
+}
+
+/*
* Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
* lock.
*/
@@ -1330,7 +1500,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
if (lock_tlbie)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
- if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
+ if (is_supported_hlbkrm(batch->psize, batch->psize)) {
do_block_remove(number, batch, param);
goto out;
}
@@ -1413,7 +1583,10 @@ static int pseries_lpar_resize_hpt_commit(void *data)
return 0;
}
-/* Must be called in user context */
+/*
+ * Must be called in process context. The caller must hold the
+ * cpus_lock.
+ */
static int pseries_lpar_resize_hpt(unsigned long shift)
{
struct hpt_resize_state state = {
@@ -1467,7 +1640,8 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
t1 = ktime_get();
- rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
+ rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit,
+ &state, NULL);
t2 = ktime_get();
@@ -1527,16 +1701,24 @@ void __init hpte_init_pseries(void)
mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range;
mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all;
mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
- register_process_table = pseries_lpar_register_process_table;
if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
+
+ /*
+ * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
+ * to inform the hypervisor that we wish to use the HPT.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ pseries_lpar_register_process_table(0, 0, 0);
}
void radix_init_pseries(void)
{
pr_info("Using radix MMU under hypervisor\n");
- register_process_table = pseries_lpar_register_process_table;
+
+ pseries_lpar_register_process_table(__pa(process_tb),
+ 0, PRTB_SIZE_SHIFT - 12);
}
#ifdef CONFIG_PPC_SMLPAR
@@ -1818,30 +2000,17 @@ static int __init vpa_debugfs_init(void)
{
char name[16];
long i;
- static struct dentry *vpa_dir;
+ struct dentry *vpa_dir;
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return 0;
vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
- if (!vpa_dir) {
- pr_warn("%s: can't create vpa root dir\n", __func__);
- return -ENOMEM;
- }
/* set up the per-cpu vpa file*/
for_each_possible_cpu(i) {
- struct dentry *d;
-
sprintf(name, "cpu-%ld", i);
-
- d = debugfs_create_file(name, 0400, vpa_dir, (void *)i,
- &vpa_fops);
- if (!d) {
- pr_warn("%s: can't create per-cpu vpa file\n",
- __func__);
- return -ENOMEM;
- }
+ debugfs_create_file(name, 0400, vpa_dir, (void *)i, &vpa_fops);
}
return 0;
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index e33e8bc4b69b..b8d28ab88178 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -435,10 +435,10 @@ static void maxmem_data(struct seq_file *m)
{
unsigned long maxmem = 0;
- maxmem += drmem_info->n_lmbs * drmem_info->lmb_size;
+ maxmem += (unsigned long)drmem_info->n_lmbs * drmem_info->lmb_size;
maxmem += hugetlb_total_pages() * PAGE_SIZE;
- seq_printf(m, "MaxMem=%ld\n", maxmem);
+ seq_printf(m, "MaxMem=%lu\n", maxmem);
}
static int pseries_lparcfg_data(struct seq_file *m, void *v)
@@ -698,12 +698,12 @@ static int lparcfg_open(struct inode *inode, struct file *file)
return single_open(file, lparcfg_data, NULL);
}
-static const struct file_operations lparcfg_fops = {
- .read = seq_read,
- .write = lparcfg_write,
- .open = lparcfg_open,
- .release = single_release,
- .llseek = seq_lseek,
+static const struct proc_ops lparcfg_proc_ops = {
+ .proc_read = seq_read,
+ .proc_write = lparcfg_write,
+ .proc_open = lparcfg_open,
+ .proc_release = single_release,
+ .proc_lseek = seq_lseek,
};
static int __init lparcfg_init(void)
@@ -714,7 +714,7 @@ static int __init lparcfg_init(void)
if (firmware_has_feature(FW_FEATURE_SPLPAR))
mode |= 0200;
- if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) {
+ if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_proc_ops)) {
printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
return -EIO;
}
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index fe812bebdf5e..b571285f6c14 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -9,6 +9,7 @@
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/kobject.h>
+#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/stat.h>
#include <linux/completion.h>
@@ -207,7 +208,11 @@ static int update_dt_node(__be32 phandle, s32 scope)
prop_data += vd;
}
+
+ cond_resched();
}
+
+ cond_resched();
} while (rtas_rc == 1);
of_node_put(dn);
@@ -310,8 +315,12 @@ int pseries_devicetree_update(s32 scope)
add_dt_node(phandle, drc_index);
break;
}
+
+ cond_resched();
}
}
+
+ cond_resched();
} while (rc == 1);
kfree(rtas_buf);
diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c
index 6df192f38f80..66dfd8256712 100644
--- a/arch/powerpc/platforms/pseries/of_helpers.c
+++ b/arch/powerpc/platforms/pseries/of_helpers.c
@@ -45,14 +45,14 @@ struct device_node *pseries_of_derive_parent(const char *path)
int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
struct of_drc_info *data)
{
- const char *p;
+ const char *p = (char *)(*curval);
const __be32 *p2;
if (!data)
return -EINVAL;
/* Get drc-type:encode-string */
- p = data->drc_type = (char*) (*curval);
+ data->drc_type = (char *)p;
p = of_prop_next_string(*prop, p);
if (!p)
return -EINVAL;
@@ -65,9 +65,7 @@ int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
/* Get drc-index-start:encode-int */
p2 = (const __be32 *)p;
- p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start);
- if (!p2)
- return -EINVAL;
+ data->drc_index_start = be32_to_cpu(*p2);
/* Get drc-name-suffix-start:encode-int */
p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start);
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a5ac371a3f06..0b4467e378e5 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -65,29 +65,22 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
cond_resched();
} while (rc == H_BUSY);
- if (rc) {
- /* H_OVERLAP needs a separate error path */
- if (rc == H_OVERLAP)
- return -EBUSY;
-
- dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
- return -ENXIO;
- }
+ if (rc)
+ return rc;
p->bound_addr = saved;
-
- dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
-
- return 0;
+ dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n",
+ p->drc_index, (unsigned long)saved);
+ return rc;
}
-static int drc_pmem_unbind(struct papr_scm_priv *p)
+static void drc_pmem_unbind(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
uint64_t token = 0;
int64_t rc;
- dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index);
+ dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index);
/* NB: unbind has the same retry requirements as drc_pmem_bind() */
do {
@@ -110,12 +103,48 @@ static int drc_pmem_unbind(struct papr_scm_priv *p)
if (rc)
dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
else
- dev_dbg(&p->pdev->dev, "unbind drc %x complete\n",
+ dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n",
p->drc_index);
- return rc == H_SUCCESS ? 0 : -ENXIO;
+ return;
+}
+
+static int drc_pmem_query_n_bind(struct papr_scm_priv *p)
+{
+ unsigned long start_addr;
+ unsigned long end_addr;
+ unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ int64_t rc;
+
+
+ rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
+ p->drc_index, 0);
+ if (rc)
+ goto err_out;
+ start_addr = ret[0];
+
+ /* Make sure the full region is bound. */
+ rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
+ p->drc_index, p->blocks - 1);
+ if (rc)
+ goto err_out;
+ end_addr = ret[0];
+
+ if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size))
+ goto err_out;
+
+ p->bound_addr = start_addr;
+ dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", p->drc_index, start_addr);
+ return rc;
+
+err_out:
+ dev_info(&p->pdev->dev,
+ "Failed to query, trying an unbind followed by bind");
+ drc_pmem_unbind(p);
+ return drc_pmem_bind(p);
}
+
static int papr_scm_meta_get(struct papr_scm_priv *p,
struct nd_cmd_get_config_data_hdr *hdr)
{
@@ -124,7 +153,7 @@ static int papr_scm_meta_get(struct papr_scm_priv *p,
int len, read;
int64_t ret;
- if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
+ if ((hdr->in_offset + hdr->in_length) > p->metadata_size)
return -EINVAL;
for (len = hdr->in_length; len; len -= read) {
@@ -178,7 +207,7 @@ static int papr_scm_meta_set(struct papr_scm_priv *p,
__be64 data_be;
int64_t ret;
- if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
+ if ((hdr->in_offset + hdr->in_length) > p->metadata_size)
return -EINVAL;
for (len = hdr->in_length; len; len -= wrote) {
@@ -256,25 +285,6 @@ int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
return 0;
}
-static const struct attribute_group *region_attr_groups[] = {
- &nd_region_attribute_group,
- &nd_device_attribute_group,
- &nd_mapping_attribute_group,
- &nd_numa_attribute_group,
- NULL,
-};
-
-static const struct attribute_group *bus_attr_groups[] = {
- &nvdimm_bus_attribute_group,
- NULL,
-};
-
-static const struct attribute_group *papr_scm_dimm_groups[] = {
- &nvdimm_attribute_group,
- &nd_device_attribute_group,
- NULL,
-};
-
static inline int papr_scm_node(int node)
{
int min_dist = INT_MAX, dist;
@@ -305,7 +315,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
p->bus_desc.ndctl = papr_scm_ndctl;
p->bus_desc.module = THIS_MODULE;
p->bus_desc.of_node = p->pdev->dev.of_node;
- p->bus_desc.attr_groups = bus_attr_groups;
p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
if (!p->bus_desc.provider_name)
@@ -314,14 +323,15 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
if (!p->bus) {
dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
+ kfree(p->bus_desc.provider_name);
return -ENXIO;
}
dimm_flags = 0;
set_bit(NDD_ALIASING, &dimm_flags);
- p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups,
- dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
+ p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags,
+ PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
if (!p->nvdimm) {
dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
goto err;
@@ -338,7 +348,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
mapping.size = p->blocks * p->block_size; // XXX: potential overflow?
memset(&ndr_desc, 0, sizeof(ndr_desc));
- ndr_desc.attr_groups = region_attr_groups;
target_nid = dev_to_node(&p->pdev->dev);
online_nid = papr_scm_node(target_nid);
ndr_desc.numa_node = online_nid;
@@ -349,7 +358,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
ndr_desc.mapping = &mapping;
ndr_desc.num_mappings = 1;
ndr_desc.nd_set = &p->nd_set;
- set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
if (p->is_volatile)
p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
@@ -436,14 +444,14 @@ static int papr_scm_probe(struct platform_device *pdev)
rc = drc_pmem_bind(p);
/* If phyp says drc memory still bound then force unbound and retry */
- if (rc == -EBUSY) {
- dev_warn(&pdev->dev, "Retrying bind after unbinding\n");
- drc_pmem_unbind(p);
- rc = drc_pmem_bind(p);
- }
+ if (rc == H_OVERLAP)
+ rc = drc_pmem_query_n_bind(p);
- if (rc)
+ if (rc != H_SUCCESS) {
+ dev_err(&p->pdev->dev, "bind err: %d\n", rc);
+ rc = -ENXIO;
goto err;
+ }
/* setup the resource for the newly bound range */
p->res.start = p->bound_addr;
@@ -470,6 +478,7 @@ static int papr_scm_remove(struct platform_device *pdev)
nvdimm_bus_unregister(p->bus);
drc_pmem_unbind(p);
+ kfree(p->bus_desc.provider_name);
kfree(p);
return 0;
@@ -485,7 +494,6 @@ static struct platform_driver papr_scm_driver = {
.remove = papr_scm_remove,
.driver = {
.name = "papr_scm",
- .owner = THIS_MODULE,
.of_match_table = papr_scm_match,
},
};
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 1eae1d09980c..911534b89c85 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -192,7 +192,7 @@ int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
{
/* Allocate PCI data */
- add_dev_pci_data(pdev);
+ add_sriov_vf_pdns(pdev);
return pseries_pci_sriov_enable(pdev, num_vfs);
}
@@ -204,7 +204,7 @@ int pseries_pcibios_sriov_disable(struct pci_dev *pdev)
/* Releasing pe_num_map */
kfree(pdn->pe_num_map);
/* Release PCI data */
- remove_dev_pci_data(pdev);
+ remove_sriov_vf_pdns(pdev);
pci_vf_drivers_autoprobe(pdev, true);
return 0;
}
@@ -229,8 +229,7 @@ void __init pSeries_final_fixup(void)
pSeries_request_regions();
- eeh_probe_devices();
- eeh_addr_cache_build();
+ eeh_show_enabled();
#ifdef CONFIG_PCI_IOV
ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 561917fa54a8..361986e4354e 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PCI Dynamic LPAR, PCI Hot Plug and PCI EEH recovery code
* for RPA-compliant PPC64 platform.
@@ -6,23 +7,6 @@
*
* Updates, 2005, John Rose <johnrose@austin.ibm.com>
* Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/pci.h>
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index a6624d4bd9d0..13fa370a87e4 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -112,5 +112,6 @@ static inline unsigned long cmo_get_page_size(void)
int dlpar_workqueue_init(void);
void pseries_setup_rfi_flush(void);
+void pseries_lpar_read_hblkrm_characteristics(void);
#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index a96874f9492f..09e98d301db0 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -36,6 +36,7 @@ static int sysfs_entries;
static u32 cpu_to_drc_index(int cpu)
{
struct device_node *dn = NULL;
+ struct property *info;
int thread_index;
int rc = 1;
u32 ret = 0;
@@ -47,20 +48,18 @@ static u32 cpu_to_drc_index(int cpu)
/* Convert logical cpu number to core number */
thread_index = cpu_core_index_of_thread(cpu);
- if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
- struct property *info = NULL;
+ info = of_find_property(dn, "ibm,drc-info", NULL);
+ if (info) {
struct of_drc_info drc;
int j;
u32 num_set_entries;
const __be32 *value;
- info = of_find_property(dn, "ibm,drc-info", NULL);
- if (info == NULL)
- goto err_of_node_put;
-
value = of_prop_next_u32(info, NULL, &num_set_entries);
if (!value)
goto err_of_node_put;
+ else
+ value++;
for (j = 0; j < num_set_entries; j++) {
@@ -110,6 +109,7 @@ err:
static int drc_index_to_cpu(u32 drc_index)
{
struct device_node *dn = NULL;
+ struct property *info;
const int *indexes;
int thread_index = 0, cpu = 0;
int rc = 1;
@@ -117,21 +117,18 @@ static int drc_index_to_cpu(u32 drc_index)
dn = of_find_node_by_path("/cpus");
if (dn == NULL)
goto err;
-
- if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
- struct property *info = NULL;
+ info = of_find_property(dn, "ibm,drc-info", NULL);
+ if (info) {
struct of_drc_info drc;
int j;
u32 num_set_entries;
const __be32 *value;
- info = of_find_property(dn, "ibm,drc-info", NULL);
- if (info == NULL)
- goto err_of_node_put;
-
value = of_prop_next_u32(info, NULL, &num_set_entries);
if (!value)
goto err_of_node_put;
+ else
+ value++;
for (j = 0; j < num_set_entries; j++) {
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index f16fdd0f71f7..1d7f973c647b 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -76,6 +76,7 @@ struct pseries_mc_errorlog {
#define MC_ERROR_TYPE_UE 0x00
#define MC_ERROR_TYPE_SLB 0x01
#define MC_ERROR_TYPE_ERAT 0x02
+#define MC_ERROR_TYPE_UNKNOWN 0x03
#define MC_ERROR_TYPE_TLB 0x04
#define MC_ERROR_TYPE_D_CACHE 0x05
#define MC_ERROR_TYPE_I_CACHE 0x07
@@ -87,6 +88,9 @@ struct pseries_mc_errorlog {
#define MC_ERROR_UE_LOAD_STORE 3
#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
+#define UE_LOGICAL_ADDR_PROVIDED 0x20
+
#define MC_ERROR_SLB_PARITY 0
#define MC_ERROR_SLB_MULTIHIT 1
#define MC_ERROR_SLB_INDETERMINATE 2
@@ -113,27 +117,6 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
}
}
-static
-inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog)
-{
- __be64 addr = 0;
-
- switch (mlog->error_type) {
- case MC_ERROR_TYPE_UE:
- if (mlog->sub_err_type & 0x40)
- addr = mlog->effective_address;
- break;
- case MC_ERROR_TYPE_SLB:
- case MC_ERROR_TYPE_ERAT:
- case MC_ERROR_TYPE_TLB:
- if (mlog->sub_err_type & 0x80)
- addr = mlog->effective_address;
- default:
- break;
- }
- return be64_to_cpu(addr);
-}
-
/*
* Enable the hotplug interrupt late because processing them may touch other
* devices or systems (e.g. hugepages) that have not been initialized at the
@@ -272,7 +255,7 @@ static void rtas_parse_epow_errlog(struct rtas_error_log *log)
break;
case EPOW_SYSTEM_SHUTDOWN:
- handle_system_shutdown(epow_log->event_modifier);
+ handle_system_shutdown(modifier);
break;
case EPOW_SYSTEM_HALT:
@@ -511,160 +494,165 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
return 0; /* need to perform reset */
}
-#define VAL_TO_STRING(ar, val) \
- (((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")
-static void pseries_print_mce_info(struct pt_regs *regs,
- struct rtas_error_log *errp)
+static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
{
- const char *level, *sevstr;
+ struct mce_error_info mce_err = { 0 };
+ unsigned long eaddr = 0, paddr = 0;
struct pseries_errorlog *pseries_log;
struct pseries_mc_errorlog *mce_log;
- u8 error_type, err_sub_type;
- u64 addr;
- u8 initiator = rtas_error_initiator(errp);
int disposition = rtas_error_disposition(errp);
+ int initiator = rtas_error_initiator(errp);
+ int severity = rtas_error_severity(errp);
+ u8 error_type, err_sub_type;
- static const char * const initiators[] = {
- [0] = "Unknown",
- [1] = "CPU",
- [2] = "PCI",
- [3] = "ISA",
- [4] = "Memory",
- [5] = "Power Mgmt",
- };
- static const char * const mc_err_types[] = {
- [0] = "UE",
- [1] = "SLB",
- [2] = "ERAT",
- [3] = "Unknown",
- [4] = "TLB",
- [5] = "D-Cache",
- [6] = "Unknown",
- [7] = "I-Cache",
- };
- static const char * const mc_ue_types[] = {
- [0] = "Indeterminate",
- [1] = "Instruction fetch",
- [2] = "Page table walk ifetch",
- [3] = "Load/Store",
- [4] = "Page table walk Load/Store",
- };
-
- /* SLB sub errors valid values are 0x0, 0x1, 0x2 */
- static const char * const mc_slb_types[] = {
- [0] = "Parity",
- [1] = "Multihit",
- [2] = "Indeterminate",
- };
-
- /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
- static const char * const mc_soft_types[] = {
- [0] = "Unknown",
- [1] = "Parity",
- [2] = "Multihit",
- [3] = "Indeterminate",
- };
-
- if (!rtas_error_extended(errp)) {
- pr_err("Machine check interrupt: Missing extended error log\n");
- return;
- }
+ if (initiator == RTAS_INITIATOR_UNKNOWN)
+ mce_err.initiator = MCE_INITIATOR_UNKNOWN;
+ else if (initiator == RTAS_INITIATOR_CPU)
+ mce_err.initiator = MCE_INITIATOR_CPU;
+ else if (initiator == RTAS_INITIATOR_PCI)
+ mce_err.initiator = MCE_INITIATOR_PCI;
+ else if (initiator == RTAS_INITIATOR_ISA)
+ mce_err.initiator = MCE_INITIATOR_ISA;
+ else if (initiator == RTAS_INITIATOR_MEMORY)
+ mce_err.initiator = MCE_INITIATOR_MEMORY;
+ else if (initiator == RTAS_INITIATOR_POWERMGM)
+ mce_err.initiator = MCE_INITIATOR_POWERMGM;
+ else
+ mce_err.initiator = MCE_INITIATOR_UNKNOWN;
+
+ if (severity == RTAS_SEVERITY_NO_ERROR)
+ mce_err.severity = MCE_SEV_NO_ERROR;
+ else if (severity == RTAS_SEVERITY_EVENT)
+ mce_err.severity = MCE_SEV_WARNING;
+ else if (severity == RTAS_SEVERITY_WARNING)
+ mce_err.severity = MCE_SEV_WARNING;
+ else if (severity == RTAS_SEVERITY_ERROR_SYNC)
+ mce_err.severity = MCE_SEV_SEVERE;
+ else if (severity == RTAS_SEVERITY_ERROR)
+ mce_err.severity = MCE_SEV_SEVERE;
+ else if (severity == RTAS_SEVERITY_FATAL)
+ mce_err.severity = MCE_SEV_FATAL;
+ else
+ mce_err.severity = MCE_SEV_FATAL;
+
+ if (severity <= RTAS_SEVERITY_ERROR_SYNC)
+ mce_err.sync_error = true;
+ else
+ mce_err.sync_error = false;
+
+ mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
+ mce_err.error_class = MCE_ECLASS_UNKNOWN;
+
+ if (!rtas_error_extended(errp))
+ goto out;
pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
if (pseries_log == NULL)
- return;
+ goto out;
mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
-
error_type = mce_log->error_type;
err_sub_type = rtas_mc_error_sub_type(mce_log);
- switch (rtas_error_severity(errp)) {
- case RTAS_SEVERITY_NO_ERROR:
- level = KERN_INFO;
- sevstr = "Harmless";
- break;
- case RTAS_SEVERITY_WARNING:
- level = KERN_WARNING;
- sevstr = "";
- break;
- case RTAS_SEVERITY_ERROR:
- case RTAS_SEVERITY_ERROR_SYNC:
- level = KERN_ERR;
- sevstr = "Severe";
- break;
- case RTAS_SEVERITY_FATAL:
- default:
- level = KERN_ERR;
- sevstr = "Fatal";
- break;
- }
+ switch (mce_log->error_type) {
+ case MC_ERROR_TYPE_UE:
+ mce_err.error_type = MCE_ERROR_TYPE_UE;
+ switch (err_sub_type) {
+ case MC_ERROR_UE_IFETCH:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
+ break;
+ case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ case MC_ERROR_UE_LOAD_STORE:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+ break;
+ case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+ break;
+ case MC_ERROR_UE_INDETERMINATE:
+ default:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
+ eaddr = be64_to_cpu(mce_log->effective_address);
-#ifdef CONFIG_PPC_BOOK3S_64
- /* Display faulty slb contents for SLB errors. */
- if (error_type == MC_ERROR_TYPE_SLB)
- slb_dump_contents(local_paca->mce_faulty_slbs);
-#endif
+ if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
+ paddr = be64_to_cpu(mce_log->logical_address);
+ } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
+ unsigned long pfn;
- printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
- disposition == RTAS_DISP_FULLY_RECOVERED ?
- "Recovered" : "Not recovered");
- if (user_mode(regs)) {
- printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level,
- regs->nip, current->pid, current->comm);
- } else {
- printk("%s NIP [%016lx]: %pS\n", level, regs->nip,
- (void *)regs->nip);
- }
- printk("%s Initiator: %s\n", level,
- VAL_TO_STRING(initiators, initiator));
+ pfn = addr_to_pfn(regs, eaddr);
+ if (pfn != ULONG_MAX)
+ paddr = pfn << PAGE_SHIFT;
+ }
- switch (error_type) {
- case MC_ERROR_TYPE_UE:
- printk("%s Error type: %s [%s]\n", level,
- VAL_TO_STRING(mc_err_types, error_type),
- VAL_TO_STRING(mc_ue_types, err_sub_type));
break;
case MC_ERROR_TYPE_SLB:
- printk("%s Error type: %s [%s]\n", level,
- VAL_TO_STRING(mc_err_types, error_type),
- VAL_TO_STRING(mc_slb_types, err_sub_type));
+ mce_err.error_type = MCE_ERROR_TYPE_SLB;
+ switch (err_sub_type) {
+ case MC_ERROR_SLB_PARITY:
+ mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ break;
+ case MC_ERROR_SLB_MULTIHIT:
+ mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ break;
+ case MC_ERROR_SLB_INDETERMINATE:
+ default:
+ mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & 0x80)
+ eaddr = be64_to_cpu(mce_log->effective_address);
break;
case MC_ERROR_TYPE_ERAT:
+ mce_err.error_type = MCE_ERROR_TYPE_ERAT;
+ switch (err_sub_type) {
+ case MC_ERROR_ERAT_PARITY:
+ mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
+ break;
+ case MC_ERROR_ERAT_MULTIHIT:
+ mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ break;
+ case MC_ERROR_ERAT_INDETERMINATE:
+ default:
+ mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & 0x80)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
case MC_ERROR_TYPE_TLB:
- printk("%s Error type: %s [%s]\n", level,
- VAL_TO_STRING(mc_err_types, error_type),
- VAL_TO_STRING(mc_soft_types, err_sub_type));
+ mce_err.error_type = MCE_ERROR_TYPE_TLB;
+ switch (err_sub_type) {
+ case MC_ERROR_TLB_PARITY:
+ mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
+ break;
+ case MC_ERROR_TLB_MULTIHIT:
+ mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ break;
+ case MC_ERROR_TLB_INDETERMINATE:
+ default:
+ mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & 0x80)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
+ case MC_ERROR_TYPE_D_CACHE:
+ mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
break;
+ case MC_ERROR_TYPE_I_CACHE:
+ mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
+ break;
+ case MC_ERROR_TYPE_UNKNOWN:
default:
- printk("%s Error type: %s\n", level,
- VAL_TO_STRING(mc_err_types, error_type));
+ mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
break;
}
- addr = rtas_mc_get_effective_addr(mce_log);
- if (addr)
- printk("%s Effective address: %016llx\n", level, addr);
-}
-
-static int mce_handle_error(struct rtas_error_log *errp)
-{
- struct pseries_errorlog *pseries_log;
- struct pseries_mc_errorlog *mce_log;
- int disposition = rtas_error_disposition(errp);
- u8 error_type;
-
- if (!rtas_error_extended(errp))
- goto out;
-
- pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
- if (pseries_log == NULL)
- goto out;
-
- mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
- error_type = mce_log->error_type;
-
#ifdef CONFIG_PPC_BOOK3S_64
if (disposition == RTAS_DISP_NOT_RECOVERED) {
switch (error_type) {
@@ -682,98 +670,24 @@ static int mce_handle_error(struct rtas_error_log *errp)
slb_save_contents(local_paca->mce_faulty_slbs);
flush_and_reload_slb();
disposition = RTAS_DISP_FULLY_RECOVERED;
- rtas_set_disposition_recovered(errp);
break;
default:
break;
}
+ } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
+ /* Platform corrected itself but could be degraded */
+ printk(KERN_ERR "MCE: limited recovery, system may "
+ "be degraded\n");
+ disposition = RTAS_DISP_FULLY_RECOVERED;
}
#endif
out:
- return disposition;
-}
-
-#ifdef CONFIG_MEMORY_FAILURE
-
-static DEFINE_PER_CPU(int, rtas_ue_count);
-static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]);
+ save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
+ &mce_err, regs->nip, eaddr, paddr);
-#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
-#define UE_LOGICAL_ADDR_PROVIDED 0x20
-
-
-static void pseries_hwpoison_work_fn(struct work_struct *work)
-{
- unsigned long paddr;
- int index;
-
- while (__this_cpu_read(rtas_ue_count) > 0) {
- index = __this_cpu_read(rtas_ue_count) - 1;
- paddr = __this_cpu_read(rtas_ue_paddr[index]);
- memory_failure(paddr >> PAGE_SHIFT, 0);
- __this_cpu_dec(rtas_ue_count);
- }
-}
-
-static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn);
-
-static void queue_ue_paddr(unsigned long paddr)
-{
- int index;
-
- index = __this_cpu_inc_return(rtas_ue_count) - 1;
- if (index >= MAX_MC_EVT) {
- __this_cpu_dec(rtas_ue_count);
- return;
- }
- this_cpu_write(rtas_ue_paddr[index], paddr);
- schedule_work(&hwpoison_work);
-}
-
-static void pseries_do_memory_failure(struct pt_regs *regs,
- struct pseries_mc_errorlog *mce_log)
-{
- unsigned long paddr;
-
- if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
- paddr = be64_to_cpu(mce_log->logical_address);
- } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
- unsigned long pfn;
-
- pfn = addr_to_pfn(regs,
- be64_to_cpu(mce_log->effective_address));
- if (pfn == ULONG_MAX)
- return;
- paddr = pfn << PAGE_SHIFT;
- } else {
- return;
- }
- queue_ue_paddr(paddr);
-}
-
-static void pseries_process_ue(struct pt_regs *regs,
- struct rtas_error_log *errp)
-{
- struct pseries_errorlog *pseries_log;
- struct pseries_mc_errorlog *mce_log;
-
- if (!rtas_error_extended(errp))
- return;
-
- pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
- if (!pseries_log)
- return;
-
- mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
-
- if (mce_log->error_type == MC_ERROR_TYPE_UE)
- pseries_do_memory_failure(regs, mce_log);
+ return disposition;
}
-#else
-static inline void pseries_process_ue(struct pt_regs *regs,
- struct rtas_error_log *errp) { }
-#endif /*CONFIG_MEMORY_FAILURE */
/*
* Process MCE rtas errlog event.
@@ -795,49 +709,51 @@ static void mce_process_errlog_event(struct irq_work *work)
* Return 1 if corrected (or delivered a signal).
* Return 0 if there is nothing we can do.
*/
-static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
+static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
{
int recovered = 0;
- int disposition = rtas_error_disposition(err);
-
- pseries_print_mce_info(regs, err);
if (!(regs->msr & MSR_RI)) {
/* If MSR_RI isn't set, we cannot recover */
pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
recovered = 0;
-
- } else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
+ } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
/* Platform corrected itself */
recovered = 1;
+ } else if (evt->severity == MCE_SEV_FATAL) {
+ /* Fatal machine check */
+ pr_err("Machine check interrupt is fatal\n");
+ recovered = 0;
+ }
- } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
- /* Platform corrected itself but could be degraded */
- printk(KERN_ERR "MCE: limited recovery, system may "
- "be degraded\n");
- recovered = 1;
-
- } else if (user_mode(regs) && !is_global_init(current) &&
- rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) {
-
+ if (!recovered && evt->sync_error) {
/*
- * If we received a synchronous error when in userspace
- * kill the task. Firmware may report details of the fail
- * asynchronously, so we can't rely on the target and type
- * fields being valid here.
+ * Try to kill processes if we get a synchronous machine check
+ * (e.g., one caused by execution of this instruction). This
+ * will devolve into a panic if we try to kill init or are in
+ * an interrupt etc.
+ *
+ * TODO: Queue up this address for hwpoisioning later.
+ * TODO: This is not quite right for d-side machine
+ * checks ->nip is not necessarily the important
+ * address.
*/
- printk(KERN_ERR "MCE: uncorrectable error, killing task "
- "%s:%d\n", current->comm, current->pid);
-
- _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
- recovered = 1;
+ if ((user_mode(regs))) {
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ } else if (die_will_crash()) {
+ /*
+ * die() would kill the kernel, so better to go via
+ * the platform reboot code that will log the
+ * machine check.
+ */
+ recovered = 0;
+ } else {
+ die("Machine check", regs, SIGBUS);
+ recovered = 1;
+ }
}
- pseries_process_ue(regs, err);
-
- /* Queue irq work to log this rtas event later. */
- irq_work_queue(&mce_errlog_process_work);
-
return recovered;
}
@@ -853,14 +769,21 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
*/
int pSeries_machine_check_exception(struct pt_regs *regs)
{
- struct rtas_error_log *errp;
+ struct machine_check_event evt;
- if (fwnmi_active) {
- fwnmi_release_errinfo();
- errp = fwnmi_get_errlog();
- if (errp && recover_mce(regs, errp))
- return 1;
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return 0;
+
+ /* Print things out */
+ if (evt.version != MCE_V1) {
+ pr_err("Machine Check Exception, Unknown event version %d !\n",
+ evt.version);
+ return 0;
}
+ machine_check_print_event_info(&evt, user_mode(regs), false);
+
+ if (recover_mce(regs, &evt))
+ return 1;
return 0;
}
@@ -877,7 +800,12 @@ long pseries_machine_check_realmode(struct pt_regs *regs)
* to panic. Hence we will call it as soon as we go into
* virtual mode.
*/
- disposition = mce_handle_error(errp);
+ disposition = mce_handle_error(regs, errp);
+ fwnmi_release_errinfo();
+
+ /* Queue irq work to log this rtas event later. */
+ irq_work_queue(&mce_errlog_process_work);
+
if (disposition == RTAS_DISP_FULLY_RECOVERED)
return 1;
}
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 8a9c4fb95b8b..7f7369fec46b 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -391,9 +391,9 @@ out:
return rv ? rv : count;
}
-static const struct file_operations ofdt_fops = {
- .write = ofdt_write,
- .llseek = noop_llseek,
+static const struct proc_ops ofdt_proc_ops = {
+ .proc_write = ofdt_write,
+ .proc_lseek = noop_llseek,
};
/* create /proc/powerpc/ofdt write-only by root */
@@ -401,7 +401,7 @@ static int proc_ppc64_create_ofdt(void)
{
struct proc_dir_entry *ent;
- ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_fops);
+ ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_proc_ops);
if (ent)
proc_set_size(ent, 0);
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
new file mode 100644
index 000000000000..70c3013fdd07
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Firmware-Assisted Dump support on POWERVM platform.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "rtas fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/fadump.h>
+#include <asm/fadump-internal.h>
+
+#include "rtas-fadump.h"
+
+static struct rtas_fadump_mem_struct fdm;
+static const struct rtas_fadump_mem_struct *fdm_active;
+
+static void rtas_fadump_update_config(struct fw_dump *fadump_conf,
+ const struct rtas_fadump_mem_struct *fdm)
+{
+ fadump_conf->boot_mem_dest_addr =
+ be64_to_cpu(fdm->rmr_region.destination_address);
+
+ fadump_conf->fadumphdr_addr = (fadump_conf->boot_mem_dest_addr +
+ fadump_conf->boot_memory_size);
+}
+
+/*
+ * This function is called in the capture kernel to get configuration details
+ * setup in the first kernel and passed to the f/w.
+ */
+static void rtas_fadump_get_config(struct fw_dump *fadump_conf,
+ const struct rtas_fadump_mem_struct *fdm)
+{
+ fadump_conf->boot_mem_addr[0] =
+ be64_to_cpu(fdm->rmr_region.source_address);
+ fadump_conf->boot_mem_sz[0] = be64_to_cpu(fdm->rmr_region.source_len);
+ fadump_conf->boot_memory_size = fadump_conf->boot_mem_sz[0];
+
+ fadump_conf->boot_mem_top = fadump_conf->boot_memory_size;
+ fadump_conf->boot_mem_regs_cnt = 1;
+
+ /*
+ * Start address of reserve dump area (permanent reservation) for
+ * re-registering FADump after dump capture.
+ */
+ fadump_conf->reserve_dump_area_start =
+ be64_to_cpu(fdm->cpu_state_data.destination_address);
+
+ rtas_fadump_update_config(fadump_conf, fdm);
+}
+
+static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf)
+{
+ u64 addr = fadump_conf->reserve_dump_area_start;
+
+ memset(&fdm, 0, sizeof(struct rtas_fadump_mem_struct));
+ addr = addr & PAGE_MASK;
+
+ fdm.header.dump_format_version = cpu_to_be32(0x00000001);
+ fdm.header.dump_num_sections = cpu_to_be16(3);
+ fdm.header.dump_status_flag = 0;
+ fdm.header.offset_first_dump_section =
+ cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct,
+ cpu_state_data));
+
+ /*
+ * Fields for disk dump option.
+ * We are not using disk dump option, hence set these fields to 0.
+ */
+ fdm.header.dd_block_size = 0;
+ fdm.header.dd_block_offset = 0;
+ fdm.header.dd_num_blocks = 0;
+ fdm.header.dd_offset_disk_path = 0;
+
+ /* set 0 to disable an automatic dump-reboot. */
+ fdm.header.max_time_auto = 0;
+
+ /* Kernel dump sections */
+ /* cpu state data section. */
+ fdm.cpu_state_data.request_flag =
+ cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.cpu_state_data.source_data_type =
+ cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA);
+ fdm.cpu_state_data.source_address = 0;
+ fdm.cpu_state_data.source_len =
+ cpu_to_be64(fadump_conf->cpu_state_data_size);
+ fdm.cpu_state_data.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->cpu_state_data_size;
+
+ /* hpte region section */
+ fdm.hpte_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.hpte_region.source_data_type =
+ cpu_to_be16(RTAS_FADUMP_HPTE_REGION);
+ fdm.hpte_region.source_address = 0;
+ fdm.hpte_region.source_len =
+ cpu_to_be64(fadump_conf->hpte_region_size);
+ fdm.hpte_region.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->hpte_region_size;
+
+ /* RMA region section */
+ fdm.rmr_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.rmr_region.source_data_type =
+ cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION);
+ fdm.rmr_region.source_address = cpu_to_be64(0);
+ fdm.rmr_region.source_len = cpu_to_be64(fadump_conf->boot_memory_size);
+ fdm.rmr_region.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->boot_memory_size;
+
+ rtas_fadump_update_config(fadump_conf, &fdm);
+
+ return addr;
+}
+
+static u64 rtas_fadump_get_bootmem_min(void)
+{
+ return RTAS_FADUMP_MIN_BOOT_MEM;
+}
+
+static int rtas_fadump_register(struct fw_dump *fadump_conf)
+{
+ unsigned int wait_time;
+ int rc, err = -EIO;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_REGISTER, &fdm,
+ sizeof(struct rtas_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+
+ } while (wait_time);
+
+ switch (rc) {
+ case 0:
+ pr_info("Registration is successful!\n");
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+ case -1:
+ pr_err("Failed to register. Hardware Error(%d).\n", rc);
+ break;
+ case -3:
+ if (!is_fadump_boot_mem_contiguous())
+ pr_err("Can't have holes in boot memory area.\n");
+ else if (!is_fadump_reserved_mem_contiguous())
+ pr_err("Can't have holes in reserved memory area.\n");
+
+ pr_err("Failed to register. Parameter Error(%d).\n", rc);
+ err = -EINVAL;
+ break;
+ case -9:
+ pr_err("Already registered!\n");
+ fadump_conf->dump_registered = 1;
+ err = -EEXIST;
+ break;
+ default:
+ pr_err("Failed to register. Unknown Error(%d).\n", rc);
+ break;
+ }
+
+ return err;
+}
+
+static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
+{
+ unsigned int wait_time;
+ int rc;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_UNREGISTER, &fdm,
+ sizeof(struct rtas_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ pr_err("Failed to un-register - unexpected error(%d).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_registered = 0;
+ return 0;
+}
+
+static int rtas_fadump_invalidate(struct fw_dump *fadump_conf)
+{
+ unsigned int wait_time;
+ int rc;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_INVALIDATE, fdm_active,
+ sizeof(struct rtas_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ pr_err("Failed to invalidate - unexpected error (%d).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_active = 0;
+ fdm_active = NULL;
+ return 0;
+}
+
+#define RTAS_FADUMP_GPR_MASK 0xffffff0000000000
+static inline int rtas_fadump_gpr_index(u64 id)
+{
+ char str[3];
+ int i = -1;
+
+ if ((id & RTAS_FADUMP_GPR_MASK) == fadump_str_to_u64("GPR")) {
+ /* get the digits at the end */
+ id &= ~RTAS_FADUMP_GPR_MASK;
+ id >>= 24;
+ str[2] = '\0';
+ str[1] = id & 0xff;
+ str[0] = (id >> 8) & 0xff;
+ if (kstrtoint(str, 10, &i))
+ i = -EINVAL;
+ if (i > 31)
+ i = -1;
+ }
+ return i;
+}
+
+void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
+{
+ int i;
+
+ i = rtas_fadump_gpr_index(reg_id);
+ if (i >= 0)
+ regs->gpr[i] = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("NIA"))
+ regs->nip = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("MSR"))
+ regs->msr = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("CTR"))
+ regs->ctr = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("LR"))
+ regs->link = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("XER"))
+ regs->xer = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("CR"))
+ regs->ccr = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("DAR"))
+ regs->dar = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("DSISR"))
+ regs->dsisr = (unsigned long)reg_val;
+}
+
+static struct rtas_fadump_reg_entry*
+rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry,
+ struct pt_regs *regs)
+{
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ while (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUEND")) {
+ rtas_fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
+ be64_to_cpu(reg_entry->reg_value));
+ reg_entry++;
+ }
+ reg_entry++;
+ return reg_entry;
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
+{
+ struct rtas_fadump_reg_save_area_header *reg_header;
+ struct fadump_crash_info_header *fdh = NULL;
+ struct rtas_fadump_reg_entry *reg_entry;
+ u32 num_cpus, *note_buf;
+ int i, rc = 0, cpu = 0;
+ struct pt_regs regs;
+ unsigned long addr;
+ void *vaddr;
+
+ addr = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
+ vaddr = __va(addr);
+
+ reg_header = vaddr;
+ if (be64_to_cpu(reg_header->magic_number) !=
+ fadump_str_to_u64("REGSAVE")) {
+ pr_err("Unable to read register save area.\n");
+ return -ENOENT;
+ }
+
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
+ pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
+
+ vaddr += be32_to_cpu(reg_header->num_cpu_offset);
+ num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
+ pr_debug("NumCpus : %u\n", num_cpus);
+ vaddr += sizeof(u32);
+ reg_entry = (struct rtas_fadump_reg_entry *)vaddr;
+
+ rc = fadump_setup_cpu_notes_buf(num_cpus);
+ if (rc != 0)
+ return rc;
+
+ note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
+
+ if (fadump_conf->fadumphdr_addr)
+ fdh = __va(fadump_conf->fadumphdr_addr);
+
+ for (i = 0; i < num_cpus; i++) {
+ if (be64_to_cpu(reg_entry->reg_id) !=
+ fadump_str_to_u64("CPUSTRT")) {
+ pr_err("Unable to read CPU state data\n");
+ rc = -ENOENT;
+ goto error_out;
+ }
+ /* Lower 4 bytes of reg_value contains logical cpu id */
+ cpu = (be64_to_cpu(reg_entry->reg_value) &
+ RTAS_FADUMP_CPU_ID_MASK);
+ if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
+ RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
+ continue;
+ }
+ pr_debug("Reading register data for cpu %d...\n", cpu);
+ if (fdh && fdh->crashing_cpu == cpu) {
+ regs = fdh->regs;
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
+ } else {
+ reg_entry++;
+ reg_entry = rtas_fadump_read_regs(reg_entry, &regs);
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ }
+ }
+ final_note(note_buf);
+
+ if (fdh) {
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+ fdh->elfcorehdr_addr);
+ fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr));
+ }
+ return 0;
+
+error_out:
+ fadump_free_cpu_notes_buf();
+ return rc;
+
+}
+
+/*
+ * Validate and process the dump data stored by firmware before exporting
+ * it through '/proc/vmcore'.
+ */
+static int __init rtas_fadump_process(struct fw_dump *fadump_conf)
+{
+ struct fadump_crash_info_header *fdh;
+ int rc = 0;
+
+ if (!fdm_active || !fadump_conf->fadumphdr_addr)
+ return -EINVAL;
+
+ /* Check if the dump data is valid. */
+ if ((be16_to_cpu(fdm_active->header.dump_status_flag) ==
+ RTAS_FADUMP_ERROR_FLAG) ||
+ (fdm_active->cpu_state_data.error_flags != 0) ||
+ (fdm_active->rmr_region.error_flags != 0)) {
+ pr_err("Dump taken by platform is not valid\n");
+ return -EINVAL;
+ }
+ if ((fdm_active->rmr_region.bytes_dumped !=
+ fdm_active->rmr_region.source_len) ||
+ !fdm_active->cpu_state_data.bytes_dumped) {
+ pr_err("Dump taken by platform is incomplete\n");
+ return -EINVAL;
+ }
+
+ /* Validate the fadump crash info header */
+ fdh = __va(fadump_conf->fadumphdr_addr);
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ pr_err("Crash info header is not valid.\n");
+ return -EINVAL;
+ }
+
+ rc = rtas_fadump_build_cpu_notes(fadump_conf);
+ if (rc)
+ return rc;
+
+ /*
+ * We are done validating dump info and elfcore header is now ready
+ * to be exported. set elfcorehdr_addr so that vmcore module will
+ * export the elfcore header through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+ return 0;
+}
+
+static void rtas_fadump_region_show(struct fw_dump *fadump_conf,
+ struct seq_file *m)
+{
+ const struct rtas_fadump_section *cpu_data_section;
+ const struct rtas_fadump_mem_struct *fdm_ptr;
+
+ if (fdm_active)
+ fdm_ptr = fdm_active;
+ else
+ fdm_ptr = &fdm;
+
+ cpu_data_section = &(fdm_ptr->cpu_state_data);
+ seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(cpu_data_section->destination_address),
+ be64_to_cpu(cpu_data_section->destination_address) +
+ be64_to_cpu(cpu_data_section->source_len) - 1,
+ be64_to_cpu(cpu_data_section->source_len),
+ be64_to_cpu(cpu_data_section->bytes_dumped));
+
+ seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(fdm_ptr->hpte_region.destination_address),
+ be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
+ be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
+ be64_to_cpu(fdm_ptr->hpte_region.source_len),
+ be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
+
+ seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
+ be64_to_cpu(fdm_ptr->rmr_region.source_address),
+ be64_to_cpu(fdm_ptr->rmr_region.destination_address));
+ seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
+ be64_to_cpu(fdm_ptr->rmr_region.source_len),
+ be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
+
+ /* Dump is active. Show reserved area start address. */
+ if (fdm_active) {
+ seq_printf(m, "\nMemory above %#016lx is reserved for saving crash dump\n",
+ fadump_conf->reserve_dump_area_start);
+ }
+}
+
+static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh,
+ const char *msg)
+{
+ /* Call ibm,os-term rtas call to trigger firmware assisted dump */
+ rtas_os_term((char *)msg);
+}
+
+static struct fadump_ops rtas_fadump_ops = {
+ .fadump_init_mem_struct = rtas_fadump_init_mem_struct,
+ .fadump_get_bootmem_min = rtas_fadump_get_bootmem_min,
+ .fadump_register = rtas_fadump_register,
+ .fadump_unregister = rtas_fadump_unregister,
+ .fadump_invalidate = rtas_fadump_invalidate,
+ .fadump_process = rtas_fadump_process,
+ .fadump_region_show = rtas_fadump_region_show,
+ .fadump_trigger = rtas_fadump_trigger,
+};
+
+void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+ int i, size, num_sections;
+ const __be32 *sections;
+ const __be32 *token;
+
+ /*
+ * Check if Firmware Assisted dump is supported. if yes, check
+ * if dump has been initiated on last reboot.
+ */
+ token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
+ if (!token)
+ return;
+
+ fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token);
+ fadump_conf->ops = &rtas_fadump_ops;
+ fadump_conf->fadump_supported = 1;
+
+ /* Firmware supports 64-bit value for size, align it to pagesize. */
+ fadump_conf->max_copy_size = _ALIGN_DOWN(U64_MAX, PAGE_SIZE);
+
+ /*
+ * The 'ibm,kernel-dump' rtas node is present only if there is
+ * dump data waiting for us.
+ */
+ fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
+ if (fdm_active) {
+ pr_info("Firmware-assisted dump is active.\n");
+ fadump_conf->dump_active = 1;
+ rtas_fadump_get_config(fadump_conf, (void *)__pa(fdm_active));
+ }
+
+ /* Get the sizes required to store dump data for the firmware provided
+ * dump sections.
+ * For each dump section type supported, a 32bit cell which defines
+ * the ID of a supported section followed by two 32 bit cells which
+ * gives the size of the section in bytes.
+ */
+ sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
+ &size);
+
+ if (!sections)
+ return;
+
+ num_sections = size / (3 * sizeof(u32));
+
+ for (i = 0; i < num_sections; i++, sections += 3) {
+ u32 type = (u32)of_read_number(sections, 1);
+
+ switch (type) {
+ case RTAS_FADUMP_CPU_STATE_DATA:
+ fadump_conf->cpu_state_data_size =
+ of_read_ulong(&sections[1], 2);
+ break;
+ case RTAS_FADUMP_HPTE_REGION:
+ fadump_conf->hpte_region_size =
+ of_read_ulong(&sections[1], 2);
+ break;
+ }
+ }
+}
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h
new file mode 100644
index 000000000000..fd59bd7ca9c3
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump support on POWERVM platform.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _PSERIES_RTAS_FADUMP_H
+#define _PSERIES_RTAS_FADUMP_H
+
+/*
+ * On some Power systems where RMO is 128MB, it still requires minimum of
+ * 256MB for kernel to boot successfully. When kdump infrastructure is
+ * configured to save vmcore over network, we run into OOM issue while
+ * loading modules related to network setup. Hence we need additional 64M
+ * of memory to avoid OOM issue.
+ */
+#define RTAS_FADUMP_MIN_BOOT_MEM ((0x1UL << 28) + (0x1UL << 26))
+
+/* Firmware provided dump sections */
+#define RTAS_FADUMP_CPU_STATE_DATA 0x0001
+#define RTAS_FADUMP_HPTE_REGION 0x0002
+#define RTAS_FADUMP_REAL_MODE_REGION 0x0011
+
+/* Dump request flag */
+#define RTAS_FADUMP_REQUEST_FLAG 0x00000001
+
+/* Dump status flag */
+#define RTAS_FADUMP_ERROR_FLAG 0x2000
+
+/* Kernel Dump section info */
+struct rtas_fadump_section {
+ __be32 request_flag;
+ __be16 source_data_type;
+ __be16 error_flags;
+ __be64 source_address;
+ __be64 source_len;
+ __be64 bytes_dumped;
+ __be64 destination_address;
+};
+
+/* ibm,configure-kernel-dump header. */
+struct rtas_fadump_section_header {
+ __be32 dump_format_version;
+ __be16 dump_num_sections;
+ __be16 dump_status_flag;
+ __be32 offset_first_dump_section;
+
+ /* Fields for disk dump option. */
+ __be32 dd_block_size;
+ __be64 dd_block_offset;
+ __be64 dd_num_blocks;
+ __be32 dd_offset_disk_path;
+
+ /* Maximum time allowed to prevent an automatic dump-reboot. */
+ __be32 max_time_auto;
+};
+
+/*
+ * Firmware Assisted dump memory structure. This structure is required for
+ * registering future kernel dump with power firmware through rtas call.
+ *
+ * No disk dump option. Hence disk dump path string section is not included.
+ */
+struct rtas_fadump_mem_struct {
+ struct rtas_fadump_section_header header;
+
+ /* Kernel dump sections */
+ struct rtas_fadump_section cpu_state_data;
+ struct rtas_fadump_section hpte_region;
+
+ /*
+ * TODO: Extend multiple boot memory regions support in the kernel
+ * for this platform.
+ */
+ struct rtas_fadump_section rmr_region;
+};
+
+/*
+ * The firmware-assisted dump format.
+ *
+ * The register save area is an area in the partition's memory used to preserve
+ * the register contents (CPU state data) for the active CPUs during a firmware
+ * assisted dump. The dump format contains register save area header followed
+ * by register entries. Each list of registers for a CPU starts with "CPUSTRT"
+ * and ends with "CPUEND".
+ */
+
+/* Register save area header. */
+struct rtas_fadump_reg_save_area_header {
+ __be64 magic_number;
+ __be32 version;
+ __be32 num_cpu_offset;
+};
+
+/* Register entry. */
+struct rtas_fadump_reg_entry {
+ __be64 reg_id;
+ __be64 reg_value;
+};
+
+/* Utility macros */
+#define RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry) \
+({ \
+ while (be64_to_cpu(reg_entry->reg_id) != \
+ fadump_str_to_u64("CPUEND")) \
+ reg_entry++; \
+ reg_entry++; \
+})
+
+#define RTAS_FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
+
+#endif /* _PSERIES_RTAS_FADUMP_H */
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index a0001280503c..2879c4f0ceb7 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -152,13 +152,12 @@ static int scanlog_release(struct inode * inode, struct file * file)
return 0;
}
-static const struct file_operations scanlog_fops = {
- .owner = THIS_MODULE,
- .read = scanlog_read,
- .write = scanlog_write,
- .open = scanlog_open,
- .release = scanlog_release,
- .llseek = noop_llseek,
+static const struct proc_ops scanlog_proc_ops = {
+ .proc_read = scanlog_read,
+ .proc_write = scanlog_write,
+ .proc_open = scanlog_open,
+ .proc_release = scanlog_release,
+ .proc_lseek = noop_llseek,
};
static int __init scanlog_init(void)
@@ -176,7 +175,7 @@ static int __init scanlog_init(void)
goto err;
ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL,
- &scanlog_fops);
+ &scanlog_proc_ops);
if (!ent)
goto err;
return 0;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f5940cc71c37..0c8421dd01ab 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -69,10 +69,14 @@
#include <asm/security_features.h>
#include <asm/asm-const.h>
#include <asm/swiotlb.h>
+#include <asm/svm.h>
#include "pseries.h"
#include "../../../../drivers/pci/pci.h"
+DEFINE_STATIC_KEY_FALSE(shared_processor);
+EXPORT_SYMBOL_GPL(shared_processor);
+
int CMO_PrPSP = -1;
int CMO_SecPSP = -1;
unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
@@ -141,17 +145,19 @@ static void __init fwnmi_init(void)
}
#ifdef CONFIG_PPC_BOOK3S_64
- /* Allocate per cpu slb area to save old slb contents during MCE */
- size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
- slb_ptr = memblock_alloc_try_nid_raw(size, sizeof(struct slb_entry),
- MEMBLOCK_LOW_LIMIT, ppc64_rma_size,
- NUMA_NO_NODE);
- if (!slb_ptr)
- panic("Failed to allocate %zu bytes below %pa for slb area\n",
- size, &ppc64_rma_size);
-
- for_each_possible_cpu(i)
- paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
+ if (!radix_enabled()) {
+ /* Allocate per cpu area to save old slb contents during MCE */
+ size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
+ slb_ptr = memblock_alloc_try_nid_raw(size,
+ sizeof(struct slb_entry), MEMBLOCK_LOW_LIMIT,
+ ppc64_rma_size, NUMA_NO_NODE);
+ if (!slb_ptr)
+ panic("Failed to allocate %zu bytes below %pa for slb area\n",
+ size, &ppc64_rma_size);
+
+ for_each_possible_cpu(i)
+ paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
+ }
#endif
}
@@ -297,8 +303,10 @@ static inline int alloc_dispatch_logs(void)
static int alloc_dispatch_log_kmem_cache(void)
{
+ void (*ctor)(void *) = get_dtl_cache_ctor();
+
dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
- DISPATCH_LOG_BYTES, 0, NULL);
+ DISPATCH_LOG_BYTES, 0, ctor);
if (!dtl_cache) {
pr_warn("Failed to create dispatch trace log buffer cache\n");
pr_warn("Stolen time statistics will be unreliable\n");
@@ -316,6 +324,9 @@ static void pseries_lpar_idle(void)
* low power mode by ceding processor to hypervisor
*/
+ if (!prep_irq_for_idle())
+ return;
+
/* Indicate to hypervisor that we are idle. */
get_lppaca()->idle = 1;
@@ -736,6 +747,7 @@ static void __init pSeries_setup_arch(void)
pseries_setup_rfi_flush();
setup_stf_barrier();
+ pseries_lpar_read_hblkrm_characteristics();
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
@@ -749,6 +761,10 @@ static void __init pSeries_setup_arch(void)
if (firmware_has_feature(FW_FEATURE_LPAR)) {
vpa_init(boot_cpuid);
+
+ if (lppaca_shared_proc(get_lppaca()))
+ static_branch_enable(&shared_processor);
+
ppc_md.power_save = pseries_lpar_idle;
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
#ifdef CONFIG_PCI_IOV
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 4b3ef8d9c63f..ad61e90032da 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -41,6 +41,7 @@
#include <asm/dbell.h>
#include <asm/plpar_wrappers.h>
#include <asm/code-patching.h>
+#include <asm/svm.h>
#include "pseries.h"
#include "offline_states.h"
@@ -221,7 +222,7 @@ static __init void pSeries_smp_probe_xics(void)
{
xics_smp_probe();
- if (cpu_has_feature(CPU_FTR_DBELL))
+ if (cpu_has_feature(CPU_FTR_DBELL) && !is_secure_guest())
smp_ops->cause_ipi = smp_pseries_cause_ipi;
else
smp_ops->cause_ipi = icp_ops->cause_ipi;
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
new file mode 100644
index 000000000000..40c0637203d5
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Secure VM platform
+ *
+ * Copyright 2018 IBM Corporation
+ * Author: Anshuman Khandual <khandual@linux.vnet.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <asm/machdep.h>
+#include <asm/svm.h>
+#include <asm/swiotlb.h>
+#include <asm/ultravisor.h>
+
+static int __init init_svm(void)
+{
+ if (!is_secure_guest())
+ return 0;
+
+ /* Don't release the SWIOTLB buffer. */
+ ppc_swiotlb_enable = 1;
+
+ /*
+ * Since the guest memory is inaccessible to the host, devices always
+ * need to use the SWIOTLB buffer for DMA even if dma_capable() says
+ * otherwise.
+ */
+ swiotlb_force = SWIOTLB_FORCE;
+
+ /* Share the SWIOTLB buffer with the host. */
+ swiotlb_update_mem_attributes();
+
+ return 0;
+}
+machine_early_initcall(pseries, init_svm);
+
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+ if (!PAGE_ALIGNED(addr))
+ return -EINVAL;
+
+ uv_unshare_page(PHYS_PFN(__pa(addr)), numpages);
+
+ return 0;
+}
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+ if (!PAGE_ALIGNED(addr))
+ return -EINVAL;
+
+ uv_share_page(PHYS_PFN(__pa(addr)), numpages);
+
+ return 0;
+}
+
+/* There's one dispatch log per CPU. */
+#define NR_DTL_PAGE (DISPATCH_LOG_BYTES * CONFIG_NR_CPUS / PAGE_SIZE)
+
+static struct page *dtl_page_store[NR_DTL_PAGE];
+static long dtl_nr_pages;
+
+static bool is_dtl_page_shared(struct page *page)
+{
+ long i;
+
+ for (i = 0; i < dtl_nr_pages; i++)
+ if (dtl_page_store[i] == page)
+ return true;
+
+ return false;
+}
+
+void dtl_cache_ctor(void *addr)
+{
+ unsigned long pfn = PHYS_PFN(__pa(addr));
+ struct page *page = pfn_to_page(pfn);
+
+ if (!is_dtl_page_shared(page)) {
+ dtl_page_store[dtl_nr_pages] = page;
+ dtl_nr_pages++;
+ WARN_ON(dtl_nr_pages >= NR_DTL_PAGE);
+ uv_share_page(pfn, 1);
+ }
+}
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 6601b9d404dc..f682b7babc09 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -605,6 +605,8 @@ static const struct dma_map_ops vio_dma_mapping_ops = {
.unmap_page = vio_dma_iommu_unmap_page,
.dma_supported = dma_iommu_dma_supported,
.get_required_mask = dma_iommu_get_required_mask,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
/**
@@ -1174,6 +1176,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
if (tbl == NULL)
return NULL;
+ kref_init(&tbl->it_kref);
+
of_parse_dma_window(dev->dev.of_node, dma_window,
&tbl->it_index, &offset, &size);
@@ -1191,7 +1195,7 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
else
tbl->it_ops = &iommu_table_pseries_ops;
- return iommu_init_table(tbl, -1);
+ return iommu_init_table(tbl, -1, 0, 0);
}
/**
OpenPOWER on IntegriCloud