/* Copyright 2013-2014 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * 	http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define pr_fmt(fmt) "FSPMEMERR: " fmt
#include <skiboot.h>
#include <opal.h>
#include <opal-msg.h>
#include <lock.h>
#include <fsp.h>
#include <errorlog.h>

/* FSP sends real address of 4K memory page. */
#define MEM_ERR_PAGE_SIZE_4K	(1UL << 12)

/* maximum number of error event to hold until linux consumes it. */
#define MERR_MAX_RECORD		1024

struct fsp_mem_err_node {
	struct list_node list;
	struct OpalMemoryErrorData data;
};

static LIST_HEAD(merr_free_list);
static LIST_HEAD(mem_error_list);
/*
 * lock is used to protect overwriting of merr_free_list and mem_error_list
 * list.
 */
static struct lock mem_err_lock = LOCK_UNLOCKED;

DEFINE_LOG_ENTRY(OPAL_RC_MEM_ERR_RES, OPAL_PLATFORM_ERR_EVT, OPAL_MEM_ERR,
			OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL,
			OPAL_NA);

DEFINE_LOG_ENTRY(OPAL_RC_MEM_ERR_DEALLOC, OPAL_PLATFORM_ERR_EVT, OPAL_MEM_ERR,
			OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL,
			OPAL_NA);

static bool send_response_to_fsp(u32 cmd_sub_mod)
{
	struct fsp_msg *rsp;
	int rc = -ENOMEM;

	rsp = fsp_mkmsg(cmd_sub_mod, 0);
	if (rsp)
		rc = fsp_queue_msg(rsp, fsp_freemsg);
	if (rc) {
		fsp_freemsg(rsp);
		/* XXX Generate error logs */
		prerror("Error %d queueing FSP memory error reply\n", rc);
		return false;
	}
	return true;
}

/*
 * Queue up the memory error message for delivery.
 *
 * queue_event_for_delivery get called from two places.
 * 1) from queue_mem_err_node when new fsp mem error is available and
 * 2) from completion callback indicating that linux has consumed an message.
 *
 * TODO:
 * There is a chance that, we may not get a free slot to queue our event
 * for delivery to linux during both the above invocations. In that case
 * we end up holding events with us until next fsp memory error comes in.
 * We need to address this case either here OR fix up messaging infrastructure
 * to make sure at least one slot will always be available per message type.
 *
 * XXX: BenH: I changed the msg infrastructure to attempt an allocation
 *            in that case, at least until we clarify a bit better how
 *            we want to handle things.
 */
static void queue_event_for_delivery(void *data __unused)
{
	struct fsp_mem_err_node *entry;
	uint64_t *merr_data;
	int rc;

	lock(&mem_err_lock);
	entry = list_pop(&mem_error_list, struct fsp_mem_err_node, list);
	unlock(&mem_err_lock);

	if (!entry)
		return;

	/*
	 * struct OpalMemoryErrorData is of (4 * 64 bits) size and well packed
	 * structure. Hence use uint64_t pointer to pass entire structure
	 * using 4 params in generic message format.
	 */
	merr_data = (uint64_t *)&entry->data;

	/* queue up for delivery */
	rc = opal_queue_msg(OPAL_MSG_MEM_ERR, NULL,
			    queue_event_for_delivery,
			    merr_data[0], merr_data[1],
			    merr_data[2], merr_data[3]);
	lock(&mem_err_lock);
	if (rc) {
		/*
		 * Failed to queue up the event for delivery. No free slot
		 * available. There is a chance that we are trying to queue
		 * up multiple event at the same time. We may already have
		 * at least one event queued up, in that case we will be
		 * called again through completion callback and we should
		 * be able to grab empty slot then.
		 *
		 * For now, put this node back on mem_error_list.
		 */
		list_add(&mem_error_list, &entry->list);
	} else
		list_add(&merr_free_list, &entry->list);
	unlock(&mem_err_lock);
}

static int queue_mem_err_node(struct OpalMemoryErrorData *merr_evt)
{
	struct fsp_mem_err_node *entry;

	lock(&mem_err_lock);
	entry = list_pop(&merr_free_list, struct fsp_mem_err_node, list);
	if (!entry) {
		printf("Failed to queue up memory error event.\n");
		unlock(&mem_err_lock);
		return -ENOMEM;
	}

	entry->data = *merr_evt;
	list_add(&mem_error_list, &entry->list);
	unlock(&mem_err_lock);

	/* Queue up the event for delivery to OS. */
	queue_event_for_delivery(NULL);
	return 0;
}

/* Check if memory resilience event for same address already exists. */
static bool is_resilience_event_exist(u64 paddr)
{
	struct fsp_mem_err_node *entry;
	struct OpalMemoryErrorData *merr_evt;
	int found = 0;

	lock(&mem_err_lock);
	list_for_each(&mem_error_list, entry, list) {
		merr_evt = &entry->data;
		if ((merr_evt->type == OPAL_MEM_ERR_TYPE_RESILIENCE) &&
		    (be64_to_cpu(merr_evt->u.resilience.physical_address_start)
							    == paddr)) {
			found = 1;
			break;
		}
	}
	unlock(&mem_err_lock);
	return !!found;
}

/*
 * handle Memory Resilience error message.
 * Section 28.2 of Hypervisor to FSP Mailbox Interface Specification.
 *
 * The flow for Memory Resilence Event is:
 * 1. PRD component in FSP gets a recoverable attention from hardware when
 *    there is a corretable/uncorrectable memory error to free up a page.
 * 2. PRD sends Memory Resilence Command to hypervisor with the real address of
 *    the 4K memory page in which the error occurred.
 * 3. The hypervisor acknowledges with a status immediately. Immediate
 *    acknowledgment doesn’t require the freeing of the page to be completed.
 */
static bool handle_memory_resilience(u32 cmd_sub_mod, u64 paddr)
{
	int rc = 0;
	struct OpalMemoryErrorData mem_err_evt;
	struct errorlog *buf;

	memset(&mem_err_evt, 0, sizeof(struct OpalMemoryErrorData));
	/* Check arguments */
	if (paddr == 0) {
		prerror("memory resilience: Invalid real address.\n");
		return send_response_to_fsp(FSP_RSP_MEM_RES |
					    FSP_STATUS_GENERIC_ERROR);
	}

	/* Check if event already exist for same address. */
	if (is_resilience_event_exist(paddr))
		goto send_response;

	/* Populate an event. */
	mem_err_evt.version = OpalMemErr_V1;
	mem_err_evt.type = OPAL_MEM_ERR_TYPE_RESILIENCE;

	switch (cmd_sub_mod) {
	case FSP_CMD_MEM_RES_CE:
		/*
		 * Should we keep counter for corrected errors in
		 * sapphire OR let linux (PowerNV) handle it?
		 *
		 * For now, send corrected errors to linux and let
		 * linux handle corrected errors thresholding.
		 */
		mem_err_evt.flags |= cpu_to_be16(OPAL_MEM_CORRECTED_ERROR);
		mem_err_evt.u.resilience.resil_err_type =
					OPAL_MEM_RESILIENCE_CE;
		break;
	case FSP_CMD_MEM_RES_UE:
		mem_err_evt.u.resilience.resil_err_type =
					OPAL_MEM_RESILIENCE_UE;
		break;
	case FSP_CMD_MEM_RES_UE_SCRB:
		mem_err_evt.u.resilience.resil_err_type =
					OPAL_MEM_RESILIENCE_UE_SCRUB;
		break;
	}
	mem_err_evt.u.resilience.physical_address_start = cpu_to_be64(paddr);
	mem_err_evt.u.resilience.physical_address_end =
		cpu_to_be64(paddr + MEM_ERR_PAGE_SIZE_4K);

	/* Queue up the event and inform OS about it. */
	rc = queue_mem_err_node(&mem_err_evt);

send_response:
	/* Queue up an OK response to the resilience message itself */
	if (!rc)
		return send_response_to_fsp(FSP_RSP_MEM_RES);
	else {
		buf = opal_elog_create(&e_info(OPAL_RC_MEM_ERR_RES), 0);
		log_append_msg(buf,
			"OPAL_MEM_ERR: Cannot queue up memory "
			"resilience error event to the OS");
		log_add_section(buf, 0x44455350);
		log_append_data(buf, (char *) &mem_err_evt,
					   sizeof(struct OpalMemoryErrorData));
		log_commit(buf);
		return false;
	}
}

/* update existing event entry if match is found. */
static bool update_memory_deallocation_event(u64 paddr_start, u64 paddr_end)
{
	struct fsp_mem_err_node *entry;
	struct OpalMemoryErrorData *merr_evt;
	int found = 0;

	lock(&mem_err_lock);
	list_for_each(&mem_error_list, entry, list) {
		merr_evt = &entry->data;
		if ((merr_evt->type == OPAL_MEM_ERR_TYPE_DYN_DALLOC) &&
		    (be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start)
							    == paddr_start)) {
			found = 1;
			if (be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end)
								< paddr_end)
				merr_evt->u.dyn_dealloc.physical_address_end =
					cpu_to_be64(paddr_end);
			break;
		}
	}
	unlock(&mem_err_lock);
	return !!found;
}

/*
 * Handle dynamic memory deallocation message.
 *
 * When a condition occurs in which we need to do a large scale memory
 * deallocation, PRD will send a starting and ending address of an area of
 * memory to Hypervisor. Hypervisor then need to use this to deallocate all
 * pages between and including the addresses.
 *
 */
static bool handle_memory_deallocation(u64 paddr_start, u64 paddr_end)
{
	int rc = 0;
	u8 err = 0;
	struct OpalMemoryErrorData mem_err_evt;
	struct errorlog *buf;

	memset(&mem_err_evt, 0, sizeof(struct OpalMemoryErrorData));
	/* Check arguments */
	if ((paddr_start == 0) || (paddr_end == 0)) {
		prerror("memory deallocation: Invalid "
			"starting/ending real address.\n");
		err = FSP_STATUS_GENERIC_ERROR;
	}

	/* If we had an error, send response to fsp and return */
	if (err)
		return send_response_to_fsp(FSP_RSP_MEM_DYN_DEALLOC | err);

	/*
	 * FSP can send dynamic memory deallocation multiple times for the
	 * same address/address ranges. Hence check and update if we already
	 * have sam event queued.
	 */
	if (update_memory_deallocation_event(paddr_start, paddr_end))
		goto send_response;

	/* Populate an new event. */
	mem_err_evt.version = OpalMemErr_V1;
	mem_err_evt.type = OPAL_MEM_ERR_TYPE_DYN_DALLOC;
	mem_err_evt.u.dyn_dealloc.dyn_err_type =
					OPAL_MEM_DYNAMIC_DEALLOC;
	mem_err_evt.u.dyn_dealloc.physical_address_start = cpu_to_be64(paddr_start);
	mem_err_evt.u.dyn_dealloc.physical_address_end = cpu_to_be64(paddr_end);

	/* Queue up the event and inform OS about it. */
	rc = queue_mem_err_node(&mem_err_evt);

send_response:
	/* Queue up an OK response to the memory deallocation message itself */
	if (!rc)
		return send_response_to_fsp(FSP_RSP_MEM_DYN_DEALLOC);
	else {
		buf = opal_elog_create(&e_info(OPAL_RC_MEM_ERR_DEALLOC), 0);
		log_append_msg(buf,
			"OPAL_MEM_ERR: Cannot queue up memory "
			"deallocation error event to the OS");
		log_add_section(buf, 0x44455350);
		log_append_data(buf, (char *)&mem_err_evt,
					   sizeof(struct OpalMemoryErrorData));
		log_commit(buf);
		return false;
	}
}

/* Receive a memory error mesages and handle it. */
static bool fsp_mem_err_msg(u32 cmd_sub_mod, struct fsp_msg *msg)
{
	u64 paddr_start, paddr_end;

	printf("Received 0x%08ux command\n", cmd_sub_mod);
	switch (cmd_sub_mod) {
	case FSP_CMD_MEM_RES_CE:
	case FSP_CMD_MEM_RES_UE:
	case FSP_CMD_MEM_RES_UE_SCRB:
		/*
		 * We get the memory relilence command from FSP for
		 * correctable/Uncorrectable/scrub UE errors with real
		 * address of 4K memory page in which the error occurred.
		 */
		paddr_start = be64_to_cpu(*((__be64 *)&msg->data.words[0]));
		printf("Got memory resilience error message for "
		       "paddr=0x%016llux\n", paddr_start);
		return handle_memory_resilience(cmd_sub_mod, paddr_start);
	case FSP_CMD_MEM_DYN_DEALLOC:
		paddr_start = be64_to_cpu(*((__be64 *)&msg->data.words[0]));
		paddr_end = be64_to_cpu(*((__be64 *)&msg->data.words[2]));
		printf("Got dynamic memory deallocation message: "
		       "paddr_start=0x%016llux, paddr_end=0x%016llux\n",
		       paddr_start, paddr_end);
		return handle_memory_deallocation(paddr_start, paddr_end);
	}
	return false;
}

/*
 * pre allocate memory to hold maximum of 128 memory error event until linux
 * consumes it.
 */
static int init_merr_free_list(uint32_t num_entries)
{
	struct fsp_mem_err_node *entry;
	int i;

	entry = zalloc(sizeof(struct fsp_mem_err_node) * num_entries);
	if (!entry)
		return -ENOMEM;

	for (i = 0; i < num_entries; ++i, entry++)
		list_add_tail(&merr_free_list, &entry->list);

	return 0;
}

static struct fsp_client fsp_mem_err_client = {
	.message = fsp_mem_err_msg,
};

void fsp_memory_err_init(void)
{
	int rc;

	printf("Intializing fsp memory handling.\n");
	/* If we have an FSP, register for notifications */
	if (!fsp_present())
		return;

	/* pre allocate memory for 128 record */
	rc = init_merr_free_list(MERR_MAX_RECORD);
	if (rc < 0)
		return;

	fsp_register_client(&fsp_mem_err_client, FSP_MCLASS_MEMORY_ERR);
}