/* IBM_PROLOG_BEGIN_TAG                                                   */
/* This is an automatically generated prolog.                             */
/*                                                                        */
/* $Source: src/usr/hwpf/hwp/build_winkle_images/p8_slw_build/pore_inline_assembler.c $ */
/*                                                                        */
/* OpenPOWER HostBoot Project                                             */
/*                                                                        */
/* COPYRIGHT International Business Machines Corp. 2012,2014              */
/*                                                                        */
/* Licensed under the Apache License, Version 2.0 (the "License");        */
/* you may not use this file except in compliance with the License.       */
/* You may obtain a copy of the License at                                */
/*                                                                        */
/*     http://www.apache.org/licenses/LICENSE-2.0                         */
/*                                                                        */
/* Unless required by applicable law or agreed to in writing, software    */
/* distributed under the License is distributed on an "AS IS" BASIS,      */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or        */
/* implied. See the License for the specific language governing           */
/* permissions and limitations under the License.                         */
/*                                                                        */
/* IBM_PROLOG_END_TAG                                                     */
// $Id: pore_inline_assembler.c,v 1.22 2013/12/11 00:11:14 bcbrock Exp $
// $Source: /afs/awd/projects/eclipz/KnowledgeBase/.cvsroot/eclipz/chips/p8/working/procedures/pore_inline_assembler.c,v $
//-----------------------------------------------------------------------------
// *! (C) Copyright International Business Machines Corp. 2013
// *! All Rights Reserved -- Property of IBM
// *! *** IBM Confidential ***
//-----------------------------------------------------------------------------

// ** WARNING : This file is maintained as part of the OCC firmware.  Do **
// ** not edit this file in the PMX area or the hardware procedure area  **
// ** as any changes will be lost.                                       **

/// \file pore_inline_assembler.c
/// \brief Inline PGAS assembler for PgP/Stage1 PORE
///
/// \page pore_inline_assembler PORE Inline Assembler and Disassembler
///
/// Several procedures targeting the PORE engine require inline assembly and
/// disassembly of PORE code, that is, they require that PORE instructions be
/// assembled/disassembled directly into/from a host memory buffer. This page
/// describes these facilities.  The APIs described here are implemented in
/// the files pore_inline.h, pore_inline_assembler.c and
/// pore_inline_disassembler.c.  Both the inline assembelr and disassembler
/// conform to the PGAS assembly format for PORE.
///
/// Both inline assembly and disassembly make use of a PoreInlineContext
/// structure. This structure represents the state of a memory area being
/// targeted for inline assembly and disassembly.  The context is initialized
/// with the pore_inline_context_create() API, and a pointer to an instance of
/// this structure appears as the first argument of all assembler/disassembler
/// APIs. As assembly/disassembly progresses the PoreInlineContext keeps
/// track of how much host memory area has been filled by assembled code or
/// scanned by the disassebler.
///
/// Assembler/disassembler APIs are predicates that return 0 for success and a
/// non-zero error code for failure.  In the event of failure, the error code
/// (a small integer) is also stored in the \a error field of the context
/// structure.  String forms of the error codes are also available in the
/// global array pore_inline_error_strings[].
///
/// The assembler always produces PORE code in the PORE-native big-endian
/// format.  Likewise, the diassembler assumes the host memory to be
/// disassembled contains PORE code in big-endian format.
///
/// \section Initialization
///
/// Before invoking inline assembly/disassembly APIs, an instance of a
/// PoreInlineContext structure must be initialized using the
/// pore_inline_context_create() API.  For assembly, the context describes the
/// host memory buffer that will contain the assembled code.  For disassembly,
/// the context describes the host memory area that contains the code to be
/// disassembled. Full documentation is available for
/// pore_inline_context_create(), including documentation for options that
/// control assembly and disassembly.  The implementation also provides a
/// 'copy operator' for the context, pore_inline_context_copy().
///
/// An example of initializing a context for inline assembly with parity
/// checking appears below.
///
/// \code 
///
/// PoreInlineContext ctx;
/// uint32_t buf[BUFSIZE];
///
/// rc = pore_inline_context_create(&ctx, buf, BUFSIZE * 4, 0,
///                                 PORE_INLINE_CHECK_PARITY);
/// if (rc) . . . Handle Error
///
/// \endcode
///
/// Applications that reuse the same memory buffer for assembling and
/// processing multiple PORE programs can 'reset' the context between uses by
/// using the pore_inline_context_reset() API.  pore_inline_context_reset()
/// resets the location counter and memory extent to their initial (creation)
/// values, and the context error code is cleared.  Any options specified at
/// creation remain as they were.
///
/// \section Assembler
///
/// The inline assembler implements each PORE/PGAS instruction as individual
/// function calls.  The APIs are consistently named \c pore_\<OPCODE\>, where
/// \c \<OPCODE\> is a PGAS mnemonic in upper case.  The arguments to each
/// opcode appear in the same order that they appear in the source-level
/// assembler, with appropriate C-language types. The supported opcode APIs
/// are defined in pore_inline.h
/// 
/// Since the PORE instruction APIs are effectivly predicates, linear code
/// sequences are easily assembled using the C-language logical OR construct.
/// Any non-0 return code will immediately break the sequence and set the
/// expression value to 1.  The failure code can then be recovered from the \a
/// error field of the context.  This coding technique is illustrated in the
/// following example of assembling a memory-memory copy sequence.
///
/// \code 
///
/// PoreInlineContext ctx;
/// int error;
///
/// . . . // Initialize context
///
/// error =
///     pore_LD(&ctx, D0, 0, A0) ||
///     pore_STD(&ctx, D0, 0, A1);
///
/// if (error) <. . . Handle error based on ctx.error>
///
/// \endcode
///
/// The above example generates code equivalent to
///
/// \code
///
///         ld      D0, 0, A0
///         std     D0, 0, A1
///
/// \endcode
///
/// Again, if an error were to occur during assembly, inline assembly would
/// stop (and the logical OR would terminate) at the point of failure. In
/// particular, the inline assembler will never allow assembled code to exceed
/// the bounds of the memory area defined by the initial call of
/// pore_inline_context_create() that defines the assembler memory space.
///
///
/// \subsection Register Names and Other Mnemonics
///
/// The header file pore_inline.h defines macros for the register mnemonics.
///
/// - D0, D1 : 64-bit data registers
/// - A0, A1 : 32-bit address registers
/// - P0, P1 : 7-bit Pervasive chiplet id registers
/// - CTR : 24-bit ounter register
/// - PC : 48-bit Program Counter
/// - ETR : 64-bit EXE-Trigger Register (Low-order 32 bits are writable)
/// - EMR : The Error Mask Register
/// - IFR : ID/Flags Register
/// - SPRG0 : 32-bit Special-Purpose General Register 0
///
/// Mnemonics for the condition code bits are also defined by pore_inline.h
/// using the PGAS mnemonics.
///
///
/// \subsection Assembling Branches
///
/// Opcodes that implement relative branches require that the branch target be
/// specified as a <em> location counter </em>. Once initialized, the current
/// location counter is available as the \a lc field of the PoreInlineContext
/// object controlling the assembly.  The \a lc field is the only field
/// (besides the error code held in the \a error field) that application code
/// should ever reference. The inline assembler also provides a typedef
/// PoreInlineLocation to use for location counters, as well as the macro
/// PORE_LOCATION() to define a location variable inline with the code flow.
/// 
/// \subsubsection Backward Branches
///
/// Backward branches are straightforward.  For example, the memory-memory
/// copy example from earlier can be converted into a loop as shown below.  The
/// \a loop_target variable is initialized with the location counter of the
/// first instruction of the loop.  The final instruction of the loop then
/// branches back to the \a loop_target.
///
/// \code
///
/// PoreInlineContext ctx;
/// PoreInlineLocation loop_target = 0; // See ** below the example
/// int error;
///
/// . . . // Initialize context
///
/// error =
///     PORE_LOCATION(&ctx, loop_target) ||
///     pore_LD(&ctx, D0, 0, A0)         ||
///     pore_STD(&ctx, D0, 0, A1)      	 ||
///     pore_ADDS(&ctx, A0, A0, 8)     	 ||
///     pore_ADDS(&ctx, A1, A1, 8)     	 ||
///     pore_LOOP(&ctx, loop_target);
///
/// if (error) <. . . Handle error based on ctx.error>
///
/// \endcode
///
/// The above inline assembler sequence is equivalent to the PGAS code
/// sequence:
///
/// \code
///
/// loop_target:
///        ld      D0, 0, A0
///        std     D0, 0, A1
///        adds    A0, A0, 8
///        adds    A1, A1, 8
///        loop    loop_target
///
/// \endcode
///
/// ** Location counters used as loop targets may need to be initialized,
/// otherwise the compiler may issue a warning that the variable "may be used
/// uninitialized", although in well-written code this would never happen.
///
///
/// \subsubsection Forward Branches
///
/// Forward branches are more complex.  Since the target location counter is
/// not known until the target has been assembled, the inline assembler
/// provides the API pore_inline_branch_fixup() to fix up forward branches
/// once the actual target is known.  This is illustrated in the simple code
/// sequence below, where an instruction is conditionally skipped.
///
/// \code
///
/// PoreInlineContext ctx;
/// PoreInlineLocation source = 0, target = 0;
/// int error, rc;
///
/// . . . // Initialize context
///
/// error = 
///     PORE_LOCATION(&ctx, source)  ||
///     pore_BRANZ(&ctx, D0, source) ||
///     pore_ADDS(&ctx, D1, D1, 1)   ||
///     PORE_LOCATION(&ctx, target)  ||
///     pore_LD(&ctx, D0, 0, A0);
///
/// if (error) <. . . Handle assembly error based on ctx->error>
/// rc = pore_inline_branch_fixup(&ctx, source, target);
/// if (rc) <. . . Handle branch fixup error>
///
/// \endcode
///
/// In the above code, the branch instruction is initially assembled as a
/// branch-to-self - the recommended idiom for forward branch source
/// instructions.  Once the entire sequence has been assembled,
/// pore_inline_branch_fixup() reassembles the \c source instruction as a
/// branch to the \c target instruction. The above instruction sequence is
/// equivalent to the PGAS code below:
///
/// \code
///
/// source:
///         branz   D0, target
///         adds    D1, D1, 1
/// target:
///         ld      D0, 0, A0
///
/// \endcode
///
///
/// \subsubsection Absolute Branches
///
/// It is unlikely that a typical application of the PORE inline assembler
/// would ever need to include an absolute branch, since the branch target in
/// this case is a fixed absolute address that must be known at assembly
/// time. However the inline assembler does provide the pore_BRAIA() API for
/// this purpose.  This opcode requires a 16-bit address space constant and a
/// 32-bit absoulte address (offset) within the memory space to specify the
/// branch. 
///
///
/// \section Disassembly
///
/// Inline disassembly is implemented by a single API,
/// pore_inline_disassemble(). The idea is similar to assembly: A host memory
/// context containing PORE code (or data) is described by a PoreInlineContext
/// structure.  Each call of pore_inline_disassemble() disassembles the next
/// instruction (or datum) in the context into a PoreInlineDisassembly
/// structure provided by the caller.  The disassembly object contains both
/// binary and string forms of the disassembled instruction (or data). The
/// next call of pore_inline_disassemble() proceses the next instruction (or
/// datum) and so on.
///
/// \subsection Text (Code) Disassembly
///
/// In the example below the inline disassembler is used to completely
/// disassemble a memory area containing text (code) to \a stdout until an
/// error occurs, assumed to be either due to disassembling the entire memory
/// area or finding an illegal instruction.
///
/// \code
///
/// PoreInlineContext ctx;
/// PoreInlineDisassembly dis;
///
/// . . . // Initialize context
///
/// while (pore_inline_disassemble(&ctx, &dis) == 0) {
///     printf("%s\n", dis.s);
/// }
///
/// \endcode
///
/// To illustrate binary disassembly, the following example uses the
/// disassembler to search for a RET statement in a block of PORE code, in
/// order to extend an inline subroutine with more code.  Note that the field
/// \a dis->ctx contains the context that existed at the time the instruction
/// was assembled.  By copying this context back into the global context,
/// inline assembly will continue by overwriting the RET with new
/// instructions. If the copy had \e not been done, then newly assembled code
/// would have \e followed the RET.
///
/// \code
///
/// PoreInlineContext ctx;
/// PoreInlineDisassembly dis;
///
/// . . . // Initialize context
///
/// while ((pore_inline_disassemble(&ctx, &dis) == 0) &&
///        (dis.opcode != PORE_OPCODE_RET));
/// if (ctx.error != 0) {
///     . . . // Handle error
/// } else {
///     pore_inline_context_copy(&ctx, &dis.ctx);
///     . . . // Continue assembly by overwriting the RET
/// }
///
/// \endcode
///
/// A special type of context reset is available to simplify applications that
/// need to disassemble a just-assembled code sequence, e.g. for debugging.
/// pore_inline_context_reset_excursion() resets the context such that the
/// effective size of the context only covers the just-assembled code,
/// allowing a dissassembly loop to cleanly stop once all code has been
/// disassembled. The use is illustrated below - note that the disassembly
/// stops on the expected error code PORE_INLINE_NO_MEMORY once the
/// (effective) end of the buffer is reached.
///
/// \code
///
/// PoreInlineContext ctx;
/// PoreInlineDisassembly dis;
///
/// . . . // Initialize context
/// . . . // Assemble code into context
///
/// pore_inline_context_reset_excursion(&ctx);
///
/// while (pore_inline_disassemble(&ctx, &dis) == 0) {
///     printf("%s\n", dis.s);
/// }
/// if (ctx.error != PORE_INLINE_NO_MEMORY) {
///     . . . // Handle error
/// }
///
/// \endcode
///
/// \subsection Data Disassembly
///
/// If the PoreInlineContext is created with the flag
/// PORE_INLINE_DISASSEMBLE_DATA, then the context is disassembled as data. If
/// the PoreInlineContext is created with the flag
/// PORE_INLINE_DISASSEMBLE_UNKNOWN then putative data embedded in a text
/// section will be disassembled as data.  For complete information see the
/// documentation for pore_inline_disassemble().


#define __PORE_INLINE_ASSEMBLER_C__
#include "pore_inline.h"
#undef __PORE_INLINE_ASSEMBLER_C__

// Definitions of PORE register classes.  These are predicates that return
// 1 if the register is a member of the class, else 0.

PORE_STATIC int
pore_data(int reg)
{
    return 
	(reg == D0) ||
	(reg == D1);
}


PORE_STATIC int
pore_address(int reg)
{
    return
	(reg == A0) ||
	(reg == A1);
}


PORE_STATIC int
pore_pervasive_chiplet_id(int reg)
{
    return
	(reg == P0) ||
	(reg == P1);
}


PORE_STATIC int
pore_branch_compare_data(int reg)
{
    return 
	(reg == D0) ||
	(reg == D1) ||
	(reg == CTR);
}


PORE_STATIC int
pore_ls_destination(int reg)
{
    return
        (reg == D0) ||
        (reg == D1) ||
        (reg == A0) ||
        (reg == A1) ||
        (reg == P0) ||
        (reg == P1) ||
        (reg == CTR);
}


PORE_STATIC int
pore_li_destination(int reg)
{
    return 
        (reg == D0)   ||
        (reg == D1)   ||
        (reg == A0)   ||
        (reg == A1)   ||
        (reg == P0)   ||
        (reg == P1)   ||
        (reg == CTR);
}


PORE_STATIC int
pore_mr_source(int reg)
{
    return
        (reg == D0)    ||
        (reg == D1)    ||
        (reg == A0)    ||
        (reg == A1)    ||
        (reg == P0)    ||
        (reg == P1)    ||
        (reg == CTR)   ||
        (reg == PC)    ||
        (reg == ETR)   ||
        (reg == SPRG0) ||
        (reg == IFR)   ||
        (reg == EMR);
}

PORE_STATIC int
pore_mr_destination(int reg)
{
    return
        (reg == D0)   ||
        (reg == D1)   ||
        (reg == A0)   ||
        (reg == A1)   ||
        (reg == P0)   ||
        (reg == P1)   ||
        (reg == CTR)  ||
        (reg == PC)   ||
        (reg == SPRG0)||
        (reg == EMR);
}                    


/// Portable store of a 32-bit integer in big-endian format
///
/// The address \a p to receive the data is in the form of an unsigned long.

void
pore_inline_be32(unsigned long p, uint32_t x)
{
    uint8_t *p8 = (uint8_t *)p;
    uint8_t *px = (uint8_t *)(&x);
    int i, j;

    if (!PORE_BIG_ENDIAN) {
	for (i = 0, j = 3; i < 4; i++, j--) {
	    p8[i] = px[j];
	}
    } else {
	*((uint32_t *)p) = x;
    }
}
	
	
/// Portable store of a 64-bit integer in big-endian format
///
/// The address \a p to receive the data is in the form of an unsigned long.

void
pore_inline_be64(unsigned long p, uint64_t x)
{
    uint8_t *p8 = (uint8_t *)p;
    uint8_t *px = (uint8_t *)(&x);
    int i, j;

    if (!PORE_BIG_ENDIAN) {
	for (i = 0, j = 7; i < 8; i++, j--) {
	    p8[i] = px[j];
	}
    } else {
	*((uint64_t *)p) = x;
    }
}


// Portable load of a 32-bit integer in big-endian format

uint32_t
pore_inline_host32(unsigned long p)
{
    uint32_t x;
    uint8_t *p8 = (uint8_t *)p;
    uint8_t *px = (uint8_t *)(&x);
    int i, j;

    if (!PORE_BIG_ENDIAN) {
	for (i = 0, j = 3; i < 4; i++, j--) {
	    px[j] = p8[i];
	}
    } else {
	x = *((uint32_t *)p);
    }

    return x;
}
	
	
// Portable load of a 64-bit integer in big-endian format

uint64_t
pore_inline_host64(unsigned long p)
{
    uint64_t x;
    uint8_t *p8 = (uint8_t *)p;
    uint8_t *px = (uint8_t *)(&x);
    int i, j;

    if (!PORE_BIG_ENDIAN) {
	for (i = 0, j = 7; i < 8; i++, j--) {
	    px[j] = p8[i];
	}
    } else {
	x = *((uint64_t *)p);
    }

    return x;
}


// 32-bit population count
//
// This is a well-known divide-and-conquer algorithm.  The idea is to compute
// sums of adjacent bit segments in parallel, in place.

PORE_STATIC int
pore_popcount32(uint32_t x)
{
    uint32_t m1 = 0x55555555;
    uint32_t m2 = 0x33333333;
    uint32_t m4 = 0x0f0f0f0f;
    x -= (x >> 1) & m1;		   /* Sum pairs of bits */
    x = (x & m2) + ((x >> 2) & m2);/* Sum 4-bit segments */
    x = (x + (x >> 4)) & m4;	   /* Sum 8-bit segments */
    x += x >>  8;		   /* Sum 16-bit segments */
    return (x + (x >> 16)) & 0x3f; /* Final sum */
}


// 64-bit population count

PORE_STATIC int
pore_popcount64(uint64_t x)
{
    return pore_popcount32(x & 0xffffffff) + pore_popcount32(x >> 32);
}


// Compute the parity of a PORE instruction as 0 or 1

int
pore_inline_parity(uint32_t instruction, uint64_t imd64)
{
    return (pore_popcount32(instruction) + pore_popcount64(imd64)) % 2;
}
	
	
/// Reset a PORE inline assembler context to its creation state
///
/// \param ctx A pointer to an initialized (and likely 'used')
/// PoreInlineContext object.
///
/// This API resets a PoreInlineContext object to it's \e creation state, that
/// is, the state it was in after the call of pore_inline_context_create().
/// This API is designed for applications that reuse a memory buffer to
/// assemble multiple PORE code sequences.  After each sequence has been fully
/// assembled and processed, calling pore_inline_context_reset() sets the
/// context back as it was when the context was initially created so that the
/// memory area can be reused.  In particular, this API resets the location
/// counter and memory extent to their initial values, and the error code is
/// cleared.  Any options specified at creation remain as they were.
///
/// For a slightly different type of reset, see
/// pore_inline_context_reset_excursion().  

void
pore_inline_context_reset(PoreInlineContext *ctx)
{
    ctx->lc_address = ctx->memory;
    ctx->remaining = ctx->size;
    ctx->lc = ctx->original_lc;
    ctx->error = 0;
}


/// Reset a PORE inline assembler context to a special state for disassembly
///
/// \param ctx A pointer to an initialized (and almost certainly 'used')
/// PoreInlineContext object.
///
/// This API resets a PoreInlineContext object to it's \e creation state, that
/// is, the state it was in after the call of pore_inline_context_create(), \e
/// except that the effective size of the memory area has been reduced to the
/// size that was actually used during assembly.  This API is designed for
/// applications that assemble into a memory buffer and then want to easily
/// disassemble the code (e.g., for debugging).  After a code sequence has
/// been assembled, calling pore_inline_context_reset_excursion() sets the
/// context back as it was when the context was initially created, but with a
/// (typically) shorter effective length, so that the disassembly will cleanly
/// stop once the entire sequence has been disassembled. Once disassembled,
/// the buffer can be fully resued after a subsequent call of
/// pore_inline_context_reset().  In particular, this API resets the location
/// counter to its initial value, clears the error code, and sets the
/// effective size of the context to the amount of memory currently used.  Any
/// options specified at creation remain as they were.
///
/// For a full context reset see pore_inline_context_reset(). For an example
/// see the \b Disassembly section of \ref pore_inline_assembler.

void
pore_inline_context_reset_excursion(PoreInlineContext *ctx)
{
    ctx->lc_address = ctx->memory;
    ctx->remaining = ctx->size - ctx->remaining;
    ctx->lc = ctx->original_lc;
    ctx->error = 0;
}


/// Create a PORE inline assembler context
///
/// \param ctx A pointer to a PoreInlineContext object to be initialized
/// and used for inline assembly. or disassembly.
///
/// \param memory A pointer to the host memory area to receive the assembled
/// code, or contain the code to disassemble. In general the inline assembler
/// will expect this memory area to be 4-byte aligned. This pointer may be
/// NULL (0) only if the associated \a size is also 0.
///
/// \param size The size (in bytes) of the host memory area. The inline
/// assembler will generate the PORE_INLINE_NO_MEMORY error if an attempt is
/// made to assemble an instruction that would overflow the buffer, or
/// disassemble past the end of the buffer. A 0 size is valid.
///
/// \param lc The initial, bytewise, target location counter for the assembled
/// or disassembled code. This paramater will normally be initialized to 0 for
/// assembling relocatable programs. The parameter would only need to be
/// specified as non-0 for special cases, such as creating a context for
/// disassembly.
///
/// \param options Option flags.  Option flags are OR-ed together to create
/// the final set of options. Valid options are
///
/// - PORE_INLINE_GENERATE_PARITY : Generate the proper parity bit for each
/// instruction during assembly.
///
/// - PORE_INLINE_CHECK_PARITY : Check for correct instruction parity during
/// disassembly.
///
/// - PORE_INLINE_LISTING_MODE : Generate disassembly strings in the form of a
/// listing that contains location counters and encoded instructions as well
/// as their diassembly.  By default the disassembly strings do not contain
/// this information and can be fed back in as source code to a PORE
/// assembler.
///
/// - PORE_INLINE_DISASSEMBLE_DATA : generate disassembly assuming that the
/// context contains data rather than text. Normally data is disassembled as
/// .long directives, however if the context is unaligned or of an odd length
/// then .byte directives may be used as well.  This option can be used in
/// conjunction with PORE_INLINE_LISTING_MODE.
///
/// - PORE_INLINE_8_BYTE_DATA : generate data disassembly using 8-byte values
/// rather than the default 4-byte values.  Normally data is disassembled as
/// .quad directives under this option, however if the context is unaligned or
/// of an odd length then .long and .byte directives may be used as well.
/// This option can be used in conjunction with PORE_INLINE_LISTING_MODE.
///
/// A PoreInlineContext describes a memory area and assembler context for
/// inline assembly and disassembly.  Assembly/disassembly begins at the host
/// memory location and virtual location counter described in the parameters.
/// As instructions are assembled/disassembled the PoreInlineContext keeps
/// track of where in the host memory and virtual PORE memory areas to place
/// new instructions during assembly, or from where to fetch the next
/// instruction to disassemble.
///
/// \retval 0 Success
///
/// \retval PORE_INLINE_INVALID_PARAMETER Either the \a context pointer is
/// NULL (0), the \a memory pointer is NULL (0) with a non-0 size, or the \a
/// options include invalid options.  The error code is also stored as the
/// value of ctx->error, and in the event of an error the ctx->size field is
/// set to 0, effectively preventing the context from being used.

int
pore_inline_context_create(PoreInlineContext *ctx,
			   void *memory, size_t size, 
			   PoreInlineLocation lc, int options)
{
    int rc;

    int valid_options = 
	PORE_INLINE_GENERATE_PARITY  |
	PORE_INLINE_CHECK_PARITY     |
	PORE_INLINE_LISTING_MODE     |
        PORE_INLINE_DISASSEMBLE_DATA |
        PORE_INLINE_8_BYTE_DATA      |
        PORE_INLINE_DISASSEMBLE_UNKNOWN;

    if ((ctx == NULL) || ((memory == NULL) && (size != 0)) ||
	((options & ~valid_options) != 0)) {
	rc = PORE_INLINE_INVALID_PARAMETER;
    } else {
	rc = 0;
	ctx->memory = (unsigned long)memory;
	ctx->size = size;
	ctx->original_lc = lc;
	ctx->options = options;
	pore_inline_context_reset(ctx);
    }

    if (ctx != NULL) {
        ctx->error = rc;
        if (rc) {
            ctx->size = 0;      /* Effectively prevents using the ctx */
        }
    }

    return rc;
}
	
	
/// Copy a PORE inline assembler context
///
/// \param dest A pointer to a PoreInlineContext object to be initialized
/// as a copy of the \a src context.
///
/// \param src A pointer to a PoreInlineContext object to be used as the
/// source of the copy.
///
/// This API copies one PoreInlineContext structure to another.  An example
/// use appears in \ref pore_inline_assembler in the section discussing
/// disassembly.

void
pore_inline_context_copy(PoreInlineContext *dest, PoreInlineContext *src)
{
    *dest = *src;
}


// 'Bump' a context forward by a given number of bytes.  This an internal API
// and the bump is always known to be legal.

void
pore_inline_context_bump(PoreInlineContext *ctx, size_t bytes)
{
    ctx->remaining -= bytes;
    ctx->lc += bytes;
    ctx->lc_address += bytes;
}    


// Allocate space in the inline assembler context
//
// Allocation is specified and implemented in bytes.  Both the physical
// memory and the virtual LC are required to be 4-byte aligned. The allocator
// returns a pointer to the memory area, or 0 if allocation fails.
// Allocation failure sets the context error code to either
// PORE_INLINE_NO_MEMORY or PORE_INLINE_ALIGNMENT_ERROR.

PORE_STATIC unsigned long
pore_inline_allocate(PoreInlineContext *ctx, size_t bytes)
{
    unsigned long p = 0;

    if (((ctx->lc % 4) != 0) || 
	((ctx->lc_address % 4) != 0)) {
	ctx->error = PORE_INLINE_ALIGNMENT_ERROR;

    } else if (bytes > ctx->remaining) {
	ctx->error = PORE_INLINE_NO_MEMORY;

    } else {
	p = ctx->lc_address;
	pore_inline_context_bump(ctx, bytes);
    }
    return p;
}


// Assemble a 1-word instruction
//
// The opcode and operand are assumed to be legal, having come from
// abstractions that check their arguments.  This call may fail with
// PORE_INLINE_NO_MEMORY if there is no more room in the memory buffer. A
// non-zero return indicates failure.

int
pore_inline_instruction1(PoreInlineContext *ctx, int opcode, uint32_t operand)
{
    uint32_t instruction;
    unsigned long p;

    p = pore_inline_allocate(ctx, 4);
    if (p != 0) {

	instruction = (opcode << 25) | operand;
	if (ctx->options & PORE_INLINE_GENERATE_PARITY) {
	    instruction |= (1 - pore_inline_parity(instruction, 0)) << 24;
	}

	pore_inline_be32(p, instruction);
	ctx->error = 0;
    }
    return p == 0;
}
	    

// Assemble a 3-word instruction
//
// The opcode and operand are assumed to be legal, having come from
// abstractions that check their arguments.  This call may fail with
// PORE_INLINE_NO_MEMORY if there is no more room in the memory buffer.  A
// non-zero return indicates failure.

int
pore_inline_instruction3(PoreInlineContext *ctx, int opcode, uint32_t operand,
			 uint64_t immediate)
{
    uint32_t instruction;
    unsigned long p;

    p = pore_inline_allocate(ctx, 12);
    if (p != 0) {

	instruction = (opcode << 25) | operand;
	if (ctx->options & PORE_INLINE_GENERATE_PARITY) {
	    instruction |= (1 - pore_inline_parity(instruction, immediate)) << 24;
	}

	pore_inline_be32(p, instruction);
	pore_inline_be64(p + 4, immediate);
	ctx->error = 0;
    }
    return p == 0;
}


// Assemble WAIT
//
// The cycle count must be an unsigned 24-bit immediate otherwise the error
// PORE_INLINE_UINT24_REQUIRED is signaled.  PGAS requires that HALT be used
// if the intention is to halt 

int
pore_WAITS(PoreInlineContext *ctx, uint32_t cycles)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_WAITS;

    if (cycles == 0) {
        ctx->error = PORE_INLINE_USE_HALT;
    } else if ((cycles & 0xffffff) != cycles) {
	ctx->error = PORE_INLINE_UINT24_REQUIRED;
    } else {
	operand = cycles;
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}


// Assemble HOOKI
//
// The hook index must be an unsigned 24-bit immediate otherwise the error
// PORE_INLINE_UINT24_REQUIRED is signaled.

int
pore_HOOKI(PoreInlineContext *ctx, uint32_t index, uint64_t imm)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_HOOKI;

    if ((index & 0xffffff) != index) {
	ctx->error = PORE_INLINE_UINT24_REQUIRED;
    } else {
	operand = index;
	pore_inline_instruction3(ctx, opcode, operand, imm);
    }
    return ctx->error;
}


// Assemble BRA, BSR and LOOP
//
// The branch target here is a bytewise location counter.  The target must be
// 4-byte aligned and must be within the legal signed 24-bit word offset of
// the current LC. Unaligned targets cause PORE_INLINE_ALIGNMENT_ERROR.
// Unreachable targets cause PORE_INLINE_UNREACHABLE_TARGET.

int
pore_inline_bra(PoreInlineContext *ctx, int opcode, PoreInlineLocation target)
{
    int32_t offset;
    uint32_t operand;

    if (target % 4) {
	ctx->error = PORE_INLINE_ALIGNMENT_ERROR;
    } else {
	offset = (int32_t)(target - ctx->lc) / 4;
	if ((offset >= (1 << 23)) ||
	    (offset < -(1 << 23))) {
	    ctx->error = PORE_INLINE_UNREACHABLE_TARGET;
	} else {
	    operand = offset & 0xffffff;
	    pore_inline_instruction1(ctx, opcode, operand);
	}
    }
    return ctx->error;
}
	    

// Assemble BRAZ and BRANZ
//
// The branch target here is a bytewise location counter.  The target must be
// 4-byte aligned and must be within the legal signed 20-bit word offset of
// the current LC. Unaligned targets cause PORE_INLINE_ALIGNMENT_ERROR.
// Unreachable targets cause PORE_INLINE_UNREACHABLE_TARGET.  Illegal
// operands cause PORE_INLINE_ILLEGAL_REGISTER.

int
pore_inline_brac(PoreInlineContext *ctx, int opcode, int reg, 
		 PoreInlineLocation target)
{
    int32_t offset;
    uint32_t operand;

    if (target % 4) {
	ctx->error = PORE_INLINE_ALIGNMENT_ERROR;
    } else if (!pore_branch_compare_data(reg)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	offset = (int32_t)(target - ctx->lc) / 4;
	if ((offset >= (1 << 20)) ||
	    (offset < -(1 << 20))) {
	    ctx->error = PORE_INLINE_UNREACHABLE_TARGET;
	} else {
	    operand = (offset & 0xfffff) | (reg << 20);
	    pore_inline_instruction1(ctx, opcode, operand);
	}
    }
    return ctx->error;
}


// Assemble CMPIBRAEQ, CMPIBRANE, CMPIBSREQ
//
// The branch target here is a bytewise location counter.  The target must be
// 4-byte aligned and must be within the legal signed 24-bit word offset of
// the current LC. Unaligned targets cause PORE_INLINE_ALIGNMENT_ERROR.
// Unreachable targets cause PORE_INLINE_UNREACHABLE_TARGET. Illegal
// operands cause PORE_INLINE_ILLEGAL_REGISTER.

int
pore_inline_cmpibra(PoreInlineContext *ctx, int opcode, int reg,
                    PoreInlineLocation target, uint64_t imm)
{
    int32_t offset;
    uint32_t operand;

    if (target % 4) {
	ctx->error = PORE_INLINE_ALIGNMENT_ERROR;
    } else if (reg != D0) {
        ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	offset = (int32_t)(target - ctx->lc) / 4;
	if ((offset >= (1 << 23)) ||
	    (offset < -(1 << 23))) {
	    ctx->error = PORE_INLINE_UNREACHABLE_TARGET;
	} else {
	    operand = offset & 0xffffff;
	    pore_inline_instruction3(ctx, opcode, operand, imm);
	}
    }
    return ctx->error;
}


// Assemble BRAD and BSRD
//
// Illegal operands cause PORE_INLINE_ILLEGAL_REGISTER.

int
pore_inline_brad(PoreInlineContext *ctx, int opcode, int reg)
{
    uint32_t operand;

    if (!pore_data(reg)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	operand = reg << 20;
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}


// Assemble ANDI, ORI, XORI
//
// Source and destination must be of class 'data' otherwise the
// PORE_INLINE_ILLEGAL_REGISTER error is generated.

int
pore_inline_ilogic(PoreInlineContext *ctx, int opcode, 
		   int dest, int src, uint64_t imm)
{
    uint32_t operand;

    if (!pore_data(dest) || !pore_data(src)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	operand = (dest << 20) | (src << 16);
	pore_inline_instruction3(ctx, opcode, operand, imm);
    }
    return ctx->error;
}


// Assemble AND, OR, XOR, ADD, SUB
//
// Destination must be of class 'data' otherwise the
// PORE_INLINE_ILLEGAL_REGISTER error is generated.  src1 and src2 must be D0,
// D1 respectively otherwise the PORE_INLINE_ILLEGAL_REGISTER error is
// generated.

int
pore_inline_alurr(PoreInlineContext *ctx, 
                  int opcode, int dest, int src1, int src2)
{
    uint32_t operand;

    if (!pore_data(dest) || (src1 != D0) || (src2 != D1)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	operand = (dest << 20);
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}


// Assemble ADDS and SUBS
//
// Destination must be of class 'ls_destination' and must be equal to source,
// otherwise the PORE_INLINE_ILLEGAL_REGISTER error is generated.  If the
// immediate is not a signed 16-bit immediate then the
// PORE_INLINE_INT16_REQUIRED error is generated.

int
pore_inline_adds(PoreInlineContext *ctx, 
                 int opcode, int dest, int src, int imm)
{
    uint32_t operand;

    if (!pore_ls_destination(dest) || (dest != src)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	if ((imm >= (1 << 15)) ||
	    (imm < -(1 << 15))) {
	    ctx->error = PORE_INLINE_INT16_REQUIRED;
	} else {
	    operand = (dest << 20) | (imm & 0xffff);
	    pore_inline_instruction1(ctx, opcode, operand);
	}
    }
    return ctx->error;
}


// Assemble NEG
//
// Source and destination must be of class 'data' otherwise the
// PORE_INLINE_ILLEGAL_REGISTER error is generated.

int
pore_NEG(PoreInlineContext *ctx, int dest, int src)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_NEG;

    if (!pore_data(dest) || !pore_data(src)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	operand = (dest << 20) | (src << 16);
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}


// Assemble MR
//
// The source must be an 'mr_source' and the destination must be an
// 'mr_destination' otherwise the PORE_INLINE_ILLEGAL_REGISTER error is
// generated.

int
pore_MR(PoreInlineContext *ctx, int dest, int src)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_MR;

    if (!pore_mr_destination(dest) || !pore_mr_source(src)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	operand = (dest << 20) | (src << 16);
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}

	    
// Assemble ROLS
//
// Source and destination must be of class 'data' otherwise the
// PORE_INLINE_ILLEGAL_REGISTER error is generated.  Illegal shifts yield the
// PORE_INLINE_ILLEGAL_ROTATE error.

int
pore_ROLS(PoreInlineContext *ctx, int dest, int src, int imm)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_ROLS;
    
    if (!pore_data(dest) || !pore_data(src)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else if ((imm != 1) &&
	       (imm != 4) &&
	       (imm != 8) &&
	       (imm != 16) &&
	       (imm != 32)) {
	ctx->error = PORE_INLINE_ILLEGAL_ROTATE;
    } else {
	operand = (dest << 20) | (src << 16) | imm;
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}


// Assemble LS
//
// The destination must be an 'ls_destination' otherwise the
// PORE_INLINE_ILLEGAL_REGISTER error is generated.  If the immediate is not
// a signed 20-bit immediate then the PORE_INLINE_INT20_REQUIRED error is
// generated. 

int
pore_LS(PoreInlineContext *ctx, int dest, int imm)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_LS;
    
    if (!pore_ls_destination(dest)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else if ((imm >= (1 << 19)) ||
               (imm < -(1 << 19))) {
        ctx->error = PORE_INLINE_INT20_REQUIRED;
    } else {
	operand = (dest << 20) | (imm & 0xfffff);
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}


// Assemble LI
//
// The destination must be an 'li destination' otherwise the
// PORE_INLINE_ILLEGAL_REGISTER error is generated.

int
pore_LI(PoreInlineContext *ctx, int dest, uint64_t imm)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_LI;
    
    if (!pore_li_destination(dest)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	operand = dest << 20;
	pore_inline_instruction3(ctx, opcode, operand, imm);
    }
    return ctx->error;
}


// BSI and BCI are normally redacted as instructions due to HW274735

// LD, LDANDI, STD, STI, BSI, BCI

PORE_STATIC void
pervasive_ima24(PoreInlineContext *ctx, 
                int opcode, uint32_t offset, int base, uint64_t imm)
{
    uint32_t operand;

    if ((offset & 0x80f00000) != 0) {
        ctx->error = PORE_INLINE_ILLEGAL_SCOM_ADDRESS;
    } else {
        operand = ((base % 2) << 22) | (offset & 0xfffff);
        switch (opcode) {
        case PGAS_OPCODE_LD0:
        case PGAS_OPCODE_LD1:
        case PGAS_OPCODE_STD0:
        case PGAS_OPCODE_STD1:
            pore_inline_instruction1(ctx, opcode, operand);
            break;
        default:
            pore_inline_instruction3(ctx, opcode, operand, imm);
            break;
        }
    }
}            
        

PORE_STATIC void
memory_ima24(PoreInlineContext *ctx, 
             int opcode, uint32_t offset, int base, uint64_t imm)
{
    uint32_t operand;

    if ((offset & 0x3fffff) != offset) {
        ctx->error = PORE_INLINE_UINT22_REQUIRED;
    } else if ((offset % 8) != 0) {
        ctx->error = PORE_INLINE_ALIGNMENT_ERROR;
    } else {
        operand = 0x800000 | ((base % 2) << 22) | (offset & 0x3fffff);
        switch (opcode) {
        case PGAS_OPCODE_LD0:
        case PGAS_OPCODE_LD1:
        case PGAS_OPCODE_STD0:
        case PGAS_OPCODE_STD1:
            pore_inline_instruction1(ctx, opcode, operand);
            break;
        default:
            pore_inline_instruction3(ctx, opcode, operand, imm);
            break;
        }
    }
}


PORE_STATIC void
ima24(PoreInlineContext *ctx, 
      int opcode, uint32_t offset, int base, uint64_t imm)
{
    if (pore_pervasive_chiplet_id(base)) {
        pervasive_ima24(ctx, opcode, offset, base, imm);
    } else if (pore_address(base)) {
        memory_ima24(ctx, opcode, offset, base, imm);
    } else {
        ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    }
}


int
pore_inline_load_store(PoreInlineContext *ctx, 
		       int opcode, int src_dest, int32_t offset, int base,
		       uint64_t imm)
{
    switch (opcode) {

    case PORE_INLINE_PSEUDO_LD:
    case PORE_INLINE_PSEUDO_LDANDI:
    case PORE_INLINE_PSEUDO_STD:

        // These three pick the real opcode based on the dest. register

        if (!pore_data(src_dest)) {
            ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
        } else {
            switch (opcode) {
            case PORE_INLINE_PSEUDO_LD:
                opcode = (src_dest == D0) ? 
                    PGAS_OPCODE_LD0 : PGAS_OPCODE_LD1;
                break;
            case PORE_INLINE_PSEUDO_LDANDI:
                opcode = (src_dest == D0) ? 
                    PGAS_OPCODE_LD0ANDI : PGAS_OPCODE_LD1ANDI;
                break;
            case PORE_INLINE_PSEUDO_STD:
                opcode = (src_dest == D0) ? 
                    PGAS_OPCODE_STD0 : PGAS_OPCODE_STD1;
                break;
            }
        }
        break;

#ifdef IGNORE_HW274735

        // BSI and BCI are normally redacted as instructions due to HW274735
        
        case PGAS_OPCODE_BSI:
        case PGAS_OPCODE_BCI:
        
            if (src_dest != D0) {
                ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
            }
            break;

#endif // IGNORE_HW274735

    case PGAS_OPCODE_STI:
        break;

    default:
        ctx->error = PORE_INLINE_BUG;
    }

    if (ctx->error == 0) {
        ima24(ctx, opcode, offset, base, imm);
    }

    return ctx->error;
}


// Assemble BRAIA

int
pore_BRAIA(PoreInlineContext *ctx,
           uint16_t address_space, uint32_t offset)
{
    int opcode = PGAS_OPCODE_BRAI;
    uint32_t operand = 0;
    uint64_t imm = ((uint64_t)address_space << 32) | offset;

    pore_inline_instruction3(ctx, opcode, operand, imm);

    return ctx->error;
}


// Assemble SCAND

int
pore_SCAND(PoreInlineContext *ctx,
           int update, int capture, uint16_t length, 
           uint32_t select, uint32_t offset)
{
    int opcode = PGAS_OPCODE_SCAND;
    uint32_t operand;
    uint64_t imm = ((uint64_t)select << 32) | offset;

    if ((update < 0) ||
        (update > 1) ||
        (capture < 0) ||
        (capture > 1)) {
        ctx->error = PORE_INLINE_INVALID_PARAMETER;
    } else {
        opcode = PGAS_OPCODE_SCAND;
        operand = (update << 23) | (capture << 22) | length;
        pore_inline_instruction3(ctx, opcode, operand, imm);
    }
    return ctx->error;
}
        

/// Fix up a PORE inline assembler forward branch instruction
///
/// \param ctx A pointer to the initialized PoreInlineContext object
/// controlling inline assembly.
///
/// \param source The PORE inline location counter associated with the source
/// instruction of the forward branch.
///
/// \param target The PORE inline location counter associated with the target
/// instruction of the forward branch.
///
/// For usage examples, see the documentation \ref pore_inline_assembler.
/// Although intended for forward branches, this API could be used to create
/// backward branches as well.  Note however the limitation that the \a source
/// must be in the current context, since the source instruction needs to be
/// reassembled with the branch target. In theory the \a target could be
/// anywhere, as long as the location counter of the target is known.
///
/// \retval 0 Success
///
/// \retval code Failure.  Any non-zero return is the PORE inline assmebler
/// error code. The failure code is also stored in the PoreInlineContext
/// object \a error field.  The most likely causes of failure include a source
/// location that is not in the current context or not associated with a
/// branch instruction.

int
pore_inline_branch_fixup(PoreInlineContext *ctx, 
			 PoreInlineLocation source,
			 PoreInlineLocation target)
{
    uint32_t instruction;
    int32_t distance;
    uint64_t imm;
    int opcode, reg;
    PoreInlineContext source_ctx;

    if ((source < ctx->original_lc) ||
	(source > ctx->lc)) {
	ctx->error = PORE_INLINE_ILLEGAL_SOURCE_LC;
    } else {

	// Create a context as it existed when the source instruction was
	// initially assembled, and then reassemble the instruction in that
	// context with the actual target.

	distance = ctx->lc - source;

	source_ctx = *ctx;
	source_ctx.lc = source;
	source_ctx.remaining += distance;
	source_ctx.lc_address -= distance;
	source_ctx.error = 0;
	
	instruction = pore_inline_host32(source_ctx.lc_address);
	opcode = (instruction >> 25);
	reg = (instruction >> 20) & 0xf;
	
	switch (opcode) {
	case PGAS_OPCODE_BRA:
	    pore_BRA(&source_ctx, target);
	    break;
	case PGAS_OPCODE_BSR:
	    pore_BSR(&source_ctx, target);
	    break;
	case PGAS_OPCODE_LOOP:
	    pore_LOOP(&source_ctx, target);
	    break;
	case PGAS_OPCODE_BRAZ:
	    pore_BRAZ(&source_ctx, reg, target);
	    break;
	case PGAS_OPCODE_BRANZ:
	    pore_BRANZ(&source_ctx, reg, target);
	    break;
	case PGAS_OPCODE_CMPIBRAEQ:
	    imm = pore_inline_host64(source_ctx.lc_address + 4);
	    pore_CMPIBRAEQ(&source_ctx, D0, target, imm);
	    break;
	case PGAS_OPCODE_CMPIBRANE:
	    imm = pore_inline_host64(source_ctx.lc_address + 4);
	    pore_CMPIBRANE(&source_ctx, D0, target, imm);
	    break;
	case PGAS_OPCODE_CMPIBSREQ:
	    imm = pore_inline_host64(source_ctx.lc_address + 4);
	    pore_CMPIBSREQ(&source_ctx, D0, target, imm);
	    break;
	default:
	    source_ctx.error = PORE_INLINE_NOT_A_BRANCH;
	    break;
	}

	ctx->error = source_ctx.error;
    }
    return ctx->error;
}