/* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ /* $Source: src/usr/hwpf/hwp/build_winkle_images/p8_slw_build/pore_inline_assembler.c $ */ /* */ /* OpenPOWER HostBoot Project */ /* */ /* COPYRIGHT International Business Machines Corp. 2012,2014 */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ /* You may obtain a copy of the License at */ /* */ /* http://www.apache.org/licenses/LICENSE-2.0 */ /* */ /* Unless required by applicable law or agreed to in writing, software */ /* distributed under the License is distributed on an "AS IS" BASIS, */ /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ /* implied. See the License for the specific language governing */ /* permissions and limitations under the License. */ /* */ /* IBM_PROLOG_END_TAG */ // $Id: pore_inline_assembler.c,v 1.22 2013/12/11 00:11:14 bcbrock Exp $ // $Source: /afs/awd/projects/eclipz/KnowledgeBase/.cvsroot/eclipz/chips/p8/working/procedures/pore_inline_assembler.c,v $ //----------------------------------------------------------------------------- // *! (C) Copyright International Business Machines Corp. 2013 // *! All Rights Reserved -- Property of IBM // *! *** IBM Confidential *** //----------------------------------------------------------------------------- // ** WARNING : This file is maintained as part of the OCC firmware. Do ** // ** not edit this file in the PMX area or the hardware procedure area ** // ** as any changes will be lost. ** /// \file pore_inline_assembler.c /// \brief Inline PGAS assembler for PgP/Stage1 PORE /// /// \page pore_inline_assembler PORE Inline Assembler and Disassembler /// /// Several procedures targeting the PORE engine require inline assembly and /// disassembly of PORE code, that is, they require that PORE instructions be /// assembled/disassembled directly into/from a host memory buffer. This page /// describes these facilities. The APIs described here are implemented in /// the files pore_inline.h, pore_inline_assembler.c and /// pore_inline_disassembler.c. Both the inline assembelr and disassembler /// conform to the PGAS assembly format for PORE. /// /// Both inline assembly and disassembly make use of a PoreInlineContext /// structure. This structure represents the state of a memory area being /// targeted for inline assembly and disassembly. The context is initialized /// with the pore_inline_context_create() API, and a pointer to an instance of /// this structure appears as the first argument of all assembler/disassembler /// APIs. As assembly/disassembly progresses the PoreInlineContext keeps /// track of how much host memory area has been filled by assembled code or /// scanned by the disassebler. /// /// Assembler/disassembler APIs are predicates that return 0 for success and a /// non-zero error code for failure. In the event of failure, the error code /// (a small integer) is also stored in the \a error field of the context /// structure. String forms of the error codes are also available in the /// global array pore_inline_error_strings[]. /// /// The assembler always produces PORE code in the PORE-native big-endian /// format. Likewise, the diassembler assumes the host memory to be /// disassembled contains PORE code in big-endian format. /// /// \section Initialization /// /// Before invoking inline assembly/disassembly APIs, an instance of a /// PoreInlineContext structure must be initialized using the /// pore_inline_context_create() API. For assembly, the context describes the /// host memory buffer that will contain the assembled code. For disassembly, /// the context describes the host memory area that contains the code to be /// disassembled. Full documentation is available for /// pore_inline_context_create(), including documentation for options that /// control assembly and disassembly. The implementation also provides a /// 'copy operator' for the context, pore_inline_context_copy(). /// /// An example of initializing a context for inline assembly with parity /// checking appears below. /// /// \code /// /// PoreInlineContext ctx; /// uint32_t buf[BUFSIZE]; /// /// rc = pore_inline_context_create(&ctx, buf, BUFSIZE * 4, 0, /// PORE_INLINE_CHECK_PARITY); /// if (rc) . . . Handle Error /// /// \endcode /// /// Applications that reuse the same memory buffer for assembling and /// processing multiple PORE programs can 'reset' the context between uses by /// using the pore_inline_context_reset() API. pore_inline_context_reset() /// resets the location counter and memory extent to their initial (creation) /// values, and the context error code is cleared. Any options specified at /// creation remain as they were. /// /// \section Assembler /// /// The inline assembler implements each PORE/PGAS instruction as individual /// function calls. The APIs are consistently named \c pore_\, where /// \c \ is a PGAS mnemonic in upper case. The arguments to each /// opcode appear in the same order that they appear in the source-level /// assembler, with appropriate C-language types. The supported opcode APIs /// are defined in pore_inline.h /// /// Since the PORE instruction APIs are effectivly predicates, linear code /// sequences are easily assembled using the C-language logical OR construct. /// Any non-0 return code will immediately break the sequence and set the /// expression value to 1. The failure code can then be recovered from the \a /// error field of the context. This coding technique is illustrated in the /// following example of assembling a memory-memory copy sequence. /// /// \code /// /// PoreInlineContext ctx; /// int error; /// /// . . . // Initialize context /// /// error = /// pore_LD(&ctx, D0, 0, A0) || /// pore_STD(&ctx, D0, 0, A1); /// /// if (error) <. . . Handle error based on ctx.error> /// /// \endcode /// /// The above example generates code equivalent to /// /// \code /// /// ld D0, 0, A0 /// std D0, 0, A1 /// /// \endcode /// /// Again, if an error were to occur during assembly, inline assembly would /// stop (and the logical OR would terminate) at the point of failure. In /// particular, the inline assembler will never allow assembled code to exceed /// the bounds of the memory area defined by the initial call of /// pore_inline_context_create() that defines the assembler memory space. /// /// /// \subsection Register Names and Other Mnemonics /// /// The header file pore_inline.h defines macros for the register mnemonics. /// /// - D0, D1 : 64-bit data registers /// - A0, A1 : 32-bit address registers /// - P0, P1 : 7-bit Pervasive chiplet id registers /// - CTR : 24-bit ounter register /// - PC : 48-bit Program Counter /// - ETR : 64-bit EXE-Trigger Register (Low-order 32 bits are writable) /// - EMR : The Error Mask Register /// - IFR : ID/Flags Register /// - SPRG0 : 32-bit Special-Purpose General Register 0 /// /// Mnemonics for the condition code bits are also defined by pore_inline.h /// using the PGAS mnemonics. /// /// /// \subsection Assembling Branches /// /// Opcodes that implement relative branches require that the branch target be /// specified as a location counter . Once initialized, the current /// location counter is available as the \a lc field of the PoreInlineContext /// object controlling the assembly. The \a lc field is the only field /// (besides the error code held in the \a error field) that application code /// should ever reference. The inline assembler also provides a typedef /// PoreInlineLocation to use for location counters, as well as the macro /// PORE_LOCATION() to define a location variable inline with the code flow. /// /// \subsubsection Backward Branches /// /// Backward branches are straightforward. For example, the memory-memory /// copy example from earlier can be converted into a loop as shown below. The /// \a loop_target variable is initialized with the location counter of the /// first instruction of the loop. The final instruction of the loop then /// branches back to the \a loop_target. /// /// \code /// /// PoreInlineContext ctx; /// PoreInlineLocation loop_target = 0; // See ** below the example /// int error; /// /// . . . // Initialize context /// /// error = /// PORE_LOCATION(&ctx, loop_target) || /// pore_LD(&ctx, D0, 0, A0) || /// pore_STD(&ctx, D0, 0, A1) || /// pore_ADDS(&ctx, A0, A0, 8) || /// pore_ADDS(&ctx, A1, A1, 8) || /// pore_LOOP(&ctx, loop_target); /// /// if (error) <. . . Handle error based on ctx.error> /// /// \endcode /// /// The above inline assembler sequence is equivalent to the PGAS code /// sequence: /// /// \code /// /// loop_target: /// ld D0, 0, A0 /// std D0, 0, A1 /// adds A0, A0, 8 /// adds A1, A1, 8 /// loop loop_target /// /// \endcode /// /// ** Location counters used as loop targets may need to be initialized, /// otherwise the compiler may issue a warning that the variable "may be used /// uninitialized", although in well-written code this would never happen. /// /// /// \subsubsection Forward Branches /// /// Forward branches are more complex. Since the target location counter is /// not known until the target has been assembled, the inline assembler /// provides the API pore_inline_branch_fixup() to fix up forward branches /// once the actual target is known. This is illustrated in the simple code /// sequence below, where an instruction is conditionally skipped. /// /// \code /// /// PoreInlineContext ctx; /// PoreInlineLocation source = 0, target = 0; /// int error, rc; /// /// . . . // Initialize context /// /// error = /// PORE_LOCATION(&ctx, source) || /// pore_BRANZ(&ctx, D0, source) || /// pore_ADDS(&ctx, D1, D1, 1) || /// PORE_LOCATION(&ctx, target) || /// pore_LD(&ctx, D0, 0, A0); /// /// if (error) <. . . Handle assembly error based on ctx->error> /// rc = pore_inline_branch_fixup(&ctx, source, target); /// if (rc) <. . . Handle branch fixup error> /// /// \endcode /// /// In the above code, the branch instruction is initially assembled as a /// branch-to-self - the recommended idiom for forward branch source /// instructions. Once the entire sequence has been assembled, /// pore_inline_branch_fixup() reassembles the \c source instruction as a /// branch to the \c target instruction. The above instruction sequence is /// equivalent to the PGAS code below: /// /// \code /// /// source: /// branz D0, target /// adds D1, D1, 1 /// target: /// ld D0, 0, A0 /// /// \endcode /// /// /// \subsubsection Absolute Branches /// /// It is unlikely that a typical application of the PORE inline assembler /// would ever need to include an absolute branch, since the branch target in /// this case is a fixed absolute address that must be known at assembly /// time. However the inline assembler does provide the pore_BRAIA() API for /// this purpose. This opcode requires a 16-bit address space constant and a /// 32-bit absoulte address (offset) within the memory space to specify the /// branch. /// /// /// \section Disassembly /// /// Inline disassembly is implemented by a single API, /// pore_inline_disassemble(). The idea is similar to assembly: A host memory /// context containing PORE code (or data) is described by a PoreInlineContext /// structure. Each call of pore_inline_disassemble() disassembles the next /// instruction (or datum) in the context into a PoreInlineDisassembly /// structure provided by the caller. The disassembly object contains both /// binary and string forms of the disassembled instruction (or data). The /// next call of pore_inline_disassemble() proceses the next instruction (or /// datum) and so on. /// /// \subsection Text (Code) Disassembly /// /// In the example below the inline disassembler is used to completely /// disassemble a memory area containing text (code) to \a stdout until an /// error occurs, assumed to be either due to disassembling the entire memory /// area or finding an illegal instruction. /// /// \code /// /// PoreInlineContext ctx; /// PoreInlineDisassembly dis; /// /// . . . // Initialize context /// /// while (pore_inline_disassemble(&ctx, &dis) == 0) { /// printf("%s\n", dis.s); /// } /// /// \endcode /// /// To illustrate binary disassembly, the following example uses the /// disassembler to search for a RET statement in a block of PORE code, in /// order to extend an inline subroutine with more code. Note that the field /// \a dis->ctx contains the context that existed at the time the instruction /// was assembled. By copying this context back into the global context, /// inline assembly will continue by overwriting the RET with new /// instructions. If the copy had \e not been done, then newly assembled code /// would have \e followed the RET. /// /// \code /// /// PoreInlineContext ctx; /// PoreInlineDisassembly dis; /// /// . . . // Initialize context /// /// while ((pore_inline_disassemble(&ctx, &dis) == 0) && /// (dis.opcode != PORE_OPCODE_RET)); /// if (ctx.error != 0) { /// . . . // Handle error /// } else { /// pore_inline_context_copy(&ctx, &dis.ctx); /// . . . // Continue assembly by overwriting the RET /// } /// /// \endcode /// /// A special type of context reset is available to simplify applications that /// need to disassemble a just-assembled code sequence, e.g. for debugging. /// pore_inline_context_reset_excursion() resets the context such that the /// effective size of the context only covers the just-assembled code, /// allowing a dissassembly loop to cleanly stop once all code has been /// disassembled. The use is illustrated below - note that the disassembly /// stops on the expected error code PORE_INLINE_NO_MEMORY once the /// (effective) end of the buffer is reached. /// /// \code /// /// PoreInlineContext ctx; /// PoreInlineDisassembly dis; /// /// . . . // Initialize context /// . . . // Assemble code into context /// /// pore_inline_context_reset_excursion(&ctx); /// /// while (pore_inline_disassemble(&ctx, &dis) == 0) { /// printf("%s\n", dis.s); /// } /// if (ctx.error != PORE_INLINE_NO_MEMORY) { /// . . . // Handle error /// } /// /// \endcode /// /// \subsection Data Disassembly /// /// If the PoreInlineContext is created with the flag /// PORE_INLINE_DISASSEMBLE_DATA, then the context is disassembled as data. If /// the PoreInlineContext is created with the flag /// PORE_INLINE_DISASSEMBLE_UNKNOWN then putative data embedded in a text /// section will be disassembled as data. For complete information see the /// documentation for pore_inline_disassemble(). #define __PORE_INLINE_ASSEMBLER_C__ #include "pore_inline.h" #undef __PORE_INLINE_ASSEMBLER_C__ // Definitions of PORE register classes. These are predicates that return // 1 if the register is a member of the class, else 0. PORE_STATIC int pore_data(int reg) { return (reg == D0) || (reg == D1); } PORE_STATIC int pore_address(int reg) { return (reg == A0) || (reg == A1); } PORE_STATIC int pore_pervasive_chiplet_id(int reg) { return (reg == P0) || (reg == P1); } PORE_STATIC int pore_branch_compare_data(int reg) { return (reg == D0) || (reg == D1) || (reg == CTR); } PORE_STATIC int pore_ls_destination(int reg) { return (reg == D0) || (reg == D1) || (reg == A0) || (reg == A1) || (reg == P0) || (reg == P1) || (reg == CTR); } PORE_STATIC int pore_li_destination(int reg) { return (reg == D0) || (reg == D1) || (reg == A0) || (reg == A1) || (reg == P0) || (reg == P1) || (reg == CTR); } PORE_STATIC int pore_mr_source(int reg) { return (reg == D0) || (reg == D1) || (reg == A0) || (reg == A1) || (reg == P0) || (reg == P1) || (reg == CTR) || (reg == PC) || (reg == ETR) || (reg == SPRG0) || (reg == IFR) || (reg == EMR); } PORE_STATIC int pore_mr_destination(int reg) { return (reg == D0) || (reg == D1) || (reg == A0) || (reg == A1) || (reg == P0) || (reg == P1) || (reg == CTR) || (reg == PC) || (reg == SPRG0)|| (reg == EMR); } /// Portable store of a 32-bit integer in big-endian format /// /// The address \a p to receive the data is in the form of an unsigned long. void pore_inline_be32(unsigned long p, uint32_t x) { uint8_t *p8 = (uint8_t *)p; uint8_t *px = (uint8_t *)(&x); int i, j; if (!PORE_BIG_ENDIAN) { for (i = 0, j = 3; i < 4; i++, j--) { p8[i] = px[j]; } } else { *((uint32_t *)p) = x; } } /// Portable store of a 64-bit integer in big-endian format /// /// The address \a p to receive the data is in the form of an unsigned long. void pore_inline_be64(unsigned long p, uint64_t x) { uint8_t *p8 = (uint8_t *)p; uint8_t *px = (uint8_t *)(&x); int i, j; if (!PORE_BIG_ENDIAN) { for (i = 0, j = 7; i < 8; i++, j--) { p8[i] = px[j]; } } else { *((uint64_t *)p) = x; } } // Portable load of a 32-bit integer in big-endian format uint32_t pore_inline_host32(unsigned long p) { uint32_t x; uint8_t *p8 = (uint8_t *)p; uint8_t *px = (uint8_t *)(&x); int i, j; if (!PORE_BIG_ENDIAN) { for (i = 0, j = 3; i < 4; i++, j--) { px[j] = p8[i]; } } else { x = *((uint32_t *)p); } return x; } // Portable load of a 64-bit integer in big-endian format uint64_t pore_inline_host64(unsigned long p) { uint64_t x; uint8_t *p8 = (uint8_t *)p; uint8_t *px = (uint8_t *)(&x); int i, j; if (!PORE_BIG_ENDIAN) { for (i = 0, j = 7; i < 8; i++, j--) { px[j] = p8[i]; } } else { x = *((uint64_t *)p); } return x; } // 32-bit population count // // This is a well-known divide-and-conquer algorithm. The idea is to compute // sums of adjacent bit segments in parallel, in place. PORE_STATIC int pore_popcount32(uint32_t x) { uint32_t m1 = 0x55555555; uint32_t m2 = 0x33333333; uint32_t m4 = 0x0f0f0f0f; x -= (x >> 1) & m1; /* Sum pairs of bits */ x = (x & m2) + ((x >> 2) & m2);/* Sum 4-bit segments */ x = (x + (x >> 4)) & m4; /* Sum 8-bit segments */ x += x >> 8; /* Sum 16-bit segments */ return (x + (x >> 16)) & 0x3f; /* Final sum */ } // 64-bit population count PORE_STATIC int pore_popcount64(uint64_t x) { return pore_popcount32(x & 0xffffffff) + pore_popcount32(x >> 32); } // Compute the parity of a PORE instruction as 0 or 1 int pore_inline_parity(uint32_t instruction, uint64_t imd64) { return (pore_popcount32(instruction) + pore_popcount64(imd64)) % 2; } /// Reset a PORE inline assembler context to its creation state /// /// \param ctx A pointer to an initialized (and likely 'used') /// PoreInlineContext object. /// /// This API resets a PoreInlineContext object to it's \e creation state, that /// is, the state it was in after the call of pore_inline_context_create(). /// This API is designed for applications that reuse a memory buffer to /// assemble multiple PORE code sequences. After each sequence has been fully /// assembled and processed, calling pore_inline_context_reset() sets the /// context back as it was when the context was initially created so that the /// memory area can be reused. In particular, this API resets the location /// counter and memory extent to their initial values, and the error code is /// cleared. Any options specified at creation remain as they were. /// /// For a slightly different type of reset, see /// pore_inline_context_reset_excursion(). void pore_inline_context_reset(PoreInlineContext *ctx) { ctx->lc_address = ctx->memory; ctx->remaining = ctx->size; ctx->lc = ctx->original_lc; ctx->error = 0; } /// Reset a PORE inline assembler context to a special state for disassembly /// /// \param ctx A pointer to an initialized (and almost certainly 'used') /// PoreInlineContext object. /// /// This API resets a PoreInlineContext object to it's \e creation state, that /// is, the state it was in after the call of pore_inline_context_create(), \e /// except that the effective size of the memory area has been reduced to the /// size that was actually used during assembly. This API is designed for /// applications that assemble into a memory buffer and then want to easily /// disassemble the code (e.g., for debugging). After a code sequence has /// been assembled, calling pore_inline_context_reset_excursion() sets the /// context back as it was when the context was initially created, but with a /// (typically) shorter effective length, so that the disassembly will cleanly /// stop once the entire sequence has been disassembled. Once disassembled, /// the buffer can be fully resued after a subsequent call of /// pore_inline_context_reset(). In particular, this API resets the location /// counter to its initial value, clears the error code, and sets the /// effective size of the context to the amount of memory currently used. Any /// options specified at creation remain as they were. /// /// For a full context reset see pore_inline_context_reset(). For an example /// see the \b Disassembly section of \ref pore_inline_assembler. void pore_inline_context_reset_excursion(PoreInlineContext *ctx) { ctx->lc_address = ctx->memory; ctx->remaining = ctx->size - ctx->remaining; ctx->lc = ctx->original_lc; ctx->error = 0; } /// Create a PORE inline assembler context /// /// \param ctx A pointer to a PoreInlineContext object to be initialized /// and used for inline assembly. or disassembly. /// /// \param memory A pointer to the host memory area to receive the assembled /// code, or contain the code to disassemble. In general the inline assembler /// will expect this memory area to be 4-byte aligned. This pointer may be /// NULL (0) only if the associated \a size is also 0. /// /// \param size The size (in bytes) of the host memory area. The inline /// assembler will generate the PORE_INLINE_NO_MEMORY error if an attempt is /// made to assemble an instruction that would overflow the buffer, or /// disassemble past the end of the buffer. A 0 size is valid. /// /// \param lc The initial, bytewise, target location counter for the assembled /// or disassembled code. This paramater will normally be initialized to 0 for /// assembling relocatable programs. The parameter would only need to be /// specified as non-0 for special cases, such as creating a context for /// disassembly. /// /// \param options Option flags. Option flags are OR-ed together to create /// the final set of options. Valid options are /// /// - PORE_INLINE_GENERATE_PARITY : Generate the proper parity bit for each /// instruction during assembly. /// /// - PORE_INLINE_CHECK_PARITY : Check for correct instruction parity during /// disassembly. /// /// - PORE_INLINE_LISTING_MODE : Generate disassembly strings in the form of a /// listing that contains location counters and encoded instructions as well /// as their diassembly. By default the disassembly strings do not contain /// this information and can be fed back in as source code to a PORE /// assembler. /// /// - PORE_INLINE_DISASSEMBLE_DATA : generate disassembly assuming that the /// context contains data rather than text. Normally data is disassembled as /// .long directives, however if the context is unaligned or of an odd length /// then .byte directives may be used as well. This option can be used in /// conjunction with PORE_INLINE_LISTING_MODE. /// /// - PORE_INLINE_8_BYTE_DATA : generate data disassembly using 8-byte values /// rather than the default 4-byte values. Normally data is disassembled as /// .quad directives under this option, however if the context is unaligned or /// of an odd length then .long and .byte directives may be used as well. /// This option can be used in conjunction with PORE_INLINE_LISTING_MODE. /// /// A PoreInlineContext describes a memory area and assembler context for /// inline assembly and disassembly. Assembly/disassembly begins at the host /// memory location and virtual location counter described in the parameters. /// As instructions are assembled/disassembled the PoreInlineContext keeps /// track of where in the host memory and virtual PORE memory areas to place /// new instructions during assembly, or from where to fetch the next /// instruction to disassemble. /// /// \retval 0 Success /// /// \retval PORE_INLINE_INVALID_PARAMETER Either the \a context pointer is /// NULL (0), the \a memory pointer is NULL (0) with a non-0 size, or the \a /// options include invalid options. The error code is also stored as the /// value of ctx->error, and in the event of an error the ctx->size field is /// set to 0, effectively preventing the context from being used. int pore_inline_context_create(PoreInlineContext *ctx, void *memory, size_t size, PoreInlineLocation lc, int options) { int rc; int valid_options = PORE_INLINE_GENERATE_PARITY | PORE_INLINE_CHECK_PARITY | PORE_INLINE_LISTING_MODE | PORE_INLINE_DISASSEMBLE_DATA | PORE_INLINE_8_BYTE_DATA | PORE_INLINE_DISASSEMBLE_UNKNOWN; if ((ctx == NULL) || ((memory == NULL) && (size != 0)) || ((options & ~valid_options) != 0)) { rc = PORE_INLINE_INVALID_PARAMETER; } else { rc = 0; ctx->memory = (unsigned long)memory; ctx->size = size; ctx->original_lc = lc; ctx->options = options; pore_inline_context_reset(ctx); } if (ctx != NULL) { ctx->error = rc; if (rc) { ctx->size = 0; /* Effectively prevents using the ctx */ } } return rc; } /// Copy a PORE inline assembler context /// /// \param dest A pointer to a PoreInlineContext object to be initialized /// as a copy of the \a src context. /// /// \param src A pointer to a PoreInlineContext object to be used as the /// source of the copy. /// /// This API copies one PoreInlineContext structure to another. An example /// use appears in \ref pore_inline_assembler in the section discussing /// disassembly. void pore_inline_context_copy(PoreInlineContext *dest, PoreInlineContext *src) { *dest = *src; } // 'Bump' a context forward by a given number of bytes. This an internal API // and the bump is always known to be legal. void pore_inline_context_bump(PoreInlineContext *ctx, size_t bytes) { ctx->remaining -= bytes; ctx->lc += bytes; ctx->lc_address += bytes; } // Allocate space in the inline assembler context // // Allocation is specified and implemented in bytes. Both the physical // memory and the virtual LC are required to be 4-byte aligned. The allocator // returns a pointer to the memory area, or 0 if allocation fails. // Allocation failure sets the context error code to either // PORE_INLINE_NO_MEMORY or PORE_INLINE_ALIGNMENT_ERROR. PORE_STATIC unsigned long pore_inline_allocate(PoreInlineContext *ctx, size_t bytes) { unsigned long p = 0; if (((ctx->lc % 4) != 0) || ((ctx->lc_address % 4) != 0)) { ctx->error = PORE_INLINE_ALIGNMENT_ERROR; } else if (bytes > ctx->remaining) { ctx->error = PORE_INLINE_NO_MEMORY; } else { p = ctx->lc_address; pore_inline_context_bump(ctx, bytes); } return p; } // Assemble a 1-word instruction // // The opcode and operand are assumed to be legal, having come from // abstractions that check their arguments. This call may fail with // PORE_INLINE_NO_MEMORY if there is no more room in the memory buffer. A // non-zero return indicates failure. int pore_inline_instruction1(PoreInlineContext *ctx, int opcode, uint32_t operand) { uint32_t instruction; unsigned long p; p = pore_inline_allocate(ctx, 4); if (p != 0) { instruction = (opcode << 25) | operand; if (ctx->options & PORE_INLINE_GENERATE_PARITY) { instruction |= (1 - pore_inline_parity(instruction, 0)) << 24; } pore_inline_be32(p, instruction); ctx->error = 0; } return p == 0; } // Assemble a 3-word instruction // // The opcode and operand are assumed to be legal, having come from // abstractions that check their arguments. This call may fail with // PORE_INLINE_NO_MEMORY if there is no more room in the memory buffer. A // non-zero return indicates failure. int pore_inline_instruction3(PoreInlineContext *ctx, int opcode, uint32_t operand, uint64_t immediate) { uint32_t instruction; unsigned long p; p = pore_inline_allocate(ctx, 12); if (p != 0) { instruction = (opcode << 25) | operand; if (ctx->options & PORE_INLINE_GENERATE_PARITY) { instruction |= (1 - pore_inline_parity(instruction, immediate)) << 24; } pore_inline_be32(p, instruction); pore_inline_be64(p + 4, immediate); ctx->error = 0; } return p == 0; } // Assemble WAIT // // The cycle count must be an unsigned 24-bit immediate otherwise the error // PORE_INLINE_UINT24_REQUIRED is signaled. PGAS requires that HALT be used // if the intention is to halt int pore_WAITS(PoreInlineContext *ctx, uint32_t cycles) { uint32_t operand; int opcode = PGAS_OPCODE_WAITS; if (cycles == 0) { ctx->error = PORE_INLINE_USE_HALT; } else if ((cycles & 0xffffff) != cycles) { ctx->error = PORE_INLINE_UINT24_REQUIRED; } else { operand = cycles; pore_inline_instruction1(ctx, opcode, operand); } return ctx->error; } // Assemble HOOKI // // The hook index must be an unsigned 24-bit immediate otherwise the error // PORE_INLINE_UINT24_REQUIRED is signaled. int pore_HOOKI(PoreInlineContext *ctx, uint32_t index, uint64_t imm) { uint32_t operand; int opcode = PGAS_OPCODE_HOOKI; if ((index & 0xffffff) != index) { ctx->error = PORE_INLINE_UINT24_REQUIRED; } else { operand = index; pore_inline_instruction3(ctx, opcode, operand, imm); } return ctx->error; } // Assemble BRA, BSR and LOOP // // The branch target here is a bytewise location counter. The target must be // 4-byte aligned and must be within the legal signed 24-bit word offset of // the current LC. Unaligned targets cause PORE_INLINE_ALIGNMENT_ERROR. // Unreachable targets cause PORE_INLINE_UNREACHABLE_TARGET. int pore_inline_bra(PoreInlineContext *ctx, int opcode, PoreInlineLocation target) { int32_t offset; uint32_t operand; if (target % 4) { ctx->error = PORE_INLINE_ALIGNMENT_ERROR; } else { offset = (int32_t)(target - ctx->lc) / 4; if ((offset >= (1 << 23)) || (offset < -(1 << 23))) { ctx->error = PORE_INLINE_UNREACHABLE_TARGET; } else { operand = offset & 0xffffff; pore_inline_instruction1(ctx, opcode, operand); } } return ctx->error; } // Assemble BRAZ and BRANZ // // The branch target here is a bytewise location counter. The target must be // 4-byte aligned and must be within the legal signed 20-bit word offset of // the current LC. Unaligned targets cause PORE_INLINE_ALIGNMENT_ERROR. // Unreachable targets cause PORE_INLINE_UNREACHABLE_TARGET. Illegal // operands cause PORE_INLINE_ILLEGAL_REGISTER. int pore_inline_brac(PoreInlineContext *ctx, int opcode, int reg, PoreInlineLocation target) { int32_t offset; uint32_t operand; if (target % 4) { ctx->error = PORE_INLINE_ALIGNMENT_ERROR; } else if (!pore_branch_compare_data(reg)) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } else { offset = (int32_t)(target - ctx->lc) / 4; if ((offset >= (1 << 20)) || (offset < -(1 << 20))) { ctx->error = PORE_INLINE_UNREACHABLE_TARGET; } else { operand = (offset & 0xfffff) | (reg << 20); pore_inline_instruction1(ctx, opcode, operand); } } return ctx->error; } // Assemble CMPIBRAEQ, CMPIBRANE, CMPIBSREQ // // The branch target here is a bytewise location counter. The target must be // 4-byte aligned and must be within the legal signed 24-bit word offset of // the current LC. Unaligned targets cause PORE_INLINE_ALIGNMENT_ERROR. // Unreachable targets cause PORE_INLINE_UNREACHABLE_TARGET. Illegal // operands cause PORE_INLINE_ILLEGAL_REGISTER. int pore_inline_cmpibra(PoreInlineContext *ctx, int opcode, int reg, PoreInlineLocation target, uint64_t imm) { int32_t offset; uint32_t operand; if (target % 4) { ctx->error = PORE_INLINE_ALIGNMENT_ERROR; } else if (reg != D0) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } else { offset = (int32_t)(target - ctx->lc) / 4; if ((offset >= (1 << 23)) || (offset < -(1 << 23))) { ctx->error = PORE_INLINE_UNREACHABLE_TARGET; } else { operand = offset & 0xffffff; pore_inline_instruction3(ctx, opcode, operand, imm); } } return ctx->error; } // Assemble BRAD and BSRD // // Illegal operands cause PORE_INLINE_ILLEGAL_REGISTER. int pore_inline_brad(PoreInlineContext *ctx, int opcode, int reg) { uint32_t operand; if (!pore_data(reg)) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } else { operand = reg << 20; pore_inline_instruction1(ctx, opcode, operand); } return ctx->error; } // Assemble ANDI, ORI, XORI // // Source and destination must be of class 'data' otherwise the // PORE_INLINE_ILLEGAL_REGISTER error is generated. int pore_inline_ilogic(PoreInlineContext *ctx, int opcode, int dest, int src, uint64_t imm) { uint32_t operand; if (!pore_data(dest) || !pore_data(src)) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } else { operand = (dest << 20) | (src << 16); pore_inline_instruction3(ctx, opcode, operand, imm); } return ctx->error; } // Assemble AND, OR, XOR, ADD, SUB // // Destination must be of class 'data' otherwise the // PORE_INLINE_ILLEGAL_REGISTER error is generated. src1 and src2 must be D0, // D1 respectively otherwise the PORE_INLINE_ILLEGAL_REGISTER error is // generated. int pore_inline_alurr(PoreInlineContext *ctx, int opcode, int dest, int src1, int src2) { uint32_t operand; if (!pore_data(dest) || (src1 != D0) || (src2 != D1)) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } else { operand = (dest << 20); pore_inline_instruction1(ctx, opcode, operand); } return ctx->error; } // Assemble ADDS and SUBS // // Destination must be of class 'ls_destination' and must be equal to source, // otherwise the PORE_INLINE_ILLEGAL_REGISTER error is generated. If the // immediate is not a signed 16-bit immediate then the // PORE_INLINE_INT16_REQUIRED error is generated. int pore_inline_adds(PoreInlineContext *ctx, int opcode, int dest, int src, int imm) { uint32_t operand; if (!pore_ls_destination(dest) || (dest != src)) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } else { if ((imm >= (1 << 15)) || (imm < -(1 << 15))) { ctx->error = PORE_INLINE_INT16_REQUIRED; } else { operand = (dest << 20) | (imm & 0xffff); pore_inline_instruction1(ctx, opcode, operand); } } return ctx->error; } // Assemble NEG // // Source and destination must be of class 'data' otherwise the // PORE_INLINE_ILLEGAL_REGISTER error is generated. int pore_NEG(PoreInlineContext *ctx, int dest, int src) { uint32_t operand; int opcode = PGAS_OPCODE_NEG; if (!pore_data(dest) || !pore_data(src)) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } else { operand = (dest << 20) | (src << 16); pore_inline_instruction1(ctx, opcode, operand); } return ctx->error; } // Assemble MR // // The source must be an 'mr_source' and the destination must be an // 'mr_destination' otherwise the PORE_INLINE_ILLEGAL_REGISTER error is // generated. int pore_MR(PoreInlineContext *ctx, int dest, int src) { uint32_t operand; int opcode = PGAS_OPCODE_MR; if (!pore_mr_destination(dest) || !pore_mr_source(src)) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } else { operand = (dest << 20) | (src << 16); pore_inline_instruction1(ctx, opcode, operand); } return ctx->error; } // Assemble ROLS // // Source and destination must be of class 'data' otherwise the // PORE_INLINE_ILLEGAL_REGISTER error is generated. Illegal shifts yield the // PORE_INLINE_ILLEGAL_ROTATE error. int pore_ROLS(PoreInlineContext *ctx, int dest, int src, int imm) { uint32_t operand; int opcode = PGAS_OPCODE_ROLS; if (!pore_data(dest) || !pore_data(src)) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } else if ((imm != 1) && (imm != 4) && (imm != 8) && (imm != 16) && (imm != 32)) { ctx->error = PORE_INLINE_ILLEGAL_ROTATE; } else { operand = (dest << 20) | (src << 16) | imm; pore_inline_instruction1(ctx, opcode, operand); } return ctx->error; } // Assemble LS // // The destination must be an 'ls_destination' otherwise the // PORE_INLINE_ILLEGAL_REGISTER error is generated. If the immediate is not // a signed 20-bit immediate then the PORE_INLINE_INT20_REQUIRED error is // generated. int pore_LS(PoreInlineContext *ctx, int dest, int imm) { uint32_t operand; int opcode = PGAS_OPCODE_LS; if (!pore_ls_destination(dest)) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } else if ((imm >= (1 << 19)) || (imm < -(1 << 19))) { ctx->error = PORE_INLINE_INT20_REQUIRED; } else { operand = (dest << 20) | (imm & 0xfffff); pore_inline_instruction1(ctx, opcode, operand); } return ctx->error; } // Assemble LI // // The destination must be an 'li destination' otherwise the // PORE_INLINE_ILLEGAL_REGISTER error is generated. int pore_LI(PoreInlineContext *ctx, int dest, uint64_t imm) { uint32_t operand; int opcode = PGAS_OPCODE_LI; if (!pore_li_destination(dest)) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } else { operand = dest << 20; pore_inline_instruction3(ctx, opcode, operand, imm); } return ctx->error; } // BSI and BCI are normally redacted as instructions due to HW274735 // LD, LDANDI, STD, STI, BSI, BCI PORE_STATIC void pervasive_ima24(PoreInlineContext *ctx, int opcode, uint32_t offset, int base, uint64_t imm) { uint32_t operand; if ((offset & 0x80f00000) != 0) { ctx->error = PORE_INLINE_ILLEGAL_SCOM_ADDRESS; } else { operand = ((base % 2) << 22) | (offset & 0xfffff); switch (opcode) { case PGAS_OPCODE_LD0: case PGAS_OPCODE_LD1: case PGAS_OPCODE_STD0: case PGAS_OPCODE_STD1: pore_inline_instruction1(ctx, opcode, operand); break; default: pore_inline_instruction3(ctx, opcode, operand, imm); break; } } } PORE_STATIC void memory_ima24(PoreInlineContext *ctx, int opcode, uint32_t offset, int base, uint64_t imm) { uint32_t operand; if ((offset & 0x3fffff) != offset) { ctx->error = PORE_INLINE_UINT22_REQUIRED; } else if ((offset % 8) != 0) { ctx->error = PORE_INLINE_ALIGNMENT_ERROR; } else { operand = 0x800000 | ((base % 2) << 22) | (offset & 0x3fffff); switch (opcode) { case PGAS_OPCODE_LD0: case PGAS_OPCODE_LD1: case PGAS_OPCODE_STD0: case PGAS_OPCODE_STD1: pore_inline_instruction1(ctx, opcode, operand); break; default: pore_inline_instruction3(ctx, opcode, operand, imm); break; } } } PORE_STATIC void ima24(PoreInlineContext *ctx, int opcode, uint32_t offset, int base, uint64_t imm) { if (pore_pervasive_chiplet_id(base)) { pervasive_ima24(ctx, opcode, offset, base, imm); } else if (pore_address(base)) { memory_ima24(ctx, opcode, offset, base, imm); } else { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } } int pore_inline_load_store(PoreInlineContext *ctx, int opcode, int src_dest, int32_t offset, int base, uint64_t imm) { switch (opcode) { case PORE_INLINE_PSEUDO_LD: case PORE_INLINE_PSEUDO_LDANDI: case PORE_INLINE_PSEUDO_STD: // These three pick the real opcode based on the dest. register if (!pore_data(src_dest)) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } else { switch (opcode) { case PORE_INLINE_PSEUDO_LD: opcode = (src_dest == D0) ? PGAS_OPCODE_LD0 : PGAS_OPCODE_LD1; break; case PORE_INLINE_PSEUDO_LDANDI: opcode = (src_dest == D0) ? PGAS_OPCODE_LD0ANDI : PGAS_OPCODE_LD1ANDI; break; case PORE_INLINE_PSEUDO_STD: opcode = (src_dest == D0) ? PGAS_OPCODE_STD0 : PGAS_OPCODE_STD1; break; } } break; #ifdef IGNORE_HW274735 // BSI and BCI are normally redacted as instructions due to HW274735 case PGAS_OPCODE_BSI: case PGAS_OPCODE_BCI: if (src_dest != D0) { ctx->error = PORE_INLINE_ILLEGAL_REGISTER; } break; #endif // IGNORE_HW274735 case PGAS_OPCODE_STI: break; default: ctx->error = PORE_INLINE_BUG; } if (ctx->error == 0) { ima24(ctx, opcode, offset, base, imm); } return ctx->error; } // Assemble BRAIA int pore_BRAIA(PoreInlineContext *ctx, uint16_t address_space, uint32_t offset) { int opcode = PGAS_OPCODE_BRAI; uint32_t operand = 0; uint64_t imm = ((uint64_t)address_space << 32) | offset; pore_inline_instruction3(ctx, opcode, operand, imm); return ctx->error; } // Assemble SCAND int pore_SCAND(PoreInlineContext *ctx, int update, int capture, uint16_t length, uint32_t select, uint32_t offset) { int opcode = PGAS_OPCODE_SCAND; uint32_t operand; uint64_t imm = ((uint64_t)select << 32) | offset; if ((update < 0) || (update > 1) || (capture < 0) || (capture > 1)) { ctx->error = PORE_INLINE_INVALID_PARAMETER; } else { opcode = PGAS_OPCODE_SCAND; operand = (update << 23) | (capture << 22) | length; pore_inline_instruction3(ctx, opcode, operand, imm); } return ctx->error; } /// Fix up a PORE inline assembler forward branch instruction /// /// \param ctx A pointer to the initialized PoreInlineContext object /// controlling inline assembly. /// /// \param source The PORE inline location counter associated with the source /// instruction of the forward branch. /// /// \param target The PORE inline location counter associated with the target /// instruction of the forward branch. /// /// For usage examples, see the documentation \ref pore_inline_assembler. /// Although intended for forward branches, this API could be used to create /// backward branches as well. Note however the limitation that the \a source /// must be in the current context, since the source instruction needs to be /// reassembled with the branch target. In theory the \a target could be /// anywhere, as long as the location counter of the target is known. /// /// \retval 0 Success /// /// \retval code Failure. Any non-zero return is the PORE inline assmebler /// error code. The failure code is also stored in the PoreInlineContext /// object \a error field. The most likely causes of failure include a source /// location that is not in the current context or not associated with a /// branch instruction. int pore_inline_branch_fixup(PoreInlineContext *ctx, PoreInlineLocation source, PoreInlineLocation target) { uint32_t instruction; int32_t distance; uint64_t imm; int opcode, reg; PoreInlineContext source_ctx; if ((source < ctx->original_lc) || (source > ctx->lc)) { ctx->error = PORE_INLINE_ILLEGAL_SOURCE_LC; } else { // Create a context as it existed when the source instruction was // initially assembled, and then reassemble the instruction in that // context with the actual target. distance = ctx->lc - source; source_ctx = *ctx; source_ctx.lc = source; source_ctx.remaining += distance; source_ctx.lc_address -= distance; source_ctx.error = 0; instruction = pore_inline_host32(source_ctx.lc_address); opcode = (instruction >> 25); reg = (instruction >> 20) & 0xf; switch (opcode) { case PGAS_OPCODE_BRA: pore_BRA(&source_ctx, target); break; case PGAS_OPCODE_BSR: pore_BSR(&source_ctx, target); break; case PGAS_OPCODE_LOOP: pore_LOOP(&source_ctx, target); break; case PGAS_OPCODE_BRAZ: pore_BRAZ(&source_ctx, reg, target); break; case PGAS_OPCODE_BRANZ: pore_BRANZ(&source_ctx, reg, target); break; case PGAS_OPCODE_CMPIBRAEQ: imm = pore_inline_host64(source_ctx.lc_address + 4); pore_CMPIBRAEQ(&source_ctx, D0, target, imm); break; case PGAS_OPCODE_CMPIBRANE: imm = pore_inline_host64(source_ctx.lc_address + 4); pore_CMPIBRANE(&source_ctx, D0, target, imm); break; case PGAS_OPCODE_CMPIBSREQ: imm = pore_inline_host64(source_ctx.lc_address + 4); pore_CMPIBSREQ(&source_ctx, D0, target, imm); break; default: source_ctx.error = PORE_INLINE_NOT_A_BRANCH; break; } ctx->error = source_ctx.error; } return ctx->error; }