#ifndef __PPE42_ASM_H__
#define __PPE42_ASM_H__
//-----------------------------------------------------------------------------
// *! (C) Copyright International Business Machines Corp. 2014
// *! All Rights Reserved -- Property of IBM
// *! *** IBM Confidential ***
//-----------------------------------------------------------------------------

/// \file ppe42_asm.h
/// \brief Generic assembler macros for 32-bit PPE42

// Doxygen is confused by assembler; the best I know how to make it
// work is to put all of the documentation at the beginning like below
// and effectively comment out the code using Doxygen cond/endcond.

/// \page ppe42_asm Generic assembler macros for 32-bit PPE42
///
///
/// \section _lxzi _l<w,h,b>zi - Load register and Zero from Immediate address
///
/// These macros encapsulate the 2-instruction sequence required to
/// load from a 32-bit immediate address.
///
/// \arg \c dreg A register to receive the load data.
/// \arg \c areg A register to hold the immediate address.  This can \e
///              not be register 0. Note that if \a areg != \a dreg
///              then \a areg will contain the address at the end of
///              the macro sequence.
/// \arg \c addr A 32-bit immediate address, which may be either an
///         absolute or relocatable expression.
///
/// Forms:
/// 
/// \b _lbzi \a dreg, \a areg, \a addr - Load Byte and Zero from Immediate address \n
/// \b _lhzi \a dreg, \a areg, \a addr - Load Halfword and Zero from Immediate address \n
/// \b _lwzi \a dreg, \a areg, \a addr - Load Word and Zero from Immediate address \n
/// 
///
/// \section _stxi _st<w,h,b>i - STore register to Immediate address
///
/// These macros encapsulate the 2-instruction sequence required to
/// store to a 32-bit immediate address.
///
/// \arg \c dreg The register to store.
/// \arg \c areg A register to hold the immediate address.  This can \e
///              not be register 0, and can not be the same as \a dreg.
///              Note that \a areg will contain the address at the end of
///              the macro sequence.
/// \arg \c addr A 32-bit immediate address, which may be either an
///         absolute or relocatable expression.
///
/// Forms:
/// 
/// \b _stbi \a dreg, \a areg, \a addr - STore Byte to Immediate address \n
/// \b _sthi \a dreg, \a areg, \a addr - STore Halfword to Immediate address \n
/// \b _stwi \a dreg, \a areg, \a addr - STore Word to Immediate address \n
///
///
/// \section _lstzsd _<l,st><w,h,b><z>sd - Load/STore register from/to Small Data area
///
/// These macros encapulate the small data area relocations for access
/// to storage in the small data sections .sbss, .sdata, .sbss2 and
/// .sdata2.  Use of these macros implies small data area support in
/// the compile environment (for variables shared between compiled and
/// assembled code) and initialization code that sets up the small data
/// area registers R13 (and optionally R2).
///
/// The relocations generated by this macro will work for both SVR4 ABI
/// and EABI environments.  In particular, for EABI environments
/// the link editor will insert offsets to either R13 or R2 depending
/// on the section of the symbol.
///
/// \arg \c dreg The register to load or store.
/// \arg \c addr A 32-bit immediate address, assumed to be a
///              relocatable address in one of the small data sections.
///
/// Forms:      
/// 
/// \b _lbzsd \a dreg, \a addr  - Load Byte and Zero from Small Data area \n
/// \b _lhzsd \a dreg, \a addr  - Load Halfword and Zero from Small Data area \n
/// \b _lwzsd \a dreg, \a addr  - Load Word and Zero from Small Data area \n
/// \b _stbsd \a dreg, \a addr  - STore Byte to Small Data area \n
/// \b _sthsd \a dreg, \a addr  - STore Halfword to Small Data area \n
/// \b _stwsd \a dreg, \a addr  - STore Word to Small Data area \n
///
///
/// \section _liw _liw<a> - Load Immediate Word (Absolute)
///
/// These macros encapsulate the two instructions required to load a
/// 32-bit immediate value into a register.  If the immediate is an
/// absolute expression, then the \c 'a' form may be able to optimize
/// to a single instruction depending on whether only the high- or
/// low-order bits of the immediate are non-zero.
///
/// Forms:
///
/// \b _liw  \a rd, \a imm - Load register \a rd with the 32-bit immediate \a imm \n
/// \b _liwa \a rd, \a imm - Load register \a rd with the 32-bit absolute immediate \a imm \n
///
///
/// \section _oriwa _oriwa - OR Immediate Word Absolute
///
/// This macro encapsulates the logical OR of a 32-bit immediate with a
/// register. The immediate value must be an absolute expression.
///
/// The PowerPC has instructions for OR-ing 16-bit immediates into the
/// upper (\c oris) and lower (\c ori) portions of a register.  This
/// macro optimizes the generated code based on which bits (if any) of
/// the absolte immediate are non-zero.
///
/// This special macro is only provided for the OR function. For other
/// logical operations and recording forms it is necessary in general
/// to first load the 32-bit immediate into a register (e.g., with \c
/// _liwa) then perform the logical operation.
///
/// \arg \c rd The destination register; at the end will contain \c rs
///            OR \a imm
/// \arg \c rs The source register.
/// \arg \c imm 32-bit absolute expression.
///
/// Forms:
///
/// \b _oriwa \a rd, \a rs, \a imm - \a rd gets \a rs OR \a imm \n
///
///
/// \section _incr64_fast - 64-bit increment for fast interrupt handlers
///
/// This macros implements 64-bit counter update in fast interrupt handlers
/// which are forbidden from using the carry-bit in the XER (without
/// saving/restoring it.)
///
/// \arg \c rs Scratch register
/// \arg \c ra Register containing the counter address at entry
///
/// \a rs and \a ra must be unique.  At the end of the macro the count
/// is updated to memory and \a ra is unmodified. 
///        
///
/// \section _setclear_bits Set/Clear/Copy Bits from Immediate Positions
///
///  There are situations where it is easier/faster to clear individual bits
///  and bit fields, set bits or copy fields, based on immediate bit numbers
///  and locations, rather than loading masks, since setting up a mask
///  requires 2 instruction in general, whereas these macros generate a single
///  instruction.
///
/// \arg \c rd - The destination register
/// \arg \c rs - The source register
/// \arg \c n - An immediate size of a bit field, in the range 0 to 32
/// \arg \c b - An immediate big-endian bit number in the range 0 to 31
///
/// Forms:
///
/// \b _clrfield \a rd, \a rs, \a n, \a b - Clear an \a n bit field from \a rs
/// to \a rd starting from bit \a b \n
/// \b _clrbit \a rd, \a rs, \a b - Clear bit \a b \n
/// \b _setbit \a rd, \a rs, \a b - Set bit \a b \n
/// \b _copyfield \a rd, \a rs, \a n, \a b - Copy an n-bit field from \a rs to
/// \a rd starting from bit \a b \n
///
///     
/// \section pseudo_ops Assembler Pseudo-Ops Macros
///
/// Several macros define new 'pseudo-ops'.
///
/// \subsection cache_align .cache_align
///
/// The \c .cache_align pseudo-op is used to force alignment on a
/// cache-line boundary.  It requires a preprocessor symbol definition for
/// \c LOG_CACHE_LINE_SIZE
///
/// Forms:
///
/// \b .cache_align \n
///
///
/// \subsection global_function Local and Global Functions
///
/// The \c .function and \c .global_function pseudo-ops define function
/// symbols in the \c .text section.
///
/// Forms:
///
/// \b .function \a symbol - Define a local function \a symbol \n
/// \b .global_function \a symbol - Define a global function \a symbol \n
///
/// 
/// \subsection epilogue .epilogue
///
/// The \c .epilogue pseudo-op adds size and type information for
/// functions defined in assembler.
///
/// \arg \c symbol - Assembler epilogue for the function \a symbol.
///
/// Forms:
///
/// \b .epilogue \a symbol \n
///
///
/// \cond

#ifdef __ASSEMBLER__


### ****************************************************************************
### _l<b,h,w>zi
### _st<b,h,w>i
### ****************************************************************************

        .macro  _lbzi dreg, areg, addr
        lis     \areg, \addr@ha
        .ifc    \areg, \dreg
        lbz     \dreg, \addr@l(\areg)
        .else
        lbzu    \dreg, \addr@l(\areg)
        .endif
        .endm
        
        .macro  _lhzi dreg, areg, addr
        lis     \areg, \addr@ha
        .ifc    \areg, \dreg
        lhz     \dreg, \addr@l(\areg)
        .else
        lhzu    \dreg, \addr@l(\areg)
        .endif
        .endm
        
        .macro  _lwzi dreg, areg, addr
        lis     \areg, \addr@ha
        .ifc    \areg, \dreg
        lwz     \dreg, \addr@l(\areg)
        .else
        lwzu    \dreg, \addr@l(\areg)
        .endif
        .endm

        .macro  _stbi dreg, areg, addr
        .ifc    \areg, \dreg
        .err
        .endif
        lis     \areg, \addr@ha
        stbu    \dreg, \addr@l(\areg)
        .endm
        
        .macro  _sthi dreg, areg, addr
        .ifc    \areg, \dreg
        .err
        .endif
        lis     \areg, \addr@ha
        sthu    \dreg, \addr@l(\areg)
        .endm
        
        .macro  _stwi dreg, areg, addr
        .ifc    \areg, \dreg
        .err
        .endif
        lis     \areg, \addr@ha
        stwu    \dreg, \addr@l(\areg)
        .endm


### ****************************************************************************
### _l<b,h,w>zsd
### _st<b,h,w>sd
### ****************************************************************************

        .macro  _lbzsd  dreg, addr
        lbz     \dreg, \addr@sda21(0)
        .endm

        .macro  _lhzsd  dreg, addr
        lhz     \dreg, \addr@sda21(0)
        .endm

        .macro  _lwzsd  dreg, addr
        lwz     \dreg, \addr@sda21(0)
        .endm

        .macro  _stbsd  dreg, addr
        stb     \dreg, \addr@sda21(0)
        .endm

        .macro  _sthsd  dreg, addr
        sth     \dreg, \addr@sda21(0)
        .endm

        .macro  _stwsd  dreg, addr
        stw     \dreg, \addr@sda21(0)
        .endm


### ****************************************************************************
### _liw<a>
### _oriwa
### ****************************************************************************

        .macro  _liw    rd, imm
        lis     \rd, \imm@h
        ori     \rd, \rd, \imm@l
        .endm

        .macro  _liwa   rd, imm
        .if     (\imm & 0xffff0000)
        lis     \rd, \imm@h
                .if     (\imm & 0xffff)
                ori     \rd, \rd, \imm@l
                .endif
        .else
        li      \rd, \imm@l
        .endif
        .endm

        .macro  _oriwa  rd, rs, imm
        .if     (\imm & 0xffff0000)
        oris    \rd, \rs, \imm@h
                .if     (\imm & 0xffff)
                ori     \rd, \rd, \imm@l
                .endif
        .else
        ori     \rd, \rs, \imm@l
        .endif
        .endm

### ****************************************************************************
### _incr64_fast
### ****************************************************************************

        .macro  _incr64_fast, rs:req, ra:req

        lwz     \rs, 4(\ra)
        addi    \rs, \rs, 1
        cmpwi   \rs, 0
        stw     \rs, 4(\ra)
        bne     233643278f

        lwz     \rs, 0(\ra)
        addi    \rs, \rs, 1
        stw     \rs, 0(\ra)
233643278:              

        .endm        

### ****************************************************************************
### _clrfield
### _clrbit
### _setbit
### _copyfield
### ****************************************************************************

        .macro  _clrfield, rd, rs, n, b
        rlwinm   \rd, \rs, 0, (\b + \n) & 0x1f, (\b - 1) & 0x1f
        .endm           

        .macro  _clrbit, rd, rs, b
        _clrfield \rd, \rs, 1, \b
        .endm

        .macro  _setbit, rd, rs, b
        .ifle   \b - 15
        oris    \rd, \rs, 1 << (15 - \b)
        .else
        ori     \rd, \rs, 1 << (31 - \b)
        .endif
        .endm

        .macro  _copyfield, rd, rs, n, b
        rlwimi  \rd, \rs, 0, \b , (\b + \n - 1)
        .endm

### ****************************************************************************
### .cache_align
### .<global_>function
### .epilogue
### ****************************************************************************

        .set    _log_cache_line_size, LOG_CACHE_LINE_SIZE

        .macro  .cache_align
        .align  _log_cache_line_size
        .endm

        .macro  .function symbol
        .text
        .align  2
        .endm

        .macro  .global_function symbol
        .text
        .align  2
        .global \symbol
        .endm

        .macro .epilogue symbol
        .type   \symbol, @function
        .size   \symbol, . - \symbol
        .endm           

### ***************************************************************************
###                                 64-bit macros
### ***************************************************************************

### ***************************************************************************
### Using symbols for register names makes the code more readable and allows
### us to do register arithmetic within macros.
### ***************************************************************************

.equiv  r0, 0
.equiv  r1, 1
.equiv  sp, 1
.equiv  r3, 3
.equiv  r4, 4
.equiv  r5, 5
.equiv  r6, 6
.equiv  r7, 7
.equiv  r8, 8
.equiv  r9, 9
.equiv  r10, 10

.equiv  r28, 28
.equiv  r29, 29
.equiv  r30, 30
.equiv  r31, 31

.equiv  d3, 3
.equiv  d4, 4
.equiv  d5, 5
.equiv  d6, 6
.equiv  d7, 7
.equiv  d8, 8
.equiv  d9, 9
.equiv  d10, 10
.equiv  d28, 28
.equiv  d29, 29
.equiv  d30, 30
.equiv  d31, 31

### ***************************************************************************
### Load virtual doubleword generic. Load a virtual doubleword from a relocatable
### address expression. If the optional RA is specified, the address remains in
### RA.
### ***************************************************************************
.macro _lvdg DT:req addr:req RA=-1
    .if \RA == -1
        lis     \DT, (\addr)@ha
        lvd     \DT, (\addr)@l(\DT)
    .else
        lis     \RA, (\addr)@ha
        lvdu    \DT, (\addr)@l(\RA)
    .endif
.endm

### ***************************************************************************
### Load virtual doubleword from a relocatable small data area address
### ***************************************************************************
.macro _lvdsd DT:req addr:req
    lvd     \DT, (\addr)@sda21(0)
.endm

### ***************************************************************************
### Store virtual doubleword generic. Store a virtual doubleword based on a
### relocatable address expression. The address remains in RA.
### ***************************************************************************
.macro _stvdg DS:req addr:req RA:req
    lis     \RA, (\addr)@ha
    stvdu   \DS, (\addr)@l(\RA)
.endm

### ***************************************************************************
### Store virtual doubleword to a relocatable small data address expression
### ***************************************************************************
.macro _stvdsd DS:req addr:req
    stvd    \DS, (\addr)@sda21(0)
.endm

### ***************************************************************************
###  Load virtual doubleword absolute. Set DT to an absolute 64-bit constant
### ***************************************************************************
.macro _lvda DT, cvalue
    lwa (\DT + 1)%32, (\cvalue) & 0x00000000ffffffff
    lwa \DT, (\cvalue) >> 32
.endm

### ***************************************************************************
###
###                         64-bit arithmetic macros
###
### ***************************************************************************

.macro check_overlap2 DA, DB
    .if ((\DA - \DB) % 32) == 1 || ((\DA - \DB) % 32) == -1
        .error "virtual doubleword registers must be identical or non-overlapping"
    .endif
.endm

.macro check_overlap3 DA, DB, DC
    check_overlap2 \DA, \DB
    check_overlap2 \DA, \DC
    check_overlap2 \DB, \DC
.endm

### ***************************************************************************
###  Add virtual doubleword carrying
### ***************************************************************************
.macro _addvdc   DT, DA, DB
    check_overlap3 \DT, \DA, \DB
    addc    (\DT+1)%32, (\DA+1)%32, (\DB+1)%32
    adde    \DT, \DA, \DB
.endm

### ***************************************************************************
###  Add virtual doubleword to signed 16-bit immediate carrying
### ***************************************************************************
.macro _addvdic  DT, DA, SI
    .if \DA == 31
        .error "d31 for addend register is not supported"
    .endif
    check_overlap2 \DT, \DA
    addi    (\DT+1)%32, \DA+1, SI
    addze   \DT, \DA
.endm

### ***************************************************************************
###  Add virtual doubleword to unsigned word carrying
### ***************************************************************************
.macro _addvdwuc DT, DA, RB
    check_overlap2 \DT, \DA
    addc    (\DT+1)%32, (\DA+1)%32, \RB
    addze   \DT, \DA
.endm

### ***************************************************************************
###  Subtract virtual doubleword carrying
### ***************************************************************************
.macro _subvdc   DT, DA, DB
    check_overlap3 \DT, \DA, \DB
    subfc   (\DT+1)%32, (\DA+1)%32, (\DB+1)%32
    subfe   \DT, \DA, \DB
.endm

### ***************************************************************************
### 
###                             64-bit logic macros
### 
### ***************************************************************************

### ***************************************************************************
###  AND virtual doubleword
### ***************************************************************************
.macro _andvd DT, DA, DB
    check_overlap3 \DT, \DA, \DB
    and (\DT+1)%32, (\DA+1)%32, (\DB+1)%32
    and \DT, \DA, \DB
.endm

### ***************************************************************************
###  ANDC virtual doubleword
### ***************************************************************************
.macro _andcvd DT, DA, DB
    check_overlap3 \DT, \DA, \DB
    andc    (\DT+1)%32, (\DA+1)%32, (\DB+1)%32
    andc    \DT, \DA, \DB
.endm

### ***************************************************************************
###  EQV virtual doubleword
### ***************************************************************************
.macro _eqvvd DT, DA, DB
    check_overlap3 \DT, \DA, \DB
    eqv (\DT+1)%32, (\DA+1)%32, (\DB+1)%32
    eqv \DT, \DA, \DB
.endm

### ***************************************************************************
###  OR virtual doubleword
### ***************************************************************************
.macro _orvd DT, DA, DB
    check_overlap3 \DT, \DA, \DB
    or (\DT+1)%32, (\DA+1)%32, (\DB+1)%32
    or \DT, \DA, \DB
.endm

### ***************************************************************************
###  ORC virtual doubleword
### ***************************************************************************
.macro _orcvd DT, DA, DB
    check_overlap3 \DT, \DA, \DB
    orc (\DT+1)%32, (\DA+1)%32, (\DB+1)%32
    orc \DT, \DA, \DB
.endm

### ***************************************************************************
###  XOR virtual doubleword
### ***************************************************************************
.macro _xorvd DT, DA, DB
    check_overlap3 \DT, \DA, \DB
    xor (\DT+1)%32, (\DA+1)%32, (\DB+1)%32
    xor \DT, \DA, \DB
.endm

#endif /* __ASSEMBLER__ */

/// \endcond
        
// Local Variables:
// mode:asm
// End:
        
#endif /* __PPE42_ASM_H__ */