summaryrefslogtreecommitdiffstats
path: root/src/lib/memcpy.c
blob: ab508ea8fce13d0361a9168f1b44dffa273b0568 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
// $Id: memcpy.c,v 1.1.1.1 2013/12/11 20:49:20 bcbrock Exp $
// $Source: /afs/awd/projects/eclipz/KnowledgeBase/.cvsroot/eclipz/chips/p8/working/procedures/lib/memcpy.c,v $
//-----------------------------------------------------------------------------
// *! (C) Copyright International Business Machines Corp. 2013
// *! All Rights Reserved -- Property of IBM
// *! *** IBM Confidential ***
//-----------------------------------------------------------------------------

/// \file memcpy.c
/// \brief The memcpy() function

#include "ssx.h"

/// The memcpy() function copies \a n bytes from memory area \a src to memory
/// area \a dest.  The memory areas should not overlap.  Use memmove(3) if the
/// memory areas do overlap. The memcpy() function returns a pointer to dest.

// This implementation should work well for both 32-bit and 64-bit machines,
// assuming they can handle unaligned accesses. The implementation assumes that
// it is better to avoid the loop setup overhead by a test and branch for
// cases where loops can be bypassed.

//void *
//memcpy(void *dest, const void *src, size_t n)
//{
//    while(n--) {
//      *dest++ = *src++;
//    }
//
//    return s;
//}

void *
memcpy(void *dest, const void *src, size_t n)
{
    uint8_t *d8, *s8;
    uint64_t *d64, *s64;
    size_t doublewords, octawords;

    // First copy memory 32 bytes at a time.

    d64 = (uint64_t *)dest;
    s64 = (uint64_t *)src;
    octawords = n / 32;
    if (octawords) {
        n -= octawords * 32;
        while(octawords--) {
            *d64++ = *s64++;
            *d64++ = *s64++;
            *d64++ = *s64++;
            *d64++ = *s64++;
        }
    }

    // Now set memory 8 bytes at a time. This might actually be better done
    // explicitly rather than as a loop because the maximum loop count is 3
    // here. 

    doublewords = n / 8;
    if (doublewords) {
        n -= doublewords * 8;
        while (doublewords--) {
            *d64++ = *s64++;
        }
    }

    // Finally finish any remaining memory bytewise

    if (n) {
        d8 = (uint8_t *)d64;
        s8 = (uint8_t *)s64;
        while (n--) {
            *d8++ = *s8++;
        }
    }

    return dest;
}
OpenPOWER on IntegriCloud