summaryrefslogtreecommitdiffstats
path: root/src/include/usr/ecmddatabuffer/prdfCompressBuffer.H
blob: 9cf927f84449cf24b013ed391e5ac19a4c8e972d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
/* IBM_PROLOG_BEGIN_TAG                                                   */
/* This is an automatically generated prolog.                             */
/*                                                                        */
/* $Source: src/include/usr/ecmddatabuffer/prdfCompressBuffer.H $         */
/*                                                                        */
/* OpenPOWER HostBoot Project                                             */
/*                                                                        */
/* COPYRIGHT International Business Machines Corp. 2011,2014              */
/*                                                                        */
/* Licensed under the Apache License, Version 2.0 (the "License");        */
/* you may not use this file except in compliance with the License.       */
/* You may obtain a copy of the License at                                */
/*                                                                        */
/*     http://www.apache.org/licenses/LICENSE-2.0                         */
/*                                                                        */
/* Unless required by applicable law or agreed to in writing, software    */
/* distributed under the License is distributed on an "AS IS" BASIS,      */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or        */
/* implied. See the License for the specific language governing           */
/* permissions and limitations under the License.                         */
/*                                                                        */
/* IBM_PROLOG_END_TAG                                                     */

/**
 * @file prdfCompressBuffer.H
 *
 * @brief Functions to provide the compression/decompression algorithms
 *
 */

#ifndef __PRDFCOMPRESSBUFFER_H
#define __PRDFCOMPRESSBUFFER_H

//--------------------------------------------------------------------
// Includes
//--------------------------------------------------------------------

#ifdef _AIX
#include <inttypes.h>
#else
#include <stdint.h>
#endif
#include <string.h>

#ifndef   MIN
#define   MIN(a, b) ( ((a) < (b)) ? (a) : (b) )
#endif

/*
 * Prdf Compression Algorithm:
 *        The purpose of these compression routines are to compress the register
 * dumps contained in our error logs.  In large systems, we could possibly have
 * more register data than we could possibly fit in an error log.  These
 * routines will allow us to fit more data into the error logs.  In addition,
 * the algorithms have been designed such that even if the end of the stream
 * is lost (cut short), we can uncompress all of the data up to that point.  We
 * had proposed using the Zlib compression algorithms, but they required the
 * CRC to match and we did not want that requirement.
 *
 *         This compression algorithm is based off the LZ77 compression algorithm.
 * The algorithm consists of a look-behind buffer of 1024 bytes, and a
 * look-ahead buffer of 63 bytes.  The algorithm attempts to find a match from
 * the start of the look-ahead buffer located inside the look-behind buffer.
 * If the longest match is bigger than 2 bytes (2 bytes is the break-even
 * point), it converts the match into a token (pos in look-behind, length) of
 * two bytes (12 bits to pos, 6 to length).  If no match is found, the first
 * character is popped from the look-ahead buffer.  As matches are found (or
 * not found) and popped from the look-ahead buffer, they are added to the
 * end of the look-behind buffer (if the buffer increases over 1024, the start
 * is shifted forward).
 *
 *         As the stream (look-ahead buffer) is converted into tokens, they are
 * bundled into groups of 8.  A special token is added to the beginning of the
 * bundle, recording the size of the following 8 tokens (2 or 1 bytes).  Once
 * the token bundle is complete (or end-of-stream), it is added to the output
 * buffer.
 *
 *        To use these routines, #define PRDF_COMPRESSBUFFER_UNCOMPRESS_FUNCTIONS
 * or PRDF_COMPRESSBUFFER_COMPRESS_FUNCTIONS, depending on usage.  These reduce
 * the code footprint, since only the needed functions are compiled in.
 *
 */

namespace PrdfCompressBuffer
{
    /* size_t compressedBufferMax(size_t)
     *                Determines the maximum size of the compressed buffer (worst
     * case) based on an input buffer size.
     */
    #ifdef PRDF_COMPRESSBUFFER_COMPRESS_FUNCTIONS
    size_t compressedBufferMax(size_t i_bufSize)
        {
            return 1 + ((i_bufSize * 9) / 8);
        };
    #endif

    #if defined PRDF_COMPRESSBUFFER_COMPRESS_FUNCTIONS   || \
        defined PRDF_COMPRESSBUFFER_UNCOMPRESS_FUNCTIONS
    static size_t COMPRESSION_BREAKEVEN = 3;
    #endif

    /* class CompressedToken
     *                Internal class for converting (pos,size) tokens to two char.
     */
    class CompressedToken
    {
        private:
            uint8_t token[2];

        public:
            #ifdef PRDF_COMPRESSBUFFER_UNCOMPRESS_FUNCTIONS
            // Default constructor.
            CompressedToken() {};
            #endif

            #ifdef PRDF_COMPRESSBUFFER_COMPRESS_FUNCTIONS
            /* CompressedToken(size_t, size_t)
             *                Convert position and size to uint8 tokens.
             */
            CompressedToken(size_t i_pos, size_t i_size)
            {
                uint16_t l_token = (i_pos << 6)
                                 | (i_size - COMPRESSION_BREAKEVEN);
                token[1] = l_token & 0xFF;
                token[0] = (l_token >> 8) & 0xFF;
            };
            #endif

            #ifdef PRDF_COMPRESSBUFFER_UNCOMPRESS_FUNCTIONS
            /* void uncompress(uint8_t *, uint8_t *&, size_t&)
             *                Convert uint8 tokens to pos,size values.  Changes
             * o_pos to be a pointer to the string inside the buffer and sets
             * o_size to be the size of the string.
             */
            void uncompress(uint8_t * i_buf, uint8_t * &o_pos, size_t &o_size)
            {
                uint16_t l_token = (token[0] << 8) | token[1];

                o_pos = &i_buf[(l_token & 0xFFC0) >> 6];
                o_size = (l_token & 0x3F) + COMPRESSION_BREAKEVEN;

                return;
            };
            /* void read(uint8_t *)
             *                 Read two bytes from the buffer to keep as tokens.
             * NOTE: Does not modify i_buf.
             */
            void read(uint8_t * i_buf)
            {
                token[0] = i_buf[0];
                token[1] = i_buf[1];
            };
            #endif

            #ifdef PRDF_COMPRESSBUFFER_COMPRESS_FUNCTIONS
            /* void write(uint8_t *)
             *                Copy tokens into the beginning of the buffer.
             */
            void write(uint8_t * o_buf) { memcpy(o_buf, token, 2); };
            #endif

            size_t size() { return 2; };

            /* static size_t minSize()
             *               This function returns minimum size of token.
             *
             * NOTE: This function will be used for buffer size validation.
             *       Currently token size is fixed in nature. But in future
             *       we may go for variable length tokens. Therefore we dont
             *       want to make function "size" static.
             */
            static size_t minSize() { return 2; };
    };

    #ifdef PRDF_COMPRESSBUFFER_COMPRESS_FUNCTIONS
    /* void compressBuffer(uint8_t *, size_t, uint8_t *, size_t &)
     *                Compresses i_buf and stores result into o_buf.
     *
     * i_buf         : pointer to input buffer.
     * i_size         : size of input buffer.
     * o_buf        : pointer to output buffer (supplied by caller).
     * o_size        : max size of output buffer.  After function, size of
     *                  compressed buffer.
     *
     * NOTE: The size of the output buffer should be 1 + ((i_size * 9) / 8)
     *             to guarentee no data is lost (worst case for compression).
     */
    void compressBuffer(uint8_t * i_buf, size_t i_size,
                        uint8_t * o_buf, size_t &o_size)
    {
        // Asserts.
        if ((i_buf == NULL) || (o_buf == NULL))
        {
            o_size = 0;
            return;
        }

        uint8_t * l_lookahead = i_buf; // Pointer to the look-behind buf.
        size_t l_laSize = 0;               // Size of look-behind.

        size_t l_tokPos = 0;               // Number of tokens in the bundle.
        uint8_t * l_tokC = o_buf++;    // Store compress bits and advance ptr.
        size_t l_outputSize = 1;       // start with l_tokC.

        while((i_size > 0) && (l_outputSize < o_size))
        {
            size_t l_curLen = 1;
            uint8_t * l_match = NULL;

            // Find the longest match. (2 is our break-even pt,
            // but 3 will provide better compression).
            for (size_t i = 3;
                 i < MIN(i_size, l_laSize) && (i < (64 + COMPRESSION_BREAKEVEN))
                 ; i++)
            {
                uint8_t * l_tmpMatch = NULL;

                l_tmpMatch = (uint8_t *) memmem(l_lookahead, l_laSize,
                                                i_buf, i);

                if (l_tmpMatch != NULL) // found match.
                {
                    l_match = l_tmpMatch;
                    l_curLen = i;
                }
                else
                    i = i_size + l_laSize; // abort for loop.
            }

            // Create token.
            if (l_match != NULL)
            {
                // found match, make sure there is room for the token.
                if (o_size - l_outputSize >= 2)
                {
                    CompressedToken l_token(l_match - l_lookahead, l_curLen);

                    l_token.write(o_buf);
                    o_buf += 2;
                    l_outputSize += 2;

                    (*l_tokC) = (*l_tokC << 1) | 0x1;
                    l_tokPos++;

                    i_buf += l_curLen;
                    l_laSize += l_curLen;
                    i_size -= l_curLen;
                }
                else
                {
                    l_outputSize = o_size;
                }
            }
            else
            {
                // no match, copy if room in the buffer.
                if (o_size - l_outputSize >= 1)
                {
                    o_buf[0] = i_buf[0];
                    o_buf++;
                    l_outputSize++;

                    (*l_tokC) = (*l_tokC << 1) | 0x0;
                    l_tokPos++;

                    i_buf++;
                    l_laSize++;
                    i_size--;
                }
                // else <= 0, so don't mess with l_outputSize.
            }

            // flush out lookahead. (keep at 1024 bytes)
            while(l_laSize >= 1024)
            {
                l_laSize--;
                l_lookahead++;
            }

            // check if bundle complete, create new bundle.
            if (l_tokPos == 8)
            {
                l_tokPos = 0;
                l_tokC = o_buf++;
                l_outputSize++;
            }

        } // end while.

        // Shift our bundle bits correctly. (the uncompressor doesn't know if
        // the bundle was complete, so always assumes so.
        if (l_tokPos != 0)
            (*l_tokC) = (*l_tokC) << (8 - l_tokPos);

        // We never _really_ go past our buffer, but sometimes our size says
        // we did, so fix that up...
        if (l_outputSize <= o_size)
            o_size = l_outputSize;

        return;
    };
    #endif

    #ifdef PRDF_COMPRESSBUFFER_UNCOMPRESS_FUNCTIONS
    /* void uncompressBuffer(uint8_t *, size_t, uint8_t *, size_t &)
     *                Uncompresses i_buf and stores result into o_buf.
     *
     * i_buf         : pointer to input buffer.
     * i_size         : size of input buffer.
     * o_buf        : pointer to output buffer (supplied by caller).
     * o_size        : max size of output buffer.  After function, size of
     *                  uncompressed buffer.
     *
     * NOTE:  The size is never stored in an compressed or uncompressed buffer.
     *               The caller needs to keep track of those kind of things and add
     *               whatever kind of header they need.  If o_size isn't big enough
     *              you're not going to get the whole stream.  These functions will
     *              not overrun the buffer.
     */
    void uncompressBuffer(uint8_t * i_buf, size_t i_size,
                          uint8_t * o_buf, size_t & o_size)
    {
        // Asserts.
        if ((i_buf == NULL) || (o_buf == NULL))
        {
            o_size = 0;
            return;
        }

        // Initilize output buffer with 0
        memset( o_buf, 0, o_size);

        uint8_t * l_lookahead = o_buf;  // Look-behind buffer.
        size_t l_laSize = 0;                // look-behind size.

        uint8_t l_tokC = 0;                // Bundle bits.
        uint8_t l_tokPos = 8;                // Number of tokens from the bundle
                                        // thus far.  (start at 8 to get a new
                                        // bundle right away).

        size_t l_outputSize = 0;        // Size of output buffer.

        while ((i_size > 0) && (o_size > 0))  // while we're at the end of a buf.
        {
            // Check if we need to get a new bundle.
            if (l_tokPos == 8)
            {
                l_tokPos = 0;
                l_tokC = i_buf[0];
                i_buf++;
                i_size--;
                continue;
            }


            // Check if token was compressed or not.
            if ((l_tokC >> (7 - l_tokPos)) & 0x1)
            {
                // check if input buffer has tokens
                if ( i_size < CompressedToken::minSize() )
                {
                    // set exit condition
                    i_size = 0;
                    continue;
                }
                // compressed token...

                size_t l_matchSize;
                uint8_t * l_match;
                CompressedToken l_tok;

                // read token from stream.
                l_tok.read(i_buf);
                i_buf += l_tok.size();
                i_size -= l_tok.size();

                // get pointer to lookahead buffer, copy into output buffer.
                l_tok.uncompress(l_lookahead, l_match, l_matchSize);
                memcpy(o_buf, l_match, MIN(l_matchSize, o_size));

                // fix up all our sizes and pointers.
                l_laSize += l_matchSize;
                l_outputSize += MIN (l_matchSize, o_size);
                o_size -= MIN (l_matchSize, o_size);
                o_buf += l_matchSize;
            }
            else
            {
                // uncompressed token... just copy the byte.

                o_buf[0] = i_buf[0];
                o_size--;
                o_buf++;
                i_buf++;
                i_size--;

                l_laSize++;
                l_outputSize++;
            }
            l_tokPos++; // just did a token, so inc the bundle count.

            // Advance the look-behind buffer as needed.
            while (l_laSize >= 1024)
            {
                l_lookahead++;
                l_laSize--;
            }
        }

        // fix up o_size (since we've been decrementing it...)
        o_size = l_outputSize;
    };
    #endif

}; // end namespace.

#endif
OpenPOWER on IntegriCloud