diff options
author | Jeremy Kerr <jk@ozlabs.org> | 2014-09-23 14:46:06 +0800 |
---|---|---|
committer | Samuel Mendoza-Jonas <sam.mj@au1.ibm.com> | 2014-09-23 16:47:58 +1000 |
commit | 3aef1b6d1f465596ebf7883a50efcf4d6f0ffcf4 (patch) | |
tree | c1ff2d5ccb4ba0d5b0ef1af0f02bcad528ce7d5b /lib/fold/fold.c | |
parent | 73ee21af6d0a379a104a21b7569331284b3659b7 (diff) | |
download | talos-petitboot-3aef1b6d1f465596ebf7883a50efcf4d6f0ffcf4.tar.gz talos-petitboot-3aef1b6d1f465596ebf7883a50efcf4d6f0ffcf4.zip |
lib/fold: Add support for multibyte strings
Currently, the fold_text function doesn't understand multibyte strings,
so may break a line in the middle of a multibyte sequence.
This change adds multibyte-awareness to the fold code, and uses proper
width calculations for the contents of the folded string.
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Diffstat (limited to 'lib/fold/fold.c')
-rw-r--r-- | lib/fold/fold.c | 70 |
1 files changed, 59 insertions, 11 deletions
diff --git a/lib/fold/fold.c b/lib/fold/fold.c index ec10c8c..8bf133c 100644 --- a/lib/fold/fold.c +++ b/lib/fold/fold.c @@ -1,4 +1,12 @@ +#define _GNU_SOURCE + +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include <wchar.h> +#include <wctype.h> + #include "fold/fold.h" void fold_text(const char *text, @@ -7,38 +15,78 @@ void fold_text(const char *text, void *arg) { const char *start, *end, *sep; - int rc = 0; + size_t sep_bytes, len; + int col, rc = 0; + mbstate_t ps; + /* start, end and sep are byte-positions in the string, and should always + * lie on the start of a multibyte sequence */ start = end = sep = text; + sep_bytes = 0; + col = 0; + len = strlen(text); + memset(&ps, 0, sizeof(ps)); while (!rc) { + size_t bytes; + wchar_t wc; + int width; + + bytes = mbrtowc(&wc, end, len - (end - text), &ps); + + assert(bytes >= 0); + + /* we'll get a zero size for the nul terminator */ + if (!bytes) { + line_cb(arg, start, end - start); + break; + } - if (*end == '\n') { + if (wc == L'\n') { rc = line_cb(arg, start, end - start); - start = sep = ++end; + start = sep = end += bytes; + sep_bytes = 0; + col = 0; + continue; + } + + width = wcwidth(wc); - } else if (*end == '\0') { + /* we should have caught this in the !bytes check... */ + if (width == 0) { line_cb(arg, start, end - start); - rc = 1; + break; + } - } else if (end - start >= linelen - 1) { + /* unprintable character? just add it to the current line */ + if (width < 0) { + end += bytes; + continue; + } + + col += width; + + if (col > linelen) { if (sep != start) { /* split on a previous word boundary, if * possible */ rc = line_cb(arg, start, sep - start); - start = end = ++sep; + end = sep + sep_bytes; } else { /* otherwise, break the word */ - end++; rc = line_cb(arg, start, end - start); - start = sep = end; } + sep_bytes = 0; + start = sep = end; + col = 0; } else { - end++; /* record our last separator */ - if (*end == ' ') + if (wc == L' ') { sep = end; + sep_bytes = bytes; + } + end += bytes; } } } |