summaryrefslogtreecommitdiffstats
path: root/lib/fold/fold.c
diff options
context:
space:
mode:
authorJeremy Kerr <jk@ozlabs.org>2014-09-23 14:46:06 +0800
committerSamuel Mendoza-Jonas <sam.mj@au1.ibm.com>2014-09-23 16:47:58 +1000
commit3aef1b6d1f465596ebf7883a50efcf4d6f0ffcf4 (patch)
treec1ff2d5ccb4ba0d5b0ef1af0f02bcad528ce7d5b /lib/fold/fold.c
parent73ee21af6d0a379a104a21b7569331284b3659b7 (diff)
downloadtalos-petitboot-3aef1b6d1f465596ebf7883a50efcf4d6f0ffcf4.tar.gz
talos-petitboot-3aef1b6d1f465596ebf7883a50efcf4d6f0ffcf4.zip
lib/fold: Add support for multibyte strings
Currently, the fold_text function doesn't understand multibyte strings, so may break a line in the middle of a multibyte sequence. This change adds multibyte-awareness to the fold code, and uses proper width calculations for the contents of the folded string. Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Diffstat (limited to 'lib/fold/fold.c')
-rw-r--r--lib/fold/fold.c70
1 files changed, 59 insertions, 11 deletions
diff --git a/lib/fold/fold.c b/lib/fold/fold.c
index ec10c8c..8bf133c 100644
--- a/lib/fold/fold.c
+++ b/lib/fold/fold.c
@@ -1,4 +1,12 @@
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <wchar.h>
+#include <wctype.h>
+
#include "fold/fold.h"
void fold_text(const char *text,
@@ -7,38 +15,78 @@ void fold_text(const char *text,
void *arg)
{
const char *start, *end, *sep;
- int rc = 0;
+ size_t sep_bytes, len;
+ int col, rc = 0;
+ mbstate_t ps;
+ /* start, end and sep are byte-positions in the string, and should always
+ * lie on the start of a multibyte sequence */
start = end = sep = text;
+ sep_bytes = 0;
+ col = 0;
+ len = strlen(text);
+ memset(&ps, 0, sizeof(ps));
while (!rc) {
+ size_t bytes;
+ wchar_t wc;
+ int width;
+
+ bytes = mbrtowc(&wc, end, len - (end - text), &ps);
+
+ assert(bytes >= 0);
+
+ /* we'll get a zero size for the nul terminator */
+ if (!bytes) {
+ line_cb(arg, start, end - start);
+ break;
+ }
- if (*end == '\n') {
+ if (wc == L'\n') {
rc = line_cb(arg, start, end - start);
- start = sep = ++end;
+ start = sep = end += bytes;
+ sep_bytes = 0;
+ col = 0;
+ continue;
+ }
+
+ width = wcwidth(wc);
- } else if (*end == '\0') {
+ /* we should have caught this in the !bytes check... */
+ if (width == 0) {
line_cb(arg, start, end - start);
- rc = 1;
+ break;
+ }
- } else if (end - start >= linelen - 1) {
+ /* unprintable character? just add it to the current line */
+ if (width < 0) {
+ end += bytes;
+ continue;
+ }
+
+ col += width;
+
+ if (col > linelen) {
if (sep != start) {
/* split on a previous word boundary, if
* possible */
rc = line_cb(arg, start, sep - start);
- start = end = ++sep;
+ end = sep + sep_bytes;
} else {
/* otherwise, break the word */
- end++;
rc = line_cb(arg, start, end - start);
- start = sep = end;
}
+ sep_bytes = 0;
+ start = sep = end;
+ col = 0;
} else {
- end++;
/* record our last separator */
- if (*end == ' ')
+ if (wc == L' ') {
sep = end;
+ sep_bytes = bytes;
+ }
+ end += bytes;
}
}
}
OpenPOWER on IntegriCloud