summaryrefslogtreecommitdiffstats
path: root/fs/isofs/joliet.c
diff options
context:
space:
mode:
authorAlan Stern <stern@rowland.harvard.edu>2009-04-30 10:08:18 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2009-06-15 21:44:43 -0700
commit74675a58507e769beee7d949dbed788af3c4139d (patch)
treed4ae3cc06dbfadecf1eaf6ed0aef249fc87b07e6 /fs/isofs/joliet.c
parenta853a3d4eb2edb066248a39f0634f6f5858816a0 (diff)
downloadtalos-obmc-linux-74675a58507e769beee7d949dbed788af3c4139d.tar.gz
talos-obmc-linux-74675a58507e769beee7d949dbed788af3c4139d.zip
NLS: update handling of Unicode
This patch (as1239) updates the kernel's treatment of Unicode. The character-set conversion routines are well behind the current state of the Unicode specification: They don't recognize the existence of code points beyond plane 0 or of surrogate pairs in the UTF-16 encoding. The old wchar_t 16-bit type is retained because it's still used in lots of places. This shouldn't cause any new problems; if a conversion now results in an invalid 16-bit code then before it must have yielded an undefined code. Difficult-to-read names like "utf_mbstowcs" are replaced with more transparent names like "utf8s_to_utf16s" and the ordering of the parameters is rationalized (buffer lengths come immediate after the pointers they refer to, and the inputs precede the outputs). Fortunately the low-level conversion routines are used in only a few places; the interfaces to the higher-level uni2char and char2uni methods have been left unchanged. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Acked-by: Clemens Ladisch <clemens@ladisch.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'fs/isofs/joliet.c')
-rw-r--r--fs/isofs/joliet.c36
1 files changed, 3 insertions, 33 deletions
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index 92c14b850e9c..a048de81c093 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -37,37 +37,6 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
return (op - ascii);
}
-/* Convert big endian wide character string to utf8 */
-static int
-wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen)
-{
- const __u8 *ip;
- __u8 *op;
- int size;
- __u16 c;
-
- op = s;
- ip = pwcs;
- while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) {
- c = (*ip << 8) | ip[1];
- if (c > 0x7f) {
- size = utf8_wctomb(op, c, maxlen);
- if (size == -1) {
- /* Ignore character and move on */
- maxlen--;
- } else {
- op += size;
- maxlen -= size;
- }
- } else {
- *op++ = (__u8) c;
- }
- ip += 2;
- inlen--;
- }
- return (op - s);
-}
-
int
get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
{
@@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st
nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
if (utf8) {
- len = wcsntombs_be(outname, de->name,
- de->name_len[0] >> 1, PAGE_SIZE);
+ len = utf16s_to_utf8s((const wchar_t *) de->name,
+ de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
+ outname, PAGE_SIZE);
} else {
len = uni16_to_x8(outname, (__be16 *) de->name,
de->name_len[0] >> 1, nls);
OpenPOWER on IntegriCloud