diff options
| author | Richard Smith <richard-llvm@metafoo.co.uk> | 2012-03-08 21:59:28 +0000 |
|---|---|---|
| committer | Richard Smith <richard-llvm@metafoo.co.uk> | 2012-03-08 21:59:28 +0000 |
| commit | 812924502bb7fbe0525757576aa2d16072ab5a87 (patch) | |
| tree | cf817c7931b543509af5f99d86be261a19e92b85 /clang/lib/Basic | |
| parent | 0ef86b0ea3392c672dd3ce69e32aa6d3d33603dd (diff) | |
| download | bcm5719-llvm-812924502bb7fbe0525757576aa2d16072ab5a87.tar.gz bcm5719-llvm-812924502bb7fbe0525757576aa2d16072ab5a87.zip | |
When checking the encoding of an 8-bit string literal, don't just check the
first codepoint! Also, don't reject empty raw string literals for spurious
"encoding" issues. Also, don't rely on undefined behavior in ConvertUTF.c.
llvm-svn: 152344
Diffstat (limited to 'clang/lib/Basic')
| -rw-r--r-- | clang/lib/Basic/ConvertUTF.c | 22 |
1 files changed, 19 insertions, 3 deletions
diff --git a/clang/lib/Basic/ConvertUTF.c b/clang/lib/Basic/ConvertUTF.c index b3fa9169344..e1970039e16 100644 --- a/clang/lib/Basic/ConvertUTF.c +++ b/clang/lib/Basic/ConvertUTF.c @@ -387,7 +387,7 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) { */ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { int length = trailingBytesForUTF8[*source]+1; - if (source+length > sourceEnd) { + if (length > sourceEnd - source) { return false; } return isLegalUTF8(source, length); @@ -395,6 +395,22 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { /* --------------------------------------------------------------------- */ +/* + * Exported function to return whether a UTF-8 string is legal or not. + * This is not used here; it's just exported. + */ +Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd) { + while (source != sourceEnd) { + int length = trailingBytesForUTF8[*source] + 1; + if (length > sourceEnd - source || !isLegalUTF8(source, length)) + return false; + source += length; + } + return true; +} + +/* --------------------------------------------------------------------- */ + ConversionResult ConvertUTF8toUTF16 ( const UTF8** sourceStart, const UTF8* sourceEnd, UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { @@ -404,7 +420,7 @@ ConversionResult ConvertUTF8toUTF16 ( while (source < sourceEnd) { UTF32 ch = 0; unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; - if (source + extraBytesToRead >= sourceEnd) { + if (extraBytesToRead >= sourceEnd - source) { result = sourceExhausted; break; } /* Do this check whether lenient or strict */ @@ -477,7 +493,7 @@ ConversionResult ConvertUTF8toUTF32 ( while (source < sourceEnd) { UTF32 ch = 0; unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; - if (source + extraBytesToRead >= sourceEnd) { + if (extraBytesToRead >= sourceEnd - source) { result = sourceExhausted; break; } /* Do this check whether lenient or strict */ |

