summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
authorRichard Smith <richard-llvm@metafoo.co.uk>2012-03-08 21:59:28 +0000
committerRichard Smith <richard-llvm@metafoo.co.uk>2012-03-08 21:59:28 +0000
commit812924502bb7fbe0525757576aa2d16072ab5a87 (patch)
treecf817c7931b543509af5f99d86be261a19e92b85 /clang
parent0ef86b0ea3392c672dd3ce69e32aa6d3d33603dd (diff)
downloadbcm5719-llvm-812924502bb7fbe0525757576aa2d16072ab5a87.tar.gz
bcm5719-llvm-812924502bb7fbe0525757576aa2d16072ab5a87.zip
When checking the encoding of an 8-bit string literal, don't just check the
first codepoint! Also, don't reject empty raw string literals for spurious "encoding" issues. Also, don't rely on undefined behavior in ConvertUTF.c. llvm-svn: 152344
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/ConvertUTF.h4
-rw-r--r--clang/lib/Basic/ConvertUTF.c22
-rw-r--r--clang/lib/Lex/LiteralSupport.cpp21
-rw-r--r--clang/test/Lexer/cxx0x_raw_string_delim_length.cpp8
-rw-r--r--clang/test/Lexer/string-literal-encoding.c15
5 files changed, 51 insertions, 19 deletions
diff --git a/clang/include/clang/Basic/ConvertUTF.h b/clang/include/clang/Basic/ConvertUTF.h
index e376b7d7973..ec6b973e6a7 100644
--- a/clang/include/clang/Basic/ConvertUTF.h
+++ b/clang/include/clang/Basic/ConvertUTF.h
@@ -151,9 +151,11 @@ ConversionResult ConvertUTF16toUTF32 (
ConversionResult ConvertUTF32toUTF16 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
-#endif
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
+#endif
+
+Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd);
#ifdef __cplusplus
}
diff --git a/clang/lib/Basic/ConvertUTF.c b/clang/lib/Basic/ConvertUTF.c
index b3fa9169344..e1970039e16 100644
--- a/clang/lib/Basic/ConvertUTF.c
+++ b/clang/lib/Basic/ConvertUTF.c
@@ -387,7 +387,7 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) {
*/
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
int length = trailingBytesForUTF8[*source]+1;
- if (source+length > sourceEnd) {
+ if (length > sourceEnd - source) {
return false;
}
return isLegalUTF8(source, length);
@@ -395,6 +395,22 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
/* --------------------------------------------------------------------- */
+/*
+ * Exported function to return whether a UTF-8 string is legal or not.
+ * This is not used here; it's just exported.
+ */
+Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd) {
+ while (source != sourceEnd) {
+ int length = trailingBytesForUTF8[*source] + 1;
+ if (length > sourceEnd - source || !isLegalUTF8(source, length))
+ return false;
+ source += length;
+ }
+ return true;
+}
+
+/* --------------------------------------------------------------------- */
+
ConversionResult ConvertUTF8toUTF16 (
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
@@ -404,7 +420,7 @@ ConversionResult ConvertUTF8toUTF16 (
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
- if (source + extraBytesToRead >= sourceEnd) {
+ if (extraBytesToRead >= sourceEnd - source) {
result = sourceExhausted; break;
}
/* Do this check whether lenient or strict */
@@ -477,7 +493,7 @@ ConversionResult ConvertUTF8toUTF32 (
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
- if (source + extraBytesToRead >= sourceEnd) {
+ if (extraBytesToRead >= sourceEnd - source) {
result = sourceExhausted; break;
}
/* Do this check whether lenient or strict */
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 901d96d21ae..e0a5ba39d0f 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -333,7 +333,7 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
/// decimal-constant integer-suffix
/// octal-constant integer-suffix
/// hexadecimal-constant integer-suffix
-/// user-defiend-integer-literal: [C++11 lex.ext]
+/// user-defined-integer-literal: [C++11 lex.ext]
/// decimal-literal ud-suffix
/// octal-literal ud-suffix
/// hexadecimal-literal ud-suffix
@@ -1167,17 +1167,14 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
++ThisTokBuf;
++ThisTokBuf; // skip '('
- // remove same number of characters from the end
- if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))
- ThisTokEnd -= (ThisTokBuf - Prefix);
+ // Remove same number of characters from the end
+ ThisTokEnd -= ThisTokBuf - Prefix;
+ assert(ThisTokEnd >= ThisTokBuf && "malformed raw string literal");
// Copy the string over
- if (CopyStringFragment(StringRef(ThisTokBuf,ThisTokEnd-ThisTokBuf)))
- {
+ if (CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf)))
if (DiagnoseBadString(StringToks[i]))
hadError = true;
- }
-
} else {
assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
++ThisTokBuf; // skip "
@@ -1204,11 +1201,9 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
} while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
// Copy the character span over.
- if (CopyStringFragment(StringRef(InStart,ThisTokBuf-InStart)))
- {
+ if (CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart)))
if (DiagnoseBadString(StringToks[i]))
hadError = true;
- }
continue;
}
// Is this a Universal Character Name escape?
@@ -1292,8 +1287,8 @@ bool StringLiteralParser::CopyStringFragment(StringRef Fragment) {
ConversionResult result = conversionOK;
// Copy the character span over.
if (CharByteWidth == 1) {
- if (!isLegalUTF8Sequence(reinterpret_cast<const UTF8*>(Fragment.begin()),
- reinterpret_cast<const UTF8*>(Fragment.end())))
+ if (!isLegalUTF8String(reinterpret_cast<const UTF8*>(Fragment.begin()),
+ reinterpret_cast<const UTF8*>(Fragment.end())))
result = sourceIllegal;
memcpy(ResultPtr, Fragment.data(), Fragment.size());
ResultPtr += Fragment.size();
diff --git a/clang/test/Lexer/cxx0x_raw_string_delim_length.cpp b/clang/test/Lexer/cxx0x_raw_string_delim_length.cpp
index e7d5c6f8cd2..b9f6d13ab74 100644
--- a/clang/test/Lexer/cxx0x_raw_string_delim_length.cpp
+++ b/clang/test/Lexer/cxx0x_raw_string_delim_length.cpp
@@ -1,3 +1,7 @@
-// RUN: %clang_cc1 -std=c++11 -E %s 2>&1 | grep 'error: raw string delimiter longer than 16 characters'
+// RUN: %clang_cc1 -std=c++11 -verify %s
-const char *str = R"abcdefghijkmnopqrstuvwxyz(abcdef)abcdefghijkmnopqrstuvwxyz";
+const char *str1 = R"(abcdef)"; // ok
+const char *str2 = R"foo()foo"; // ok
+const char *str3 = R"()"; // ok
+// FIXME: recover better than this.
+const char *str4 = R"abcdefghijkmnopqrstuvwxyz(abcdef)abcdefghijkmnopqrstuvwxyz"; // expected-error {{raw string delimiter longer than 16 characters}} expected-error {{expected expression}}
diff --git a/clang/test/Lexer/string-literal-encoding.c b/clang/test/Lexer/string-literal-encoding.c
index aa7cb73f626..57097dca88a 100644
--- a/clang/test/Lexer/string-literal-encoding.c
+++ b/clang/test/Lexer/string-literal-encoding.c
@@ -15,4 +15,19 @@ void f() {
char const *g = "Àéîõü"; // expected-warning {{illegal character encoding in string literal}}
char const *h = u8"Àéîõü"; // expected-error {{illegal character encoding in string literal}}
+ char const *i = R"(Àéîõü)"; // expected-warning {{illegal character encoding in string literal}}
+}
+
+void g() {
+ wchar_t const *a = L"foo Àéîõü"; // expected-error {{illegal character encoding in string literal}}
+
+ char16_t const *b = u"foo Àéîõü"; // expected-error {{illegal character encoding in string literal}}
+ char32_t const *c = U"foo Àéîõü"; // expected-error {{illegal character encoding in string literal}}
+ wchar_t const *d = LR"(foo Àéîõü)"; // expected-error {{illegal character encoding in string literal}}
+ char16_t const *e = uR"(foo Àéîõü)"; // expected-error {{illegal character encoding in string literal}}
+ char32_t const *f = UR"(foo Àéîõü)"; // expected-error {{illegal character encoding in string literal}}
+
+ char const *g = "foo Àéîõü"; // expected-warning {{illegal character encoding in string literal}}
+ char const *h = u8"foo Àéîõü"; // expected-error {{illegal character encoding in string literal}}
+ char const *i = R"(foo Àéîõü)"; // expected-warning {{illegal character encoding in string literal}}
}
OpenPOWER on IntegriCloud