When a bad UTF-8 encoding or bogus escape sequence is encountered in a

string literal, produce a diagnostic pointing at the erroneous character range, not at the start of the literal. llvm-svn: 163459
author: Richard Smith <richard-llvm@metafoo.co.uk> 2012-09-08 07:16:20 +0000
committer: Richard Smith <richard-llvm@metafoo.co.uk> 2012-09-08 07:16:20 +0000
commit: 639b8d05dd2432f0609774f7963b5780dd5366e1 (patch)
tree: de4121f4117db133f5da18d62912166d624fa12e /clang/lib/Basic/ConvertUTFWrapper.cpp
parent: 3e41a5bb3176c2163f1646f313e91c9674658e77 (diff)
download: bcm5719-llvm-639b8d05dd2432f0609774f7963b5780dd5366e1.tar.gz
bcm5719-llvm-639b8d05dd2432f0609774f7963b5780dd5366e1.zip
1 files changed, 12 insertions, 6 deletions
diff --git a/clang/lib/Basic/ConvertUTFWrapper.cpp b/clang/lib/Basic/ConvertUTFWrapper.cpp
index a1b3f7fd9da..6be3828d286 100644
--- a/clang/lib/Basic/ConvertUTFWrapper.cpp
+++ b/clang/lib/Basic/ConvertUTFWrapper.cpp
@@ -13,16 +13,19 @@
 namespace clang {
 
 bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
-                       char *&ResultPtr) {
+                       char *&ResultPtr, const UTF8 *&ErrorPtr) {
   assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
   ConversionResult result = conversionOK;
   // Copy the character span over.
   if (WideCharWidth == 1) {
-    if (!isLegalUTF8String(reinterpret_cast<const UTF8*>(Source.begin()),
-                           reinterpret_cast<const UTF8*>(Source.end())))
+    const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.begin());
+    if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.end()))) {
       result = sourceIllegal;
-    memcpy(ResultPtr, Source.data(), Source.size());
-    ResultPtr += Source.size();
+      ErrorPtr = Pos;
+    } else {
+      memcpy(ResultPtr, Source.data(), Source.size());
+      ResultPtr += Source.size();
+    }
   } else if (WideCharWidth == 2) {
     const UTF8 *sourceStart = (const UTF8*)Source.data();
     // FIXME: Make the type of the result buffer correct instead of
@@ -34,6 +37,8 @@ bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
         &targetStart, targetStart + 2*Source.size(), flags);
     if (result == conversionOK)
       ResultPtr = reinterpret_cast<char*>(targetStart);
+    else
+      ErrorPtr = sourceStart;
   } else if (WideCharWidth == 4) {
     const UTF8 *sourceStart = (const UTF8*)Source.data();
     // FIXME: Make the type of the result buffer correct instead of
@@ -45,6 +50,8 @@ bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
         &targetStart, targetStart + 4*Source.size(), flags);
     if (result == conversionOK)
       ResultPtr = reinterpret_cast<char*>(targetStart);
+    else
+      ErrorPtr = sourceStart;
   }
   assert((result != targetExhausted)
          && "ConvertUTF8toUTFXX exhausted target buffer");
@@ -67,4 +74,3 @@ bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
 }
 
 } // end namespace clang
-
author	Richard Smith <richard-llvm@metafoo.co.uk>	2012-09-08 07:16:20 +0000
committer	Richard Smith <richard-llvm@metafoo.co.uk>	2012-09-08 07:16:20 +0000
commit	639b8d05dd2432f0609774f7963b5780dd5366e1 (patch)
tree	de4121f4117db133f5da18d62912166d624fa12e /clang/lib/Basic/ConvertUTFWrapper.cpp
parent	3e41a5bb3176c2163f1646f313e91c9674658e77 (diff)
download	bcm5719-llvm-639b8d05dd2432f0609774f7963b5780dd5366e1.tar.gz bcm5719-llvm-639b8d05dd2432f0609774f7963b5780dd5366e1.zip