summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CodeGenModule.cpp
diff options
context:
space:
mode:
authorDaniel Dunbar <daniel@zuster.org>2009-07-23 23:41:22 +0000
committerDaniel Dunbar <daniel@zuster.org>2009-07-23 23:41:22 +0000
commit91ade1419748a8b092a6e634d2a126a01fb05f2d (patch)
tree414c0a8a2b8220a88e733c17d31c6810524caf1f /clang/lib/CodeGen/CodeGenModule.cpp
parent8ce4021a40ec7efaa809dbfaade406d88632c52a (diff)
downloadbcm5719-llvm-91ade1419748a8b092a6e634d2a126a01fb05f2d.tar.gz
bcm5719-llvm-91ade1419748a8b092a6e634d2a126a01fb05f2d.zip
Output UTF-16 string literals independent of host byte order.
- Steve, can you take a look at this? It seems like this code should live elsewhere, and there is a FIXME about having Sema validates the UTF-8 to UTF-16 conversion. llvm-svn: 76915
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.cpp')
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp33
1 files changed, 24 insertions, 9 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 994f60b0156..803df31a6f6 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1193,6 +1193,7 @@ static void appendFieldAndPadding(CodeGenModule &CGM,
static llvm::StringMapEntry<llvm::Constant*> &
GetConstantCFStringEntry(llvm::StringMap<llvm::Constant*> &Map,
const StringLiteral *Literal,
+ bool TargetIsLSB,
bool &IsUTF16,
unsigned &StringLength) {
unsigned NumBytes = Literal->getByteLength();
@@ -1223,15 +1224,28 @@ GetConstantCFStringEntry(llvm::StringMap<llvm::Constant*> &Map,
StringLength));
}
- // FIXME: Storing UTF-16 in a C string is a hack to test Unicode strings
- // without doing more surgery to this routine. Since we aren't explicitly
- // checking for endianness here, it's also a bug (when generating code for
- // a target that doesn't match the host endianness). Modeling this as an
- // i16 array is likely the cleanest solution.
+ // ConvertUTF8toUTF16 returns the length in ToPtr.
StringLength = ToPtr - &ToBuf[0];
+
+ // Render the UTF-16 string into a byte array and convert to the target byte
+ // order.
+ //
+ // FIXME: This isn't something we should need to do here.
+ llvm::SmallString<128> AsBytes;
+ AsBytes.reserve(StringLength * 2);
+ for (unsigned i = 0; i != StringLength; ++i) {
+ unsigned short Val = ToBuf[i];
+ if (TargetIsLSB) {
+ AsBytes.push_back(Val & 0xFF);
+ AsBytes.push_back(Val >> 8);
+ } else {
+ AsBytes.push_back(Val >> 8);
+ AsBytes.push_back(Val & 0xFF);
+ }
+ }
+
IsUTF16 = true;
- return Map.GetOrCreateValue(llvm::StringRef((char *)&ToBuf[0],
- StringLength * 2));
+ return Map.GetOrCreateValue(llvm::StringRef(AsBytes.data(), AsBytes.size()));
}
llvm::Constant *
@@ -1239,8 +1253,9 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
unsigned StringLength = 0;
bool isUTF16 = false;
llvm::StringMapEntry<llvm::Constant*> &Entry =
- GetConstantCFStringEntry(CFConstantStringMap, Literal, isUTF16,
- StringLength);
+ GetConstantCFStringEntry(CFConstantStringMap, Literal,
+ getTargetData().isLittleEndian(),
+ isUTF16, StringLength);
if (llvm::Constant *C = Entry.getValue())
return C;
OpenPOWER on IntegriCloud