diff options
author | charlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-03-26 07:43:18 +0000 |
---|---|---|
committer | charlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-03-26 07:43:18 +0000 |
commit | b67a70de8ab854b29584e4189198e5b40efa4cd5 (patch) | |
tree | 75dbd24d57fb4aa9b026b8565de51a5e073092d2 /gcc/ada/g-byorma.adb | |
parent | 0bf99ff62ae6445221072af0805f462eb2cb0d53 (diff) | |
download | ppe42-gcc-b67a70de8ab854b29584e4189198e5b40efa4cd5.tar.gz ppe42-gcc-b67a70de8ab854b29584e4189198e5b40efa4cd5.zip |
2008-03-26 Robert Dewar <dewar@adacore.com>
* g-byorma.adb (Read_BOM): Reorder tests so that UTF_32 is recognized
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@133584 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/ada/g-byorma.adb')
-rwxr-xr-x | gcc/ada/g-byorma.adb | 39 |
1 files changed, 21 insertions, 18 deletions
diff --git a/gcc/ada/g-byorma.adb b/gcc/ada/g-byorma.adb index 9cc6f08b519..6bbaedf8b71 100755 --- a/gcc/ada/g-byorma.adb +++ b/gcc/ada/g-byorma.adb @@ -44,27 +44,13 @@ package body GNAT.Byte_Order_Mark is XML_Support : Boolean := False) is begin - -- UTF-16 (big-endian) - - if Str'Length >= 2 - and then Str (Str'First) = Character'Val (16#FE#) - and then Str (Str'First + 1) = Character'Val (16#FF#) - then - Len := 2; - BOM := UTF16_BE; - - -- UTF-16 (little-endian) - - elsif Str'Length >= 2 - and then Str (Str'First) = Character'Val (16#FF#) - and then Str (Str'First + 1) = Character'Val (16#FE#) - then - Len := 2; - BOM := UTF16_LE; + -- Note: the order of these tests is important, because in some cases + -- one sequence is a prefix of a longer sequence, and we must test for + -- the longer sequence first -- UTF-32 (big-endian) - elsif Str'Length >= 4 + if Str'Length >= 4 and then Str (Str'First) = Character'Val (16#00#) and then Str (Str'First + 1) = Character'Val (16#00#) and then Str (Str'First + 2) = Character'Val (16#FE#) @@ -84,6 +70,23 @@ package body GNAT.Byte_Order_Mark is Len := 4; BOM := UTF32_LE; + -- UTF-16 (big-endian) + + elsif Str'Length >= 2 + and then Str (Str'First) = Character'Val (16#FE#) + and then Str (Str'First + 1) = Character'Val (16#FF#) + then + Len := 2; + BOM := UTF16_BE; + + -- UTF-16 (little-endian) + + elsif Str'Length >= 2 + and then Str (Str'First) = Character'Val (16#FF#) + and then Str (Str'First + 1) = Character'Val (16#FE#) + then + Len := 2; + BOM := UTF16_LE; -- UTF-8 (endian-independent) elsif Str'Length >= 3 |