diff options
author | Pavel Labath <labath@google.com> | 2018-02-14 11:06:39 +0000 |
---|---|---|
committer | Pavel Labath <labath@google.com> | 2018-02-14 11:06:39 +0000 |
commit | 918f60056a57aa2aa1322ff3a6207b79e40a56e3 (patch) | |
tree | ef57d31a780ec1d5ce69ab81a234cb48e7a780f1 /llvm/utils/unicode-case-fold.py | |
parent | 7beea3ab73b35e9dd3bd2fc2f4232cf41e41adc4 (diff) | |
download | bcm5719-llvm-918f60056a57aa2aa1322ff3a6207b79e40a56e3.tar.gz bcm5719-llvm-918f60056a57aa2aa1322ff3a6207b79e40a56e3.zip |
Revert r325107 (case folding DJB hash) and subsequent build fix
The "knownValuesUnicode" test in the patch fails on ppc64 and arm64
bots. Reverting while I investigate.
llvm-svn: 325115
Diffstat (limited to 'llvm/utils/unicode-case-fold.py')
-rwxr-xr-x | llvm/utils/unicode-case-fold.py | 137 |
1 files changed, 0 insertions, 137 deletions
diff --git a/llvm/utils/unicode-case-fold.py b/llvm/utils/unicode-case-fold.py deleted file mode 100755 index 98c56839c6c..00000000000 --- a/llvm/utils/unicode-case-fold.py +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python -""" -Unicode case folding database conversion utility - -Parses the database and generates a C++ function which implements the case -folding algorithm. The database entries are of the form: - - <code>; <status>; <mapping>; # <name> - -<status> can be one of four characters: - C - Common mappings - S - mappings for Simple case folding - F - mappings for Full case folding - T - special case for Turkish I characters - -Right now this generates a function which implements simple case folding (C+S -entries). -""" - -import sys -import re -import urllib2 - -# This variable will body of the mappings function -body = "" - -# Reads file line-by-line, extracts Common and Simple case fold mappings and -# returns a (from_char, to_char, from_name) tuple. -def mappings(f): - previous_from = -1 - expr = re.compile(r'^(.*); [CS]; (.*); # (.*)') - for line in f: - m = expr.match(line) - if not m: continue - from_char = int(m.group(1), 16) - to_char = int(m.group(2), 16) - from_name = m.group(3) - - if from_char <= previous_from: - raise Exception("Duplicate or unsorted characters in input") - yield from_char, to_char, from_name - previous_from = from_char - -# Computes the shift (to_char - from_char) in a mapping. -def shift(mapping): - return mapping[1] - mapping[0] - -# Computes the stride (from_char2 - from_char1) of two mappings. -def stride2(mapping1, mapping2): - return mapping2[0] - mapping1[0] - -# Computes the stride of a list of mappings. The list should have at least two -# mappings. All mappings in the list are assumed to have the same stride. -def stride(block): - return stride2(block[0], block[1]) - - -# b is a list of mappings. All the mappings are assumed to have the same -# shift and the stride between adjecant mappings (if any) is constant. -def dump_block(b): - global body - - if len(b) == 1: - # Special case for handling blocks of length 1. We don't even need to - # emit the "if (C < X) return C" check below as all characters in this - # range will be caught by the "C < X" check emitted by the first - # non-trivial block. - body += " // {2}\n if (C == {0:#06x})\n return {1:#06x};\n".format(*b[0]) - return - - first = b[0][0] - last = first + stride(b) * (len(b)-1) - modulo = first % stride(b) - - # All characters before this block map to themselves. - body += " if (C < {0:#06x})\n return C;\n".format(first) - body += " // {0} characters\n".format(len(b)) - - # Generic pattern: check upper bound (lower bound is checked by the "if" - # above) and modulo of C, return C+shift. - pattern = " if (C <= {0:#06x} && C % {1} == {2})\n return C + {3};\n" - - if stride(b) == 2 and shift(b[0]) == 1 and modulo == 0: - # Special case: - # We can elide the modulo-check because the expression "C|1" will map - # the intervening characters to themselves. - pattern = " if (C <= {0:#06x})\n return C | 1;\n" - elif stride(b) == 1: - # Another special case: X % 1 is always zero, so don't emit the - # modulo-check. - pattern = " if (C <= {0:#06x})\n return C + {3};\n" - - body += pattern.format(last, stride(b), modulo, shift(b[0])) - -current_block = [] -f = urllib2.urlopen(sys.argv[1]) -for m in mappings(f): - if len(current_block) == 0: - current_block.append(m) - continue - - if shift(current_block[0]) != shift(m): - # Incompatible shift, start a new block. - dump_block(current_block) - current_block = [m] - continue - - if len(current_block) == 1 or stride(current_block) == stride2(current_block[-1], m): - current_block.append(m) - continue - - # Incompatible stride, start a new block. - dump_block(current_block) - current_block = [m] -f.close() - -dump_block(current_block) - -print '//===---------- Support/UnicodeCaseFold.cpp -------------------------------===//' -print '//' -print '// This file was generated by utils/unicode-case-fold.py from the Unicode' -print '// case folding database at' -print '// ', sys.argv[1] -print '//' -print '// To regenerate this file, run:' -print '// utils/unicode-case-fold.py \\' -print '// "{}" \\'.format(sys.argv[1]) -print '// > lib/Support/UnicodeCaseFold.cpp' -print '//' -print '//===----------------------------------------------------------------------===//' -print '' -print '#include "llvm/Support/Unicode.h"' -print '' -print "int llvm::sys::unicode::foldCharSimple(int C) {" -print body -print " return C;" -print "}" |