Clean up and reduce visual clutter in the makeunicode.py script. (GH-7558)

author: Stefan Behnel <stefan_ml@behnel.de> 2019-06-01 21:49:03 +0200
committer: GitHub <noreply@github.com> 2019-06-01 21:49:03 +0200
commit: faa2948654d15a859bc4317e00730ff213295764 (patch)
tree: 75cdfdd3687fab464d7583d941a8521b969a41c8 /Tools/unicode
parent: bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) (diff)
download: cpython-faa2948654d15a859bc4317e00730ff213295764.tar.gz
cpython-faa2948654d15a859bc4317e00730ff213295764.tar.bz2
cpython-faa2948654d15a859bc4317e00730ff213295764.zip
1 files changed, 275 insertions, 263 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index 5418eec588c..5b9427acd39 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -31,6 +31,7 @@ import sys
 import zipfile
 
 from textwrap import dedent
+from functools import partial
 
 SCRIPT = sys.argv[0]
 VERSION = "3.3"
@@ -106,11 +107,11 @@ cjk_ranges = [
     ('2CEB0', '2EBE0'),
 ]
 
+
 def maketables(trace=0):
 
     print("--- Reading", UNICODE_DATA % "", "...")
 
-    version = ""
     unicode = UnicodeData(UNIDATA_VERSION)
 
     print(len(list(filter(None, unicode.table))), "characters")
@@ -125,6 +126,7 @@ def maketables(trace=0):
     makeunicodedata(unicode, trace)
     makeunicodetype(unicode, trace)
 
+
 # --------------------------------------------------------------------
 # unicode character properties
 
@@ -258,124 +260,125 @@ def makeunicodedata(unicode, trace):
 
     print("--- Writing", FILE, "...")
 
-    fp = open(FILE, "w")
-    print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
-    print(file=fp)
-    print('#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION, file=fp)
-    print("/* a list of unique database records */", file=fp)
-    print("const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {", file=fp)
-    for item in table:
-        print("    {%d, %d, %d, %d, %d, %d}," % item, file=fp)
-    print("};", file=fp)
-    print(file=fp)
-
-    print("/* Reindexing of NFC first characters. */", file=fp)
-    print("#define TOTAL_FIRST",total_first, file=fp)
-    print("#define TOTAL_LAST",total_last, file=fp)
-    print("struct reindex{int start;short count,index;};", file=fp)
-    print("static struct reindex nfc_first[] = {", file=fp)
-    for start,end in comp_first_ranges:
-        print("    { %d, %d, %d}," % (start,end-start,comp_first[start]), file=fp)
-    print("    {0,0,0}", file=fp)
-    print("};\n", file=fp)
-    print("static struct reindex nfc_last[] = {", file=fp)
-    for start,end in comp_last_ranges:
-        print("  { %d, %d, %d}," % (start,end-start,comp_last[start]), file=fp)
-    print("  {0,0,0}", file=fp)
-    print("};\n", file=fp)
-
-    # FIXME: <fl> the following tables could be made static, and
-    # the support code moved into unicodedatabase.c
-
-    print("/* string literals */", file=fp)
-    print("const char *_PyUnicode_CategoryNames[] = {", file=fp)
-    for name in CATEGORY_NAMES:
-        print("    \"%s\"," % name, file=fp)
-    print("    NULL", file=fp)
-    print("};", file=fp)
-
-    print("const char *_PyUnicode_BidirectionalNames[] = {", file=fp)
-    for name in BIDIRECTIONAL_NAMES:
-        print("    \"%s\"," % name, file=fp)
-    print("    NULL", file=fp)
-    print("};", file=fp)
-
-    print("const char *_PyUnicode_EastAsianWidthNames[] = {", file=fp)
-    for name in EASTASIANWIDTH_NAMES:
-        print("    \"%s\"," % name, file=fp)
-    print("    NULL", file=fp)
-    print("};", file=fp)
-
-    print("static const char *decomp_prefix[] = {", file=fp)
-    for name in decomp_prefix:
-        print("    \"%s\"," % name, file=fp)
-    print("    NULL", file=fp)
-    print("};", file=fp)
-
-    # split record index table
-    index1, index2, shift = splitbins(index, trace)
-
-    print("/* index tables for the database records */", file=fp)
-    print("#define SHIFT", shift, file=fp)
-    Array("index1", index1).dump(fp, trace)
-    Array("index2", index2).dump(fp, trace)
-
-    # split decomposition index table
-    index1, index2, shift = splitbins(decomp_index, trace)
-
-    print("/* decomposition data */", file=fp)
-    Array("decomp_data", decomp_data).dump(fp, trace)
-
-    print("/* index tables for the decomposition data */", file=fp)
-    print("#define DECOMP_SHIFT", shift, file=fp)
-    Array("decomp_index1", index1).dump(fp, trace)
-    Array("decomp_index2", index2).dump(fp, trace)
-
-    index, index2, shift = splitbins(comp_data, trace)
-    print("/* NFC pairs */", file=fp)
-    print("#define COMP_SHIFT", shift, file=fp)
-    Array("comp_index", index).dump(fp, trace)
-    Array("comp_data", index2).dump(fp, trace)
-
-    # Generate delta tables for old versions
-    for version, table, normalization in unicode.changed:
-        cversion = version.replace(".","_")
-        records = [table[0]]
-        cache = {table[0]:0}
-        index = [0] * len(table)
-        for i, record in enumerate(table):
-            try:
-                index[i] = cache[record]
-            except KeyError:
-                index[i] = cache[record] = len(records)
-                records.append(record)
+    with open(FILE, "w") as fp:
+        fprint = partial(print, file=fp)
+
+        fprint("/* this file was generated by %s %s */" % (SCRIPT, VERSION))
+        fprint()
+        fprint('#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION)
+        fprint("/* a list of unique database records */")
+        fprint("const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {")
+        for item in table:
+            fprint("    {%d, %d, %d, %d, %d, %d}," % item)
+        fprint("};")
+        fprint()
+
+        fprint("/* Reindexing of NFC first characters. */")
+        fprint("#define TOTAL_FIRST",total_first)
+        fprint("#define TOTAL_LAST",total_last)
+        fprint("struct reindex{int start;short count,index;};")
+        fprint("static struct reindex nfc_first[] = {")
+        for start,end in comp_first_ranges:
+            fprint("    { %d, %d, %d}," % (start,end-start,comp_first[start]))
+        fprint("    {0,0,0}")
+        fprint("};\n")
+        fprint("static struct reindex nfc_last[] = {")
+        for start,end in comp_last_ranges:
+            fprint("  { %d, %d, %d}," % (start,end-start,comp_last[start]))
+        fprint("  {0,0,0}")
+        fprint("};\n")
+
+        # FIXME: <fl> the following tables could be made static, and
+        # the support code moved into unicodedatabase.c
+
+        fprint("/* string literals */")
+        fprint("const char *_PyUnicode_CategoryNames[] = {")
+        for name in CATEGORY_NAMES:
+            fprint("    \"%s\"," % name)
+        fprint("    NULL")
+        fprint("};")
+
+        fprint("const char *_PyUnicode_BidirectionalNames[] = {")
+        for name in BIDIRECTIONAL_NAMES:
+            fprint("    \"%s\"," % name)
+        fprint("    NULL")
+        fprint("};")
+
+        fprint("const char *_PyUnicode_EastAsianWidthNames[] = {")
+        for name in EASTASIANWIDTH_NAMES:
+            fprint("    \"%s\"," % name)
+        fprint("    NULL")
+        fprint("};")
+
+        fprint("static const char *decomp_prefix[] = {")
+        for name in decomp_prefix:
+            fprint("    \"%s\"," % name)
+        fprint("    NULL")
+        fprint("};")
+
+        # split record index table
         index1, index2, shift = splitbins(index, trace)
-        print("static const change_record change_records_%s[] = {" % cversion, file=fp)
-        for record in records:
-            print("    { %s }," % ", ".join(map(str,record)), file=fp)
-        print("};", file=fp)
-        Array("changes_%s_index" % cversion, index1).dump(fp, trace)
-        Array("changes_%s_data" % cversion, index2).dump(fp, trace)
-        print("static const change_record* get_change_%s(Py_UCS4 n)" % cversion, file=fp)
-        print("{", file=fp)
-        print("    int index;", file=fp)
-        print("    if (n >= 0x110000) index = 0;", file=fp)
-        print("    else {", file=fp)
-        print("        index = changes_%s_index[n>>%d];" % (cversion, shift), file=fp)
-        print("        index = changes_%s_data[(index<<%d)+(n & %d)];" % \
-              (cversion, shift, ((1<<shift)-1)), file=fp)
-        print("    }", file=fp)
-        print("    return change_records_%s+index;" % cversion, file=fp)
-        print("}\n", file=fp)
-        print("static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion, file=fp)
-        print("{", file=fp)
-        print("    switch(n) {", file=fp)
-        for k, v in normalization:
-            print("    case %s: return 0x%s;" % (hex(k), v), file=fp)
-        print("    default: return 0;", file=fp)
-        print("    }\n}\n", file=fp)
-
-    fp.close()
+
+        fprint("/* index tables for the database records */")
+        fprint("#define SHIFT", shift)
+        Array("index1", index1).dump(fp, trace)
+        Array("index2", index2).dump(fp, trace)
+
+        # split decomposition index table
+        index1, index2, shift = splitbins(decomp_index, trace)
+
+        fprint("/* decomposition data */")
+        Array("decomp_data", decomp_data).dump(fp, trace)
+
+        fprint("/* index tables for the decomposition data */")
+        fprint("#define DECOMP_SHIFT", shift)
+        Array("decomp_index1", index1).dump(fp, trace)
+        Array("decomp_index2", index2).dump(fp, trace)
+
+        index, index2, shift = splitbins(comp_data, trace)
+        fprint("/* NFC pairs */")
+        fprint("#define COMP_SHIFT", shift)
+        Array("comp_index", index).dump(fp, trace)
+        Array("comp_data", index2).dump(fp, trace)
+
+        # Generate delta tables for old versions
+        for version, table, normalization in unicode.changed:
+            cversion = version.replace(".","_")
+            records = [table[0]]
+            cache = {table[0]:0}
+            index = [0] * len(table)
+            for i, record in enumerate(table):
+                try:
+                    index[i] = cache[record]
+                except KeyError:
+                    index[i] = cache[record] = len(records)
+                    records.append(record)
+            index1, index2, shift = splitbins(index, trace)
+            fprint("static const change_record change_records_%s[] = {" % cversion)
+            for record in records:
+                fprint("    { %s }," % ", ".join(map(str,record)))
+            fprint("};")
+            Array("changes_%s_index" % cversion, index1).dump(fp, trace)
+            Array("changes_%s_data" % cversion, index2).dump(fp, trace)
+            fprint("static const change_record* get_change_%s(Py_UCS4 n)" % cversion)
+            fprint("{")
+            fprint("    int index;")
+            fprint("    if (n >= 0x110000) index = 0;")
+            fprint("    else {")
+            fprint("        index = changes_%s_index[n>>%d];" % (cversion, shift))
+            fprint("        index = changes_%s_data[(index<<%d)+(n & %d)];" % \
+                   (cversion, shift, ((1<<shift)-1)))
+            fprint("    }")
+            fprint("    return change_records_%s+index;" % cversion)
+            fprint("}\n")
+            fprint("static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion)
+            fprint("{")
+            fprint("    switch(n) {")
+            for k, v in normalization:
+                fprint("    case %s: return 0x%s;" % (hex(k), v))
+            fprint("    default: return 0;")
+            fprint("    }\n}\n")
+
 
 # --------------------------------------------------------------------
 # unicode character type tables
@@ -404,7 +407,6 @@ def makeunicodetype(unicode, trace):
             bidirectional = record[4]
             properties = record[16]
             flags = 0
-            delta = True
             if category in ["Lm", "Lt", "Lu", "Ll", "Lo"]:
                 flags |= ALPHA_MASK
             if "Lowercase" in properties:
@@ -505,90 +507,91 @@ def makeunicodetype(unicode, trace):
 
     print("--- Writing", FILE, "...")
 
-    fp = open(FILE, "w")
-    print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
-    print(file=fp)
-    print("/* a list of unique character type descriptors */", file=fp)
-    print("const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {", file=fp)
-    for item in table:
-        print("    {%d, %d, %d, %d, %d, %d}," % item, file=fp)
-    print("};", file=fp)
-    print(file=fp)
-
-    print("/* extended case mappings */", file=fp)
-    print(file=fp)
-    print("const Py_UCS4 _PyUnicode_ExtendedCase[] = {", file=fp)
-    for c in extra_casing:
-        print("    %d," % c, file=fp)
-    print("};", file=fp)
-    print(file=fp)
-
-    # split decomposition index table
-    index1, index2, shift = splitbins(index, trace)
-
-    print("/* type indexes */", file=fp)
-    print("#define SHIFT", shift, file=fp)
-    Array("index1", index1).dump(fp, trace)
-    Array("index2", index2).dump(fp, trace)
-
-    # Generate code for _PyUnicode_ToNumeric()
-    numeric_items = sorted(numeric.items())
-    print('/* Returns the numeric value as double for Unicode characters', file=fp)
-    print(' * having this property, -1.0 otherwise.', file=fp)
-    print(' */', file=fp)
-    print('double _PyUnicode_ToNumeric(Py_UCS4 ch)', file=fp)
-    print('{', file=fp)
-    print('    switch (ch) {', file=fp)
-    for value, codepoints in numeric_items:
-        # Turn text into float literals
-        parts = value.split('/')
-        parts = [repr(float(part)) for part in parts]
-        value = '/'.join(parts)
-
-        codepoints.sort()
-        for codepoint in codepoints:
-            print('    case 0x%04X:' % (codepoint,), file=fp)
-        print('        return (double) %s;' % (value,), file=fp)
-    print('    }', file=fp)
-    print('    return -1.0;', file=fp)
-    print('}', file=fp)
-    print(file=fp)
-
-    # Generate code for _PyUnicode_IsWhitespace()
-    print("/* Returns 1 for Unicode characters having the bidirectional", file=fp)
-    print(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.", file=fp)
-    print(" */", file=fp)
-    print('int _PyUnicode_IsWhitespace(const Py_UCS4 ch)', file=fp)
-    print('{', file=fp)
-    print('    switch (ch) {', file=fp)
-
-    for codepoint in sorted(spaces):
-        print('    case 0x%04X:' % (codepoint,), file=fp)
-    print('        return 1;', file=fp)
-
-    print('    }', file=fp)
-    print('    return 0;', file=fp)
-    print('}', file=fp)
-    print(file=fp)
-
-    # Generate code for _PyUnicode_IsLinebreak()
-    print("/* Returns 1 for Unicode characters having the line break", file=fp)
-    print(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional", file=fp)
-    print(" * type 'B', 0 otherwise.", file=fp)
-    print(" */", file=fp)
-    print('int _PyUnicode_IsLinebreak(const Py_UCS4 ch)', file=fp)
-    print('{', file=fp)
-    print('    switch (ch) {', file=fp)
-    for codepoint in sorted(linebreaks):
-        print('    case 0x%04X:' % (codepoint,), file=fp)
-    print('        return 1;', file=fp)
-
-    print('    }', file=fp)
-    print('    return 0;', file=fp)
-    print('}', file=fp)
-    print(file=fp)
-
-    fp.close()
+    with open(FILE, "w") as fp:
+        fprint = partial(print, file=fp)
+
+        fprint("/* this file was generated by %s %s */" % (SCRIPT, VERSION))
+        fprint()
+        fprint("/* a list of unique character type descriptors */")
+        fprint("const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {")
+        for item in table:
+            fprint("    {%d, %d, %d, %d, %d, %d}," % item)
+        fprint("};")
+        fprint()
+
+        fprint("/* extended case mappings */")
+        fprint()
+        fprint("const Py_UCS4 _PyUnicode_ExtendedCase[] = {")
+        for c in extra_casing:
+            fprint("    %d," % c)
+        fprint("};")
+        fprint()
+
+        # split decomposition index table
+        index1, index2, shift = splitbins(index, trace)
+
+        fprint("/* type indexes */")
+        fprint("#define SHIFT", shift)
+        Array("index1", index1).dump(fp, trace)
+        Array("index2", index2).dump(fp, trace)
+
+        # Generate code for _PyUnicode_ToNumeric()
+        numeric_items = sorted(numeric.items())
+        fprint('/* Returns the numeric value as double for Unicode characters')
+        fprint(' * having this property, -1.0 otherwise.')
+        fprint(' */')
+        fprint('double _PyUnicode_ToNumeric(Py_UCS4 ch)')
+        fprint('{')
+        fprint('    switch (ch) {')
+        for value, codepoints in numeric_items:
+            # Turn text into float literals
+            parts = value.split('/')
+            parts = [repr(float(part)) for part in parts]
+            value = '/'.join(parts)
+
+            codepoints.sort()
+            for codepoint in codepoints:
+                fprint('    case 0x%04X:' % (codepoint,))
+            fprint('        return (double) %s;' % (value,))
+        fprint('    }')
+        fprint('    return -1.0;')
+        fprint('}')
+        fprint()
+
+        # Generate code for _PyUnicode_IsWhitespace()
+        fprint("/* Returns 1 for Unicode characters having the bidirectional")
+        fprint(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.")
+        fprint(" */")
+        fprint('int _PyUnicode_IsWhitespace(const Py_UCS4 ch)')
+        fprint('{')
+        fprint('    switch (ch) {')
+
+        for codepoint in sorted(spaces):
+            fprint('    case 0x%04X:' % (codepoint,))
+        fprint('        return 1;')
+
+        fprint('    }')
+        fprint('    return 0;')
+        fprint('}')
+        fprint()
+
+        # Generate code for _PyUnicode_IsLinebreak()
+        fprint("/* Returns 1 for Unicode characters having the line break")
+        fprint(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional")
+        fprint(" * type 'B', 0 otherwise.")
+        fprint(" */")
+        fprint('int _PyUnicode_IsLinebreak(const Py_UCS4 ch)')
+        fprint('{')
+        fprint('    switch (ch) {')
+        for codepoint in sorted(linebreaks):
+            fprint('    case 0x%04X:' % (codepoint,))
+        fprint('        return 1;')
+
+        fprint('    }')
+        fprint('    return 0;')
+        fprint('}')
+        fprint()
+
 
 # --------------------------------------------------------------------
 # unicode name database
@@ -727,63 +730,63 @@ def makeunicodename(unicode, trace):
 
     print("--- Writing", FILE, "...")
 
-    fp = open(FILE, "w")
-    print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
-    print(file=fp)
-    print("#define NAME_MAXLEN", 256, file=fp)
-    print(file=fp)
-    print("/* lexicon */", file=fp)
-    Array("lexicon", lexicon).dump(fp, trace)
-    Array("lexicon_offset", lexicon_offset).dump(fp, trace)
-
-    # split decomposition index table
-    offset1, offset2, shift = splitbins(phrasebook_offset, trace)
-
-    print("/* code->name phrasebook */", file=fp)
-    print("#define phrasebook_shift", shift, file=fp)
-    print("#define phrasebook_short", short, file=fp)
-
-    Array("phrasebook", phrasebook).dump(fp, trace)
-    Array("phrasebook_offset1", offset1).dump(fp, trace)
-    Array("phrasebook_offset2", offset2).dump(fp, trace)
-
-    print("/* name->code dictionary */", file=fp)
-    codehash.dump(fp, trace)
-
-    print(file=fp)
-    print('static const unsigned int aliases_start = %#x;' %
-          NAME_ALIASES_START, file=fp)
-    print('static const unsigned int aliases_end = %#x;' %
-          (NAME_ALIASES_START + len(unicode.aliases)), file=fp)
-
-    print('static const unsigned int name_aliases[] = {', file=fp)
-    for name, codepoint in unicode.aliases:
-        print('    0x%04X,' % codepoint, file=fp)
-    print('};', file=fp)
-
-    # In Unicode 6.0.0, the sequences contain at most 4 BMP chars,
-    # so we are using Py_UCS2 seq[4].  This needs to be updated if longer
-    # sequences or sequences with non-BMP chars are added.
-    # unicodedata_lookup should be adapted too.
-    print(dedent("""
-        typedef struct NamedSequence {
-            int seqlen;
-            Py_UCS2 seq[4];
-        } named_sequence;
-        """), file=fp)
-
-    print('static const unsigned int named_sequences_start = %#x;' %
-          NAMED_SEQUENCES_START, file=fp)
-    print('static const unsigned int named_sequences_end = %#x;' %
-          (NAMED_SEQUENCES_START + len(unicode.named_sequences)), file=fp)
-
-    print('static const named_sequence named_sequences[] = {', file=fp)
-    for name, sequence in unicode.named_sequences:
-        seq_str = ', '.join('0x%04X' % cp for cp in sequence)
-        print('    {%d, {%s}},' % (len(sequence), seq_str), file=fp)
-    print('};', file=fp)
-
-    fp.close()
+    with open(FILE, "w") as fp:
+        fprint = partial(print, file=fp)
+
+        fprint("/* this file was generated by %s %s */" % (SCRIPT, VERSION))
+        fprint()
+        fprint("#define NAME_MAXLEN", 256)
+        fprint()
+        fprint("/* lexicon */")
+        Array("lexicon", lexicon).dump(fp, trace)
+        Array("lexicon_offset", lexicon_offset).dump(fp, trace)
+
+        # split decomposition index table
+        offset1, offset2, shift = splitbins(phrasebook_offset, trace)
+
+        fprint("/* code->name phrasebook */")
+        fprint("#define phrasebook_shift", shift)
+        fprint("#define phrasebook_short", short)
+
+        Array("phrasebook", phrasebook).dump(fp, trace)
+        Array("phrasebook_offset1", offset1).dump(fp, trace)
+        Array("phrasebook_offset2", offset2).dump(fp, trace)
+
+        fprint("/* name->code dictionary */")
+        codehash.dump(fp, trace)
+
+        fprint()
+        fprint('static const unsigned int aliases_start = %#x;' %
+               NAME_ALIASES_START)
+        fprint('static const unsigned int aliases_end = %#x;' %
+               (NAME_ALIASES_START + len(unicode.aliases)))
+
+        fprint('static const unsigned int name_aliases[] = {')
+        for name, codepoint in unicode.aliases:
+            fprint('    0x%04X,' % codepoint)
+        fprint('};')
+
+        # In Unicode 6.0.0, the sequences contain at most 4 BMP chars,
+        # so we are using Py_UCS2 seq[4].  This needs to be updated if longer
+        # sequences or sequences with non-BMP chars are added.
+        # unicodedata_lookup should be adapted too.
+        fprint(dedent("""
+            typedef struct NamedSequence {
+                int seqlen;
+                Py_UCS2 seq[4];
+            } named_sequence;
+            """))
+
+        fprint('static const unsigned int named_sequences_start = %#x;' %
+               NAMED_SEQUENCES_START)
+        fprint('static const unsigned int named_sequences_end = %#x;' %
+               (NAMED_SEQUENCES_START + len(unicode.named_sequences)))
+
+        fprint('static const named_sequence named_sequences[] = {')
+        for name, sequence in unicode.named_sequences:
+            seq_str = ', '.join('0x%04X' % cp for cp in sequence)
+            fprint('    {%d, {%s}},' % (len(sequence), seq_str))
+        fprint('};')
 
 
 def merge_old_version(version, new, old):
@@ -882,6 +885,7 @@ def merge_old_version(version, new, old):
                                           numeric_changes)),
                         normalization_changes))
 
+
 def open_data(template, version):
     local = template % ('-'+version,)
     if not os.path.exists(local):
@@ -898,6 +902,7 @@ def open_data(template, version):
         # Unihan.zip
         return open(local, 'rb')
 
+
 # --------------------------------------------------------------------
 # the following support code is taken from the unidb utilities
 # Copyright (c) 1999-2000 by Secret Labs AB
@@ -1150,6 +1155,7 @@ class UnicodeData:
         # restrict character range to ISO Latin 1
         self.chars = list(range(256))
 
+
 # hash table tools
 
 # this is a straight-forward reimplementation of Python's built-in
@@ -1165,6 +1171,7 @@ def myhash(s, magic):
             h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff
     return h
 
+
 SIZES = [
     (4,3), (8,3), (16,3), (32,5), (64,3), (128,3), (256,29), (512,17),
     (1024,9), (2048,5), (4096,83), (8192,27), (16384,43), (32768,3),
@@ -1172,6 +1179,7 @@ SIZES = [
     (2097152,5), (4194304,3), (8388608,33), (16777216,27)
 ]
 
+
 class Hash:
     def __init__(self, name, data, magic):
         # turn a (key, value) list into a static hash table structure
@@ -1202,7 +1210,7 @@ class Hash:
             if v is None:
                 table[i] = value
                 continue
-            incr = (h ^ (h >> 3)) & mask;
+            incr = (h ^ (h >> 3)) & mask
             if not incr:
                 incr = mask
             while 1:
@@ -1236,6 +1244,7 @@ class Hash:
         file.write("#define %s_size %d\n" % (self.name, self.size))
         file.write("#define %s_poly %d\n" % (self.name, self.poly))
 
+
 # stuff to deal with arrays of unsigned integers
 
 class Array:
@@ -1270,6 +1279,7 @@ class Array:
                 file.write(s.rstrip() + "\n")
         file.write("};\n\n")
 
+
 def getsize(data):
     # return smallest possible integer size for the given array
     maxdata = max(data)
@@ -1280,6 +1290,7 @@ def getsize(data):
     else:
         return 4
 
+
 def splitbins(t, trace=0):
     """t, trace=0 -> (t1, t2, shift).  Split a table to save space.
 
@@ -1299,8 +1310,8 @@ def splitbins(t, trace=0):
         def dump(t1, t2, shift, bytes):
             print("%d+%d bins at shift %d; %d bytes" % (
                 len(t1), len(t2), shift, bytes), file=sys.stderr)
-        print("Size of original table:", len(t)*getsize(t), \
-                            "bytes", file=sys.stderr)
+        print("Size of original table:", len(t)*getsize(t), "bytes",
+              file=sys.stderr)
     n = len(t)-1    # last valid index
     maxshift = 0    # the most we can shift n and still have something left
     if n > 0:
@@ -1341,5 +1352,6 @@ def splitbins(t, trace=0):
             assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
     return best
 
+
 if __name__ == "__main__":
     maketables(1)
author	Stefan Behnel <stefan_ml@behnel.de>	2019-06-01 21:49:03 +0200
committer	GitHub <noreply@github.com>	2019-06-01 21:49:03 +0200
commit	faa2948654d15a859bc4317e00730ff213295764 (patch)
tree	75cdfdd3687fab464d7583d941a8521b969a41c8 /Tools/unicode
parent	bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) (diff)
download	cpython-faa2948654d15a859bc4317e00730ff213295764.tar.gz cpython-faa2948654d15a859bc4317e00730ff213295764.tar.bz2 cpython-faa2948654d15a859bc4317e00730ff213295764.zip