| | |
| |
|
| | import json |
| | import sys |
| | from dataclasses import dataclass |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | try: |
| | with open('entities.json') as json_data: |
| | ents = json.load(json_data) |
| | except FileNotFoundError: |
| | print('entities.json not found, try curl -LJO', |
| | 'https://html.spec.whatwg.org/entities.json') |
| | sys.exit(1) |
| |
|
| | def to_cchars(s): |
| | r = [] |
| |
|
| | for c in s.encode(): |
| | if c >= 0x20 and c <= 0x7E and c != ord("'") and c != ord('\\'): |
| | v = f"'{chr(c)}'" |
| | else: |
| | v = c |
| | r += [ v ] |
| |
|
| | return r |
| |
|
| | @dataclass |
| | class PrefixStackEntry: |
| | prefix: str |
| | table_id: int |
| |
|
| | @dataclass |
| | class AlphaFixup: |
| | table_id: int |
| | char: int |
| |
|
| | @dataclass |
| | class StringFixup: |
| | table_id: int |
| | string_index: int |
| | super_table_id: int |
| | super_offset: int |
| |
|
| | |
| | keys = (key for key in ents.keys() if key.endswith(';')) |
| |
|
| | |
| | keys = sorted(keys, key=lambda k: k[1:-1]) |
| |
|
| | strings = [] |
| | tables = [] |
| | prefix_stack = [] |
| | alpha_fixups = [] |
| | string_fixups = [] |
| | for i in range(64): |
| | tables.append([]) |
| |
|
| | for i, key in enumerate(keys): |
| | name = key[1:-1] |
| |
|
| | next_name = None |
| | if i + 1 < len(keys): |
| | next_name = keys[i+1][1:-1] |
| |
|
| | while prefix_stack and not name.startswith(prefix_stack[-1].prefix): |
| | prefix_stack.pop() |
| |
|
| | |
| | if not prefix_stack: |
| | table_id = len(tables) |
| | tables.append([]) |
| |
|
| | prefix_stack.append(PrefixStackEntry(name[0], table_id)) |
| | alpha_fixups.append(AlphaFixup(table_id, ord(name[0]) % 64)) |
| |
|
| | string_index = len(strings) |
| | table = tables[prefix_stack[-1].table_id] |
| | table_index = len(table) |
| | table.append(string_index) |
| |
|
| | name_offset = len(prefix_stack[-1].prefix) |
| | name_chars = to_cchars(name[name_offset:]) |
| | repl_chars = to_cchars(ents[key]['characters']) |
| | semicolon_flag = 0 |
| | if key[:-1] in ents: |
| | semicolon_flag = 0x80 |
| |
|
| | if next_name and next_name.startswith(name): |
| | |
| |
|
| | strings += [ |
| | len(name_chars) | semicolon_flag | 0x40, *name_chars, |
| | 0, 0, |
| | len(repl_chars), *repl_chars, |
| | ] |
| |
|
| | table_id = len(tables) |
| | tables.append([]) |
| |
|
| | fixup_index = string_index + 1 + len(name_chars) |
| | string_fixups.append(StringFixup( |
| | table_id, fixup_index, prefix_stack[-1].table_id, table_index, |
| | )) |
| |
|
| | prefix_stack.append(PrefixStackEntry(name, table_id)) |
| | else: |
| | strings += [ |
| | len(name_chars) | semicolon_flag, *name_chars, |
| | len(repl_chars), *repl_chars, |
| | ] |
| |
|
| | |
| | ranges = [ 0 ] |
| | values = [] |
| | for table in tables: |
| | values += table |
| | ranges.append(len(values)) |
| |
|
| | |
| | alpha = [ 0 ] * (59 * 3) |
| | for fixup in alpha_fixups: |
| | table_id, c = fixup.table_id, fixup.char |
| | start = ranges[table_id] |
| | end = ranges[table_id+1] |
| | alpha[c*3:c*3+3] = [ start & 0xFF, start >> 8, end - start ] |
| |
|
| | |
| | for fixup in string_fixups: |
| | table_id, i = fixup.table_id, fixup.string_index |
| | start = ranges[table_id] |
| | end = ranges[table_id+1] |
| | super_index = ranges[fixup.super_table_id] + fixup.super_offset |
| | strings[i:i+2] = [ start - super_index, end - start ] |
| |
|
| | |
| |
|
| | def gen_table(ctype, cname, values, fmt, elems_per_line): |
| | count = len(values) |
| | r = '' |
| |
|
| | for i in range(count): |
| | if i != 0: r += ',' |
| | if i % elems_per_line == 0: r += '\n ' |
| | else: r += ' ' |
| | r += fmt % values[i] |
| |
|
| | return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n\n' |
| |
|
| | with open('codegen/html5ent.inc', 'w') as out: |
| | out.write(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15)) |
| | out.write(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10)) |
| | out.write(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15)) |
| |
|