diff options
Diffstat (limited to 'maintenance/docmatcher.py')
-rw-r--r-- | maintenance/docmatcher.py | 281 |
1 files changed, 281 insertions, 0 deletions
diff --git a/maintenance/docmatcher.py b/maintenance/docmatcher.py new file mode 100644 index 00000000..4b6b94e0 --- /dev/null +++ b/maintenance/docmatcher.py @@ -0,0 +1,281 @@ +#! /usr/bin/python +# Copyright (C) 2010 Gentoo Foundation +# Written by Sebastian Pipping <sebastian@pipping.org> +# Licensed under GPL v2 or later + +from __future__ import print_function +import re +import sys +import os + + +NON_VARIABLES = ('UTF', 'USE', 'TCP', 'SMP', 'PXE', 'PPC', 'MAC', + 'GTK', 'GNU', 'CPU', 'DOS', 'NIC', 'NFS', 'ISO', 'TIMEOUT', + 'TFTP', 'SYSTEM', 'SPARC', 'RAID', 'LABEL', 'PROMPT', 'KERNEL', + 'GRP', 'DOCTYPE', 'DHCP', 'DEFAULT', 'ATARAID', 'APPEND') + +EXTRA_VARIABLES = ['ARCH_OVERRIDE', 'BOOTLOADER', 'CLEAR_CACHE_DIR', 'DEFAULT_KERNEL_SOURCE', 'DISTDIR', 'GK_SHARE', 'BUSYBOX_APPLETS'] +for app in ('DEVICE_MAPPER', 'UNIONFS_FUSE', 'BUSYBOX', 'DMRAID', 'LVM', 'ISCSI', 'FUSE', 'GPG'): + for prop in ('DIR', 'SRCTAR', 'VER'): + EXTRA_VARIABLES.append('%s_%s' % (app, prop)) +EXTRA_VARIABLES = tuple(EXTRA_VARIABLES) + +IGNORE_OPTIONS = ('help', 'version') + + +def exract_gen_cmdline_sh(): + f = open('gen_cmdline.sh', 'r') + gen_cmdline_sh = f.read() + f.close() + + usage_lines = list() + parsing_lines = list() + dest = None + + for line in gen_cmdline_sh.split('\n'): + if line in ('longusage() {', 'usage() {'): + dest = usage_lines + elif line == 'parse_cmdline() {': + dest = parsing_lines + + if dest is not None: + dest.append(line) + + if line == '}': + dest = None + + del gen_cmdline_sh + + + parsing_code = '\n'.join(parsing_lines) + del parsing_lines + + gen_cmdline_sh_parsing_long_params = set() + for match in re.finditer('--([a-z][a-z0-9-]+)', parsing_code): + para_name = match.group(1) + if para_name in IGNORE_OPTIONS: + continue + gen_cmdline_sh_parsing_long_params.add(para_name) + + gen_cmdline_sh_variables = set() + for match in re.finditer('^\s*([A-Z_]+)=', parsing_code, re.MULTILINE): + var_name = match.group(1) + if var_name.startswith('CMD_'): + continue + gen_cmdline_sh_variables.add(var_name) + + del parsing_code + + + gen_cmdline_sh_usage_long_params = set() + for match in re.finditer('--([a-z][a-z0-9-]+)', '\n'.join(usage_lines)): + para_name = match.group(1) + if para_name in IGNORE_OPTIONS: + continue + gen_cmdline_sh_usage_long_params.add(para_name) + del usage_lines + + return gen_cmdline_sh_parsing_long_params, gen_cmdline_sh_usage_long_params, gen_cmdline_sh_variables + + +def extract_genkernel_8_txt(): + f = open(os.path.join('doc', 'genkernel.8.txt'), 'r') + genkernel_8_txt = f.read() + f.close() + + # Preprocess + genkernel_8_txt = genkernel_8_txt.replace('*[*no-*]*','[no-]') + + yes_no = re.compile('^\\[(no-)\\]([a-z0-9-]+)$') + + genkernel_8_txt_long_params = set() + for match in re.finditer('--((?:[a-z]|\\[no-\\])[a-z0-9-]+)', genkernel_8_txt): + para_name = match.group(1) + + # Black list + if para_name == 'no-': + continue + + m = yes_no.match(para_name) + if m: + p_yes = m.group(2) + p_no = m.group(1) + m.group(2) + genkernel_8_txt_long_params.add(p_yes) + genkernel_8_txt_long_params.add(p_no) + else: + genkernel_8_txt_long_params.add(para_name) + + del genkernel_8_txt + + return genkernel_8_txt_long_params + + +def extract_genkernel_xml(genkernel_xml_path, variables_blacklist): + f = open(genkernel_xml_path, 'r') + genkernel_xml = f.read() + f.close() + + # Preprocess + genkernel_xml = genkernel_xml.replace('<c>','').replace('</c>','').replace('<b>','').replace('</b>','') + + yes_no = re.compile('^(no-)([a-z0-9-]+)$') + + genkernel_xml_long_params = set() + for match in re.finditer('--([a-z][a-z0-9-]+)', genkernel_xml): + para_name = match.group(1) + + if para_name in IGNORE_OPTIONS: + continue + + # Fix doc error "--no install" + if para_name == 'no': + para_name = 'no-install' + + m = yes_no.match(para_name) + if m and para_name != 'no-ramdisk-modules': + p_yes = m.group(2) + p_no = m.group(1) + m.group(2) + genkernel_xml_long_params.add(p_yes) + genkernel_xml_long_params.add(p_no) + else: + genkernel_xml_long_params.add(para_name) + + genkernel_xml_variables = set() + for match in re.finditer('[A-Z_]{3,}', genkernel_xml): + var_name = match.group(0) + if var_name in variables_blacklist: + continue + genkernel_xml_variables.add(var_name) + + del genkernel_xml + + return genkernel_xml_long_params, genkernel_xml_variables + + +def extract_gen_determineargs_sh(): + f = open('gen_determineargs.sh', 'r') + gen_determineargs_sh = f.read() + f.close() + + gen_determineargs_sh_variables = set() + for match in re.finditer('set_config_with_override\s+[0-9]+\s+([A-Z_]+)', gen_determineargs_sh): + var_name = match.group(1) + gen_determineargs_sh_variables.add(var_name) + + for match in re.finditer('([A-Z_]+)=`(?:arch|cache)_replace "\\$\\{\\1\\}"`', gen_determineargs_sh): + var_name = match.group(1) + gen_determineargs_sh_variables.add(var_name) + + del gen_determineargs_sh + + return gen_determineargs_sh_variables + + +def extract_genkernel_conf(variables_blacklist): + f = open('genkernel.conf', 'r') + genkernel_conf = f.read() + f.close() + + genkernel_conf_variables = set() + for match in re.finditer('^#*\\s*([A-Z_]{3,})', genkernel_conf, re.MULTILINE): + var_name = match.group(1) + if var_name in variables_blacklist: + continue + genkernel_conf_variables.add(var_name) + + del genkernel_conf + + return genkernel_conf_variables + + +def print_set(s): + if s: + print('\n'.join(('- ' + e) for e in sorted(s))) + else: + print(' NONE') + print() + + +def usage(): + print('USAGE: %s GENTOO/xml/htdocs/doc/en/genkernel.xml' % os.path.basename(sys.argv[0])) + + +def main(): + if len(sys.argv) != 2: + usage() + sys.exit(1) + + gen_cmdline_sh_parsing_long_params, gen_cmdline_sh_usage_long_params, gen_cmdline_sh_variables = exract_gen_cmdline_sh() + genkernel_8_txt_long_params = extract_genkernel_8_txt() + gen_determineargs_sh_variables = extract_gen_determineargs_sh() + + variables_blacklist = set(NON_VARIABLES).difference(gen_determineargs_sh_variables) + known_variales = set(EXTRA_VARIABLES).union(gen_determineargs_sh_variables).union(gen_cmdline_sh_variables) + + genkernel_xml_long_params, genkernel_xml_variables = extract_genkernel_xml(sys.argv[1], variables_blacklist) + genkernel_conf_variables = extract_genkernel_conf(variables_blacklist) + + + # Status quo + print('Options used by parser in *gen_cmdline.sh*:') + print_set(gen_cmdline_sh_parsing_long_params) + + print('Options mentioned in usage of *gen_cmdline.sh*:') + print_set(gen_cmdline_sh_usage_long_params) + + print('Options mentioned in *man page*:') + print_set(genkernel_8_txt_long_params) + + print('Options mentioned in *web page*:') + print_set(genkernel_xml_long_params) + + + print('Variables set by *gen_cmdline.sh*:') + print_set(gen_cmdline_sh_variables) + + print('Variables read by *gen_determineargs.sh*:') + print_set(gen_determineargs_sh_variables) + + print('Variables mentioned in *web page*:') + print_set(genkernel_xml_variables) + + print('Variables used in *genkernel.conf*:') + print_set(genkernel_conf_variables) + + + # Future work (due extensions) + print('Options missing from the *man page*:') + print_set(gen_cmdline_sh_parsing_long_params.difference(genkernel_8_txt_long_params)) + + print('Options missing from *--help*:') + print_set(gen_cmdline_sh_parsing_long_params.difference(gen_cmdline_sh_usage_long_params)) + + print('Options missing from *web page*:') + print_set(gen_cmdline_sh_parsing_long_params.difference(genkernel_xml_long_params)) + + print('Variables missing from *web page*:') + print_set(known_variales.difference(genkernel_xml_variables)) + + print('Variables missing from *genkernel.conf*:') + print_set(known_variales.difference(genkernel_conf_variables)) + + + # Future work (due removal and updates) + print('Removed options still mentioned in the *man page*:') + print_set(genkernel_8_txt_long_params.difference(gen_cmdline_sh_parsing_long_params)) + + print('Removed options still mentioned in *--help*:') + print_set(gen_cmdline_sh_usage_long_params.difference(gen_cmdline_sh_parsing_long_params)) + + print('Removed options still mentioned in *web page*:') + print_set(genkernel_xml_long_params.difference(gen_cmdline_sh_parsing_long_params)) + + print('Removed variables still mentioned in *web page*:') + print_set(genkernel_xml_variables.difference(known_variales)) + + print('Removed variables still mentioned in *genkernel.conf*:') + print_set(genkernel_conf_variables.difference(known_variales)) + + +if __name__ == '__main__': + main() |