diff options
author | heinrich5991 <heinrich5991@gmail.com> | 2020-10-14 17:21:23 +0200 |
---|---|---|
committer | Robert Müller <robytemueller@gmail.com> | 2023-06-16 20:33:16 +0200 |
commit | 468de595f823267ae2d8b6bf6f11e590887ebde6 (patch) | |
tree | 3089cd0fc59ad6adb398a9bcdd130bad78090f91 | |
parent | 73c84cadc9cca00092017caba9f7d6e59926bc8b (diff) |
Add scripts to detect variable name style violations
Currently very user-unfriendly, call like this:
```
env CXXFLAGS="-I<builddir>/src -I<includedir_lib1> -I<includedir_lib2> …" python scripts/extract_identifiers.py src/game/**.cpp src/engine/client/**.cpp src/engine/server/**.cpp src/engine/shared/**.cpp > identifiers
python scripts/check_identifiers.py < identifiers
```
-rw-r--r-- | scripts/check_identifiers.py | 73 | ||||
-rw-r--r-- | scripts/extract_identifiers.py | 170 |
2 files changed, 243 insertions, 0 deletions
diff --git a/scripts/check_identifiers.py b/scripts/check_identifiers.py new file mode 100644 index 000000000..7ad727506 --- /dev/null +++ b/scripts/check_identifiers.py @@ -0,0 +1,73 @@ +import csv +import sys +import re + +def check_name(kind, qualifiers, type, name): + if kind == "variable": + return check_variable_name(qualifiers, type, name) + elif kind in "class struct".split(): + if name[0] not in "CI": + return "should start with 'C' (or 'I' for interfaces)" + if len(name) < 2: + return "must be at least two characters long" + if not name[1].isupper(): + return "must start with an uppercase letter" + elif kind == "enum_constant": + if not name.isupper(): + return "must only contain uppercase letters, digits and underscores" + return None + +ALLOW = set(""" + dx dy + fx fy + mx my + ix iy + px py + sx sy + wx wy + x0 x1 + y0 y1 +""".split()) +def check_variable_name(qualifiers, type, name): + if qualifiers == "" and type == "" and name == "argc": + return None + if qualifiers == "" and type == "pp" and name == "argv": + return None + if qualifiers == "cs": + # Allow all uppercase names for constant statics. + if name.isupper(): + return None + qualifiers = "s" + # Allow single lowercase letters as member and variable names. + if qualifiers in ["m", ""] and len(name) == 1 and name.islower(): + return None + prefix = "".join([qualifiers, "_" if qualifiers else "", type]) + if not name.startswith(prefix): + return "should start with {!r}".format(prefix) + if name in ALLOW: + return None + name = name[len(prefix):] + if not name[0].isupper(): + if prefix: + return "should start with an uppercase letter after the prefix {!r}".format(prefix) + else: + return "should start with an uppercase letter" + return None + +def main(): + import argparse + p = argparse.ArgumentParser(description="Check identifiers (input via stdin in CSV format from extract_identifiers.py) for naming style in Teeworlds code") + args = p.parse_args() + + identifiers = list(csv.DictReader(sys.stdin)) + + unclean = False + for i in identifiers: + error = check_name(i["kind"], i["qualifiers"], i["type"], i["name"]) + if error: + unclean = True + print("{}:{}:{}: {}: {}".format(i["file"], i["line"], i["column"], i["name"], error)) + return unclean + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/extract_identifiers.py b/scripts/extract_identifiers.py new file mode 100644 index 000000000..99f2ed77c --- /dev/null +++ b/scripts/extract_identifiers.py @@ -0,0 +1,170 @@ +import clang.cindex +import csv +import enum +import os +import sys + +from clang.cindex import CursorKind, LinkageKind, StorageClass, TypeKind +from collections import Counter + +try: + from tqdm import tqdm +except ImportError: + def tqdm(it, *args, **kwargs): + return it + +def traverse_namespaced(root, filter_files=None, skip_namespaces=1, namespace=()): + if root.location.file is not None and root.location.file.name not in filter_files: + return + yield namespace, root + if root.displayname != "": + if skip_namespaces > 0: + skip_namespaces -= 1 + else: + namespace += (root.spelling,) + for node in root.get_children(): + yield from traverse_namespaced(node, filter_files, skip_namespaces, namespace) + +INTERESTING_NODE_KINDS = { + CursorKind.CLASS_DECL: "class", + CursorKind.CLASS_TEMPLATE: "class", + CursorKind.ENUM_DECL: "enum", + CursorKind.ENUM_CONSTANT_DECL: "enum_constant", + CursorKind.FIELD_DECL: "variable", + CursorKind.PARM_DECL: "variable", + CursorKind.STRUCT_DECL: "struct", + CursorKind.UNION_DECL: "union", + CursorKind.VAR_DECL: "variable", + CursorKind.FUNCTION_DECL: "function", +} + +def is_array_type(type): + return type.kind in (TypeKind.CONSTANTARRAY, TypeKind.DEPENDENTSIZEDARRAY, TypeKind.INCOMPLETEARRAY) + +def get_complex_type(type): + if type.spelling in ("IOHANDLE", "LOCK"): + return "" + if type.kind == TypeKind.AUTO: + return get_complex_type(type.get_canonical()) + if type.kind == TypeKind.LVALUEREFERENCE: + return get_complex_type(type.get_pointee()) + if type.kind == TypeKind.POINTER: + return "p" + get_complex_type(type.get_pointee()) + if is_array_type(type): + return "a" + get_complex_type(type.element_type) + if type.kind == TypeKind.FUNCTIONPROTO: + return "fn" + if type.kind == TypeKind.TYPEDEF: + return get_complex_type(type.get_declaration().underlying_typedef_type) + if type.kind == TypeKind.ELABORATED: + return get_complex_type(type.get_named_type()) + if type.kind in (TypeKind.UNEXPOSED, TypeKind.RECORD): + if type.get_declaration().spelling in "shared_ptr unique_ptr".split(): + return "p" + get_complex_type(type.get_template_argument_type(0)) + if type.get_declaration().spelling in "array sorted_array".split(): + return "a" + get_complex_type(type.get_template_argument_type(0)) + return "" + +def is_static_member_definition_hack(node): + last_colons = False + for t in node.get_tokens(): + t = t.spelling + if t == "::": + last_colons = True + elif last_colons: + if t.startswith("ms_"): + return True + last_colons = False + if t == "=": + return False + return False + +def is_const(type): + if type.is_const_qualified(): + return True + if is_array_type(type): + return is_const(type.element_type) + return False + +class ParseError(RuntimeError): + pass + +def process_source_file(out, file, extra_args, break_on): + args = extra_args + ["-Isrc"] + if file.endswith(".c"): + header = "{}.h".format(file[:-2]) + elif file.endswith(".cpp"): + header = "{}.h".format(file[:-4]) + else: + raise ValueError("unrecognized source file: {}".format(file)) + + index = clang.cindex.Index.create() + unit = index.parse(file, args=args) + errors = list(unit.diagnostics) + if errors: + for error in errors: + print("{}: {}".format(file, error.format()), file=sys.stderr) + print(args, file=sys.stderr) + raise ParseError("failed parsing {}".format(file)) + + filter_files = frozenset([file, header]) + + for namespace, node in traverse_namespaced(unit.cursor, filter_files=filter_files): + cur_file = None + if node.location.file is not None: + cur_file = node.location.file.name + if cur_file is None or (cur_file != file and cur_file != header): + continue + if node.kind in INTERESTING_NODE_KINDS and node.spelling: + type = get_complex_type(node.type) + qualifiers = "" + if INTERESTING_NODE_KINDS[node.kind] in {"variable", "function"}: + is_member = node.semantic_parent.kind in {CursorKind.CLASS_DECL, CursorKind.CLASS_TEMPLATE, CursorKind.STRUCT_DECL, CursorKind.UNION_DECL} + is_static = node.storage_class == StorageClass.STATIC or is_static_member_definition_hack(node) + if is_static: + qualifiers = "s" + qualifiers + if is_member: + qualifiers = "m" + qualifiers + if is_static and not is_member and is_const(node.type): + qualifiers = "c" + qualifiers + if node.linkage == LinkageKind.EXTERNAL and not is_member: + qualifiers = "g" + qualifiers + out.writerow({ + "file": cur_file, + "line": node.location.line, + "column": node.location.column, + "kind": INTERESTING_NODE_KINDS[node.kind], + "path": "::".join(namespace), + "qualifiers": qualifiers, + "type": type, + "name": node.spelling, + }) + if node.spelling == break_on: + breakpoint() + +def main(): + import argparse + p = argparse.ArgumentParser(description="Extracts identifier data from a Teeworlds source file and its header, outputting the data as CSV to stdout") + p.add_argument("file", metavar="FILE", nargs="+", help="Source file to analyze") + p.add_argument("--break-on", help="Break on a specific variable name, useful to debug issues with the script") + args = p.parse_args() + + extra_args = [] + if "CXXFLAGS" in os.environ: + extra_args = os.environ["CXXFLAGS"].split() + + out = csv.DictWriter(sys.stdout, "file line column kind path qualifiers type name".split()) + out.writeheader() + files = args.file + if len(files) > 1: + files = tqdm(files, leave=False) + error = False + for file in files: + try: + process_source_file(out, file, extra_args, args.break_on) + except ParseError: + error = True + return int(error) + +if __name__ == "__main__": + sys.exit(main()) |