#!/usr/bin/env python3 import os import re import sys from file_types import ( find_files, lang_type, ) mode_line = re.compile(r"(-\*- *mode:.* *-\*-)") shell_comment = re.compile(r"^\s*#") lisp_comment = re.compile(r";") cpp_comment = re.compile(r"//") c_comment_start = re.compile(r"/\*") c_comment_end = re.compile(r"\*/") def find_copyright_block(lines, lang_type): start = None if lang_type in ("python", "make", "shell", "perl", "scons"): for i, line in enumerate(lines): if i == 0 and (line.startswith("#!") or mode_line.search(line)): continue if shell_comment.search(line): if start is None: start = i elif start is None: if line.strip(): return else: yield start, i - 1 start = None elif lang_type in ("lisp",): for i, line in enumerate(lines): if i == 0 and mode_line.search(line): continue if lisp_comment.search(line): if start is None: start = i elif start is None: if line.strip(): return else: yield start, i - 1 start = None elif lang_type in ( "C", "C++", "swig", "isa", "asm", "slicc", "lex", "yacc", ): mode = None for i, line in enumerate(lines): if i == 0 and mode_line.search(line): continue if mode == "C": assert start is not None, "on line %d" % (i + 1) match = c_comment_end.search(line) if match: yield start, i mode = None continue cpp_match = cpp_comment.search(line) c_match = c_comment_start.search(line) if cpp_match: assert not c_match, "on line %d" % (i + 1) if line[: cpp_match.start()].strip(): return if mode is None: mode = "CPP" start = i else: text = line[cpp_match.end() :].lstrip() if text.startswith("Copyright") > 0: yield start, i - 1 start = i continue elif mode == "CPP": assert start is not None, "on line %d" % (i + 1) if not line.strip(): continue yield start, i - 1 mode = None if not c_match: return if c_match: assert mode is None, "on line %d" % (i + 1) mode = "C" start = i if mode is None and line.strip(): return else: raise AttributeError(f"Could not handle language {lang_type}") date_range_re = re.compile(r"([0-9]{4})\s*-\s*([0-9]{4})") def process_dates(dates): dates = [d.strip() for d in dates.split(",")] output = set() for date in dates: match = date_range_re.match(date) if match: f, l = (int(d) for d in match.groups()) for i in range(f, l + 1): output.add(i) else: try: date = int(date) output.add(date) except ValueError: pass return output copyright_re = re.compile( r"Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)", re.DOTALL ) authors_re = re.compile(r"^[\s*#/]*Authors:\s*([A-z .]+)\s*$") more_authors_re = re.compile(r"^[\s*#/]*([A-z .]+)\s*$") all_owners = set() def get_data(lang_type, lines): data = [] last = None for start, end in find_copyright_block(lines, lang_type): joined = "".join(lines[start : end + 1]) match = copyright_re.search(joined) if not match: continue c, dates, owner = match.groups() dates = dates.strip() owner = owner.strip() all_owners.add(owner) try: dates = process_dates(dates) except Exception: print(dates) print(owner) raise authors = [] for i in range(start, end + 1): line = lines[i] if not authors: match = authors_re.search(line) if match: authors.append(match.group(1).strip()) else: match = more_authors_re.search(line) if not match: for j in range(i, end + 1): line = lines[j].strip() if not line: end = j break if line.startswith("//"): line = line[2:].lstrip() if line: end = j - 1 break break authors.append(match.group(1).strip()) info = (owner, dates, authors, start, end) data.append(info) return data def datestr(dates): dates = list(dates) dates.sort() output = [] def add_output(first, second): if first == second: output.append("%d" % (first)) else: output.append("%d-%d" % (first, second)) first = dates.pop(0) second = first while dates: next = dates.pop(0) if next == second + 1: second = next else: add_output(first, second) first = next second = next add_output(first, second) return ",".join(output) usage_str = """usage: %s [-v] """ def usage(exitcode): print(usage_str % sys.argv[0]) if exitcode is not None: sys.exit(exitcode) if __name__ == "__main__": import getopt show_counts = False ignore = set() verbose = False try: opts, args = getopt.getopt(sys.argv[1:], "ci:v") except getopt.GetoptError: usage(1) for o, a in opts: if o == "-c": show_counts = True if o == "-i": ignore.add(a) if o == "-v": verbose = True files = [] for base in args: if os.path.isfile(base): files += [(base, lang_type(base))] elif os.path.isdir(base): files += find_files(base) else: raise AttributeError(f"can't access '{base}'") copyrights = {} counts = {} for filename, lang in files: f = file(filename, "r") lines = f.readlines() if not lines: continue lines = [line.rstrip("\r\n") for line in lines] lt = lang_type(filename, lines[0]) try: data = get_data(lt, lines) except Exception as e: if verbose: if len(e.args) == 1: e.args = (f"{e} ({filename}))",) print(f"could not parse {filename}: {e}") continue for owner, dates, authors, start, end in data: if owner not in copyrights: copyrights[owner] = set() if owner not in counts: counts[owner] = 0 copyrights[owner] |= dates counts[owner] += 1 info = [(counts[o], d, o) for o, d in list(copyrights.items())] for count, dates, owner in sorted(info, reverse=True): if show_counts: owner = f"{owner} ({count} files)" print(f"Copyright (c) {datestr(dates)} {owner}")