1#!/usr/bin/env python3 2# Run with directory arguments from any directory, with no special setup 3# required. 4 5import os 6from pathlib import Path 7import re 8import sys 9from typing import Sequence 10 11VERBOSE = False 12 13copyrights = set() 14 15 16def warn(s): 17 sys.stderr.write("warning: %s\n" % s) 18 19 20def warn_verbose(s): 21 if VERBOSE: 22 warn(s) 23 24 25def is_interesting(path_str: str) -> bool: 26 path = Path(path_str.lower()) 27 uninteresting_extensions = [ 28 ".bp", 29 ".map", 30 ".md", 31 ".mk", 32 ".py", 33 ".pyc", 34 ".swp", 35 ".txt", 36 ] 37 if path.suffix in uninteresting_extensions: 38 return False 39 if path.name in {"notice", "readme", "pylintrc"}: 40 return False 41 # Backup files for some editors. 42 if path.match("*~"): 43 return False 44 return True 45 46 47def is_auto_generated(content): 48 if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: 49 return True 50 if "This header was automatically generated from a Linux kernel header" in content: 51 return True 52 return False 53 54 55def is_copyright_end(line: str, first_line_was_hash: bool) -> bool: 56 endings = [ 57 " $FreeBSD: ", 58 "$Citrus$", 59 "$FreeBSD$", 60 "*/", 61 "From: @(#)", 62 # OpenBSD likes to say where stuff originally came from: 63 "Original version ID:", 64 "\t$Citrus: ", 65 "\t$NetBSD: ", 66 "\t$OpenBSD: ", 67 "\t@(#)", 68 "\tcitrus Id: ", 69 "\tfrom: @(#)", 70 "from OpenBSD:", 71 ] 72 if first_line_was_hash and not line: 73 return True 74 75 for ending in endings: 76 if ending in line: 77 return True 78 79 return False 80 81 82def extract_copyright_at(lines: Sequence[str], i: int) -> int: 83 first_line_was_hash = lines[i].startswith("#") 84 85 # Do we need to back up to find the start of the copyright header? 86 start = i 87 if not first_line_was_hash: 88 while start > 0: 89 if "/*" in lines[start - 1]: 90 break 91 start -= 1 92 93 # Read comment lines until we hit something that terminates a 94 # copyright header. 95 while i < len(lines): 96 if is_copyright_end(lines[i], first_line_was_hash): 97 break 98 i += 1 99 100 end = i 101 102 # Trim trailing cruft. 103 while end > 0: 104 line = lines[end - 1] 105 if line not in { 106 " *", " * ====================================================" 107 }: 108 break 109 end -= 1 110 111 # Remove C/assembler comment formatting, pulling out just the text. 112 clean_lines = [] 113 for line in lines[start:end]: 114 line = line.replace("\t", " ") 115 line = line.replace("/* ", "") 116 line = re.sub(r"^ \* ", "", line) 117 line = line.replace("** ", "") 118 line = line.replace("# ", "") 119 if "SPDX-License-Identifier:" in line: 120 continue 121 if line.startswith("++Copyright++"): 122 continue 123 line = line.replace("--Copyright--", "") 124 line = line.rstrip() 125 # These come last and take care of "blank" comment lines. 126 if line in {"#", " *", "**", "-"}: 127 line = "" 128 clean_lines.append(line) 129 130 # Trim blank lines from head and tail. 131 while clean_lines[0] == "": 132 clean_lines = clean_lines[1:] 133 while clean_lines[len(clean_lines) - 1] == "": 134 clean_lines = clean_lines[0:(len(clean_lines) - 1)] 135 136 copyrights.add("\n".join(clean_lines)) 137 138 return i 139 140 141def do_file(path: str) -> None: 142 raw = Path(path).read_bytes() 143 try: 144 content = raw.decode("utf-8") 145 except UnicodeDecodeError: 146 warn("bad UTF-8 in %s" % path) 147 content = raw.decode("iso-8859-1") 148 149 lines = content.split("\n") 150 151 if len(lines) <= 4: 152 warn_verbose("ignoring short file %s" % path) 153 return 154 155 if is_auto_generated(content): 156 warn_verbose("ignoring auto-generated file %s" % path) 157 return 158 159 if not "Copyright" in content: 160 if "public domain" in content.lower(): 161 warn_verbose("ignoring public domain file %s" % path) 162 return 163 warn('no copyright notice found in "%s" (%d lines)' % 164 (path, len(lines))) 165 return 166 167 # Manually iterate because extract_copyright_at tells us how many lines to 168 # skip. 169 i = 0 170 while i < len(lines): 171 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: 172 i = extract_copyright_at(lines, i) 173 else: 174 i += 1 175 176 177def do_dir(arg): 178 for directory, sub_directories, filenames in os.walk(arg): 179 if ".git" in sub_directories: 180 sub_directories.remove(".git") 181 sub_directories = sorted(sub_directories) 182 183 for filename in sorted(filenames): 184 path = os.path.join(directory, filename) 185 if is_interesting(path): 186 do_file(path) 187 188 189def main() -> None: 190 args = sys.argv[1:] 191 if len(args) == 0: 192 args = ["."] 193 194 for arg in args: 195 if os.path.isdir(arg): 196 do_dir(arg) 197 else: 198 do_file(arg) 199 200 for notice in sorted(copyrights): 201 print(notice) 202 print() 203 print("-" * 67) 204 print() 205 206 207if __name__ == "__main__": 208 main() 209