URL
https://opencores.org/ocsvn/c0or1k/c0or1k/trunk
Subversion Repositories c0or1k
[/] [c0or1k/] [trunk/] [tools/] [cml2-tools/] [kxref.py] - Rev 2
Compare with Previous | Blame | View Log
#!/usr/bin/env python """ kxref.py -- generate configuration symbol cross-reference for the kernel tree This is a report generator intended to catch problems and inconsistencies in the configuration-symbol namespace. It uses information generated by the CML2 compiler -- notably, it relies on the compiler's scanning of help files. All this does is generate cross-reference reports on configuration symbols. But they can be filtered and presented in various interesting ways. Basic usage is like this: kxref.py [-f filter | -h] [-l] [-x symbol] [-n re] [sourcetree] You can set a filter using a boolean-expression minilanguage. The predicates available are as follows: c -- select all symbols present in code (.c, .h, .S files) m -- select all symbols present in makefiles n -- select all symbols defined in CML2 rulesfiles h -- select all symbols for which help is available (CMl1 convention) H -- select all symbols for which help is available (CML2 convention) d -- select all symbols that occur in defconfigs x -- select all symbols that are derived in CML2. o -- select all symbols present in CML1 configuration files a -- select all symbols declared in CML1 configuration files p -- select all symbols for which autoconfigure.py has a probe D(name) -- select all symbols transitively dependent on name A(name) -- select all symbols transitively ancestral to name T(type) -- select type (trit, bool, string, decimal, hexadecimal) P(property) -- select all symbols with given property V(symbol) -- select all symbols with given symbol in their visibility guard. Operations available are as follows: & -- and (set intersection) | -- or (set intersection) ~ -- not (set complement). You may use parentheses for expression grouping. This program caches a cross-reference database in a file named xref.out, so all reports after the first are generated really fast. You should remove this file whenever you apply a patch. The -i option inverts the report so it's keyed by file, rather than by symbol. The -g option generates a patch removing file lines containing the reported (presumably orphaned) symbols. Use with caution...it's really only safe for hacking defconfigs. The -x option is for debugging. It generates a report on an individual symbol specified as an argument to the option. Flag letters are as above, with f= giving the value of the computed filter predicate. The -h option checks for duplicate or superfluous file inclusions in the source tree. The -l switch suppresses printing printing of cross-references; only symbols matching the given filter(s) are listed. The -n suppresses listing of files with names matching the given regexp. If all the files a symbol occurs in are excluded, it will be omitted from the listings. The -t option produces a listing of symbols which either have inconsistent CML1 types or types that differ between CML1 and CML2. The -k option accepts a file of kill-list symbols to be ignored. The program has some knowledge of file syntax. It ignores the contents of comments in C, CML1, and CML2 files (e.g. does not cross-reference symbols in such comments). Some interesting reports: n&~p&~a -- identifies CML2 symbols no longer declared or defined in CML1 """ import sys, os, re, getopt, cPickle, cml, cmlsystem xrefs = None rulebase = None typefind = choicere = configre = definere = mycml1types = None def suffix(haystack, *needle): "Does a filename have any of the given suffixes?" for suf in needle: if haystack[-len(suf):] == suf: return 1 return 0 def prefix(haystack, *needle): "Does a filename have any of the given prefixes?" for pre in needle: if haystack[len(pre):] == pre: return 1 return 0 # Code for recognizing symbols and stripping out comments # It's OK that this matches _MODULE symbols, we'll filter those out later. configpref = re.compile("(?<![A-Z0-9_])(CONFIG_[a-zA-Z0-9_][a-zA-Z0-9_]+)") # Regular expressions for stripping out C comments. We're aided here by the # fact that we don't care about the contents of most of the file. So by # first stripping out / and characters that are not part of comment # delimiter pairs, we can make detecting comments pretty trivial. This won't # completely strip comments of the form /* aaaa /* bbbb */, but for this # application that's OK -- we don't have to be perfect, just reduce the # exception cases to the point where eyeball checking is feasible. Use # of lookaheads and lookbehinds avoids nipping off anything that might # be a nearby bit of symbol. # randomslash = re.compile("(?<=[^*])/(?=[^*])") randomstar = re.compile("(?<=[^/])\*(?=[^/])") c_comment = re.compile("/\*[^*]*\*/") def c_comment_strip(str): str = randomslash.sub("", str,) str = randomstar.sub("", str) return c_comment.sub("", str) # Shell, config-file, and Makefile-style comments. # hashcomment = re.compile("#.*\n", re.MULTILINE) def hash_comment_strip(str): return hashcomment.sub("", str) # Code for generating the cross-reference def ignore(file): "Return 1 if the file should be ignored for cross-referencing purposes." # Ignore CML files because we look symbols up directly in the rulebase. return suffix(file, ".bak", ".orig", ".rej", ".cml", ".o", ".a", ".out", "log", "Log", ",v", "~") # These are used in the language documentation kill_list = {"CHEER":1, "BOOM":1, "BOGUS":1} def makexref(tree): "Generate a cross-reference dictionary for the given source tree." global typefind, choicere, configre, definere, mycml1types typefind = re.compile(r"(?<!define_)(bool|tristate|int|hex|string)\s+'.*'\s+CONFIG_(\w+)") choicere = re.compile(r"^\s*choice") configre = re.compile(rulebase.prefix + r"(\w*)") definere = re.compile(r"^\s+define_([a-z]*)\s+(\w*)") mycml1types = {} def xrefvisit(dict, dir, files): "Visit a directory on behalf of the cross-referencer." def filevisitor(dict, file): "Visit a file on behalf of the cross-referencer." if file[0] == '.': return fp = open(file) contents = fp.read() fp.close() if suffix(file, ".c", ".h", ".S"): contents = c_comment_strip(contents) elif suffix(file, ".in", ".cml"): contents = hash_comment_strip(contents) for match in configpref.findall(contents): if suffix(match, "_MODULE"): continue match = namestrip(match) if kill_list.has_key(match): continue elif not dict.has_key(match): dict[match] = [] if file not in dict[match]: dict[match].append(file) # Parse file contents for choice symbols if suffix(file, ".in"): lines = contents.split("\n") while lines: if not choicere.match(lines[0]): # First extract type info for ordinary symbols m = typefind.search(lines[0]) if m: symtype = m.group(1) symname = m.group(2) if not mycml1types.has_key(symname): mycml1types[symname] = [] if (symtype, file) not in mycml1types[symname]: mycml1types[symname].append((symtype, file)) # CML1 defines count with other symbols of their type symdef = definere.search(lines[0]) if symdef: symbol = namestrip(symdef.group(2)) type = symdef.group(1) if not mycml1types.has_key(symbol): mycml1types[symbol] = [] if (type, file) not in mycml1types[symbol]: mycml1types[symbol].append((type, file)) lines.pop(0) continue else: lines.pop(0) while lines[0].find(rulebase.prefix) > -1: findit = configre.search(lines[0]) symbol = namestrip(findit.group(0)) if not mycml1types.has_key(symbol): mycml1types[symbol] = [] mycml1types[symbol].append(("choice", file)) if lines[0].find('" ') > -1: break lines.pop(0) for file in files: node = os.path.join(dir, file)[2:] if os.path.isfile(node) and not ignore(node): filevisitor(dict, node) xrefdict = {} here = os.getcwd() os.chdir(sourcetree) os.path.walk(".", xrefvisit, xrefdict) os.chdir(here) # Data reduction -- collapse CML1 cross references of identical type for (key, value) in mycml1types.items(): if len(value) <= 1: continue # Only interested in the multiples else: tdict = {} for (type, file) in value: tdict[type] = [] for (type, file) in value: tdict[type].append(file) reslist = [] for type in tdict.keys(): reslist.append((type, tdict[type])) mycml1types[key] = reslist # Second stage of data reduction -- if a symbol has both a choice # declaration and another of a different type, suppress the non-choice # declaration -- we can assume it came from a CML1 define. for (key, value) in mycml1types.items(): if "choice" in map(lambda x: x[0], value): mycml1types[key]=filter(lambda x: x[0]=="choice", mycml1types[key]) return (xrefdict, mycml1types) probe_table = {} def load_probe_table(): "Build a table of symbols for qhich we have probes." from autoconfigure import get_arch (ARCH, ARCHSYMBOL) = get_arch() TRUE = 1 FALSE = 0 PRESENT = 1 ABSENT = 0 y = m = n = 0 def DEBUG(str): pass def PCI(prefix, symbol): probe_table[symbol] = 1 def PCI_CLASS(match, symbol): probe_table[symbol] = 1 def PNP(match, symbol): probe_table[symbol] = 1 def MCA(match, symbol): probe_table[symbol] = 1 def USBP(match, symbol): probe_table[symbol] = 1 def USBC(match, symbol): probe_table[symbol] = 1 def USBI(match, symbol): probe_table[symbol] = 1 def FS(match, symbol): probe_table[symbol] = 1 def DEV(match, symbol): probe_table[symbol] = 1 def DEVM(match, symbol): probe_table[symbol] = 1 def CONS(match, symbol): probe_table[symbol] = 1 def DMESG(match, symbol, truthval=None): probe_table[symbol] = 1 def NET(match, symbol): probe_table[symbol] = 1 def IDE(match, symbol): probe_table[symbol] = 1 def REQ(match, symbol): probe_table[symbol] = 1 def CPUTYPE(match, symbol): probe_table[symbol] = 1 def CPUINFO(match, symbol, present=None, truthval=None): probe_table[symbol] = 1 def EXISTS(procfile, symbol): probe_table[symbol] = 1 def MODULE(name, symbol): probe_table[symbol] = 1 def GREP(pattern, file, symbol): probe_table[symbol] = 1 execfile(rulesfile) # Predicates for filtering the reports def namestrip(name): if rulebase.prefix and name[:len(rulebase.prefix)] == rulebase.prefix: return name[len(rulebase.prefix):] else: return name def in_code(name): "Does a name occur in code?" if not xrefs.has_key(name): return 0 for file in xrefs[name]: if suffix(file, ".c", ".S") or (suffix(file, ".h") and not suffix(file, "autoconf.h")): return 1 return 0 def in_help(name): "Is there help for a symbol (CML1 convention)?" # Catch choice names that aren't in Configure.help directly. entry = rulebase.dictionary.get(namestrip(name)) if entry and entry.help(): return 1 # This catches names that are in a helpfile but not known to CML2. if not xrefs.has_key(name): return 0 for file in xrefs[name]: if suffix(file, ".help"): return 1 # False negative if there is ever a choice name that CML2 # doesn't know about. return 0 def in_cml2_help(name): "Does a name occur in some help file (CML2 rules)?" entry = rulebase.dictionary.get(namestrip(name)) if entry and entry.helptext: return 1 # This catches names that are in a helpfile but not known to CML2. if not xrefs.has_key(name): return 0 for file in xrefs[name]: if suffix(file, ".help"): return 1 # False negative if there is ever a choice name that CML2 # doesn't know about. return 0 def in_makefile(name): "Does a name occur in a makefile?" if not xrefs.has_key(name): return 0 for file in xrefs[name]: if suffix(file, "akefile"): return 1 return 0 def in_cml1(name): "Does a name occur in a CML1 file?" if not xrefs.has_key(name): return 0 for file in xrefs[name]: if suffix(file, "onfig.in"): return 1 return 0 def cml1_declared(name): "Is a name declared (assigned a type) in a CML1 file?" return mycml1types.has_key(name) def in_defconfig(name): if not xrefs.has_key(name): return 0 "Does a this symbol occur in a defconfig?" for file in xrefs[name]: if file.find("defconfig") > -1 or file.find("configs/") > -1: return 1 return 0 def in_cml2(name): "Is this a valid CML2 symbol?" return rulebase.dictionary.has_key(namestrip(name)) def is_derived(name): "Is this a CML2 derived name?" entry = rulebase.dictionary.get(namestrip(name)) if entry and entry.is_derived(): return 1 else: return 0 def dependent_of(ancestor, name): "Is given symbol a dependent of given ancestor?" ancestor = rulebase.dictionary.get(namestrip(ancestor)) entry = rulebase.dictionary.get(namestrip(name)) if entry and ancestor.ancestor_of(entry): return 1 else: return 0 def ancestor_of(dependent, name): "Is given symbol a an ancestor of given dependent?" dependent = rulebase.dictionary.get(namestrip(dependent)) entry = rulebase.dictionary.get(namestrip(name)) if entry and entry.ancestor_of(dependent): return 1 else: return 0 def type_of(typename, name): "Is given symbol of given tyoe?" entry = rulebase.dictionary.get(namestrip(name)) if entry and entry.type == typename: return 1 else: return 0 def has_property(property, name): "Does given symbol have given property?" entry = rulebase.dictionary.get(namestrip(name)) if entry and property in entry.properties: return 1 else: return 0 def is_probed(name): "Does given symbol have a probe?" entry = rulebase.dictionary.get(namestrip(name)) if not probe_table: load_probe_table() return entry and probe_table.has_key(entry.name) def in_visibility(guard, name): "Does the symbol GUARD occur in the visibility predicate of NAME?" entry = rulebase.dictionary.get(namestrip(name)) if not entry: return 0 guard = rulebase.dictionary.get(namestrip(guard)) return entry.visibility and guard in cml.flatten_expr(entry.visibility) # Report generation def setfilter(filterspec): "Set the filter function." if not filterspec: function = "def myfilter(name): return 1" else: state = 0 expression = "" for c in filterspec: if state == 0: if c == "(" or c == ")": expression += c elif c == " " or c == "\t": pass elif c == "a": expression += " cml1_declared(name)" elif c == "c": expression += " in_code(name)" elif c == "h": expression += " in_help(name)" elif c == "H": expression += " in_cml2_help(name)" elif c == 'm': expression += " in_makefile(name)" elif c == "o": expression += " in_cml1(name)" elif c == "n": expression += " in_cml2(name)" elif c == "d": expression += " in_defconfig(name)" elif c == "x": expression += " is_derived(name)" elif c == "~": expression += " not" elif c == "&": expression += " and" elif c == "|": expression += " or" elif c == "p": expression += " is_probed(name)" elif c == "D": expression += " dependent_of" state = 1 elif c == "A": expression += " ancestor_of" state = 1 elif c == "T": expression += " type_of" state = 1 elif c == "P": expression += " has_property" state = 1 elif c == "V": expression += " in_visibility" state = 1 elif state == 1: if c == ')': expression += '", name)' state = 0 elif c == '(': expression += '("' else: expression += c function = "def myfilter(name): return " + expression #sys.stderr.write("Filter function: " + function + "\n") exec function in globals() def report(keys, norefs=0): "Generate a filtered report on the cross-references." for symbol in keys: refs = filter(lambda x: not (suppress and suppress.search(x)), xrefs[symbol]) if refs: if norefs: print symbol else: sys.stdout.write(symbol + ":") for file in refs: sys.stdout.write(" " + file) sys.stdout.write("\n") def generate_patch(file, symbols): "Generate a patch deleting the given symbols from the given file." pfp = open(file, "rb") contents = pfp.read() pfp.close() for symbol in symbols: contents = re.compile("^.*" + symbol + "[^A-Z0-9].*\n", re.M).sub("", contents) pfp = open(file + ".tweaked", "wb") pfp.write(contents) pfp.close() os.system("diff -u %s %s.tweaked; rm %s.tweaked" % (file, file, file)) # Inclusion checking. This lives here because we use the CML2 rulebase to # check which CONFIG_ symbols are defined (just checking for a CONFIG_ stem # isn't reliable as CML2 doesn't completely own that namespace). includere = re.compile(r'^\s*#\s*include\s*[<"](\S*)[>"]', re.M) def includecheck(sourcetree): "Check the inclusion structure of a source tree." def includevisit(dummy, dir, files): "Visit a directory on behalf of the inclusion checker." def filevisitor(dummy, file): "Visit a file on behalf of the inclusion checker." fp = open(file) contents = fp.read() fp.close() # First get the list of included files inclusions = includere.findall(contents) # This strips slashes, so it has to be done after contents = c_comment_strip(contents) # Check to see if we have defined CONFIG_ symbols in the file matched = [] for match in configpref.findall(contents): if suffix(match, "_MODULE"): match = match[:-7] match = namestrip(match) # Strip prefix if rulebase.dictionary.has_key(match) and match not in matched: matched.append(match) # Check for duplicates dups = {} for header in inclusions: dups[header] = 0 for header in inclusions: dups[header] += 1 for header in inclusions: if dups[header] > 1: print "%s: %s is included %d times" % (file, header, dups[header]) # OK, check to see if we have autoconf inclusion. have_autoconf = 0 for header in inclusions: if header == "autoconf.h" or header == "linux/config.h": have_autoconf = 1 break if not matched and have_autoconf: print "%s: has unnecessary configure file inclusion" % file elif matched and not have_autoconf: print "%s: needs configure file inclusion for %s" % (file, matched) for file in files: if suffix(file, ".c", ".h", ".S"): node = os.path.join(dir, file)[2:] if os.path.isfile(node) and not ignore(node): filevisitor(None, node) here = os.getcwd() os.chdir(sourcetree) os.path.walk(".", includevisit, None) os.chdir(here) # The main program def load_context(tree): "Load context, including CML2 rulebase and cross-reference database." global rulebase, xrefs, mycml1types # Get a CML2 rulebase. if not os.path.exists(os.path.join(tree, "rules.out")): print "This program requires a CML2 rulebase in the source tree." raise SystemExit, 1 else: rulebase = cmlsystem.CMLSystem(os.path.join(tree, "rules.out")) # Try to find a saved cross-reference database. If no such database # exists, generate one and cache it. xref_file = os.path.join(tree, "xref.out") if os.path.exists(xref_file): sys.stderr.write("Reading cross-reference database...") ifp = open(xref_file, "rb") (xrefs, mycml1types) = cPickle.load(ifp) ifp.close() sys.stderr.write("done.\n") else: sys.stderr.write("Regenerating cross-reference database...") (xrefs, mycml1types) = makexref(tree) ofp = open(xref_file, "w") cPickle.dump((xrefs, mycml1types), ofp, 1) ofp.close() sys.stderr.write("done.\n") if __name__ == "__main__": setfilter(None) examine = "" norefs = 0 typecheck = 0 suppress = None rulesfile = None invert = genpatch = checkincludes = 0 (options, arguments) = getopt.getopt(sys.argv[1:], "ef:ghik:ln:r:tx:") for (switch, val) in options: if switch == '-f': setfilter(val) elif switch == '-i': invert = 1 elif switch == '-g': invert = genpatch = 1 elif switch == '-h': checkincludes = 1 elif switch == '-k': fp = open(val, "r") while 1: line = fp.readline() if not line: break kill_list[line.strip()] = 1 elif switch == '-l': norefs = 1 elif switch == '-n': suppress = re.compile(val) elif switch == '-r': rulesfile = val elif switch == '-t': typecheck = 1 elif switch == '-x': examine = val if len(arguments) < 1: sourcetree = "." else: sourcetree = arguments[0] # Load or regenerate the cross-reference database load_context(sourcetree) if not checkincludes: # OK, now filter the database keys = filter(myfilter, xrefs.keys()) keys.sort() # If invert was specified, invert the database so it's keyed by file if invert: inverted = {} for key in keys: for file in xrefs[key]: if not inverted.has_key(file): inverted[file] = [] if key not in inverted[file]: inverted[file].append(key) xrefs = inverted keys = inverted.keys() keys.sort() if genpatch: for file in keys: generate_patch(file, xrefs[file]) elif checkincludes: includecheck(sourcetree) elif examine: shortname = namestrip(examine) if not rulebase.dictionary.has_key(shortname) and not mycml1types.has_key(examine): print "%s: no such symbol" % examine else: print "%s: a=%d c=%d h=%d o=%d n=%d m=%d d=%d x=%s f=%d" % (examine, cml1_declared(examine), in_code(examine), in_help(examine), in_cml1(examine), in_cml2(examine), in_makefile(examine), in_defconfig(examine), is_derived(examine), myfilter(examine)) elif typecheck: print "CML1 type consistency report:" hits = [] ok = 0 for (key, item) in mycml1types.items(): if len(item) == 1: ok += 1 else: hits.append(key) print "%d symbols have consistent type declarations." % ok if hits: print "Non-declared or multiply-declared symbols:" for symbol in hits: print "%s:" % symbol for (type, locs) in mycml1types[symbol]: print " %-8s: %s" % (type, " ".join(locs)) print "CML2 type cross-check:" typematch = 0 missing = 0 matching = 0 typemap = {"bool":"bool", "trit":"tristate", "string":"string", "decimal":"int", "hexadecimal":"hex"} for (key, item) in mycml1types.items(): if not rulebase.dictionary.has_key(namestrip(key)): missing += 1 continue elif len(item) != 1: continue cml2symbol = rulebase.dictionary[namestrip(key)] cml1type = item[0][0] if typemap[cml2symbol.type] == cml1type: matching += 1 elif cml2symbol.menu and cml2symbol.menu.type=="choices" and cml1type=="choice": matching += 1 else: if cml2symbol.is_derived(): derived = "(derived)" else: derived = "" print '"%s", line %d: %s, %s -> %s %s' % (cml2symbol.file, cml2symbol.lineno, key, item[0][0], cml2symbol.type, derived) print "%d CML1 symbols missing, %d type matches" % (missing, matching) else: # OK, list the filtered symbols try: report(keys, norefs) except (KeyboardInterrupt, IOError): pass # In case we break a pipe by interrupting # That's all, folks!