Made summary.py more powerful, dropped -m from size scripts

With more scripts generating CSV files this moves most CSV manipulation into summary.py, which can now handle more or less any arbitrary CSV file with arbitrary names and fields. This also includes a bunch of additional, probably unnecessary, tweaks: - summary.py/coverage.py use a custom fractional type for encoding fractions, this will also be used for test counts. - Added a smaller diff output for size scripts with the --percent flag. - Added line and hit info to coverage.py's CSV files. - Added --tree flag to stack.py to show only the call tree without other noise. - Renamed structs.py to struct.py. - Changed a few flags around for consistency between size/summary scripts. - Added `make sizes` alias. - Added `make lfs.code.csv` rules
2025-10-23 18:38:49 +08:00 · 2022-09-14 13:34:59 -05:00
parent 23fba40f20
commit acdea1880e
8 changed files with 3081 additions and 1733 deletions
--- a/scripts/code.py
+++ b/scripts/code.py
@@ -5,71 +5,123 @@
 # by Linux's Bloat-O-Meter.
 #

-import os
+import collections as co
+import csv
 import glob
 import itertools as it
-import subprocess as sp
-import shlex
+import math as m
+import os
 import re
-import csv
-import collections as co
+import shlex
+import subprocess as sp


 OBJ_PATHS = ['*.o']
+NM_TOOL = ['nm']
+TYPE = 'tTrRdD'

-class CodeResult(co.namedtuple('CodeResult', 'code_size')):
+
+# integer fields
+class IntField(co.namedtuple('IntField', 'x')):
    __slots__ = ()
-    def __new__(cls, code_size=0):
-        return super().__new__(cls, int(code_size))
+    def __new__(cls, x):
+        if isinstance(x, IntField):
+            return x
+        if isinstance(x, str):
+            try:
+                x = int(x, 0)
+            except ValueError:
+                # also accept +-∞ and +-inf
+                if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
+                    x = float('inf')
+                elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
+                    x = float('-inf')
+                else:
+                    raise
+        return super().__new__(cls, x)
+
+    def __int__(self):
+        assert not m.isinf(self.x)
+        return self.x
+
+    def __float__(self):
+        return float(self.x)
+
+    def __str__(self):
+        if self.x == float('inf'):
+            return '∞'
+        elif self.x == float('-inf'):
+            return '-∞'
+        else:
+            return str(self.x)
+
+    none = '%7s' % '-'
+    def table(self):
+        return '%7s' % (self,)
+
+    diff_none = '%7s' % '-'
+    diff_table = table
+
+    def diff_diff(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        diff = new - old
+        if diff == float('+inf'):
+            return '%7s' % '+∞'
+        elif diff == float('-inf'):
+            return '%7s' % '-∞'
+        else:
+            return '%+7d' % diff
+
+    def ratio(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        if m.isinf(new) and m.isinf(old):
+            return 0.0
+        elif m.isinf(new):
+            return float('+inf')
+        elif m.isinf(old):
+            return float('-inf')
+        elif not old and not new:
+            return 0.0
+        elif not old:
+            return 1.0
+        else:
+            return (new-old) / old

    def __add__(self, other):
-        return self.__class__(self.code_size + other.code_size)
+        return IntField(self.x + other.x)

-    def __sub__(self, other):
-        return CodeDiff(other, self)
+    def __mul__(self, other):
+        return IntField(self.x * other.x)

-    def __rsub__(self, other):
-        return self.__class__.__sub__(other, self)
+    def __lt__(self, other):
+        return self.x < other.x

-    def key(self, **args):
-        if args.get('size_sort'):
-            return -self.code_size
-        elif args.get('reverse_size_sort'):
-            return +self.code_size
+    def __gt__(self, other):
+        return self.__class__.__lt__(other, self)
+
+    def __le__(self, other):
+        return not self.__gt__(other)
+
+    def __ge__(self, other):
+        return not self.__lt__(other)
+
+    def __truediv__(self, n):
+        if m.isinf(self.x):
+            return self
        else:
-            return None
+            return IntField(round(self.x / n))

-    _header = '%7s' % 'size'
-    def __str__(self):
-        return '%7d' % self.code_size
-
-class CodeDiff(co.namedtuple('CodeDiff',  'old,new')):
+# code size results
+class CodeResult(co.namedtuple('CodeResult', 'file,function,code_size')):
    __slots__ = ()
+    def __new__(cls, file, function, code_size):
+        return super().__new__(cls, file, function, IntField(code_size))

-    def ratio(self):
-        old = self.old.code_size if self.old is not None else 0
-        new = self.new.code_size if self.new is not None else 0
-        return (new-old) / old if old else 1.0
-
-    def key(self, **args):
-        return (
-            self.new.key(**args) if self.new is not None else 0,
-            -self.ratio())
-
-    def __bool__(self):
-        return bool(self.ratio())
-
-    _header = '%7s %7s %7s' % ('old', 'new', 'diff')
-    def __str__(self):
-        old = self.old.code_size if self.old is not None else 0
-        new = self.new.code_size if self.new is not None else 0
-        diff = new - old
-        ratio = self.ratio()
-        return '%7s %7s %+7d%s' % (
-            old or "-",
-            new or "-",
-            diff,
-            ' (%+.1f%%)' % (100*ratio) if ratio else '')
+    def __add__(self, other):
+        return CodeResult(self.file, self.function,
+            self.code_size + other.code_size)


 def openio(path, mode='r'):
@@ -81,20 +133,25 @@ def openio(path, mode='r'):
    else:
        return open(path, mode)

-def collect(paths, **args):
-    results = co.defaultdict(lambda: CodeResult())
+def collect(paths, *,
+        nm_tool=NM_TOOL,
+        type=TYPE,
+        build_dir=None,
+        everything=False,
+        **args):
+    results = []
    pattern = re.compile(
        '^(?P<size>[0-9a-fA-F]+)' +
-        ' (?P<type>[%s])' % re.escape(args['type']) +
+        ' (?P<type>[%s])' % re.escape(type) +
        ' (?P<func>.+?)$')
    for path in paths:
        # map to source file
        src_path = re.sub('\.o$', '.c', path)
-        if args.get('build_dir'):
-            src_path = re.sub('%s/*' % re.escape(args['build_dir']), '',
+        if build_dir:
+            src_path = re.sub('%s/*' % re.escape(build_dir), '',
                src_path)
        # note nm-tool may contain extra args
-        cmd = args['nm_tool'] + ['--size-sort', path]
+        cmd = nm_tool + ['--size-sort', path]
        if args.get('verbose'):
            print(' '.join(shlex.quote(c) for c in cmd))
        proc = sp.Popen(cmd,
@@ -107,12 +164,15 @@ def collect(paths, **args):
            if m:
                func = m.group('func')
                # discard internal functions
-                if not args.get('everything') and func.startswith('__'):
+                if not everything and func.startswith('__'):
                    continue
                # discard .8449 suffixes created by optimizer
                func = re.sub('\.[0-9]+', '', func)
-                results[(src_path, func)] += CodeResult(
-                    int(m.group('size'), 16))
+
+                results.append(CodeResult(
+                    src_path, func,
+                    int(m.group('size'), 16)))
+
        proc.wait()
        if proc.returncode != 0:
            if not args.get('verbose'):
@@ -122,12 +182,167 @@ def collect(paths, **args):

    return results

-def main(**args):
+
+def fold(results, *,
+        by=['file', 'function'],
+        **_):
+    folding = co.OrderedDict()
+    for r in results:
+        name = tuple(getattr(r, k) for k in by)
+        if name not in folding:
+            folding[name] = []
+        folding[name].append(r)
+
+    folded = []
+    for rs in folding.values():
+        folded.append(sum(rs[1:], start=rs[0]))
+
+    return folded
+
+
+def table(results, diff_results=None, *,
+        by_file=False,
+        size_sort=False,
+        reverse_size_sort=False,
+        summary=False,
+        all=False,
+        percent=False,
+        **_):
+    all_, all = all, __builtins__.all
+
+    # fold
+    results = fold(results, by=['file' if by_file else 'function'])
+    if diff_results is not None:
+        diff_results = fold(diff_results,
+            by=['file' if by_file else 'function'])
+
+    table = {
+        r.file if by_file else r.function: r
+        for r in results}
+    diff_table = {
+        r.file if by_file else r.function: r
+        for r in diff_results or []}
+
+    # sort, note that python's sort is stable
+    names = list(table.keys() | diff_table.keys())
+    names.sort()
+    if diff_results is not None:
+        names.sort(key=lambda n: -IntField.ratio(
+            table[n].code_size if n in table else None,
+            diff_table[n].code_size if n in diff_table else None))
+    if size_sort:
+        names.sort(key=lambda n: (table[n].code_size,) if n in table else (),
+            reverse=True)
+    elif reverse_size_sort:
+        names.sort(key=lambda n: (table[n].code_size,) if n in table else (),
+            reverse=False)
+
+    # print header
+    print('%-36s' % ('%s%s' % (
+        'file' if by_file else 'function',
+        ' (%d added, %d removed)' % (
+            sum(1 for n in table if n not in diff_table),
+            sum(1 for n in diff_table if n not in table))
+            if diff_results is not None and not percent else '')
+        if not summary else ''),
+        end='')
+    if diff_results is None:
+        print(' %s' % ('size'.rjust(len(IntField.none))))
+    elif percent:
+        print(' %s' % ('size'.rjust(len(IntField.diff_none))))
+    else:
+        print(' %s %s %s' % (
+            'old'.rjust(len(IntField.diff_none)),
+            'new'.rjust(len(IntField.diff_none)),
+            'diff'.rjust(len(IntField.diff_none))))
+
+    # print entries
+    if not summary:
+        for name in names:
+            r = table.get(name)
+            if diff_results is not None:
+                diff_r = diff_table.get(name)
+                ratio = IntField.ratio(
+                    r.code_size if r else None,
+                    diff_r.code_size if diff_r else None)
+                if not ratio and not all_:
+                    continue
+
+            print('%-36s' % name, end='')
+            if diff_results is None:
+                print(' %s' % (
+                    r.code_size.table()
+                        if r else IntField.none))
+            elif percent:
+                print(' %s%s' % (
+                    r.code_size.diff_table()
+                        if r else IntField.diff_none,
+                    ' (%s)' % (
+                        '+∞%' if ratio == float('+inf')
+                        else '-∞%' if ratio == float('-inf')
+                        else '%+.1f%%' % (100*ratio))))
+            else:
+                print(' %s %s %s%s' % (
+                    diff_r.code_size.diff_table()
+                        if diff_r else IntField.diff_none,
+                    r.code_size.diff_table()
+                        if r else IntField.diff_none,
+                    IntField.diff_diff(
+                        r.code_size if r else None,
+                        diff_r.code_size if diff_r else None)
+                        if r or diff_r else IntField.diff_none,
+                    ' (%s)' % (
+                        '+∞%' if ratio == float('+inf')
+                        else '-∞%' if ratio == float('-inf')
+                        else '%+.1f%%' % (100*ratio))
+                        if ratio else ''))
+
+    # print total
+    total = fold(results, by=[])
+    r = total[0] if total else None
+    if diff_results is not None:
+        diff_total = fold(diff_results, by=[])
+        diff_r = diff_total[0] if diff_total else None
+        ratio = IntField.ratio(
+            r.code_size if r else None,
+            diff_r.code_size if diff_r else None)
+
+    print('%-36s' % 'TOTAL', end='')
+    if diff_results is None:
+        print(' %s' % (
+            r.code_size.table()
+                if r else IntField.none))
+    elif percent:
+        print(' %s%s' % (
+            r.code_size.diff_table()
+                if r else IntField.diff_none,
+            ' (%s)' % (
+                '+∞%' if ratio == float('+inf')
+                else '-∞%' if ratio == float('-inf')
+                else '%+.1f%%' % (100*ratio))))
+    else:
+        print(' %s %s %s%s' % (
+            diff_r.code_size.diff_table()
+                if diff_r else IntField.diff_none,
+            r.code_size.diff_table()
+                if r else IntField.diff_none,
+            IntField.diff_diff(
+                r.code_size if r else None,
+                diff_r.code_size if diff_r else None)
+                if r or diff_r else IntField.diff_none,
+            ' (%s)' % (
+                '+∞%' if ratio == float('+inf')
+                else '-∞%' if ratio == float('-inf')
+                else '%+.1f%%' % (100*ratio))
+                if ratio else ''))
+
+
+def main(obj_paths, **args):
    # find sizes
    if not args.get('use', None):
        # find .o files
        paths = []
-        for path in args['obj_paths']:
+        for path in obj_paths:
            if os.path.isdir(path):
                path = path + '/*.o'

@@ -135,127 +350,61 @@ def main(**args):
                paths.append(path)

        if not paths:
-            print('no .obj files found in %r?' % args['obj_paths'])
+            print('no .obj files found in %r?' % obj_paths)
            sys.exit(-1)

        results = collect(paths, **args)
    else:
+        results = []
        with openio(args['use']) as f:
-            r = csv.DictReader(f)
-            results = {
-                (result['file'], result['name']): CodeResult(
-                    *(result[f] for f in CodeResult._fields))
-                for result in r
-                if all(result.get(f) not in {None, ''}
-                    for f in CodeResult._fields)}
+            reader = csv.DictReader(f)
+            for r in reader:
+                try:
+                    results.append(CodeResult(**{
+                        k: v for k, v in r.items()
+                        if k in CodeResult._fields}))
+                except TypeError:
+                    pass

-    # find previous results?
-    if args.get('diff'):
-        try:
-            with openio(args['diff']) as f:
-                r = csv.DictReader(f)
-                prev_results = {
-                    (result['file'], result['name']): CodeResult(
-                        *(result[f] for f in CodeResult._fields))
-                    for result in r
-                    if all(result.get(f) not in {None, ''}
-                        for f in CodeResult._fields)}
-        except FileNotFoundError:
-            prev_results = []
+    # fold to remove duplicates
+    results = fold(results)
+
+    # sort because why not
+    results.sort()

    # write results to CSV
    if args.get('output'):
-        merged_results = co.defaultdict(lambda: {})
-        other_fields = []
-
-        # merge?
-        if args.get('merge'):
-            try:
-                with openio(args['merge']) as f:
-                    r = csv.DictReader(f)
-                    for result in r:
-                        file = result.pop('file', '')
-                        func = result.pop('name', '')
-                        for f in CodeResult._fields:
-                            result.pop(f, None)
-                        merged_results[(file, func)] = result
-                        other_fields = result.keys()
-            except FileNotFoundError:
-                pass
-
-        for (file, func), result in results.items():
-            merged_results[(file, func)] |= result._asdict()
-
        with openio(args['output'], 'w') as f:
-            w = csv.DictWriter(f, ['file', 'name',
-                *other_fields, *CodeResult._fields])
-            w.writeheader()
-            for (file, func), result in sorted(merged_results.items()):
-                w.writerow({'file': file, 'name': func, **result})
+            writer = csv.DictWriter(f, CodeResult._fields)
+            writer.writeheader()
+            for r in results:
+                writer.writerow(r._asdict())

-    # print results
-    def print_header(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
+    # find previous results?
+    if args.get('diff'):
+        diff_results = []
+        try:
+            with openio(args['diff']) as f:
+                reader = csv.DictReader(f)
+                for r in reader:
+                    try:
+                        diff_results.append(CodeResult(**{
+                            k: v for k, v in r.items()
+                            if k in CodeResult._fields}))
+                    except TypeError:
+                        pass
+        except FileNotFoundError:
+            pass

-        if not args.get('diff'):
-            print('%-36s %s' % (by, CodeResult._header))
-        else:
-            old = {entry(k) for k in results.keys()}
-            new = {entry(k) for k in prev_results.keys()}
-            print('%-36s %s' % (
-                '%s (%d added, %d removed)' % (by,
-                        sum(1 for k in new if k not in old),
-                        sum(1 for k in old if k not in new))
-                    if by else '',
-                CodeDiff._header))
+        # fold to remove duplicates
+        diff_results = fold(diff_results)

-    def print_entries(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
-
-        entries = co.defaultdict(lambda: CodeResult())
-        for k, result in results.items():
-            entries[entry(k)] += result
-
-        if not args.get('diff'):
-            for name, result in sorted(entries.items(),
-                    key=lambda p: (p[1].key(**args), p)):
-                print('%-36s %s' % (name, result))
-        else:
-            prev_entries = co.defaultdict(lambda: CodeResult())
-            for k, result in prev_results.items():
-                prev_entries[entry(k)] += result
-
-            diff_entries = {name: entries.get(name) - prev_entries.get(name)
-                for name in (entries.keys() | prev_entries.keys())}
-
-            for name, diff in sorted(diff_entries.items(),
-                    key=lambda p: (p[1].key(**args), p)):
-                if diff or args.get('all'):
-                    print('%-36s %s' % (name, diff))
-
-    if args.get('quiet'):
-        pass
-    elif args.get('summary'):
-        print_header('')
-        print_entries('total')
-    elif args.get('files'):
-        print_header('file')
-        print_entries('file')
-        print_entries('total')
-    else:
-        print_header('function')
-        print_entries('function')
-        print_entries('total')
+    # print table
+    if not args.get('quiet'):
+        table(
+            results,
+            diff_results if args.get('diff') else None,
+            **args)


 if __name__ == "__main__":
@@ -263,42 +412,72 @@ if __name__ == "__main__":
    import sys
    parser = argparse.ArgumentParser(
        description="Find code size at the function level.")
-    parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
-        help="Description of where to find *.o files. May be a directory \
-            or a list of paths. Defaults to %r." % OBJ_PATHS)
-    parser.add_argument('-v', '--verbose', action='store_true',
+    parser.add_argument(
+        'obj_paths',
+        nargs='*',
+        default=OBJ_PATHS,
+        help="Description of where to find *.o files. May be a directory "
+            "or a list of paths. Defaults to %(default)r.")
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
        help="Output commands that run behind the scenes.")
-    parser.add_argument('-q', '--quiet', action='store_true',
+    parser.add_argument(
+        '-q', '--quiet',
+        action='store_true',
        help="Don't show anything, useful with -o.")
-    parser.add_argument('-o', '--output',
+    parser.add_argument(
+        '-o', '--output',
        help="Specify CSV file to store results.")
-    parser.add_argument('-u', '--use',
-        help="Don't compile and find code sizes, instead use this CSV file.")
-    parser.add_argument('-d', '--diff',
-        help="Specify CSV file to diff code size against.")
-    parser.add_argument('-m', '--merge',
-        help="Merge with an existing CSV file when writing to output.")
-    parser.add_argument('-a', '--all', action='store_true',
-        help="Show all functions, not just the ones that changed.")
-    parser.add_argument('-A', '--everything', action='store_true',
-        help="Include builtin and libc specific symbols.")
-    parser.add_argument('-s', '--size-sort', action='store_true',
+    parser.add_argument(
+        '-u', '--use',
+        help="Don't parse anything, use this CSV file.")
+    parser.add_argument(
+        '-d', '--diff',
+        help="Specify CSV file to diff against.")
+    parser.add_argument(
+        '-a', '--all',
+        action='store_true',
+        help="Show all, not just the ones that changed.")
+    parser.add_argument(
+        '-p', '--percent',
+        action='store_true',
+        help="Only show percentage change, not a full diff.")
+    parser.add_argument(
+        '-b', '--by-file',
+        action='store_true',
+        help="Group by file. Note this does not include padding "
+            "so sizes may differ from other tools.")
+    parser.add_argument(
+        '-s', '--size-sort',
+        action='store_true',
        help="Sort by size.")
-    parser.add_argument('-S', '--reverse-size-sort', action='store_true',
+    parser.add_argument(
+        '-S', '--reverse-size-sort',
+        action='store_true',
        help="Sort by size, but backwards.")
-    parser.add_argument('-F', '--files', action='store_true',
-        help="Show file-level code sizes. Note this does not include padding! "
-            "So sizes may differ from other tools.")
-    parser.add_argument('-Y', '--summary', action='store_true',
-        help="Only show the total code size.")
-    parser.add_argument('--type', default='tTrRdD',
+    parser.add_argument(
+        '-Y', '--summary',
+        action='store_true',
+        help="Only show the total size.")
+    parser.add_argument(
+        '-A', '--everything',
+        action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument(
+        '--type',
+        default=TYPE,
        help="Type of symbols to report, this uses the same single-character "
            "type-names emitted by nm. Defaults to %(default)r.")
-    parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(),
-        help="Path to the nm tool to use.")
-    parser.add_argument('--build-dir',
-        help="Specify the relative build directory. Used to map object files \
-            to the correct source files.")
+    parser.add_argument(
+        '--nm-tool',
+        type=lambda x: x.split(),
+        default=NM_TOOL,
+        help="Path to the nm tool to use. Defaults to %(default)r")
+    parser.add_argument(
+        '--build-dir',
+        help="Specify the relative build directory. Used to map object files "
+            "to the correct source files.")
    sys.exit(main(**{k: v
        for k, v in vars(parser.parse_args()).items()
        if v is not None}))