#!/usr/bin/env python3 # # Script to find data size at the function level. Basically just a bit wrapper # around nm with some extra conveniences for comparing builds. Heavily inspired # by Linux's Bloat-O-Meter. # # Example: # ./scripts/data.py lfs.o lfs_util.o -S # # Copyright (c) 2022, The littlefs authors. # Copyright (c) 2020, Arm Limited. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # import collections as co import csv import glob import itertools as it import math as m import os import re import shlex import subprocess as sp OBJ_PATHS = ['*.o'] NM_TOOL = ['nm'] TYPE = 'dDbB' # integer fields class IntField(co.namedtuple('IntField', 'x')): __slots__ = () def __new__(cls, x=0): if isinstance(x, IntField): return x if isinstance(x, str): try: x = int(x, 0) except ValueError: # also accept +-∞ and +-inf if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x): x = m.inf elif re.match('^\s*-\s*(?:∞|inf)\s*$', x): x = -m.inf else: raise assert isinstance(x, int) or m.isinf(x), x return super().__new__(cls, x) def __str__(self): if self.x == m.inf: return '∞' elif self.x == -m.inf: return '-∞' else: return str(self.x) def __int__(self): assert not m.isinf(self.x) return self.x def __float__(self): return float(self.x) none = '%7s' % '-' def table(self): return '%7s' % (self,) diff_none = '%7s' % '-' diff_table = table def diff_diff(self, other): new = self.x if self else 0 old = other.x if other else 0 diff = new - old if diff == +m.inf: return '%7s' % '+∞' elif diff == -m.inf: return '%7s' % '-∞' else: return '%+7d' % diff def ratio(self, other): new = self.x if self else 0 old = other.x if other else 0 if m.isinf(new) and m.isinf(old): return 0.0 elif m.isinf(new): return +m.inf elif m.isinf(old): return -m.inf elif not old and not new: return 0.0 elif not old: return 1.0 else: return (new-old) / old def __add__(self, other): return IntField(self.x + other.x) def __sub__(self, other): return IntField(self.x - other.x) def __mul__(self, other): return IntField(self.x * other.x) def __lt__(self, other): return self.x < other.x def __gt__(self, other): return self.__class__.__lt__(other, self) def __le__(self, other): return not self.__gt__(other) def __ge__(self, other): return not self.__lt__(other) # data size results class DataResult(co.namedtuple('DataResult', 'file,function,data_size')): __slots__ = () def __new__(cls, file, function, data_size): return super().__new__(cls, file, function, IntField(data_size)) def __add__(self, other): return DataResult(self.file, self.function, self.data_size + other.data_size) def openio(path, mode='r'): if path == '-': if mode == 'r': return os.fdopen(os.dup(sys.stdin.fileno()), 'r') else: return os.fdopen(os.dup(sys.stdout.fileno()), 'w') else: return open(path, mode) def collect(paths, *, nm_tool=NM_TOOL, type=TYPE, build_dir=None, everything=False, **args): results = [] pattern = re.compile( '^(?P[0-9a-fA-F]+)' + ' (?P[%s])' % re.escape(type) + ' (?P.+?)$') for path in paths: # map to source file src_path = re.sub('\.o$', '.c', path) if build_dir: src_path = re.sub('%s/*' % re.escape(build_dir), '', src_path) # note nm-tool may contain extra args cmd = nm_tool + ['--size-sort', path] if args.get('verbose'): print(' '.join(shlex.quote(c) for c in cmd)) proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE if not args.get('verbose') else None, universal_newlines=True, errors='replace') for line in proc.stdout: m = pattern.match(line) if m: func = m.group('func') # discard internal functions if not everything and func.startswith('__'): continue # discard .8449 suffixes created by optimizer func = re.sub('\.[0-9]+', '', func) results.append(DataResult( src_path, func, int(m.group('size'), 16))) proc.wait() if proc.returncode != 0: if not args.get('verbose'): for line in proc.stderr: sys.stdout.write(line) sys.exit(-1) return results def fold(results, *, by=['file', 'function'], **_): folding = co.OrderedDict() for r in results: name = tuple(getattr(r, k) for k in by) if name not in folding: folding[name] = [] folding[name].append(r) folded = [] for rs in folding.values(): folded.append(sum(rs[1:], start=rs[0])) return folded def table(results, diff_results=None, *, by_file=False, size_sort=False, reverse_size_sort=False, summary=False, all=False, percent=False, **_): all_, all = all, __builtins__.all # fold results = fold(results, by=['file' if by_file else 'function']) if diff_results is not None: diff_results = fold(diff_results, by=['file' if by_file else 'function']) table = { r.file if by_file else r.function: r for r in results} diff_table = { r.file if by_file else r.function: r for r in diff_results or []} # sort, note that python's sort is stable names = list(table.keys() | diff_table.keys()) names.sort() if diff_results is not None: names.sort(key=lambda n: -IntField.ratio( table[n].data_size if n in table else None, diff_table[n].data_size if n in diff_table else None)) if size_sort: names.sort(key=lambda n: (table[n].data_size,) if n in table else (), reverse=True) elif reverse_size_sort: names.sort(key=lambda n: (table[n].data_size,) if n in table else (), reverse=False) # print header print('%-36s' % ('%s%s' % ( 'file' if by_file else 'function', ' (%d added, %d removed)' % ( sum(1 for n in table if n not in diff_table), sum(1 for n in diff_table if n not in table)) if diff_results is not None and not percent else '') if not summary else ''), end='') if diff_results is None: print(' %s' % ('size'.rjust(len(IntField.none)))) elif percent: print(' %s' % ('size'.rjust(len(IntField.diff_none)))) else: print(' %s %s %s' % ( 'old'.rjust(len(IntField.diff_none)), 'new'.rjust(len(IntField.diff_none)), 'diff'.rjust(len(IntField.diff_none)))) # print entries if not summary: for name in names: r = table.get(name) if diff_results is not None: diff_r = diff_table.get(name) ratio = IntField.ratio( r.data_size if r else None, diff_r.data_size if diff_r else None) if not ratio and not all_: continue print('%-36s' % name, end='') if diff_results is None: print(' %s' % ( r.data_size.table() if r else IntField.none)) elif percent: print(' %s%s' % ( r.data_size.diff_table() if r else IntField.diff_none, ' (%s)' % ( '+∞%' if ratio == +m.inf else '-∞%' if ratio == -m.inf else '%+.1f%%' % (100*ratio)))) else: print(' %s %s %s%s' % ( diff_r.data_size.diff_table() if diff_r else IntField.diff_none, r.data_size.diff_table() if r else IntField.diff_none, IntField.diff_diff( r.data_size if r else None, diff_r.data_size if diff_r else None) if r or diff_r else IntField.diff_none, ' (%s)' % ( '+∞%' if ratio == +m.inf else '-∞%' if ratio == -m.inf else '%+.1f%%' % (100*ratio)) if ratio else '')) # print total total = fold(results, by=[]) r = total[0] if total else None if diff_results is not None: diff_total = fold(diff_results, by=[]) diff_r = diff_total[0] if diff_total else None ratio = IntField.ratio( r.data_size if r else None, diff_r.data_size if diff_r else None) print('%-36s' % 'TOTAL', end='') if diff_results is None: print(' %s' % ( r.data_size.table() if r else IntField.none)) elif percent: print(' %s%s' % ( r.data_size.diff_table() if r else IntField.diff_none, ' (%s)' % ( '+∞%' if ratio == +m.inf else '-∞%' if ratio == -m.inf else '%+.1f%%' % (100*ratio)))) else: print(' %s %s %s%s' % ( diff_r.data_size.diff_table() if diff_r else IntField.diff_none, r.data_size.diff_table() if r else IntField.diff_none, IntField.diff_diff( r.data_size if r else None, diff_r.data_size if diff_r else None) if r or diff_r else IntField.diff_none, ' (%s)' % ( '+∞%' if ratio == +m.inf else '-∞%' if ratio == -m.inf else '%+.1f%%' % (100*ratio)) if ratio else '')) def main(obj_paths, **args): # find sizes if not args.get('use', None): # find .o files paths = [] for path in obj_paths: if os.path.isdir(path): path = path + '/*.o' for path in glob.glob(path): paths.append(path) if not paths: print('no .obj files found in %r?' % obj_paths) sys.exit(-1) results = collect(paths, **args) else: results = [] with openio(args['use']) as f: reader = csv.DictReader(f, restval='') for r in reader: try: results.append(DataResult(**{ k: v for k, v in r.items() if k in DataResult._fields})) except TypeError: pass # fold to remove duplicates results = fold(results) # sort because why not results.sort() # write results to CSV if args.get('output'): with openio(args['output'], 'w') as f: writer = csv.DictWriter(f, DataResult._fields) writer.writeheader() for r in results: writer.writerow(r._asdict()) # find previous results? if args.get('diff'): diff_results = [] try: with openio(args['diff']) as f: reader = csv.DictReader(f, restval='') for r in reader: try: diff_results.append(DataResult(**{ k: v for k, v in r.items() if k in DataResult._fields})) except TypeError: pass except FileNotFoundError: pass # fold to remove duplicates diff_results = fold(diff_results) # print table if not args.get('quiet'): table( results, diff_results if args.get('diff') else None, **args) if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser( description="Find data size at the function level.") parser.add_argument( 'obj_paths', nargs='*', default=OBJ_PATHS, help="Description of where to find *.o files. May be a directory " "or a list of paths. Defaults to %r." % OBJ_PATHS) parser.add_argument( '-v', '--verbose', action='store_true', help="Output commands that run behind the scenes.") parser.add_argument( '-q', '--quiet', action='store_true', help="Don't show anything, useful with -o.") parser.add_argument( '-o', '--output', help="Specify CSV file to store results.") parser.add_argument( '-u', '--use', help="Don't parse anything, use this CSV file.") parser.add_argument( '-d', '--diff', help="Specify CSV file to diff against.") parser.add_argument( '-a', '--all', action='store_true', help="Show all, not just the ones that changed.") parser.add_argument( '-p', '--percent', action='store_true', help="Only show percentage change, not a full diff.") parser.add_argument( '-b', '--by-file', action='store_true', help="Group by file. Note this does not include padding " "so sizes may differ from other tools.") parser.add_argument( '-s', '--size-sort', action='store_true', help="Sort by size.") parser.add_argument( '-S', '--reverse-size-sort', action='store_true', help="Sort by size, but backwards.") parser.add_argument( '-Y', '--summary', action='store_true', help="Only show the total size.") parser.add_argument( '-A', '--everything', action='store_true', help="Include builtin and libc specific symbols.") parser.add_argument( '--type', default=TYPE, help="Type of symbols to report, this uses the same single-character " "type-names emitted by nm. Defaults to %r." % TYPE) parser.add_argument( '--nm-tool', type=lambda x: x.split(), default=NM_TOOL, help="Path to the nm tool to use. Defaults to %r." % NM_TOOL) parser.add_argument( '--build-dir', help="Specify the relative build directory. Used to map object files " "to the correct source files.") sys.exit(main(**{k: v for k, v in vars(parser.parse_intermixed_args()).items() if v is not None}))