diff --git a/scripts/mbedtls_dev/c_parsing_helper.py b/scripts/mbedtls_dev/c_parsing_helper.py new file mode 100644 index 000000000..3bb6f0405 --- /dev/null +++ b/scripts/mbedtls_dev/c_parsing_helper.py @@ -0,0 +1,127 @@ +"""Helper functions to parse C code in heavily constrained scenarios. + +Currently supported functionality: + +* read_function_declarations: read function declarations from a header file. +""" + +# Copyright The Mbed TLS Contributors +# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +import re +from typing import Dict, Iterable, Iterator, List, Optional, Tuple + + +class ArgumentInfo: + """Information about an argument to an API function.""" + #pylint: disable=too-few-public-methods + + _KEYWORDS = [ + 'const', 'register', 'restrict', + 'int', 'long', 'short', 'signed', 'unsigned', + ] + _DECLARATION_RE = re.compile( + r'(?P\w[\w\s*]*?)\s*' + + r'(?!(?:' + r'|'.join(_KEYWORDS) + r'))(?P\b\w+\b)?' + + r'\s*(?P\[[^][]*\])?\Z', + re.A | re.S) + + @classmethod + def normalize_type(cls, typ: str) -> str: + """Normalize whitespace in a type.""" + typ = re.sub(r'\s+', r' ', typ) + typ = re.sub(r'\s*\*', r' *', typ) + return typ + + def __init__(self, decl: str) -> None: + self.decl = decl.strip() + m = self._DECLARATION_RE.match(self.decl) + if not m: + raise ValueError(self.decl) + self.type = self.normalize_type(m.group('type')) #type: str + self.name = m.group('name') #type: Optional[str] + self.suffix = m.group('suffix') if m.group('suffix') else '' #type: str + + +class FunctionInfo: + """Information about an API function.""" + #pylint: disable=too-few-public-methods + + # Regex matching the declaration of a function that returns void. + VOID_RE = re.compile(r'\s*\bvoid\s*\Z', re.A) + + def __init__(self, #pylint: disable=too-many-arguments + filename: str, + line_number: int, + qualifiers: Iterable[str], + return_type: str, + name: str, + arguments: List[str]) -> None: + self.filename = filename + self.line_number = line_number + self.qualifiers = frozenset(qualifiers) + self.return_type = return_type + self.name = name + self.arguments = [ArgumentInfo(arg) for arg in arguments] + + def returns_void(self) -> bool: + """Whether the function returns void.""" + return bool(self.VOID_RE.search(self.return_type)) + + +# Match one C comment. +# Note that we match both comment types, so things like // in a /*...*/ +# comment are handled correctly. +_C_COMMENT_RE = re.compile(r'//[^n]*|/\*.*?\*/', re.S) +_NOT_NEWLINES_RE = re.compile(r'[^\n]+') + +def read_logical_lines(filename: str) -> Iterator[Tuple[int, str]]: + """Read logical lines from a file. + + Logical lines are one or more physical line, with balanced parentheses. + """ + with open(filename, encoding='utf-8') as inp: + content = inp.read() + # Strip comments, but keep newlines for line numbering + content = re.sub(_C_COMMENT_RE, + lambda m: re.sub(_NOT_NEWLINES_RE, "", m.group(0)), + content) + lines = enumerate(content.splitlines(), 1) + for line_number, line in lines: + # Read a logical line, containing balanced parentheses. + # We assume that parentheses are balanced (this should be ok + # since comments have been stripped), otherwise there will be + # a gigantic logical line at the end. + paren_level = line.count('(') - line.count(')') + while paren_level > 0: + _, more = next(lines) #pylint: disable=stop-iteration-return + paren_level += more.count('(') - more.count(')') + line += '\n' + more + yield line_number, line + +_C_FUNCTION_DECLARATION_RE = re.compile( + r'(?P(?:(?:extern|inline|static)\b\s*)*)' + r'(?P\w[\w\s*]*?)\s*' + + r'\b(?P\w+)' + + r'\s*\((?P.*)\)\s*;', + re.A | re.S) + +def read_function_declarations(functions: Dict[str, FunctionInfo], + filename: str) -> None: + """Collect function declarations from a C header file.""" + for line_number, line in read_logical_lines(filename): + m = _C_FUNCTION_DECLARATION_RE.match(line) + if not m: + continue + qualifiers = m.group('qualifiers').split() + return_type = m.group('return_type') + name = m.group('name') + arguments = m.group('arguments').split(',') + if len(arguments) == 1 and re.match(FunctionInfo.VOID_RE, arguments[0]): + arguments = [] + # Note: we replace any existing declaration for the same name. + functions[name] = FunctionInfo(filename, line_number, + qualifiers, + return_type, + name, + arguments)