diff options
Diffstat (limited to 'Tools/c-analyzer/c_parser/parser/_func_body.py')
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/_func_body.py | 278 |
1 files changed, 278 insertions, 0 deletions
diff --git a/Tools/c-analyzer/c_parser/parser/_func_body.py b/Tools/c-analyzer/c_parser/parser/_func_body.py new file mode 100644 index 00000000000..42fd459e111 --- /dev/null +++ b/Tools/c-analyzer/c_parser/parser/_func_body.py @@ -0,0 +1,278 @@ +import re + +from ._regexes import ( + LOCAL as _LOCAL, + LOCAL_STATICS as _LOCAL_STATICS, +) +from ._common import ( + log_match, + parse_var_decl, + set_capture_groups, + match_paren, +) +from ._compound_decl_body import DECL_BODY_PARSERS + + +LOCAL = set_capture_groups(_LOCAL, ( + 'EMPTY', + 'INLINE_LEADING', + 'INLINE_PRE', + 'INLINE_KIND', + 'INLINE_NAME', + 'STORAGE', + 'VAR_DECL', + 'VAR_INIT', + 'VAR_ENDING', + 'COMPOUND_BARE', + 'COMPOUND_LABELED', + 'COMPOUND_PAREN', + 'BLOCK_LEADING', + 'BLOCK_OPEN', + 'SIMPLE_STMT', + 'SIMPLE_ENDING', + 'BLOCK_CLOSE', +)) +LOCAL_RE = re.compile(rf'^ \s* {LOCAL}', re.VERBOSE) + + +# Note that parse_function_body() still has trouble with a few files +# in the CPython codebase. + +def parse_function_body(source, name, anon_name): + # XXX + raise NotImplementedError + + +def parse_function_body(name, text, resolve, source, anon_name, parent): + raise NotImplementedError + # For now we do not worry about locals declared in for loop "headers". + depth = 1; + while depth > 0: + m = LOCAL_RE.match(text) + while not m: + text, resolve = continue_text(source, text or '{', resolve) + m = LOCAL_RE.match(text) + text = text[m.end():] + ( + empty, + inline_leading, inline_pre, inline_kind, inline_name, + storage, decl, + var_init, var_ending, + compound_bare, compound_labeled, compound_paren, + block_leading, block_open, + simple_stmt, simple_ending, + block_close, + ) = m.groups() + + if empty: + log_match('', m) + resolve(None, None, None, text) + yield None, text + elif inline_kind: + log_match('', m) + kind = inline_kind + name = inline_name or anon_name('inline-') + data = [] # members + # We must set the internal "text" from _iter_source() to the + # start of the inline compound body, + # Note that this is effectively like a forward reference that + # we do not emit. + resolve(kind, None, name, text, None) + _parse_body = DECL_BODY_PARSERS[kind] + before = [] + ident = f'{kind} {name}' + for member, inline, text in _parse_body(text, resolve, source, anon_name, ident): + if member: + data.append(member) + if inline: + yield from inline + # un-inline the decl. Note that it might not actually be inline. + # We handle the case in the "maybe_inline_actual" branch. + text = f'{inline_leading or ""} {inline_pre or ""} {kind} {name} {text}' + # XXX Should "parent" really be None for inline type decls? + yield resolve(kind, data, name, text, None), text + elif block_close: + log_match('', m) + depth -= 1 + resolve(None, None, None, text) + # XXX This isn't great. Calling resolve() should have + # cleared the closing bracket. However, some code relies + # on the yielded value instead of the resolved one. That + # needs to be fixed. + yield None, text + elif compound_bare: + log_match('', m) + yield resolve('statement', compound_bare, None, text, parent), text + elif compound_labeled: + log_match('', m) + yield resolve('statement', compound_labeled, None, text, parent), text + elif compound_paren: + log_match('', m) + try: + pos = match_paren(text) + except ValueError: + text = f'{compound_paren} {text}' + #resolve(None, None, None, text) + text, resolve = continue_text(source, text, resolve) + yield None, text + else: + head = text[:pos] + text = text[pos:] + if compound_paren == 'for': + # XXX Parse "head" as a compound statement. + stmt1, stmt2, stmt3 = head.split(';', 2) + data = { + 'compound': compound_paren, + 'statements': (stmt1, stmt2, stmt3), + } + else: + data = { + 'compound': compound_paren, + 'statement': head, + } + yield resolve('statement', data, None, text, parent), text + elif block_open: + log_match('', m) + depth += 1 + if block_leading: + # An inline block: the last evaluated expression is used + # in place of the block. + # XXX Combine it with the remainder after the block close. + stmt = f'{block_open}{{<expr>}}...;' + yield resolve('statement', stmt, None, text, parent), text + else: + resolve(None, None, None, text) + yield None, text + elif simple_ending: + log_match('', m) + yield resolve('statement', simple_stmt, None, text, parent), text + elif var_ending: + log_match('', m) + kind = 'variable' + _, name, vartype = parse_var_decl(decl) + data = { + 'storage': storage, + 'vartype': vartype, + } + after = () + if var_ending == ',': + # It was a multi-declaration, so queue up the next one. + _, qual, typespec, _ = vartype.values() + text = f'{storage or ""} {qual or ""} {typespec} {text}' + yield resolve(kind, data, name, text, parent), text + if var_init: + _data = f'{name} = {var_init.strip()}' + yield resolve('statement', _data, None, text, parent), text + else: + # This should be unreachable. + raise NotImplementedError + + +############################# +# static local variables + +LOCAL_STATICS = set_capture_groups(_LOCAL_STATICS, ( + 'INLINE_LEADING', + 'INLINE_PRE', + 'INLINE_KIND', + 'INLINE_NAME', + 'STATIC_DECL', + 'STATIC_INIT', + 'STATIC_ENDING', + 'DELIM_LEADING', + 'BLOCK_OPEN', + 'BLOCK_CLOSE', + 'STMT_END', +)) +LOCAL_STATICS_RE = re.compile(rf'^ \s* {LOCAL_STATICS}', re.VERBOSE) + + +def parse_function_statics(source, func, anon_name): + # For now we do not worry about locals declared in for loop "headers". + depth = 1; + while depth > 0: + for srcinfo in source: + m = LOCAL_STATICS_RE.match(srcinfo.text) + if m: + break + else: + # We ran out of lines. + if srcinfo is not None: + srcinfo.done() + return + for item, depth in _parse_next_local_static(m, srcinfo, + anon_name, func, depth): + if callable(item): + parse_body = item + yield from parse_body(source) + elif item is not None: + yield item + + +def _parse_next_local_static(m, srcinfo, anon_name, func, depth): + (inline_leading, inline_pre, inline_kind, inline_name, + static_decl, static_init, static_ending, + _delim_leading, + block_open, + block_close, + stmt_end, + ) = m.groups() + remainder = srcinfo.text[m.end():] + + if inline_kind: + log_match('func inline', m) + kind = inline_kind + name = inline_name or anon_name('inline-') + # Immediately emit a forward declaration. + yield srcinfo.resolve(kind, name=name, data=None), depth + + # un-inline the decl. Note that it might not actually be inline. + # We handle the case in the "maybe_inline_actual" branch. + srcinfo.nest( + remainder, + f'{inline_leading or ""} {inline_pre or ""} {kind} {name}' + ) + def parse_body(source): + _parse_body = DECL_BODY_PARSERS[kind] + + data = [] # members + ident = f'{kind} {name}' + for item in _parse_body(source, anon_name, ident): + if item.kind == 'field': + data.append(item) + else: + yield item + # XXX Should "parent" really be None for inline type decls? + yield srcinfo.resolve(kind, data, name, parent=None) + + srcinfo.resume() + yield parse_body, depth + + elif static_decl: + log_match('local variable', m) + _, name, data = parse_var_decl(static_decl) + + yield srcinfo.resolve('variable', data, name, parent=func), depth + + if static_init: + srcinfo.advance(f'{name} {static_init} {remainder}') + elif static_ending == ',': + # It was a multi-declaration, so queue up the next one. + _, qual, typespec, _ = data.values() + srcinfo.advance(f'static {qual or ""} {typespec} {remainder}') + else: + srcinfo.advance('') + + else: + log_match('func other', m) + if block_open: + depth += 1 + elif block_close: + depth -= 1 + elif stmt_end: + pass + else: + # This should be unreachable. + raise NotImplementedError + srcinfo.advance(remainder) + yield None, depth |