diff options
Diffstat (limited to 'Tools/c-analyzer/c_parser/parser')
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/__init__.py | 212 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/_alt.py | 6 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/_common.py | 115 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/_compound_decl_body.py | 158 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/_delim.py | 54 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/_func_body.py | 278 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/_global.py | 179 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/_info.py | 168 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/_regexes.py | 796 |
9 files changed, 1966 insertions, 0 deletions
diff --git a/Tools/c-analyzer/c_parser/parser/__init__.py b/Tools/c-analyzer/c_parser/parser/__init__.py new file mode 100644 index 00000000000..7cb34caf09e --- /dev/null +++ b/Tools/c-analyzer/c_parser/parser/__init__.py @@ -0,0 +1,212 @@ +"""A simple non-validating parser for C99. + +The functions and regex patterns here are not entirely suitable for +validating C syntax. Please rely on a proper compiler for that. +Instead our goal here is merely matching and extracting information from +valid C code. + +Furthermore, the grammar rules for the C syntax (particularly as +described in the K&R book) actually describe a superset, of which the +full C langage is a proper subset. Here are some of the extra +conditions that must be applied when parsing C code: + +* ... + +(see: http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf) + +We have taken advantage of the elements of the C grammar that are used +only in a few limited contexts, mostly as delimiters. They allow us to +focus the regex patterns confidently. Here are the relevant tokens and +in which grammar rules they are used: + +separators: +* ";" + + (decl) struct/union: at end of each member decl + + (decl) declaration: at end of each (non-compound) decl + + (stmt) expr stmt: at end of each stmt + + (stmt) for: between exprs in "header" + + (stmt) goto: at end + + (stmt) continue: at end + + (stmt) break: at end + + (stmt) return: at end +* "," + + (decl) struct/union: between member declators + + (decl) param-list: between params + + (decl) enum: between enumerators + + (decl) initializer (compound): between initializers + + (expr) postfix: between func call args + + (expr) expression: between "assignment" exprs +* ":" + + (decl) struct/union: in member declators + + (stmt) label: between label and stmt + + (stmt) case: between expression and stmt + + (stmt) default: between "default" and stmt +* "=" + + (decl) delaration: between decl and initializer + + (decl) enumerator: between identifier and "initializer" + + (expr) assignment: between "var" and expr + +wrappers: +* "(...)" + + (decl) declarator (func ptr): to wrap ptr/name + + (decl) declarator (func ptr): around params + + (decl) declarator: around sub-declarator (for readability) + + (expr) postfix (func call): around args + + (expr) primary: around sub-expr + + (stmt) if: around condition + + (stmt) switch: around source expr + + (stmt) while: around condition + + (stmt) do-while: around condition + + (stmt) for: around "header" +* "{...}" + + (decl) enum: around enumerators + + (decl) func: around body + + (stmt) compound: around stmts +* "[...]" + * (decl) declarator: for arrays + * (expr) postfix: array access + +other: +* "*" + + (decl) declarator: for pointer types + + (expr) unary: for pointer deref + + +To simplify the regular expressions used here, we've takens some +shortcuts and made certain assumptions about the code we are parsing. +Some of these allow us to skip context-sensitive matching (e.g. braces) +or otherwise still match arbitrary C code unambiguously. However, in +some cases there are certain corner cases where the patterns are +ambiguous relative to arbitrary C code. However, they are still +unambiguous in the specific code we are parsing. + +Here are the cases where we've taken shortcuts or made assumptions: + +* there is no overlap syntactically between the local context (func + bodies) and the global context (other than variable decls), so we + do not need to worry about ambiguity due to the overlap: + + the global context has no expressions or statements + + the local context has no function definitions or type decls +* no "inline" type declarations (struct, union, enum) in function + parameters ~(including function pointers)~ +* no "inline" type decls in function return types +* no superflous parentheses in declarators +* var decls in for loops are always "simple" (e.g. no inline types) +* only inline struct/union/enum decls may be anonymouns (without a name) +* no function pointers in function pointer parameters +* for loop "headers" do not have curly braces (e.g. compound init) +* syntactically, variable decls do not overlap with stmts/exprs, except + in the following case: + spam (*eggs) (...) + This could be either a function pointer variable named "eggs" + or a call to a function named "spam", which returns a function + pointer that gets called. The only differentiator is the + syntax used in the "..." part. It will be comma-separated + parameters for the former and comma-separated expressions for + the latter. Thus, if we expect such decls or calls then we must + parse the decl params. +""" + +""" +TODO: +* extract CPython-specific code +* drop include injection (or only add when needed) +* track position instead of slicing "text" +* Parser class instead of the _iter_source() mess +* alt impl using a state machine (& tokenizer or split on delimiters) +""" + +from ..info import ParsedItem +from ._info import SourceInfo + + +def parse(srclines): + if isinstance(srclines, str): # a filename + raise NotImplementedError + + anon_name = anonymous_names() + for result in _parse(srclines, anon_name): + yield ParsedItem.from_raw(result) + + +# XXX Later: Add a separate function to deal with preprocessor directives +# parsed out of raw source. + + +def anonymous_names(): + counter = 1 + def anon_name(prefix='anon-'): + nonlocal counter + name = f'{prefix}{counter}' + counter += 1 + return name + return anon_name + + +############################# +# internal impl + +import logging + + +_logger = logging.getLogger(__name__) + + +def _parse(srclines, anon_name): + from ._global import parse_globals + + source = _iter_source(srclines) + #source = _iter_source(srclines, showtext=True) + for result in parse_globals(source, anon_name): + # XXX Handle blocks here insted of in parse_globals(). + yield result + + +def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False): + filestack = [] + allinfo = {} + # "lines" should be (fileinfo, data), as produced by the preprocessor code. + for fileinfo, line in lines: + if fileinfo.filename in filestack: + while fileinfo.filename != filestack[-1]: + filename = filestack.pop() + del allinfo[filename] + filename = fileinfo.filename + srcinfo = allinfo[filename] + else: + filename = fileinfo.filename + srcinfo = SourceInfo(filename) + filestack.append(filename) + allinfo[filename] = srcinfo + + _logger.debug(f'-> {line}') + srcinfo._add_line(line, fileinfo.lno) + if len(srcinfo.text) > maxtext: + break + if srcinfo.end - srcinfo.start > maxlines: + break + while srcinfo._used(): + yield srcinfo + if showtext: + _logger.debug(f'=> {srcinfo.text}') + else: + if not filestack: + srcinfo = SourceInfo('???') + else: + filename = filestack[-1] + srcinfo = allinfo[filename] + while srcinfo._used(): + yield srcinfo + if showtext: + _logger.debug(f'=> {srcinfo.text}') + yield srcinfo + if showtext: + _logger.debug(f'=> {srcinfo.text}') + if not srcinfo._ready: + return + # At this point either the file ended prematurely + # or there's "too much" text. + filename, lno, text = srcinfo.filename, srcinfo._start, srcinfo.text + if len(text) > 500: + text = text[:500] + '...' + raise Exception(f'unmatched text ({filename} starting at line {lno}):\n{text}') diff --git a/Tools/c-analyzer/c_parser/parser/_alt.py b/Tools/c-analyzer/c_parser/parser/_alt.py new file mode 100644 index 00000000000..05a9101b4f5 --- /dev/null +++ b/Tools/c-analyzer/c_parser/parser/_alt.py @@ -0,0 +1,6 @@ + +def _parse(srclines, anon_name): + text = ' '.join(l for _, l in srclines) + + from ._delim import parse + yield from parse(text, anon_name) diff --git a/Tools/c-analyzer/c_parser/parser/_common.py b/Tools/c-analyzer/c_parser/parser/_common.py new file mode 100644 index 00000000000..40c36039f3f --- /dev/null +++ b/Tools/c-analyzer/c_parser/parser/_common.py @@ -0,0 +1,115 @@ +import re + +from ._regexes import ( + _ind, + STRING_LITERAL, + VAR_DECL as _VAR_DECL, +) + + +def log_match(group, m): + from . import _logger + _logger.debug(f'matched <{group}> ({m.group(0)})') + + +############################# +# regex utils + +def set_capture_group(pattern, group, *, strict=True): + old = f'(?: # <{group}>' + if strict and f'(?: # <{group}>' not in pattern: + raise ValueError(f'{old!r} not found in pattern') + return pattern.replace(old, f'( # <{group}>', 1) + + +def set_capture_groups(pattern, groups, *, strict=True): + for group in groups: + pattern = set_capture_group(pattern, group, strict=strict) + return pattern + + +############################# +# syntax-related utils + +_PAREN_RE = re.compile(rf''' + (?: + (?: + [^'"()]* + {_ind(STRING_LITERAL, 3)} + )* + [^'"()]* + (?: + ( [(] ) + | + ( [)] ) + ) + ) + ''', re.VERBOSE) + + +def match_paren(text, depth=0): + pos = 0 + while (m := _PAREN_RE.match(text, pos)): + pos = m.end() + _open, _close = m.groups() + if _open: + depth += 1 + else: # _close + depth -= 1 + if depth == 0: + return pos + else: + raise ValueError(f'could not find matching parens for {text!r}') + + +VAR_DECL = set_capture_groups(_VAR_DECL, ( + 'STORAGE', + 'TYPE_QUAL', + 'TYPE_SPEC', + 'DECLARATOR', + 'IDENTIFIER', + 'WRAPPED_IDENTIFIER', + 'FUNC_IDENTIFIER', +)) + + +def parse_var_decl(decl): + m = re.match(VAR_DECL, decl, re.VERBOSE) + (storage, typequal, typespec, declarator, + name, + wrappedname, + funcptrname, + ) = m.groups() + if name: + kind = 'simple' + elif wrappedname: + kind = 'wrapped' + name = wrappedname + elif funcptrname: + kind = 'funcptr' + name = funcptrname + else: + raise NotImplementedError + abstract = declarator.replace(name, '') + vartype = { + 'storage': storage, + 'typequal': typequal, + 'typespec': typespec, + 'abstract': abstract, + } + return (kind, name, vartype) + + +############################# +# parser state utils + +# XXX Drop this or use it! +def iter_results(results): + if not results: + return + if callable(results): + results = results() + + for result, text in results(): + if result: + yield result, text diff --git a/Tools/c-analyzer/c_parser/parser/_compound_decl_body.py b/Tools/c-analyzer/c_parser/parser/_compound_decl_body.py new file mode 100644 index 00000000000..eb5bc67607b --- /dev/null +++ b/Tools/c-analyzer/c_parser/parser/_compound_decl_body.py @@ -0,0 +1,158 @@ +import re + +from ._regexes import ( + STRUCT_MEMBER_DECL as _STRUCT_MEMBER_DECL, + ENUM_MEMBER_DECL as _ENUM_MEMBER_DECL, +) +from ._common import ( + log_match, + parse_var_decl, + set_capture_groups, +) + + +############################# +# struct / union + +STRUCT_MEMBER_DECL = set_capture_groups(_STRUCT_MEMBER_DECL, ( + 'COMPOUND_TYPE_KIND', + 'COMPOUND_TYPE_NAME', + 'SPECIFIER_QUALIFIER', + 'DECLARATOR', + 'SIZE', + 'ENDING', + 'CLOSE', +)) +STRUCT_MEMBER_RE = re.compile(rf'^ \s* {STRUCT_MEMBER_DECL}', re.VERBOSE) + + +def parse_struct_body(source, anon_name, parent): + done = False + while not done: + done = True + for srcinfo in source: + m = STRUCT_MEMBER_RE.match(srcinfo.text) + if m: + break + else: + # We ran out of lines. + if srcinfo is not None: + srcinfo.done() + return + for item in _parse_struct_next(m, srcinfo, anon_name, parent): + if callable(item): + parse_body = item + yield from parse_body(source) + else: + yield item + done = False + + +def _parse_struct_next(m, srcinfo, anon_name, parent): + (inline_kind, inline_name, + qualspec, declarator, + size, + ending, + close, + ) = m.groups() + remainder = srcinfo.text[m.end():] + + if close: + log_match('compound close', m) + srcinfo.advance(remainder) + + elif inline_kind: + log_match('compound inline', m) + kind = inline_kind + name = inline_name or anon_name('inline-') + # Immediately emit a forward declaration. + yield srcinfo.resolve(kind, name=name, data=None) + + # un-inline the decl. Note that it might not actually be inline. + # We handle the case in the "maybe_inline_actual" branch. + srcinfo.nest( + remainder, + f'{kind} {name}', + ) + def parse_body(source): + _parse_body = DECL_BODY_PARSERS[kind] + + data = [] # members + ident = f'{kind} {name}' + for item in _parse_body(source, anon_name, ident): + if item.kind == 'field': + data.append(item) + else: + yield item + # XXX Should "parent" really be None for inline type decls? + yield srcinfo.resolve(kind, data, name, parent=None) + + srcinfo.resume() + yield parse_body + + else: + # not inline (member) + log_match('compound member', m) + if qualspec: + _, name, data = parse_var_decl(f'{qualspec} {declarator}') + if not name: + name = anon_name('struct-field-') + if size: +# data = (data, size) + data['size'] = int(size) + else: + # This shouldn't happen (we expect each field to have a name). + raise NotImplementedError + name = sized_name or anon_name('struct-field-') + data = int(size) + + yield srcinfo.resolve('field', data, name, parent) # XXX Restart? + if ending == ',': + remainder = rf'{qualspec} {remainder}' + srcinfo.advance(remainder) + + +############################# +# enum + +ENUM_MEMBER_DECL = set_capture_groups(_ENUM_MEMBER_DECL, ( + 'CLOSE', + 'NAME', + 'INIT', + 'ENDING', +)) +ENUM_MEMBER_RE = re.compile(rf'{ENUM_MEMBER_DECL}', re.VERBOSE) + + +def parse_enum_body(source, _anon_name, _parent): + ending = None + while ending != '}': + for srcinfo in source: + m = ENUM_MEMBER_RE.match(srcinfo.text) + if m: + break + else: + # We ran out of lines. + if srcinfo is not None: + srcinfo.done() + return + remainder = srcinfo.text[m.end():] + + (close, + name, init, ending, + ) = m.groups() + if close: + ending = '}' + else: + data = init + yield srcinfo.resolve('field', data, name, _parent) + srcinfo.advance(remainder) + + +############################# + +DECL_BODY_PARSERS = { + 'struct': parse_struct_body, + 'union': parse_struct_body, + 'enum': parse_enum_body, +} diff --git a/Tools/c-analyzer/c_parser/parser/_delim.py b/Tools/c-analyzer/c_parser/parser/_delim.py new file mode 100644 index 00000000000..51433a629d3 --- /dev/null +++ b/Tools/c-analyzer/c_parser/parser/_delim.py @@ -0,0 +1,54 @@ +import re +import textwrap + +from ._regexes import _ind, STRING_LITERAL + + +def parse(text, anon_name): + context = None + data = None + for m in DELIMITER_RE.find_iter(text): + before, opened, closed = m.groups() + delim = opened or closed + + handle_segment = HANDLERS[context][delim] + result, context, data = handle_segment(before, delim, data) + if result: + yield result + + +DELIMITER = textwrap.dedent(rf''' + ( + (?: + [^'"()\[\]{};]* + {_ind(STRING_LITERAL, 3)} + }* + [^'"()\[\]{};]+ + )? # <before> + (?: + ( + [(\[{] + ) # <open> + | + ( + [)\]};] + ) # <close> + )? + ''') +DELIMITER_RE = re.compile(DELIMITER, re.VERBOSE) + +_HANDLERS = { + None: { # global + # opened + '{': ..., + '[': None, + '(': None, + # closed + '}': None, + ']': None, + ')': None, + ';': ..., + }, + '': { + }, +} diff --git a/Tools/c-analyzer/c_parser/parser/_func_body.py b/Tools/c-analyzer/c_parser/parser/_func_body.py new file mode 100644 index 00000000000..42fd459e111 --- /dev/null +++ b/Tools/c-analyzer/c_parser/parser/_func_body.py @@ -0,0 +1,278 @@ +import re + +from ._regexes import ( + LOCAL as _LOCAL, + LOCAL_STATICS as _LOCAL_STATICS, +) +from ._common import ( + log_match, + parse_var_decl, + set_capture_groups, + match_paren, +) +from ._compound_decl_body import DECL_BODY_PARSERS + + +LOCAL = set_capture_groups(_LOCAL, ( + 'EMPTY', + 'INLINE_LEADING', + 'INLINE_PRE', + 'INLINE_KIND', + 'INLINE_NAME', + 'STORAGE', + 'VAR_DECL', + 'VAR_INIT', + 'VAR_ENDING', + 'COMPOUND_BARE', + 'COMPOUND_LABELED', + 'COMPOUND_PAREN', + 'BLOCK_LEADING', + 'BLOCK_OPEN', + 'SIMPLE_STMT', + 'SIMPLE_ENDING', + 'BLOCK_CLOSE', +)) +LOCAL_RE = re.compile(rf'^ \s* {LOCAL}', re.VERBOSE) + + +# Note that parse_function_body() still has trouble with a few files +# in the CPython codebase. + +def parse_function_body(source, name, anon_name): + # XXX + raise NotImplementedError + + +def parse_function_body(name, text, resolve, source, anon_name, parent): + raise NotImplementedError + # For now we do not worry about locals declared in for loop "headers". + depth = 1; + while depth > 0: + m = LOCAL_RE.match(text) + while not m: + text, resolve = continue_text(source, text or '{', resolve) + m = LOCAL_RE.match(text) + text = text[m.end():] + ( + empty, + inline_leading, inline_pre, inline_kind, inline_name, + storage, decl, + var_init, var_ending, + compound_bare, compound_labeled, compound_paren, + block_leading, block_open, + simple_stmt, simple_ending, + block_close, + ) = m.groups() + + if empty: + log_match('', m) + resolve(None, None, None, text) + yield None, text + elif inline_kind: + log_match('', m) + kind = inline_kind + name = inline_name or anon_name('inline-') + data = [] # members + # We must set the internal "text" from _iter_source() to the + # start of the inline compound body, + # Note that this is effectively like a forward reference that + # we do not emit. + resolve(kind, None, name, text, None) + _parse_body = DECL_BODY_PARSERS[kind] + before = [] + ident = f'{kind} {name}' + for member, inline, text in _parse_body(text, resolve, source, anon_name, ident): + if member: + data.append(member) + if inline: + yield from inline + # un-inline the decl. Note that it might not actually be inline. + # We handle the case in the "maybe_inline_actual" branch. + text = f'{inline_leading or ""} {inline_pre or ""} {kind} {name} {text}' + # XXX Should "parent" really be None for inline type decls? + yield resolve(kind, data, name, text, None), text + elif block_close: + log_match('', m) + depth -= 1 + resolve(None, None, None, text) + # XXX This isn't great. Calling resolve() should have + # cleared the closing bracket. However, some code relies + # on the yielded value instead of the resolved one. That + # needs to be fixed. + yield None, text + elif compound_bare: + log_match('', m) + yield resolve('statement', compound_bare, None, text, parent), text + elif compound_labeled: + log_match('', m) + yield resolve('statement', compound_labeled, None, text, parent), text + elif compound_paren: + log_match('', m) + try: + pos = match_paren(text) + except ValueError: + text = f'{compound_paren} {text}' + #resolve(None, None, None, text) + text, resolve = continue_text(source, text, resolve) + yield None, text + else: + head = text[:pos] + text = text[pos:] + if compound_paren == 'for': + # XXX Parse "head" as a compound statement. + stmt1, stmt2, stmt3 = head.split(';', 2) + data = { + 'compound': compound_paren, + 'statements': (stmt1, stmt2, stmt3), + } + else: + data = { + 'compound': compound_paren, + 'statement': head, + } + yield resolve('statement', data, None, text, parent), text + elif block_open: + log_match('', m) + depth += 1 + if block_leading: + # An inline block: the last evaluated expression is used + # in place of the block. + # XXX Combine it with the remainder after the block close. + stmt = f'{block_open}{{<expr>}}...;' + yield resolve('statement', stmt, None, text, parent), text + else: + resolve(None, None, None, text) + yield None, text + elif simple_ending: + log_match('', m) + yield resolve('statement', simple_stmt, None, text, parent), text + elif var_ending: + log_match('', m) + kind = 'variable' + _, name, vartype = parse_var_decl(decl) + data = { + 'storage': storage, + 'vartype': vartype, + } + after = () + if var_ending == ',': + # It was a multi-declaration, so queue up the next one. + _, qual, typespec, _ = vartype.values() + text = f'{storage or ""} {qual or ""} {typespec} {text}' + yield resolve(kind, data, name, text, parent), text + if var_init: + _data = f'{name} = {var_init.strip()}' + yield resolve('statement', _data, None, text, parent), text + else: + # This should be unreachable. + raise NotImplementedError + + +############################# +# static local variables + +LOCAL_STATICS = set_capture_groups(_LOCAL_STATICS, ( + 'INLINE_LEADING', + 'INLINE_PRE', + 'INLINE_KIND', + 'INLINE_NAME', + 'STATIC_DECL', + 'STATIC_INIT', + 'STATIC_ENDING', + 'DELIM_LEADING', + 'BLOCK_OPEN', + 'BLOCK_CLOSE', + 'STMT_END', +)) +LOCAL_STATICS_RE = re.compile(rf'^ \s* {LOCAL_STATICS}', re.VERBOSE) + + +def parse_function_statics(source, func, anon_name): + # For now we do not worry about locals declared in for loop "headers". + depth = 1; + while depth > 0: + for srcinfo in source: + m = LOCAL_STATICS_RE.match(srcinfo.text) + if m: + break + else: + # We ran out of lines. + if srcinfo is not None: + srcinfo.done() + return + for item, depth in _parse_next_local_static(m, srcinfo, + anon_name, func, depth): + if callable(item): + parse_body = item + yield from parse_body(source) + elif item is not None: + yield item + + +def _parse_next_local_static(m, srcinfo, anon_name, func, depth): + (inline_leading, inline_pre, inline_kind, inline_name, + static_decl, static_init, static_ending, + _delim_leading, + block_open, + block_close, + stmt_end, + ) = m.groups() + remainder = srcinfo.text[m.end():] + + if inline_kind: + log_match('func inline', m) + kind = inline_kind + name = inline_name or anon_name('inline-') + # Immediately emit a forward declaration. + yield srcinfo.resolve(kind, name=name, data=None), depth + + # un-inline the decl. Note that it might not actually be inline. + # We handle the case in the "maybe_inline_actual" branch. + srcinfo.nest( + remainder, + f'{inline_leading or ""} {inline_pre or ""} {kind} {name}' + ) + def parse_body(source): + _parse_body = DECL_BODY_PARSERS[kind] + + data = [] # members + ident = f'{kind} {name}' + for item in _parse_body(source, anon_name, ident): + if item.kind == 'field': + data.append(item) + else: + yield item + # XXX Should "parent" really be None for inline type decls? + yield srcinfo.resolve(kind, data, name, parent=None) + + srcinfo.resume() + yield parse_body, depth + + elif static_decl: + log_match('local variable', m) + _, name, data = parse_var_decl(static_decl) + + yield srcinfo.resolve('variable', data, name, parent=func), depth + + if static_init: + srcinfo.advance(f'{name} {static_init} {remainder}') + elif static_ending == ',': + # It was a multi-declaration, so queue up the next one. + _, qual, typespec, _ = data.values() + srcinfo.advance(f'static {qual or ""} {typespec} {remainder}') + else: + srcinfo.advance('') + + else: + log_match('func other', m) + if block_open: + depth += 1 + elif block_close: + depth -= 1 + elif stmt_end: + pass + else: + # This should be unreachable. + raise NotImplementedError + srcinfo.advance(remainder) + yield None, depth diff --git a/Tools/c-analyzer/c_parser/parser/_global.py b/Tools/c-analyzer/c_parser/parser/_global.py new file mode 100644 index 00000000000..35947c12998 --- /dev/null +++ b/Tools/c-analyzer/c_parser/parser/_global.py @@ -0,0 +1,179 @@ +import re + +from ._regexes import ( + GLOBAL as _GLOBAL, +) +from ._common import ( + log_match, + parse_var_decl, + set_capture_groups, +) +from ._compound_decl_body import DECL_BODY_PARSERS +#from ._func_body import parse_function_body +from ._func_body import parse_function_statics as parse_function_body + + +GLOBAL = set_capture_groups(_GLOBAL, ( + 'EMPTY', + 'COMPOUND_LEADING', + 'COMPOUND_KIND', + 'COMPOUND_NAME', + 'FORWARD_KIND', + 'FORWARD_NAME', + 'MAYBE_INLINE_ACTUAL', + 'TYPEDEF_DECL', + 'TYPEDEF_FUNC_PARAMS', + 'VAR_STORAGE', + 'FUNC_INLINE', + 'VAR_DECL', + 'FUNC_PARAMS', + 'FUNC_DELIM', + 'FUNC_LEGACY_PARAMS', + 'VAR_INIT', + 'VAR_ENDING', +)) +GLOBAL_RE = re.compile(rf'^ \s* {GLOBAL}', re.VERBOSE) + + +def parse_globals(source, anon_name): + for srcinfo in source: + m = GLOBAL_RE.match(srcinfo.text) + if not m: + # We need more text. + continue + for item in _parse_next(m, srcinfo, anon_name): + if callable(item): + parse_body = item + yield from parse_body(source) + else: + yield item + else: + # We ran out of lines. + if srcinfo is not None: + srcinfo.done() + return + + +def _parse_next(m, srcinfo, anon_name): + ( + empty, + # compound type decl (maybe inline) + compound_leading, compound_kind, compound_name, + forward_kind, forward_name, maybe_inline_actual, + # typedef + typedef_decl, typedef_func_params, + # vars and funcs + storage, func_inline, decl, + func_params, func_delim, func_legacy_params, + var_init, var_ending, + ) = m.groups() + remainder = srcinfo.text[m.end():] + + if empty: + log_match('global empty', m) + srcinfo.advance(remainder) + + elif maybe_inline_actual: + log_match('maybe_inline_actual', m) + # Ignore forward declarations. + # XXX Maybe return them too (with an "isforward" flag)? + if not maybe_inline_actual.strip().endswith(';'): + remainder = maybe_inline_actual + remainder + yield srcinfo.resolve(forward_kind, None, forward_name) + if maybe_inline_actual.strip().endswith('='): + # We use a dummy prefix for a fake typedef. + # XXX Ideally this case would not be caught by MAYBE_INLINE_ACTUAL. + _, name, data = parse_var_decl(f'{forward_kind} {forward_name} fake_typedef_{forward_name}') + yield srcinfo.resolve('typedef', data, name, parent=None) + remainder = f'{name} {remainder}' + srcinfo.advance(remainder) + + elif compound_kind: + kind = compound_kind + name = compound_name or anon_name('inline-') + # Immediately emit a forward declaration. + yield srcinfo.resolve(kind, name=name, data=None) + + # un-inline the decl. Note that it might not actually be inline. + # We handle the case in the "maybe_inline_actual" branch. + srcinfo.nest( + remainder, + f'{compound_leading or ""} {compound_kind} {name}', + ) + def parse_body(source): + _parse_body = DECL_BODY_PARSERS[compound_kind] + + data = [] # members + ident = f'{kind} {name}' + for item in _parse_body(source, anon_name, ident): + if item.kind == 'field': + data.append(item) + else: + yield item + # XXX Should "parent" really be None for inline type decls? + yield srcinfo.resolve(kind, data, name, parent=None) + + srcinfo.resume() + yield parse_body + + elif typedef_decl: + log_match('typedef', m) + kind = 'typedef' + _, name, data = parse_var_decl(typedef_decl) + if typedef_func_params: + return_type = data + # This matches the data for func declarations. + data = { + 'storage': None, + 'inline': None, + 'params': f'({typedef_func_params})', + 'returntype': return_type, + 'isforward': True, + } + yield srcinfo.resolve(kind, data, name, parent=None) + srcinfo.advance(remainder) + + elif func_delim or func_legacy_params: + log_match('function', m) + kind = 'function' + _, name, return_type = parse_var_decl(decl) + func_params = func_params or func_legacy_params + data = { + 'storage': storage, + 'inline': func_inline, + 'params': f'({func_params})', + 'returntype': return_type, + 'isforward': func_delim == ';', + } + + yield srcinfo.resolve(kind, data, name, parent=None) + srcinfo.advance(remainder) + + if func_delim == '{' or func_legacy_params: + def parse_body(source): + yield from parse_function_body(source, name, anon_name) + yield parse_body + + elif var_ending: + log_match('global variable', m) + kind = 'variable' + _, name, vartype = parse_var_decl(decl) + data = { + 'storage': storage, + 'vartype': vartype, + } + yield srcinfo.resolve(kind, data, name, parent=None) + + if var_ending == ',': + # It was a multi-declaration, so queue up the next one. + _, qual, typespec, _ = vartype.values() + remainder = f'{storage or ""} {qual or ""} {typespec} {remainder}' + srcinfo.advance(remainder) + + if var_init: + _data = f'{name} = {var_init.strip()}' + yield srcinfo.resolve('statement', _data, name=None) + + else: + # This should be unreachable. + raise NotImplementedError diff --git a/Tools/c-analyzer/c_parser/parser/_info.py b/Tools/c-analyzer/c_parser/parser/_info.py new file mode 100644 index 00000000000..2dcd5e5e760 --- /dev/null +++ b/Tools/c-analyzer/c_parser/parser/_info.py @@ -0,0 +1,168 @@ +from ..info import KIND, ParsedItem, FileInfo + + +class TextInfo: + + def __init__(self, text, start=None, end=None): + # immutable: + if not start: + start = 1 + self.start = start + + # mutable: + lines = text.splitlines() or [''] + self.text = text.strip() + if not end: + end = start + len(lines) - 1 + self.end = end + self.line = lines[-1] + + def __repr__(self): + args = (f'{a}={getattr(self, a)!r}' + for a in ['text', 'start', 'end']) + return f'{type(self).__name__}({", ".join(args)})' + + def add_line(self, line, lno=None): + if lno is None: + lno = self.end + 1 + else: + if isinstance(lno, FileInfo): + fileinfo = lno + if fileinfo.filename != self.filename: + raise NotImplementedError((fileinfo, self.filename)) + lno = fileinfo.lno + # XXX + #if lno < self.end: + # raise NotImplementedError((lno, self.end)) + line = line.lstrip() + self.text += ' ' + line + self.line = line + self.end = lno + + +class SourceInfo: + + _ready = False + + def __init__(self, filename, _current=None): + # immutable: + self.filename = filename + # mutable: + if isinstance(_current, str): + _current = TextInfo(_current) + self._current = _current + start = -1 + self._start = _current.start if _current else -1 + self._nested = [] + self._set_ready() + + def __repr__(self): + args = (f'{a}={getattr(self, a)!r}' + for a in ['filename', '_current']) + return f'{type(self).__name__}({", ".join(args)})' + + @property + def start(self): + if self._current is None: + return self._start + return self._current.start + + @property + def end(self): + if self._current is None: + return self._start + return self._current.end + + @property + def text(self): + if self._current is None: + return '' + return self._current.text + + def nest(self, text, before, start=None): + if self._current is None: + raise Exception('nesting requires active source text') + current = self._current + current.text = before + self._nested.append(current) + self._replace(text, start) + + def resume(self, remainder=None): + if not self._nested: + raise Exception('no nested text to resume') + if self._current is None: + raise Exception('un-nesting requires active source text') + if remainder is None: + remainder = self._current.text + self._clear() + self._current = self._nested.pop() + self._current.text += ' ' + remainder + self._set_ready() + + def advance(self, remainder, start=None): + if self._current is None: + raise Exception('advancing requires active source text') + if remainder.strip(): + self._replace(remainder, start, fixnested=True) + else: + if self._nested: + self._replace('', start, fixnested=True) + #raise Exception('cannot advance while nesting') + else: + self._clear(start) + + def resolve(self, kind, data, name, parent=None): + # "field" isn't a top-level kind, so we leave it as-is. + if kind and kind != 'field': + kind = KIND._from_raw(kind) + fileinfo = FileInfo(self.filename, self._start) + return ParsedItem(fileinfo, kind, parent, name, data) + + def done(self): + self._set_ready() + + def _set_ready(self): + if self._current is None: + self._ready = False + else: + self._ready = self._current.text.strip() != '' + + def _used(self): + ready = self._ready + self._ready = False + return ready + + def _clear(self, start=None): + old = self._current + if self._current is not None: + # XXX Fail if self._current wasn't used up? + if start is None: + start = self._current.end + self._current = None + if start is not None: + self._start = start + self._set_ready() + return old + + def _replace(self, text, start=None, *, fixnested=False): + end = self._current.end + old = self._clear(start) + self._current = TextInfo(text, self._start, end) + if fixnested and self._nested and self._nested[-1] is old: + self._nested[-1] = self._current + self._set_ready() + + def _add_line(self, line, lno=None): + if not line.strip(): + # We don't worry about multi-line string literals. + return + if self._current is None: + self._start = lno + self._current = TextInfo(line, lno) + else: + # XXX + #if lno < self._current.end: + # # A circular include? + # raise NotImplementedError((lno, self)) + self._current.add_line(line, lno) + self._ready = True diff --git a/Tools/c-analyzer/c_parser/parser/_regexes.py b/Tools/c-analyzer/c_parser/parser/_regexes.py new file mode 100644 index 00000000000..e9bc31d335a --- /dev/null +++ b/Tools/c-analyzer/c_parser/parser/_regexes.py @@ -0,0 +1,796 @@ +# Regular expression patterns for C syntax. +# +# None of these patterns has any capturing. However, a number of them +# have capturing markers compatible with utils.set_capture_groups(). + +import textwrap + + +def _ind(text, level=1, edges='both'): + indent = ' ' * level + text = textwrap.indent(text, indent) + if edges == 'pre' or edges == 'both': + text = '\n' + indent + text.lstrip() + if edges == 'post' or edges == 'both': + text = text.rstrip() + '\n' + ' ' * (level - 1) + return text + + +####################################### +# general + +HEX = r'(?: [0-9a-zA-Z] )' + +STRING_LITERAL = textwrap.dedent(rf''' + (?: + # character literal + (?: + ['] [^'] ['] + | + ['] \\ . ['] + | + ['] \\x{HEX}{HEX} ['] + | + ['] \\0\d\d ['] + | + (?: + ['] \\o[01]\d\d ['] + | + ['] \\o2[0-4]\d ['] + | + ['] \\o25[0-5] ['] + ) + ) + | + # string literal + (?: + ["] (?: [^"\\]* \\ . )* [^"\\]* ["] + ) + # end string literal + ) + ''') + +_KEYWORD = textwrap.dedent(r''' + (?: + \b + (?: + auto | + extern | + register | + static | + typedef | + + const | + volatile | + + signed | + unsigned | + char | + short | + int | + long | + float | + double | + void | + + struct | + union | + enum | + + goto | + return | + sizeof | + break | + continue | + if | + else | + for | + do | + while | + switch | + case | + default | + entry + ) + \b + ) + ''') +KEYWORD = rf''' + # keyword + {_KEYWORD} + # end keyword + ''' +_KEYWORD = ''.join(_KEYWORD.split()) + +IDENTIFIER = r'(?: [a-zA-Z_][a-zA-Z0-9_]* )' +# We use a negative lookahead to filter out keywords. +STRICT_IDENTIFIER = rf'(?: (?! {_KEYWORD} ) \b {IDENTIFIER} \b )' +ANON_IDENTIFIER = rf'(?: (?! {_KEYWORD} ) \b {IDENTIFIER} (?: - \d+ )? \b )' + + +####################################### +# types + +SIMPLE_TYPE = textwrap.dedent(rf''' + # simple type + (?: + \b + (?: + void + | + (?: signed | unsigned ) # implies int + | + (?: + (?: (?: signed | unsigned ) \s+ )? + (?: (?: long | short ) \s+ )? + (?: char | short | int | long | float | double ) + ) + ) + \b + ) + # end simple type + ''') + +COMPOUND_TYPE_KIND = r'(?: \b (?: struct | union | enum ) \b )' + + +####################################### +# variable declarations + +STORAGE_CLASS = r'(?: \b (?: auto | register | static | extern ) \b )' +TYPE_QUALIFIER = r'(?: \b (?: const | volatile ) \b )' +PTR_QUALIFIER = rf'(?: [*] (?: \s* {TYPE_QUALIFIER} )? )' + +TYPE_SPEC = textwrap.dedent(rf''' + # type spec + (?: + {_ind(SIMPLE_TYPE, 2)} + | + (?: + [_]*typeof[_]* + \s* [(] + (?: \s* [*&] )* + \s* {STRICT_IDENTIFIER} + \s* [)] + ) + | + # reference to a compound type + (?: + {COMPOUND_TYPE_KIND} + (?: \s* {ANON_IDENTIFIER} )? + ) + | + # reference to a typedef + {STRICT_IDENTIFIER} + ) + # end type spec + ''') + +DECLARATOR = textwrap.dedent(rf''' + # declarator (possibly abstract) + (?: + (?: {PTR_QUALIFIER} \s* )* + (?: + (?: + (?: # <IDENTIFIER> + {STRICT_IDENTIFIER} + ) + (?: \s* \[ (?: \s* [^\]]+ \s* )? [\]] )* # arrays + ) + | + (?: + [(] \s* + (?: # <WRAPPED_IDENTIFIER> + {STRICT_IDENTIFIER} + ) + (?: \s* \[ (?: \s* [^\]]+ \s* )? [\]] )* # arrays + \s* [)] + ) + | + # func ptr + (?: + [(] (?: \s* {PTR_QUALIFIER} )? \s* + (?: # <FUNC_IDENTIFIER> + {STRICT_IDENTIFIER} + ) + (?: \s* \[ (?: \s* [^\]]+ \s* )? [\]] )* # arrays + \s* [)] + # We allow for a single level of paren nesting in parameters. + \s* [(] (?: [^()]* [(] [^)]* [)] )* [^)]* [)] + ) + ) + ) + # end declarator + ''') + +VAR_DECL = textwrap.dedent(rf''' + # var decl (and typedef and func return type) + (?: + (?: + (?: # <STORAGE> + {STORAGE_CLASS} + ) + \s* + )? + (?: + (?: # <TYPE_QUAL> + {TYPE_QUALIFIER} + ) + \s* + )? + (?: + (?: # <TYPE_SPEC> + {_ind(TYPE_SPEC, 4)} + ) + ) + \s* + (?: + (?: # <DECLARATOR> + {_ind(DECLARATOR, 4)} + ) + ) + ) + # end var decl + ''') + +INITIALIZER = textwrap.dedent(rf''' + # initializer + (?: + (?: + [(] + # no nested parens (e.g. func ptr) + [^)]* + [)] + \s* + )? + (?: + # a string literal + (?: + (?: {_ind(STRING_LITERAL, 4)} \s* )* + {_ind(STRING_LITERAL, 4)} + ) + | + + # a simple initializer + (?: + (?: + [^'",;{{]* + {_ind(STRING_LITERAL, 4)} + )* + [^'",;{{]* + ) + | + + # a struct/array literal + (?: + # We only expect compound initializers with + # single-variable declarations. + {{ + (?: + [^'";]*? + {_ind(STRING_LITERAL, 5)} + )* + [^'";]*? + }} + (?= \s* ; ) # Note this lookahead. + ) + ) + ) + # end initializer + ''') + + +####################################### +# compound type declarations + +STRUCT_MEMBER_DECL = textwrap.dedent(rf''' + (?: + # inline compound type decl + (?: + (?: # <COMPOUND_TYPE_KIND> + {COMPOUND_TYPE_KIND} + ) + (?: + \s+ + (?: # <COMPOUND_TYPE_NAME> + {STRICT_IDENTIFIER} + ) + )? + \s* {{ + ) + | + (?: + # typed member + (?: + # Technically it doesn't have to have a type... + (?: # <SPECIFIER_QUALIFIER> + (?: {TYPE_QUALIFIER} \s* )? + {_ind(TYPE_SPEC, 5)} + ) + (?: + # If it doesn't have a declarator then it will have + # a size and vice versa. + \s* + (?: # <DECLARATOR> + {_ind(DECLARATOR, 6)} + ) + )? + ) + + # sized member + (?: + \s* [:] \s* + (?: # <SIZE> + \d+ + ) + )? + \s* + (?: # <ENDING> + [,;] + ) + ) + | + (?: + \s* + (?: # <CLOSE> + }} + ) + ) + ) + ''') + +ENUM_MEMBER_DECL = textwrap.dedent(rf''' + (?: + (?: + \s* + (?: # <CLOSE> + }} + ) + ) + | + (?: + \s* + (?: # <NAME> + {IDENTIFIER} + ) + (?: + \s* = \s* + (?: # <INIT> + {_ind(STRING_LITERAL, 4)} + | + [^'",}}]+ + ) + )? + \s* + (?: # <ENDING> + , | }} + ) + ) + ) + ''') + + +####################################### +# statements + +SIMPLE_STMT_BODY = textwrap.dedent(rf''' + # simple statement body + (?: + (?: + [^'"{{}};]* + {_ind(STRING_LITERAL, 3)} + )* + [^'"{{}};]* + #(?= [;{{] ) # Note this lookahead. + ) + # end simple statement body + ''') +SIMPLE_STMT = textwrap.dedent(rf''' + # simple statement + (?: + (?: # <SIMPLE_STMT> + # stmt-inline "initializer" + (?: + return \b + (?: + \s* + {_ind(INITIALIZER, 5)} + )? + ) + | + # variable assignment + (?: + (?: [*] \s* )? + (?: + {STRICT_IDENTIFIER} \s* + (?: . | -> ) \s* + )* + {STRICT_IDENTIFIER} + (?: \s* \[ \s* \d+ \s* \] )? + \s* = \s* + {_ind(INITIALIZER, 4)} + ) + | + # catchall return statement + (?: + return \b + (?: + (?: + [^'";]* + {_ind(STRING_LITERAL, 6)} + )* + \s* [^'";]* + )? + ) + | + # simple statement + (?: + {_ind(SIMPLE_STMT_BODY, 4)} + ) + ) + \s* + (?: # <SIMPLE_ENDING> + ; + ) + ) + # end simple statement + ''') +COMPOUND_STMT = textwrap.dedent(rf''' + # compound statement + (?: + \b + (?: + (?: + (?: # <COMPOUND_BARE> + else | do + ) + \b + ) + | + (?: + (?: # <COMPOUND_LABELED> + (?: + case \b + (?: + [^'":]* + {_ind(STRING_LITERAL, 7)} + )* + \s* [^'":]* + ) + | + default + | + {STRICT_IDENTIFIER} + ) + \s* [:] + ) + | + (?: + (?: # <COMPOUND_PAREN> + for | while | if | switch + ) + \s* (?= [(] ) # Note this lookahead. + ) + ) + \s* + ) + # end compound statement + ''') + + +####################################### +# function bodies + +LOCAL = textwrap.dedent(rf''' + (?: + # an empty statement + (?: # <EMPTY> + ; + ) + | + # inline type decl + (?: + (?: + (?: # <INLINE_LEADING> + [^;{{}}]+? + ) + \s* + )? + (?: # <INLINE_PRE> + (?: {STORAGE_CLASS} \s* )? + (?: {TYPE_QUALIFIER} \s* )? + )? # </INLINE_PRE> + (?: # <INLINE_KIND> + {COMPOUND_TYPE_KIND} + ) + (?: + \s+ + (?: # <INLINE_NAME> + {STRICT_IDENTIFIER} + ) + )? + \s* {{ + ) + | + # var decl + (?: + (?: # <STORAGE> + {STORAGE_CLASS} + )? # </STORAGE> + (?: + \s* + (?: # <VAR_DECL> + {_ind(VAR_DECL, 5)} + ) + ) + (?: + (?: + # initializer + # We expect only basic initializers. + \s* = \s* + (?: # <VAR_INIT> + {_ind(INITIALIZER, 6)} + ) + )? + (?: + \s* + (?: # <VAR_ENDING> + [,;] + ) + ) + ) + ) + | + {_ind(COMPOUND_STMT, 2)} + | + # start-of-block + (?: + (?: # <BLOCK_LEADING> + (?: + [^'"{{}};]* + {_ind(STRING_LITERAL, 5)} + )* + [^'"{{}};]* + # Presumably we will not see "== {{". + [^\s='"{{}});] + \s* + )? # </BLOCK_LEADING> + (?: # <BLOCK_OPEN> + {{ + ) + ) + | + {_ind(SIMPLE_STMT, 2)} + | + # end-of-block + (?: # <BLOCK_CLOSE> + }} + ) + ) + ''') + +LOCAL_STATICS = textwrap.dedent(rf''' + (?: + # inline type decl + (?: + (?: + (?: # <INLINE_LEADING> + [^;{{}}]+? + ) + \s* + )? + (?: # <INLINE_PRE> + (?: {STORAGE_CLASS} \s* )? + (?: {TYPE_QUALIFIER} \s* )? + )? + (?: # <INLINE_KIND> + {COMPOUND_TYPE_KIND} + ) + (?: + \s+ + (?: # <INLINE_NAME> + {STRICT_IDENTIFIER} + ) + )? + \s* {{ + ) + | + # var decl + (?: + # We only look for static variables. + (?: # <STATIC_DECL> + static \b + (?: \s* {TYPE_QUALIFIER} )? + \s* {_ind(TYPE_SPEC, 4)} + \s* {_ind(DECLARATOR, 4)} + ) + \s* + (?: + (?: # <STATIC_INIT> + = \s* + {_ind(INITIALIZER, 4)} + \s* + [,;{{] + ) + | + (?: # <STATIC_ENDING> + [,;] + ) + ) + ) + | + # everything else + (?: + (?: # <DELIM_LEADING> + (?: + [^'"{{}};]* + {_ind(STRING_LITERAL, 4)} + )* + \s* [^'"{{}};]* + ) + (?: + (?: # <BLOCK_OPEN> + {{ + ) + | + (?: # <BLOCK_CLOSE> + }} + ) + | + (?: # <STMT_END> + ; + ) + ) + ) + ) + ''') + + +####################################### +# global declarations + +GLOBAL = textwrap.dedent(rf''' + (?: + # an empty statement + (?: # <EMPTY> + ; + ) + | + + # compound type decl (maybe inline) + (?: + (?: + (?: # <COMPOUND_LEADING> + [^;{{}}]+? + ) + \s* + )? + (?: # <COMPOUND_KIND> + {COMPOUND_TYPE_KIND} + ) + (?: + \s+ + (?: # <COMPOUND_NAME> + {STRICT_IDENTIFIER} + ) + )? + \s* {{ + ) + | + # bogus inline decl artifact + # This simplifies resolving the relative syntactic ambiguity of + # inline structs. + (?: + (?: # <FORWARD_KIND> + {COMPOUND_TYPE_KIND} + ) + \s* + (?: # <FORWARD_NAME> + {ANON_IDENTIFIER} + ) + (?: # <MAYBE_INLINE_ACTUAL> + [^=,;({{[*\]]* + [=,;({{] + ) + ) + | + + # typedef + (?: + \b typedef \b \s* + (?: # <TYPEDEF_DECL> + {_ind(VAR_DECL, 4)} + ) + (?: + # We expect no inline type definitions in the parameters. + \s* [(] \s* + (?: # <TYPEDEF_FUNC_PARAMS> + [^{{;]* + ) + \s* [)] + )? + \s* ; + ) + | + + # func decl/definition & var decls + # XXX dedicated pattern for funcs (more restricted)? + (?: + (?: + (?: # <VAR_STORAGE> + {STORAGE_CLASS} + ) + \s* + )? + (?: + (?: # <FUNC_INLINE> + \b inline \b + ) + \s* + )? + (?: # <VAR_DECL> + {_ind(VAR_DECL, 4)} + ) + (?: + # func decl / definition + (?: + (?: + # We expect no inline type definitions in the parameters. + \s* [(] \s* + (?: # <FUNC_PARAMS> + [^{{;]* + ) + \s* [)] \s* + (?: # <FUNC_DELIM> + [{{;] + ) + ) + | + (?: + # This is some old-school syntax! + \s* [(] \s* + # We throw away the bare names: + {STRICT_IDENTIFIER} + (?: \s* , \s* {STRICT_IDENTIFIER} )* + \s* [)] \s* + + # We keep the trailing param declarations: + (?: # <FUNC_LEGACY_PARAMS> + # There's at least one! + (?: {TYPE_QUALIFIER} \s* )? + {_ind(TYPE_SPEC, 7)} + \s* + {_ind(DECLARATOR, 7)} + \s* ; + (?: + \s* + (?: {TYPE_QUALIFIER} \s* )? + {_ind(TYPE_SPEC, 8)} + \s* + {_ind(DECLARATOR, 8)} + \s* ; + )* + ) + \s* {{ + ) + ) + | + # var / typedef + (?: + (?: + # initializer + # We expect only basic initializers. + \s* = \s* + (?: # <VAR_INIT> + {_ind(INITIALIZER, 6)} + ) + )? + \s* + (?: # <VAR_ENDING> + [,;] + ) + ) + ) + ) + ) + ''') |