diff options
Diffstat (limited to 'Tools/peg_generator/pegen/parser_generator.py')
-rw-r--r-- | Tools/peg_generator/pegen/parser_generator.py | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py new file mode 100644 index 00000000000..7851a7c90f4 --- /dev/null +++ b/Tools/peg_generator/pegen/parser_generator.py @@ -0,0 +1,188 @@ +import contextlib +import token +from abc import abstractmethod + +from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple + +from pegen import sccutils +from pegen.grammar import ( + Grammar, + Rule, + Rhs, + Alt, + NamedItem, + Plain, + NameLeaf, + StringLeaf, + Gather, +) +from pegen.grammar import GrammarError, GrammarVisitor + + +class RuleCheckingVisitor(GrammarVisitor): + def __init__(self, rules: Dict[str, Rule]): + self.rules = rules + + def visit_NameLeaf(self, node: NameLeaf) -> None: + if node.value not in self.rules and node.value not in token.tok_name.values(): + # TODO: Add line/col info to (leaf) nodes + raise GrammarError(f"Dangling reference to rule {node.value!r}") + + +class ParserGenerator: + + callmakervisitor: GrammarVisitor + + def __init__(self, grammar: Grammar, file: Optional[IO[Text]]): + self.grammar = grammar + self.rules = grammar.rules + if "trailer" not in grammar.metas and "start" not in self.rules: + raise GrammarError("Grammar without a trailer must have a 'start' rule") + checker = RuleCheckingVisitor(self.rules) + for rule in self.rules.values(): + checker.visit(rule) + self.file = file + self.level = 0 + compute_nullables(self.rules) + self.first_graph, self.first_sccs = compute_left_recursives(self.rules) + self.todo = self.rules.copy() # Rules to generate + self.counter = 0 # For name_rule()/name_loop() + self.keyword_counter = 499 # For keyword_type() + + @abstractmethod + def generate(self, filename: str) -> None: + raise NotImplementedError + + @contextlib.contextmanager + def indent(self) -> Iterator[None]: + self.level += 1 + try: + yield + finally: + self.level -= 1 + + def print(self, *args: object) -> None: + if not args: + print(file=self.file) + else: + print(" " * self.level, end="", file=self.file) + print(*args, file=self.file) + + def printblock(self, lines: str) -> None: + for line in lines.splitlines(): + self.print(line) + + def collect_todo(self) -> None: + done: Set[str] = set() + while True: + alltodo = list(self.todo) + todo = [i for i in alltodo if i not in done] + if not todo: + break + for rulename in todo: + self.todo[rulename].collect_todo(self) + done = set(alltodo) + + def keyword_type(self) -> int: + self.keyword_counter += 1 + return self.keyword_counter + + def name_node(self, rhs: Rhs) -> str: + self.counter += 1 + name = f"_tmp_{self.counter}" # TODO: Pick a nicer name. + self.todo[name] = Rule(name, None, rhs) + return name + + def name_loop(self, node: Plain, is_repeat1: bool) -> str: + self.counter += 1 + if is_repeat1: + prefix = "_loop1_" + else: + prefix = "_loop0_" + name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name. + self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) + return name + + def name_gather(self, node: Gather) -> str: + self.counter += 1 + name = f"_gather_{self.counter}" + self.counter += 1 + extra_function_name = f"_loop0_{self.counter}" + extra_function_alt = Alt( + [NamedItem(None, node.separator), NamedItem("elem", node.node),], action="elem", + ) + self.todo[extra_function_name] = Rule( + extra_function_name, None, Rhs([extra_function_alt]), + ) + alt = Alt( + [NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name)),], + ) + self.todo[name] = Rule(name, None, Rhs([alt]),) + return name + + +def dedupe(name: str, names: List[str]) -> str: + origname = name + counter = 0 + while name in names: + counter += 1 + name = f"{origname}_{counter}" + names.append(name) + return name + + +def compute_nullables(rules: Dict[str, Rule]) -> None: + """Compute which rules in a grammar are nullable. + + Thanks to TatSu (tatsu/leftrec.py) for inspiration. + """ + for rule in rules.values(): + rule.nullable_visit(rules) + + +def compute_left_recursives( + rules: Dict[str, Rule] +) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]: + graph = make_first_graph(rules) + sccs = list(sccutils.strongly_connected_components(graph.keys(), graph)) + for scc in sccs: + if len(scc) > 1: + for name in scc: + rules[name].left_recursive = True + # Try to find a leader such that all cycles go through it. + leaders = set(scc) + for start in scc: + for cycle in sccutils.find_cycles_in_scc(graph, scc, start): + ## print("Cycle:", " -> ".join(cycle)) + leaders -= scc - set(cycle) + if not leaders: + raise ValueError( + f"SCC {scc} has no leadership candidate (no element is included in all cycles)" + ) + ## print("Leaders:", leaders) + leader = min(leaders) # Pick an arbitrary leader from the candidates. + rules[leader].leader = True + else: + name = min(scc) # The only element. + if name in graph[name]: + rules[name].left_recursive = True + rules[name].leader = True + return graph, sccs + + +def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]: + """Compute the graph of left-invocations. + + There's an edge from A to B if A may invoke B at its initial + position. + + Note that this requires the nullable flags to have been computed. + """ + graph = {} + vertices: Set[str] = set() + for rulename, rhs in rules.items(): + graph[rulename] = names = rhs.initial_names() + vertices |= names + for vertex in vertices: + graph.setdefault(vertex, set()) + return graph |