Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/advanced/extend_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from python_parser import PythonIndenter

GRAMMAR = r"""
%import .python3 (compound_stmt, single_input, file_input, eval_input, test, suite, _NEWLINE, _INDENT, _DEDENT, COMMENT)
%import python (compound_stmt, single_input, file_input, eval_input, test, suite, _NEWLINE, _INDENT, _DEDENT, COMMENT)

%extend compound_stmt: match_stmt

Expand Down
94 changes: 94 additions & 0 deletions examples/advanced/py3to2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
Python 3 to Python 2 converter (tree templates)
===============================================

This example demonstrates how to translate between two trees using tree templates.
It parses Python 3, translates it to a Python 2 AST, and then outputs the result as Python 2 code.

Uses reconstruct_python.py for generating the final Python 2 code.
"""


from lark import Lark
from lark.tree_templates import TemplateConf, TemplateTranslator

from lark.indenter import PythonIndenter
from reconstruct_python import PythonReconstructor


#
# 1. Define a Python parser that also accepts template vars in the code (in the form of $var)
#
TEMPLATED_PYTHON = r"""
%import python (single_input, file_input, eval_input, atom, var, stmt, expr, testlist_star_expr, _NEWLINE, _INDENT, _DEDENT, COMMENT, NAME)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like a perfect usecase for an %include statement.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking that too

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You didn't submit a PR for this, right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is part of #998, but that is not a finished implementation.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm okay. Anyway, we can change it to %include when it's ready.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.


%extend atom: TEMPLATE_NAME -> var

TEMPLATE_NAME: "$" NAME

?template_start: (stmt | testlist_star_expr _NEWLINE)

%ignore /[\t \f]+/ // WS
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
%ignore COMMENT
"""

parser = Lark(TEMPLATED_PYTHON, parser='lalr', start=['single_input', 'file_input', 'eval_input', 'template_start'], postlex=PythonIndenter(), maybe_placeholders=False)


def parse_template(s):
return parser.parse(s + '\n', start='template_start')

def parse_code(s):
return parser.parse(s + '\n', start='file_input')


#
# 2. Define translations using templates (each template code is parsed to a template tree)
#

pytemplate = TemplateConf(parse=parse_template)

translations_3to2 = {
'yield from $a':
'for _tmp in $a: yield _tmp',

'raise $e from $x':
'raise $e',

'$a / $b':
'float($a) / $b',
}
translations_3to2 = {pytemplate(k): pytemplate(v) for k, v in translations_3to2.items()}

#
# 3. Translate and reconstruct Python 3 code into valid Python 2 code
#

python_reconstruct = PythonReconstructor(parser)

def translate_py3to2(code):
tree = parse_code(code)
tree = TemplateTranslator(translations_3to2).translate(tree)
return python_reconstruct.reconstruct(tree)


#
# Test Code
#

_TEST_CODE = '''
if a / 2 > 1:
yield from [1,2,3]
else:
raise ValueError(a) from e

'''

def test():
print(_TEST_CODE)
print(' -----> ')
print(translate_py3to2(_TEST_CODE))

if __name__ == '__main__':
test()
10 changes: 1 addition & 9 deletions examples/advanced/python_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,9 @@
import glob, time

from lark import Lark
from lark.indenter import Indenter
from lark.indenter import PythonIndenter


class PythonIndenter(Indenter):
NL_type = '_NEWLINE'
OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
INDENT_type = '_INDENT'
DEDENT_type = '_DEDENT'
tab_len = 8

kwargs = dict(postlex=PythonIndenter(), start='file_input')

# Official Python grammar by Lark
Expand Down
21 changes: 17 additions & 4 deletions examples/advanced/reconstruct_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@

"""

from lark import Token
from lark import Lark, Token
from lark.reconstruct import Reconstructor
from lark.indenter import PythonIndenter

from python_parser import python_parser3
# Official Python grammar by Lark
kwargs = dict(parser='lalr', postlex=PythonIndenter(), start='file_input', maybe_placeholders=False)
python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'], **kwargs)


SPACE_AFTER = set(',+-*/~@<>="|:')
Expand Down Expand Up @@ -53,16 +56,26 @@ def postproc(items):
yield "\n"


python_reconstruct = Reconstructor(python_parser3, {'_NEWLINE': special, '_DEDENT': special, '_INDENT': special})
class PythonReconstructor:
def __init__(self, parser):
self._recons = Reconstructor(parser, {'_NEWLINE': special, '_DEDENT': special, '_INDENT': special})

def reconstruct(self, tree):
return self._recons.reconstruct(tree, postproc)


def test():
python_reconstructor = PythonReconstructor(python_parser3)

self_contents = open(__file__).read()

tree = python_parser3.parse(self_contents+'\n')
output = python_reconstruct.reconstruct(tree, postproc)
output = python_reconstructor.reconstruct(tree)

tree_new = python_parser3.parse(output)
print(tree.pretty())
print(tree_new.pretty())

assert tree == tree_new

print(output)
Expand Down
9 changes: 9 additions & 0 deletions lark/indenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,12 @@ def tab_len(self) -> int:
raise NotImplementedError()

###}


class PythonIndenter(Indenter):
NL_type = '_NEWLINE'
OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
INDENT_type = '_INDENT'
DEDENT_type = '_DEDENT'
tab_len = 8
154 changes: 154 additions & 0 deletions lark/tree_templates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""This module defines utilities for matching and translation tree templates.

A tree templates is a tree that contains nodes that are template variables.

"""

from typing import Union, Optional, Mapping

from lark import Tree, Transformer

TreeOrCode = Union[Tree, str]

class TemplateConf:
"""Template Configuration

Allows customization for different uses of Template
"""

def __init__(self, parse=None):
self._parse = parse


def test_var(self, var: Union[Tree, str]) -> Optional[str]:
"""Given a tree node, if it is a template variable return its name. Otherwise, return None.

This method may be overridden for customization

Parameters:
var: Tree | str - The tree node to test

"""
if isinstance(var, str) and var.startswith('$'):
return var.lstrip('$')

if isinstance(var, Tree) and var.data == 'var' and var.children[0].startswith('$'):
return var.children[0].lstrip('$')


def _get_tree(self, template: TreeOrCode):
if isinstance(template, str):
assert self._parse
template = self._parse(template)

assert isinstance(template, Tree)
return template

def __call__(self, template):
return Template(template, conf=self)

def _match_tree_template(self, template, tree):
template_var = self.test_var(template)
if template_var:
return {template_var: tree}

if isinstance(template, str):
if template == tree:
return {}
return

assert isinstance(template, Tree), template

if template.data == tree.data and len(template.children) == len(tree.children):
res = {}
for t1, t2 in zip(template.children, tree.children):
matches = self._match_tree_template(t1, t2)
if matches is None:
return

res.update(matches)

return res



class _ReplaceVars(Transformer):
def __init__(self, conf, vars):
self._conf = conf
self._vars = vars

def __default__(self, data, children, meta):
tree = super().__default__(data, children, meta)

var = self._conf.test_var(tree)
if var:
return self._vars[var]
return tree


class Template:
"""Represents a tree templates, tied to a specific configuration

A tree template is a tree that contains nodes that are template variables.
Those variables will match any tree.
(future versions may support annotations on the variables, to allow more complex templates)
"""

def __init__(self, tree: Tree, conf = TemplateConf()):
self.conf = conf
self.tree = conf._get_tree(tree)

def match(self, tree: TreeOrCode):
"""Match a tree template to a tree.

A tree template without variables will only match ``tree`` if it is equal to the template.

Parameters:
tree (Tree): The tree to match to the template

Returns:
Optional[Dict[str, Tree]]: If match is found, returns a dictionary mapping
template variable names to their matching tree nodes.
If no match was found, returns None.
"""
tree = self.conf._get_tree(tree)
return self.conf._match_tree_template(self.tree, tree)

def search(self, tree: TreeOrCode):
"""Search for all occurances of the tree template inside ``tree``.
"""
tree = self.conf._get_tree(tree)
for subtree in tree.iter_subtrees():
res = self.match(subtree)
if res:
yield subtree, res

def apply_vars(self, vars: Mapping[str, Tree]):
"""Apply vars to the template tree
"""
return _ReplaceVars(self.conf, vars).transform(self.tree)


def translate(t1: Template, t2: Template, tree: TreeOrCode):
"""Search tree and translate each occurrance of t1 into t2.
"""
tree = t1.conf._get_tree(tree) # ensure it's a tree, parse if necessary and possible
for subtree, vars in t1.search(tree):
res = t2.apply_vars(vars)
subtree.set(res.data, res.children)
return tree



class TemplateTranslator:
"""Utility class for translating a collection of patterns
"""

def __init__(self, translations: Mapping[TreeOrCode, TreeOrCode]):
assert all( isinstance(k, Template) and isinstance(v, Template) for k, v in translations.items() )
self.translations = translations

def translate(self, tree: Tree):
for k, v in self.translations.items():
tree = translate(k, v, tree)
return tree
2 changes: 1 addition & 1 deletion tests/test_nearley/nearley