From 93f35581d03b270b10ff3feed3f8afbf9233bddb Mon Sep 17 00:00:00 2001
From: Sam Weaver <sweaver@relaypro.com>
Date: Mon, 23 Sep 2024 14:10:22 -0400
Subject: [PATCH 01/14] Create an initial PoC for reversing parsed HCL back
 into text

---
 .gitignore       |  3 +++
 hcl2/__init__.py |  2 +-
 hcl2/api.py      | 24 +++++++++++++++++-
 hcl2/parser.py   | 65 ++++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5a2fc4a4..aea11ed0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -121,3 +121,6 @@ node_modules/
 # Don't commit the generated parser
 lark_parser.py
 .lark_cache.bin
+
+# ASDF tool-versions file
+.tool-versions
\ No newline at end of file
diff --git a/hcl2/__init__.py b/hcl2/__init__.py
index 8b41bd87..7ea2ac6b 100644
--- a/hcl2/__init__.py
+++ b/hcl2/__init__.py
@@ -5,4 +5,4 @@
 except ImportError:
     __version__ = "unknown"
 
-from .api import load, loads
+from .api import load, loads, parse, parses, writes
diff --git a/hcl2/api.py b/hcl2/api.py
index 9079fce8..d06e36ab 100644
--- a/hcl2/api.py
+++ b/hcl2/api.py
@@ -1,7 +1,8 @@
 """The API that will be exposed to users of this package"""
 from typing import TextIO
 
-from hcl2.parser import hcl2
+from lark.tree import Tree
+from hcl2.parser import hcl2, hcl2_reconstructor
 from hcl2.transformer import DictTransformer
 
 
@@ -26,3 +27,24 @@ def loads(text: str, with_meta=False) -> dict:
     # Append a new line as a temporary fix
     tree = hcl2.parse(text + "\n")
     return DictTransformer(with_meta=with_meta).transform(tree)
+
+
+def parse(file: TextIO) -> Tree:
+    """Load HCL2 syntax tree from a file.
+    :param file: File with hcl2 to be loaded as a dict.
+    """
+    return parses(file.read())
+
+
+def parses(text: str) -> Tree:
+    """Load HCL2 syntax tree from a string.
+    :param text: Text with hcl2 to be loaded as a dict.
+    """
+    return hcl2.parse(text + "\n")
+
+
+def writes(ast: Tree) -> str:
+    """Convert an HCL2 syntax tree to a string.
+    :param ast: HCL2 syntax tree.
+    """
+    return hcl2_reconstructor.reconstruct(ast)
diff --git a/hcl2/parser.py b/hcl2/parser.py
index e6d810bd..488cde56 100644
--- a/hcl2/parser.py
+++ b/hcl2/parser.py
@@ -1,7 +1,8 @@
 """A parser for HCL2 implemented using the Lark parser"""
 from pathlib import Path
 
-from lark import Lark
+from lark import Lark, Token
+from lark.reconstruct import Reconstructor
 
 
 PARSER_FILE = Path(__file__).absolute().resolve().parent / ".lark_cache.bin"
@@ -10,7 +11,67 @@
 hcl2 = Lark.open(
     "hcl2.lark",
     parser="lalr",
-    cache=str(PARSER_FILE),  # Disable/Delete file to effect changes to the grammar
+    # cache=str(PARSER_FILE),  # Disable/Delete file to effect changes to the grammar
     rel_to=__file__,
     propagate_positions=True,
+    maybe_placeholders=False,  # Needed for reconstruction
 )
+
+SPACE_AFTER = set(',+-*/~@<>="|:')
+SPACE_BEFORE = (SPACE_AFTER - set(",:")) | set("'")
+
+
+def _special_symbol(sym):
+    return Token("SPECIAL", sym.name)
+
+
+def _postprocess_reconstruct(items):
+    stack = ["\n"]
+    actions = []
+    last_was_whitespace = True
+    for item in items:
+        if isinstance(item, Token) and item.type == "SPECIAL":
+            actions.append(item.value)
+        else:
+            if actions:
+                assert (
+                    actions[0] == "_NEWLINE" and "_NEWLINE" not in actions[1:]
+                ), actions
+
+                for a in actions[1:]:
+                    if a == "_INDENT":
+                        stack.append(stack[-1] + " " * 4)
+                    else:
+                        assert a == "_DEDENT"
+                        stack.pop()
+                actions.clear()
+                yield stack[-1]
+                last_was_whitespace = True
+            if not last_was_whitespace:
+                if item[0] in SPACE_BEFORE:
+                    yield " "
+            yield item
+            last_was_whitespace = item[-1].isspace()
+            if not last_was_whitespace:
+                if item[-1] in SPACE_AFTER:
+                    yield " "
+                    last_was_whitespace = True
+    yield "\n"
+
+
+class HCLReconstructor:
+    def __init__(self, parser):
+        self._recons = Reconstructor(
+            parser,
+            {
+                "_NEWLINE": _special_symbol,
+                "_DEDENT": _special_symbol,
+                "_INDENT": _special_symbol,
+            },
+        )
+
+    def reconstruct(self, tree):
+        return self._recons.reconstruct(tree, _postprocess_reconstruct)
+
+
+hcl2_reconstructor = HCLReconstructor(hcl2)

From e05bd9cfee42e4eb5852e529a3f68f087e6175f2 Mon Sep 17 00:00:00 2001
From: Sam Weaver <sweaver@relaypro.com>
Date: Mon, 23 Sep 2024 14:16:21 -0400
Subject: [PATCH 02/14] Reclassify multi-line comments as "comment" instead of
 ignored tokens

---
 hcl2/hcl2.lark | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark
index 0d6b40b5..15c381d0 100644
--- a/hcl2/hcl2.lark
+++ b/hcl2/hcl2.lark
@@ -3,7 +3,7 @@ body : (new_line_or_comment? (attribute | block))* new_line_or_comment?
 attribute : identifier "=" expression
 block : identifier (identifier | STRING_LIT)* new_line_or_comment? "{" body "}"
 new_line_and_or_comma: new_line_or_comment | "," | "," new_line_or_comment
-new_line_or_comment: ( /\n/ | /#.*\n/ | /\/\/.*\n/ )+
+new_line_or_comment: ( /\n/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ )+
 
 identifier : /[a-zA-Z_][a-zA-Z0-9_-]*/ | IN | FOR | IF | FOR_EACH
 IF : "if"
@@ -78,4 +78,3 @@ full_splat : "[*]" (get_attr | index)*
 !for_cond : "if" new_line_or_comment? expression
 
 %ignore /[ \t]+/
-%ignore /\/\*(.|\n)*?(\*\/)/

From 44a280af15e8cbe55ec5d13a5e38a5a635674660 Mon Sep 17 00:00:00 2001
From: Sam Weaver <sweaver@relaypro.com>
Date: Tue, 24 Sep 2024 16:42:31 -0400
Subject: [PATCH 03/14] Keep track of indentation so it can be reconstructed

---
 hcl2/__init__.py |  2 +-
 hcl2/api.py      |  8 ++++----
 hcl2/hcl2.lark   | 10 +++++++++-
 hcl2/indenter.py | 15 +++++++++++++++
 hcl2/parser.py   | 19 +++++++++++--------
 5 files changed, 40 insertions(+), 14 deletions(-)
 create mode 100644 hcl2/indenter.py

diff --git a/hcl2/__init__.py b/hcl2/__init__.py
index 7ea2ac6b..e6fff56e 100644
--- a/hcl2/__init__.py
+++ b/hcl2/__init__.py
@@ -5,4 +5,4 @@
 except ImportError:
     __version__ = "unknown"
 
-from .api import load, loads, parse, parses, writes
+from .api import load, loads, parse, parses, writes, AST
diff --git a/hcl2/api.py b/hcl2/api.py
index d06e36ab..17c00aa4 100644
--- a/hcl2/api.py
+++ b/hcl2/api.py
@@ -1,7 +1,7 @@
 """The API that will be exposed to users of this package"""
 from typing import TextIO
 
-from lark.tree import Tree
+from lark.tree import Tree as AST
 from hcl2.parser import hcl2, hcl2_reconstructor
 from hcl2.transformer import DictTransformer
 
@@ -29,21 +29,21 @@ def loads(text: str, with_meta=False) -> dict:
     return DictTransformer(with_meta=with_meta).transform(tree)
 
 
-def parse(file: TextIO) -> Tree:
+def parse(file: TextIO) -> AST:
     """Load HCL2 syntax tree from a file.
     :param file: File with hcl2 to be loaded as a dict.
     """
     return parses(file.read())
 
 
-def parses(text: str) -> Tree:
+def parses(text: str) -> AST:
     """Load HCL2 syntax tree from a string.
     :param text: Text with hcl2 to be loaded as a dict.
     """
     return hcl2.parse(text + "\n")
 
 
-def writes(ast: Tree) -> str:
+def writes(ast: AST) -> str:
     """Convert an HCL2 syntax tree to a string.
     :param ast: HCL2 syntax tree.
     """
diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark
index 15c381d0..473925bd 100644
--- a/hcl2/hcl2.lark
+++ b/hcl2/hcl2.lark
@@ -3,7 +3,14 @@ body : (new_line_or_comment? (attribute | block))* new_line_or_comment?
 attribute : identifier "=" expression
 block : identifier (identifier | STRING_LIT)* new_line_or_comment? "{" body "}"
 new_line_and_or_comma: new_line_or_comment | "," | "," new_line_or_comment
-new_line_or_comment: ( /\n/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ )+
+new_line_or_comment: ( new_line | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ | indent | dedent )+
+// these rules are needed (rather than including the terminals in the above rule)
+// because information about the terminals is discarded when the AST is built,
+// making reconstruction impossible without using named rules
+new_line: _NL
+indent: _INDENT
+dedent: _DEDENT
+_NL: /\n[ \t]*/
 
 identifier : /[a-zA-Z_][a-zA-Z0-9_-]*/ | IN | FOR | IF | FOR_EACH
 IF : "if"
@@ -78,3 +85,4 @@ full_splat : "[*]" (get_attr | index)*
 !for_cond : "if" new_line_or_comment? expression
 
 %ignore /[ \t]+/
+%declare _INDENT _DEDENT
\ No newline at end of file
diff --git a/hcl2/indenter.py b/hcl2/indenter.py
new file mode 100644
index 00000000..c31da696
--- /dev/null
+++ b/hcl2/indenter.py
@@ -0,0 +1,15 @@
+from lark.indenter import Indenter
+
+
+class TerraformIndenter(Indenter):
+    """A postlexer that "injects" _INDENT/_DEDENT tokens based on indentation, according to Terraform syntax.
+
+    See also: the ``postlex`` option in `Lark`.
+    """
+
+    NL_type = "_NL"
+    OPEN_PAREN_types = ["LPAR"]
+    CLOSE_PAREN_types = ["RPAR"]
+    INDENT_type = "_INDENT"
+    DEDENT_type = "_DEDENT"
+    tab_len = 2
diff --git a/hcl2/parser.py b/hcl2/parser.py
index 488cde56..e0ea4980 100644
--- a/hcl2/parser.py
+++ b/hcl2/parser.py
@@ -4,6 +4,7 @@
 from lark import Lark, Token
 from lark.reconstruct import Reconstructor
 
+from .indenter import TerraformIndenter
 
 PARSER_FILE = Path(__file__).absolute().resolve().parent / ".lark_cache.bin"
 
@@ -15,6 +16,7 @@
     rel_to=__file__,
     propagate_positions=True,
     maybe_placeholders=False,  # Needed for reconstruction
+    postlex=TerraformIndenter(),
 )
 
 SPACE_AFTER = set(',+-*/~@<>="|:')
@@ -34,16 +36,17 @@ def _postprocess_reconstruct(items):
             actions.append(item.value)
         else:
             if actions:
-                assert (
-                    actions[0] == "_NEWLINE" and "_NEWLINE" not in actions[1:]
-                ), actions
+                assert actions[0] == "_NL", actions
 
                 for a in actions[1:]:
-                    if a == "_INDENT":
-                        stack.append(stack[-1] + " " * 4)
-                    else:
-                        assert a == "_DEDENT"
+                    if a == "_NL":
+                        yield "\n"
+                    elif a == "_INDENT":
+                        stack.append(stack[-1] + " " * 2)
+                    elif a == "_DEDENT":
                         stack.pop()
+                    else:
+                        assert False, a
                 actions.clear()
                 yield stack[-1]
                 last_was_whitespace = True
@@ -64,7 +67,7 @@ def __init__(self, parser):
         self._recons = Reconstructor(
             parser,
             {
-                "_NEWLINE": _special_symbol,
+                "_NL": _special_symbol,
                 "_DEDENT": _special_symbol,
                 "_INDENT": _special_symbol,
             },

From 51c0972c36efd67094f7ac0ca272286a33b737ac Mon Sep 17 00:00:00 2001
From: Sam Weaver <sweaver@relaypro.com>
Date: Tue, 1 Oct 2024 14:53:11 -0400
Subject: [PATCH 04/14] Simplify reconstruction to just output the spaces as is
 around newlines

---
 hcl2/hcl2.lark   | 12 ++-----
 hcl2/indenter.py | 15 --------
 hcl2/parser.py   | 93 ++++++++++++++++++++++++------------------------
 3 files changed, 48 insertions(+), 72 deletions(-)
 delete mode 100644 hcl2/indenter.py

diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark
index 473925bd..0523b87f 100644
--- a/hcl2/hcl2.lark
+++ b/hcl2/hcl2.lark
@@ -3,14 +3,7 @@ body : (new_line_or_comment? (attribute | block))* new_line_or_comment?
 attribute : identifier "=" expression
 block : identifier (identifier | STRING_LIT)* new_line_or_comment? "{" body "}"
 new_line_and_or_comma: new_line_or_comment | "," | "," new_line_or_comment
-new_line_or_comment: ( new_line | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ | indent | dedent )+
-// these rules are needed (rather than including the terminals in the above rule)
-// because information about the terminals is discarded when the AST is built,
-// making reconstruction impossible without using named rules
-new_line: _NL
-indent: _INDENT
-dedent: _DEDENT
-_NL: /\n[ \t]*/
+new_line_or_comment: ( /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ )+
 
 identifier : /[a-zA-Z_][a-zA-Z0-9_-]*/ | IN | FOR | IF | FOR_EACH
 IF : "if"
@@ -84,5 +77,4 @@ full_splat : "[*]" (get_attr | index)*
 !for_intro : "for" new_line_or_comment? identifier ("," identifier new_line_or_comment?)? new_line_or_comment? "in" new_line_or_comment? expression new_line_or_comment? ":" new_line_or_comment?
 !for_cond : "if" new_line_or_comment? expression
 
-%ignore /[ \t]+/
-%declare _INDENT _DEDENT
\ No newline at end of file
+%ignore /[ \t]+/
\ No newline at end of file
diff --git a/hcl2/indenter.py b/hcl2/indenter.py
deleted file mode 100644
index c31da696..00000000
--- a/hcl2/indenter.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from lark.indenter import Indenter
-
-
-class TerraformIndenter(Indenter):
-    """A postlexer that "injects" _INDENT/_DEDENT tokens based on indentation, according to Terraform syntax.
-
-    See also: the ``postlex`` option in `Lark`.
-    """
-
-    NL_type = "_NL"
-    OPEN_PAREN_types = ["LPAR"]
-    CLOSE_PAREN_types = ["RPAR"]
-    INDENT_type = "_INDENT"
-    DEDENT_type = "_DEDENT"
-    tab_len = 2
diff --git a/hcl2/parser.py b/hcl2/parser.py
index e0ea4980..3b58f556 100644
--- a/hcl2/parser.py
+++ b/hcl2/parser.py
@@ -3,8 +3,7 @@
 
 from lark import Lark, Token
 from lark.reconstruct import Reconstructor
-
-from .indenter import TerraformIndenter
+from lark.utils import is_id_continue
 
 PARSER_FILE = Path(__file__).absolute().resolve().parent / ".lark_cache.bin"
 
@@ -16,65 +15,65 @@
     rel_to=__file__,
     propagate_positions=True,
     maybe_placeholders=False,  # Needed for reconstruction
-    postlex=TerraformIndenter(),
 )
 
 SPACE_AFTER = set(',+-*/~@<>="|:')
 SPACE_BEFORE = (SPACE_AFTER - set(",:")) | set("'")
-
-
-def _special_symbol(sym):
-    return Token("SPECIAL", sym.name)
-
+DIGITS = set("0123456789")
+IDENT_JOINERS = set(".")
 
 def _postprocess_reconstruct(items):
-    stack = ["\n"]
-    actions = []
-    last_was_whitespace = True
+    """
+    Postprocess the stream of tokens derived from the AST during reconstruction.
+
+    For HCL2, this is used exclusively for adding whitespace in the right locations.
+    """
+    prev_item = ""
     for item in items:
-        if isinstance(item, Token) and item.type == "SPECIAL":
-            actions.append(item.value)
-        else:
-            if actions:
-                assert actions[0] == "_NL", actions
-
-                for a in actions[1:]:
-                    if a == "_NL":
-                        yield "\n"
-                    elif a == "_INDENT":
-                        stack.append(stack[-1] + " " * 2)
-                    elif a == "_DEDENT":
-                        stack.pop()
-                    else:
-                        assert False, a
-                actions.clear()
-                yield stack[-1]
-                last_was_whitespace = True
-            if not last_was_whitespace:
-                if item[0] in SPACE_BEFORE:
-                    yield " "
-            yield item
-            last_was_whitespace = item[-1].isspace()
-            if not last_was_whitespace:
-                if item[-1] in SPACE_AFTER:
-                    yield " "
-                    last_was_whitespace = True
+        # see if we need to add space after the previous identifier
+        if (
+            prev_item
+            and item
+            and prev_item[-1] not in SPACE_AFTER
+            and not prev_item[-1].isspace()
+            and is_id_continue(prev_item[-1])
+            and not item[0] in IDENT_JOINERS
+            and not prev_item[-1] in DIGITS
+        ):
+            yield " "
+            prev_item = " "
+
+        # if the next character expects us to add a space before it
+        if (
+            prev_item
+            and not prev_item[-1].isspace()
+            and prev_item[-1] not in SPACE_AFTER
+        ):
+            if item[0] in SPACE_BEFORE:
+                yield " "
+                prev_item = " "
+
+        # print the actual token
+        yield item
+
+        if item and item[-1] in SPACE_AFTER:
+            yield " "
+
+        # store the previous item as we continue
+        prev_item = item
     yield "\n"
 
 
 class HCLReconstructor:
     def __init__(self, parser):
-        self._recons = Reconstructor(
-            parser,
-            {
-                "_NL": _special_symbol,
-                "_DEDENT": _special_symbol,
-                "_INDENT": _special_symbol,
-            },
-        )
+        self._recons = Reconstructor(parser)
 
     def reconstruct(self, tree):
-        return self._recons.reconstruct(tree, _postprocess_reconstruct)
+        return self._recons.reconstruct(
+            tree,
+            _postprocess_reconstruct,
+            insert_spaces=False,
+        )
 
 
 hcl2_reconstructor = HCLReconstructor(hcl2)

From 3e5e4f47cf03322b9d80be1519f5fca897653225 Mon Sep 17 00:00:00 2001
From: Sam Weaver <sweaver@relaypro.com>
Date: Tue, 1 Oct 2024 16:14:19 -0400
Subject: [PATCH 05/14] Fix failing tests (disable caching)

---
 hcl2/parser.py         |  4 ++++
 test/unit/test_load.py | 20 ++++++++++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/hcl2/parser.py b/hcl2/parser.py
index 3b58f556..cf2fd63e 100644
--- a/hcl2/parser.py
+++ b/hcl2/parser.py
@@ -11,6 +11,10 @@
 hcl2 = Lark.open(
     "hcl2.lark",
     parser="lalr",
+    # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed:
+    #
+    #   https://github.com/lark-parser/lark/issues/1472
+    #
     # cache=str(PARSER_FILE),  # Disable/Delete file to effect changes to the grammar
     rel_to=__file__,
     propagate_positions=True,
diff --git a/test/unit/test_load.py b/test/unit/test_load.py
index d21ca02f..762412b4 100644
--- a/test/unit/test_load.py
+++ b/test/unit/test_load.py
@@ -19,15 +19,23 @@ class TestLoad(TestCase):
 
     def test_load_terraform(self):
         """Test parsing a set of hcl2 files and force recreating the parser file"""
-        # delete the parser file to force it to be recreated
-        PARSER_FILE.unlink()
+        # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed:
+        #
+        #   https://github.com/lark-parser/lark/issues/1472
+        #
+        # # delete the parser file to force it to be recreated
+        # PARSER_FILE.unlink()
         for hcl_path in HCL2_FILES:
             yield self.check_terraform, hcl_path
 
-    def test_load_terraform_from_cache(self):
-        """Test parsing a set of hcl2 files from a cached parser file"""
-        for hcl_path in HCL2_FILES:
-            yield self.check_terraform, hcl_path
+    # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed:
+    #
+    #   https://github.com/lark-parser/lark/issues/1472
+    #
+    # def test_load_terraform_from_cache(self):
+    #     """Test parsing a set of hcl2 files from a cached parser file"""
+    #     for hcl_path in HCL2_FILES:
+    #         yield self.check_terraform, hcl_path
 
     def check_terraform(self, hcl_path_str: str):
         """Loads a single hcl2 file, parses it and compares with the expected json"""

From c1e68f621e43b409f19b2f4a623a96c24bee9737 Mon Sep 17 00:00:00 2001
From: Sam Weaver <sweaver@relaypro.com>
Date: Tue, 1 Oct 2024 17:27:14 -0400
Subject: [PATCH 06/14] Fix final spacing inconsistencies, fix failing tests

---
 hcl2/api.py         |  2 +-
 hcl2/hcl2.lark      |  6 ++++--
 hcl2/parser.py      | 51 +++++++++++++++++++++------------------------
 hcl2/transformer.py |  8 +++++--
 4 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/hcl2/api.py b/hcl2/api.py
index 17c00aa4..cb605e53 100644
--- a/hcl2/api.py
+++ b/hcl2/api.py
@@ -40,7 +40,7 @@ def parses(text: str) -> AST:
     """Load HCL2 syntax tree from a string.
     :param text: Text with hcl2 to be loaded as a dict.
     """
-    return hcl2.parse(text + "\n")
+    return hcl2.parse(text)
 
 
 def writes(ast: AST) -> str:
diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark
index 0523b87f..1f964563 100644
--- a/hcl2/hcl2.lark
+++ b/hcl2/hcl2.lark
@@ -1,6 +1,6 @@
 start : body
 body : (new_line_or_comment? (attribute | block))* new_line_or_comment?
-attribute : identifier "=" expression
+attribute : identifier EQ expression
 block : identifier (identifier | STRING_LIT)* new_line_or_comment? "{" body "}"
 new_line_and_or_comma: new_line_or_comment | "," | "," new_line_or_comment
 new_line_or_comment: ( /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ )+
@@ -50,10 +50,12 @@ int_lit : DECIMAL+
             | DECIMAL+ ("." DECIMAL+)? EXP_MARK DECIMAL+
 DECIMAL : "0".."9"
 EXP_MARK : ("e" | "E") ("+" | "-")?
+EQ : /[ \t]*=(?!=|>)/
 
 tuple : "[" (new_line_or_comment* expression new_line_or_comment* ",")* (new_line_or_comment* expression)? new_line_or_comment* "]"
 object : "{" new_line_or_comment? (object_elem (new_line_and_or_comma object_elem )* new_line_and_or_comma?)? "}"
-object_elem : (identifier | expression) ("=" | ":") expression
+object_elem : (identifier | expression) ( EQ | ":") expression
+
 
 heredoc_template : /<<(?P<heredoc>[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?(?P=heredoc)/
 heredoc_template_trim : /<<-(?P<heredoc_trim>[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?(?P=heredoc_trim)/
diff --git a/hcl2/parser.py b/hcl2/parser.py
index cf2fd63e..97801f98 100644
--- a/hcl2/parser.py
+++ b/hcl2/parser.py
@@ -1,7 +1,7 @@
 """A parser for HCL2 implemented using the Lark parser"""
 from pathlib import Path
 
-from lark import Lark, Token
+from lark import Lark
 from lark.reconstruct import Reconstructor
 from lark.utils import is_id_continue
 
@@ -22,7 +22,7 @@
 )
 
 SPACE_AFTER = set(',+-*/~@<>="|:')
-SPACE_BEFORE = (SPACE_AFTER - set(",:")) | set("'")
+SPACE_BEFORE = (SPACE_AFTER - set(",:=")) | set("'")
 DIGITS = set("0123456789")
 IDENT_JOINERS = set(".")
 
@@ -34,39 +34,36 @@ def _postprocess_reconstruct(items):
     """
     prev_item = ""
     for item in items:
-        # see if we need to add space after the previous identifier
+        # these rules apply if we need to add space
         if (
-            prev_item
-            and item
-            and prev_item[-1] not in SPACE_AFTER
+            prev_item  # make sure a previous item exists
+            and item  # and that the current item isn't empty
+            # if the last character of the previous item is a space, we abort
             and not prev_item[-1].isspace()
-            and is_id_continue(prev_item[-1])
-            and not item[0] in IDENT_JOINERS
+            # if the previous character was a number we don't want to break up
+            # a numeric literal, abort
             and not prev_item[-1] in DIGITS
+            # if the next item has a space at the beginning, we don't need to
+            # add one, abort
+            and not item[0].isspace()
+            # now the scenarios when we do not abort are
+            and (
+                # scenario 1, the prev token ended with an identifier character
+                # and the next character is not an "IDENT_JOINER" character
+                (is_id_continue(prev_item[-1]) and not item[0] in IDENT_JOINERS)
+                # scenario 2, the prev token ended with a character that should
+                # be followed by a space
+                or (prev_item[-1] in SPACE_AFTER)
+                # scenario 3, the next token begins with a character that needs
+                # a space preceeding it
+                or (item[0] in SPACE_BEFORE)
+            )
         ):
             yield " "
-            prev_item = " "
 
-        # if the next character expects us to add a space before it
-        if (
-            prev_item
-            and not prev_item[-1].isspace()
-            and prev_item[-1] not in SPACE_AFTER
-        ):
-            if item[0] in SPACE_BEFORE:
-                yield " "
-                prev_item = " "
-
-        # print the actual token
+        # print the actual token, and store it for the next iteraction
         yield item
-
-        if item and item[-1] in SPACE_AFTER:
-            yield " "
-
-        # store the previous item as we continue
         prev_item = item
-    yield "\n"
-
 
 class HCLReconstructor:
     def __init__(self, parser):
diff --git a/hcl2/transformer.py b/hcl2/transformer.py
index ddc7269f..05e5cd07 100644
--- a/hcl2/transformer.py
+++ b/hcl2/transformer.py
@@ -90,10 +90,14 @@ def tuple(self, args: List) -> List:
         return [self.to_string_dollar(arg) for arg in self.strip_new_line_tokens(args)]
 
     def object_elem(self, args: List) -> Dict:
+        print(args)
         # This returns a dict with a single key/value pair to make it easier to merge these
         # into a bigger dict that is returned by the "object" function
         key = self.strip_quotes(args[0])
-        value = self.to_string_dollar(args[1])
+        if len(args) == 3:
+            value = self.to_string_dollar(args[2])
+        else:
+            value = self.to_string_dollar(args[1])
 
         return {key: value}
 
@@ -148,7 +152,7 @@ def attribute(self, args: List) -> Attribute:
         key = str(args[0])
         if key.startswith('"') and key.endswith('"'):
             key = key[1:-1]
-        value = self.to_string_dollar(args[1])
+        value = self.to_string_dollar(args[2])
         return Attribute(key, value)
 
     def conditional(self, args: List) -> str:

From 42b55031c560096f7a58ca0b57c361fc96e5a7d5 Mon Sep 17 00:00:00 2001
From: Sam Weaver <sweaver@relaypro.com>
Date: Wed, 2 Oct 2024 14:53:02 -0400
Subject: [PATCH 07/14] Fix more whitespace handling issues, add extensive
 testing

---
 hcl2/parser.py                |  93 +++++++++++++++++++++++++-----
 test/unit/test_reconstruct.py | 105 ++++++++++++++++++++++++++++++++++
 2 files changed, 185 insertions(+), 13 deletions(-)
 create mode 100644 test/unit/test_reconstruct.py

diff --git a/hcl2/parser.py b/hcl2/parser.py
index 97801f98..9194f5d6 100644
--- a/hcl2/parser.py
+++ b/hcl2/parser.py
@@ -21,10 +21,34 @@
     maybe_placeholders=False,  # Needed for reconstruction
 )
 
-SPACE_AFTER = set(',+-*/~@<>="|:')
-SPACE_BEFORE = (SPACE_AFTER - set(",:=")) | set("'")
+CHAR_SPACE_AFTER = set(',~@<>="|?)]:')
+CHAR_SPACE_BEFORE = (CHAR_SPACE_AFTER - set(",=")) | set("'")
+KEYWORDS_SPACE_AFTER = [
+    "if",
+    "in",
+    "for",
+    "for_each",
+    "==",
+    "!=",
+    "<",
+    ">",
+    "<=",
+    ">=",
+    "-",
+    "*",
+    "/",
+    "%",
+    "&&",
+    "||",
+    "+",
+]
+KEYWORDS_SPACE_BEFORE = KEYWORDS_SPACE_AFTER
 DIGITS = set("0123456789")
-IDENT_JOINERS = set(".")
+NEVER_SPACE_AFTER = set("[(")
+NEVER_SPACE_BEFORE = set("]),.")
+NEVER_COMMA_BEFORE = set("])}")
+# characters that are OK to come right after an identifier with no space between
+IDENT_NO_SPACE = set("()[]")
 
 def _postprocess_reconstruct(items):
     """
@@ -34,37 +58,80 @@ def _postprocess_reconstruct(items):
     """
     prev_item = ""
     for item in items:
-        # these rules apply if we need to add space
+        # first, handle any deferred tokens
+        if type(prev_item) == tuple and prev_item[0] == "_deferred":
+            prev_item = prev_item[1]
+
+            # if the deferred token was a comma, see if we're ending a block
+            if prev_item == ",":
+                if item[0] not in NEVER_COMMA_BEFORE:
+                    yield prev_item
+            else:
+                yield prev_item
+
+        # these rules apply if we need to add space prior to the current token
         if (
             prev_item  # make sure a previous item exists
             and item  # and that the current item isn't empty
             # if the last character of the previous item is a space, we abort
             and not prev_item[-1].isspace()
-            # if the previous character was a number we don't want to break up
-            # a numeric literal, abort
-            and not prev_item[-1] in DIGITS
+            # if the previous character was a number and the next character is
+            # a number. we don't want to break up a numeric literal, abort
+            and not (prev_item[-1] in DIGITS and item[0] in DIGITS)
             # if the next item has a space at the beginning, we don't need to
             # add one, abort
             and not item[0].isspace()
+            # if the previous character should not have a space after it, abort
+            and not prev_item[-1] in NEVER_SPACE_AFTER
+            # if the next character should not have a space before it, abort
+            and not item[0] in NEVER_SPACE_BEFORE
+            # double colons do not get space as they're part of a call to a
+            # namespaced function
+            and not (item == "::" or prev_item == "::")
+            # if both the last character and the next character are
+            # IDENT_NO_SPACE characters, we don't want a space between them either
+            and not (prev_item[-1] in IDENT_NO_SPACE and item[0] in IDENT_NO_SPACE)
             # now the scenarios when we do not abort are
             and (
                 # scenario 1, the prev token ended with an identifier character
-                # and the next character is not an "IDENT_JOINER" character
-                (is_id_continue(prev_item[-1]) and not item[0] in IDENT_JOINERS)
+                # and the next character is not an "IDENT_NO_SPACE" character
+                (is_id_continue(prev_item[-1]) and not item[0] in IDENT_NO_SPACE)
                 # scenario 2, the prev token ended with a character that should
                 # be followed by a space
-                or (prev_item[-1] in SPACE_AFTER)
+                or (
+                    prev_item[-1] in CHAR_SPACE_AFTER
+                    or prev_item in KEYWORDS_SPACE_AFTER
+                )
                 # scenario 3, the next token begins with a character that needs
                 # a space preceeding it
-                or (item[0] in SPACE_BEFORE)
+                or (item[0] in CHAR_SPACE_BEFORE or item in KEYWORDS_SPACE_BEFORE)
+                # scenario 4, the previous token was a block opening brace and
+                # the next token is not a closing brace (so the block is on one
+                # line and not empty)
+                or (prev_item[-1] == "{" and item[0] != "}")
             )
         ):
             yield " "
 
-        # print the actual token, and store it for the next iteraction
-        yield item
+        # in some cases, we may want to defer printing the next token
+        defer_item = False
+
+        # prevent the inclusion of extra commas if they are not intended
+        if item[0] == ",":
+            item = ("_deferred", item)
+            defer_item = True
+
+        # print the actual token
+        if not defer_item:
+            yield item
+
+        # store the previous item for the next token
         prev_item = item
 
+    # if the last token was deferred, print it before continuing
+    if type(prev_item) == tuple and prev_item[0] == "_deferred":
+        yield prev_item[1]
+
 class HCLReconstructor:
     def __init__(self, parser):
         self._recons = Reconstructor(parser)
diff --git a/test/unit/test_reconstruct.py b/test/unit/test_reconstruct.py
new file mode 100644
index 00000000..f03f6aa1
--- /dev/null
+++ b/test/unit/test_reconstruct.py
@@ -0,0 +1,105 @@
+""" Test reconstructing hcl files"""
+
+import json
+from pathlib import Path
+from unittest import TestCase
+
+import hcl2
+
+
+HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers"
+HCL2_DIR = HELPERS_DIR / "terraform-config"
+HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()]
+JSON_DIR = HELPERS_DIR / "terraform-config-json"
+
+
+class TestReconstruct(TestCase):
+    """Test reconstructing a variety of hcl files"""
+
+    # print any differences fully to the console
+    maxDiff = None
+
+    def test_write_terraform(self):
+        """Test reconstructing a set of hcl2 files, to make sure they parse to the same structure"""
+        for hcl_path in HCL2_FILES:
+            yield self.check_terraform, hcl_path
+
+    def test_write_terraform_exact(self):
+        """Test reconstructing a set of hcl2 files, to make sure they reconstruct exactly the same, including whitespace"""
+
+        # the reconstruction process is not precise, so some files do not
+        # reconstruct their whitespace exactly the same, but they are
+        # syntactically equivalent. This list is a target for further
+        # improvements to the whitespace handling of the reconstruction
+        # algorithm.
+        INEXACT_FILES = [
+            # the reconstructor loses commas on the last element in an array,
+            # even if they're in the input file
+            "iam.tf",
+            "variables.tf",
+            # the reconstructor doesn't preserve indentation within comments
+            # perfectly
+            "multiline_expressions.tf",
+            # the reconstructor doesn't preserve the line that a ternary is
+            # broken on.
+            "route_table.tf",
+        ]
+
+        for hcl_path in HCL2_FILES:
+            if hcl_path not in INEXACT_FILES:
+                yield self.check_whitespace, hcl_path
+
+    def check_terraform(self, hcl_path_str: str):
+        """Loads a single hcl2 file, parses it, reconstructs it, parses the reconstructed file, and compares with the expected json"""
+        hcl_path = (HCL2_DIR / hcl_path_str).absolute()
+        json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json")
+        with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file:
+            hcl_file_content = hcl_file.read()
+            try:
+                hcl_ast = hcl2.parses(hcl_file_content)
+            except Exception as exc:
+                assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}"
+
+            try:
+                hcl_reconstructed = hcl2.writes(hcl_ast)
+            except Exception as exc:
+                assert (
+                    False
+                ), f"failed to reconstruct terraform in `{hcl_path_str}`: {exc}"
+
+            try:
+                hcl2_dict = hcl2.loads(hcl_reconstructed)
+            except Exception as exc:
+                assert (
+                    False
+                ), f"failed to tokenize terraform in file reconstructed from `{hcl_path_str}`: {exc}"
+
+            json_dict = json.load(json_file)
+            self.assertDictEqual(
+                hcl2_dict,
+                json_dict,
+                f"failed comparing {hcl_path_str} with reconstructed version",
+            )
+
+    def check_whitespace(self, hcl_path_str: str):
+        """Tests that the reconstructed file matches the original file exactly."""
+        hcl_path = (HCL2_DIR / hcl_path_str).absolute()
+        with hcl_path.open("r") as hcl_file:
+            hcl_file_content = hcl_file.read()
+            try:
+                hcl_ast = hcl2.parses(hcl_file_content)
+            except Exception as exc:
+                assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}"
+
+            try:
+                hcl_reconstructed = hcl2.writes(hcl_ast)
+            except Exception as exc:
+                assert (
+                    False
+                ), f"failed to reconstruct terraform in `{hcl_path_str}`: {exc}"
+
+            self.assertMultiLineEqual(
+                hcl_reconstructed,
+                hcl_file_content,
+                f"file {hcl_path_str} does not match its reconstructed version exactly. this is usually whitespace related.",
+            )

From f746a256957f0c973dea824aaca5ec04d4ad70a1 Mon Sep 17 00:00:00 2001
From: Sam Weaver <sweaver@relaypro.com>
Date: Wed, 2 Oct 2024 14:58:03 -0400
Subject: [PATCH 08/14] Clean up a few things, move reconstructor to its own
 file

---
 hcl2/api.py           |   3 +-
 hcl2/parser.py        | 128 +----------------------------------------
 hcl2/reconstructor.py | 131 ++++++++++++++++++++++++++++++++++++++++++
 hcl2/transformer.py   |   1 -
 4 files changed, 134 insertions(+), 129 deletions(-)
 create mode 100644 hcl2/reconstructor.py

diff --git a/hcl2/api.py b/hcl2/api.py
index cb605e53..e41f1681 100644
--- a/hcl2/api.py
+++ b/hcl2/api.py
@@ -2,7 +2,8 @@
 from typing import TextIO
 
 from lark.tree import Tree as AST
-from hcl2.parser import hcl2, hcl2_reconstructor
+from hcl2.parser import hcl2
+from hcl2.reconstructor import hcl2_reconstructor
 from hcl2.transformer import DictTransformer
 
 
diff --git a/hcl2/parser.py b/hcl2/parser.py
index 9194f5d6..8da9f9b6 100644
--- a/hcl2/parser.py
+++ b/hcl2/parser.py
@@ -2,8 +2,7 @@
 from pathlib import Path
 
 from lark import Lark
-from lark.reconstruct import Reconstructor
-from lark.utils import is_id_continue
+
 
 PARSER_FILE = Path(__file__).absolute().resolve().parent / ".lark_cache.bin"
 
@@ -20,128 +19,3 @@
     propagate_positions=True,
     maybe_placeholders=False,  # Needed for reconstruction
 )
-
-CHAR_SPACE_AFTER = set(',~@<>="|?)]:')
-CHAR_SPACE_BEFORE = (CHAR_SPACE_AFTER - set(",=")) | set("'")
-KEYWORDS_SPACE_AFTER = [
-    "if",
-    "in",
-    "for",
-    "for_each",
-    "==",
-    "!=",
-    "<",
-    ">",
-    "<=",
-    ">=",
-    "-",
-    "*",
-    "/",
-    "%",
-    "&&",
-    "||",
-    "+",
-]
-KEYWORDS_SPACE_BEFORE = KEYWORDS_SPACE_AFTER
-DIGITS = set("0123456789")
-NEVER_SPACE_AFTER = set("[(")
-NEVER_SPACE_BEFORE = set("]),.")
-NEVER_COMMA_BEFORE = set("])}")
-# characters that are OK to come right after an identifier with no space between
-IDENT_NO_SPACE = set("()[]")
-
-def _postprocess_reconstruct(items):
-    """
-    Postprocess the stream of tokens derived from the AST during reconstruction.
-
-    For HCL2, this is used exclusively for adding whitespace in the right locations.
-    """
-    prev_item = ""
-    for item in items:
-        # first, handle any deferred tokens
-        if type(prev_item) == tuple and prev_item[0] == "_deferred":
-            prev_item = prev_item[1]
-
-            # if the deferred token was a comma, see if we're ending a block
-            if prev_item == ",":
-                if item[0] not in NEVER_COMMA_BEFORE:
-                    yield prev_item
-            else:
-                yield prev_item
-
-        # these rules apply if we need to add space prior to the current token
-        if (
-            prev_item  # make sure a previous item exists
-            and item  # and that the current item isn't empty
-            # if the last character of the previous item is a space, we abort
-            and not prev_item[-1].isspace()
-            # if the previous character was a number and the next character is
-            # a number. we don't want to break up a numeric literal, abort
-            and not (prev_item[-1] in DIGITS and item[0] in DIGITS)
-            # if the next item has a space at the beginning, we don't need to
-            # add one, abort
-            and not item[0].isspace()
-            # if the previous character should not have a space after it, abort
-            and not prev_item[-1] in NEVER_SPACE_AFTER
-            # if the next character should not have a space before it, abort
-            and not item[0] in NEVER_SPACE_BEFORE
-            # double colons do not get space as they're part of a call to a
-            # namespaced function
-            and not (item == "::" or prev_item == "::")
-            # if both the last character and the next character are
-            # IDENT_NO_SPACE characters, we don't want a space between them either
-            and not (prev_item[-1] in IDENT_NO_SPACE and item[0] in IDENT_NO_SPACE)
-            # now the scenarios when we do not abort are
-            and (
-                # scenario 1, the prev token ended with an identifier character
-                # and the next character is not an "IDENT_NO_SPACE" character
-                (is_id_continue(prev_item[-1]) and not item[0] in IDENT_NO_SPACE)
-                # scenario 2, the prev token ended with a character that should
-                # be followed by a space
-                or (
-                    prev_item[-1] in CHAR_SPACE_AFTER
-                    or prev_item in KEYWORDS_SPACE_AFTER
-                )
-                # scenario 3, the next token begins with a character that needs
-                # a space preceeding it
-                or (item[0] in CHAR_SPACE_BEFORE or item in KEYWORDS_SPACE_BEFORE)
-                # scenario 4, the previous token was a block opening brace and
-                # the next token is not a closing brace (so the block is on one
-                # line and not empty)
-                or (prev_item[-1] == "{" and item[0] != "}")
-            )
-        ):
-            yield " "
-
-        # in some cases, we may want to defer printing the next token
-        defer_item = False
-
-        # prevent the inclusion of extra commas if they are not intended
-        if item[0] == ",":
-            item = ("_deferred", item)
-            defer_item = True
-
-        # print the actual token
-        if not defer_item:
-            yield item
-
-        # store the previous item for the next token
-        prev_item = item
-
-    # if the last token was deferred, print it before continuing
-    if type(prev_item) == tuple and prev_item[0] == "_deferred":
-        yield prev_item[1]
-
-class HCLReconstructor:
-    def __init__(self, parser):
-        self._recons = Reconstructor(parser)
-
-    def reconstruct(self, tree):
-        return self._recons.reconstruct(
-            tree,
-            _postprocess_reconstruct,
-            insert_spaces=False,
-        )
-
-
-hcl2_reconstructor = HCLReconstructor(hcl2)
diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py
new file mode 100644
index 00000000..aabcbb1a
--- /dev/null
+++ b/hcl2/reconstructor.py
@@ -0,0 +1,131 @@
+from lark.reconstruct import Reconstructor
+from lark.utils import is_id_continue
+
+from hcl2.parser import hcl2
+
+CHAR_SPACE_AFTER = set(',~@<>="|?)]:')
+CHAR_SPACE_BEFORE = (CHAR_SPACE_AFTER - set(",=")) | set("'")
+KEYWORDS_SPACE_AFTER = [
+    "if",
+    "in",
+    "for",
+    "for_each",
+    "==",
+    "!=",
+    "<",
+    ">",
+    "<=",
+    ">=",
+    "-",
+    "*",
+    "/",
+    "%",
+    "&&",
+    "||",
+    "+",
+]
+KEYWORDS_SPACE_BEFORE = KEYWORDS_SPACE_AFTER
+DIGITS = set("0123456789")
+NEVER_SPACE_AFTER = set("[(")
+NEVER_SPACE_BEFORE = set("]),.")
+NEVER_COMMA_BEFORE = set("])}")
+# characters that are OK to come right after an identifier with no space between
+IDENT_NO_SPACE = set("()[]")
+
+
+def _postprocess_reconstruct(items):
+    """
+    Postprocess the stream of tokens derived from the AST during reconstruction.
+
+    For HCL2, this is used exclusively for adding whitespace in the right locations.
+    """
+    prev_item = ""
+    for item in items:
+        # first, handle any deferred tokens
+        if type(prev_item) == tuple and prev_item[0] == "_deferred":
+            prev_item = prev_item[1]
+
+            # if the deferred token was a comma, see if we're ending a block
+            if prev_item == ",":
+                if item[0] not in NEVER_COMMA_BEFORE:
+                    yield prev_item
+            else:
+                yield prev_item
+
+        # these rules apply if we need to add space prior to the current token
+        if (
+            prev_item  # make sure a previous item exists
+            and item  # and that the current item isn't empty
+            # if the last character of the previous item is a space, we abort
+            and not prev_item[-1].isspace()
+            # if the previous character was a number and the next character is
+            # a number. we don't want to break up a numeric literal, abort
+            and not (prev_item[-1] in DIGITS and item[0] in DIGITS)
+            # if the next item has a space at the beginning, we don't need to
+            # add one, abort
+            and not item[0].isspace()
+            # if the previous character should not have a space after it, abort
+            and not prev_item[-1] in NEVER_SPACE_AFTER
+            # if the next character should not have a space before it, abort
+            and not item[0] in NEVER_SPACE_BEFORE
+            # double colons do not get space as they're part of a call to a
+            # namespaced function
+            and not (item == "::" or prev_item == "::")
+            # if both the last character and the next character are
+            # IDENT_NO_SPACE characters, we don't want a space between them either
+            and not (prev_item[-1] in IDENT_NO_SPACE and item[0] in IDENT_NO_SPACE)
+            # now the scenarios when we do not abort are
+            and (
+                # scenario 1, the prev token ended with an identifier character
+                # and the next character is not an "IDENT_NO_SPACE" character
+                (is_id_continue(prev_item[-1]) and not item[0] in IDENT_NO_SPACE)
+                # scenario 2, the prev token ended with a character that should
+                # be followed by a space
+                or (
+                    prev_item[-1] in CHAR_SPACE_AFTER
+                    or prev_item in KEYWORDS_SPACE_AFTER
+                )
+                # scenario 3, the next token begins with a character that needs
+                # a space preceeding it
+                or (item[0] in CHAR_SPACE_BEFORE or item in KEYWORDS_SPACE_BEFORE)
+                # scenario 4, the previous token was a block opening brace and
+                # the next token is not a closing brace (so the block is on one
+                # line and not empty)
+                or (prev_item[-1] == "{" and item[0] != "}")
+            )
+        ):
+            yield " "
+
+        # in some cases, we may want to defer printing the next token
+        defer_item = False
+
+        # prevent the inclusion of extra commas if they are not intended
+        if item[0] == ",":
+            item = ("_deferred", item)
+            defer_item = True
+
+        # print the actual token
+        if not defer_item:
+            yield item
+
+        # store the previous item for the next token
+        prev_item = item
+
+    # if the last token was deferred, print it before continuing
+    if type(prev_item) == tuple and prev_item[0] == "_deferred":
+        yield prev_item[1]
+
+
+class HCLReconstructor:
+    def __init__(self, parser):
+        self._recons = Reconstructor(parser)
+
+    def reconstruct(self, tree):
+        return self._recons.reconstruct(
+            tree,
+            _postprocess_reconstruct,
+            insert_spaces=False,
+        )
+
+
+hcl2_reconstructor = HCLReconstructor(hcl2)
diff --git a/hcl2/transformer.py b/hcl2/transformer.py
index 05e5cd07..866cef24 100644
--- a/hcl2/transformer.py
+++ b/hcl2/transformer.py
@@ -90,7 +90,6 @@ def tuple(self, args: List) -> List:
         return [self.to_string_dollar(arg) for arg in self.strip_new_line_tokens(args)]
 
     def object_elem(self, args: List) -> Dict:
-        print(args)
         # This returns a dict with a single key/value pair to make it easier to merge these
         # into a bigger dict that is returned by the "object" function
         key = self.strip_quotes(args[0])

From e2cbb1b349a9de286ac9bc7d808c938dbfd86fc8 Mon Sep 17 00:00:00 2001
From: Sam Weaver <sweaver@relaypro.com>
Date: Wed, 2 Oct 2024 15:03:33 -0400
Subject: [PATCH 09/14] Add a method to transform an AST into a dictionary
 after any modification

---
 hcl2/api.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hcl2/api.py b/hcl2/api.py
index e41f1681..dd891abf 100644
--- a/hcl2/api.py
+++ b/hcl2/api.py
@@ -44,8 +44,15 @@ def parses(text: str) -> AST:
     return hcl2.parse(text)
 
 
+def transform(ast: AST, with_meta=False) -> dict:
+    """Convert an HCL2 AST to a dictionary.
+    :param ast: HCL2 syntax tree, output from `parse` or `parses`
+    """
+    return DictTransformer(with_meta=with_meta).transform(ast)
+
+
 def writes(ast: AST) -> str:
     """Convert an HCL2 syntax tree to a string.
-    :param ast: HCL2 syntax tree.
+    :param ast: HCL2 syntax tree, output from `parse` or `parses`
     """
     return hcl2_reconstructor.reconstruct(ast)

From afa6dfc5e371656f5cc3dcde522a8959fcc03b1d Mon Sep 17 00:00:00 2001
From: Sam Weaver <sweaver@relaypro.com>
Date: Wed, 2 Oct 2024 15:29:15 -0400
Subject: [PATCH 10/14] Update reconstructor to build a new parser so regular
 parsing methods can retain caching benefits

---
 hcl2/__init__.py      |  2 +-
 hcl2/api.py           | 11 +++++++++--
 hcl2/parser.py        |  7 +------
 hcl2/reconstructor.py | 17 ++++++++++++++++-
 4 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/hcl2/__init__.py b/hcl2/__init__.py
index e6fff56e..f56787b7 100644
--- a/hcl2/__init__.py
+++ b/hcl2/__init__.py
@@ -5,4 +5,4 @@
 except ImportError:
     __version__ = "unknown"
 
-from .api import load, loads, parse, parses, writes, AST
+from .api import load, loads, parse, parses, transform, writes, AST
diff --git a/hcl2/api.py b/hcl2/api.py
index dd891abf..6b717d63 100644
--- a/hcl2/api.py
+++ b/hcl2/api.py
@@ -3,7 +3,6 @@
 
 from lark.tree import Tree as AST
 from hcl2.parser import hcl2
-from hcl2.reconstructor import hcl2_reconstructor
 from hcl2.transformer import DictTransformer
 
 
@@ -41,7 +40,11 @@ def parses(text: str) -> AST:
     """Load HCL2 syntax tree from a string.
     :param text: Text with hcl2 to be loaded as a dict.
     """
-    return hcl2.parse(text)
+    # defer this import until this method is called, due to the performance hit
+    # of rebuilding the grammar without cache
+    from hcl2.reconstructor import hcl2 as uncached_hcl2
+
+    return uncached_hcl2.parse(text)
 
 
 def transform(ast: AST, with_meta=False) -> dict:
@@ -55,4 +58,8 @@ def writes(ast: AST) -> str:
     """Convert an HCL2 syntax tree to a string.
     :param ast: HCL2 syntax tree, output from `parse` or `parses`
     """
+    # defer this import until this method is called, due to the performance hit
+    # of rebuilding the grammar without cache
+    from hcl2.reconstructor import hcl2_reconstructor
+
     return hcl2_reconstructor.reconstruct(ast)
diff --git a/hcl2/parser.py b/hcl2/parser.py
index 8da9f9b6..e6d810bd 100644
--- a/hcl2/parser.py
+++ b/hcl2/parser.py
@@ -10,12 +10,7 @@
 hcl2 = Lark.open(
     "hcl2.lark",
     parser="lalr",
-    # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed:
-    #
-    #   https://github.com/lark-parser/lark/issues/1472
-    #
-    # cache=str(PARSER_FILE),  # Disable/Delete file to effect changes to the grammar
+    cache=str(PARSER_FILE),  # Disable/Delete file to effect changes to the grammar
     rel_to=__file__,
     propagate_positions=True,
-    maybe_placeholders=False,  # Needed for reconstruction
 )
diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py
index aabcbb1a..45d9f4d5 100644
--- a/hcl2/reconstructor.py
+++ b/hcl2/reconstructor.py
@@ -1,7 +1,22 @@
+from lark import Lark
 from lark.reconstruct import Reconstructor
 from lark.utils import is_id_continue
 
-from hcl2.parser import hcl2
+# this is duplicated from `parser` because we need different options here for
+# the reconstructor. please make sure changes are kept in sync between the two
+# if necessary.
+hcl2 = Lark.open(
+    "hcl2.lark",
+    parser="lalr",
+    # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed:
+    #
+    #   https://github.com/lark-parser/lark/issues/1472
+    #
+    # cache=str(PARSER_FILE),  # Disable/Delete file to effect changes to the grammar
+    rel_to=__file__,
+    propagate_positions=True,
+    maybe_placeholders=False,  # Needed for reconstruction
+)
 
 CHAR_SPACE_AFTER = set(',~@<>="|?)]:')
 CHAR_SPACE_BEFORE = (CHAR_SPACE_AFTER - set(",=")) | set("'")

From d8c5def14c175e967c6fa38d371afe8db99e9541 Mon Sep 17 00:00:00 2001
From: Sam Weaver <sweaver@relaypro.com>
Date: Wed, 2 Oct 2024 17:01:01 -0400
Subject: [PATCH 11/14] Fix pylint warnings

---
 hcl2/api.py                   |   8 ++-
 hcl2/reconstructor.py         | 104 ++++++++++++++++++++--------------
 test/unit/test_reconstruct.py |  17 ++++--
 3 files changed, 78 insertions(+), 51 deletions(-)

diff --git a/hcl2/api.py b/hcl2/api.py
index 6b717d63..4a3ec106 100644
--- a/hcl2/api.py
+++ b/hcl2/api.py
@@ -42,7 +42,9 @@ def parses(text: str) -> AST:
     """
     # defer this import until this method is called, due to the performance hit
     # of rebuilding the grammar without cache
-    from hcl2.reconstructor import hcl2 as uncached_hcl2
+    from hcl2.reconstructor import (  # pylint: disable=import-outside-toplevel
+        hcl2 as uncached_hcl2,
+    )
 
     return uncached_hcl2.parse(text)
 
@@ -60,6 +62,8 @@ def writes(ast: AST) -> str:
     """
     # defer this import until this method is called, due to the performance hit
     # of rebuilding the grammar without cache
-    from hcl2.reconstructor import hcl2_reconstructor
+    from hcl2.reconstructor import (  # pylint: disable=import-outside-toplevel
+        hcl2_reconstructor,
+    )
 
     return hcl2_reconstructor.reconstruct(ast)
diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py
index 45d9f4d5..f6bbb9b7 100644
--- a/hcl2/reconstructor.py
+++ b/hcl2/reconstructor.py
@@ -1,3 +1,5 @@
+"""A reconstructor for HCL2 implemented using Lark's experimental reconstruction functionality"""
+
 from lark import Lark
 from lark.reconstruct import Reconstructor
 from lark.utils import is_id_continue
@@ -48,6 +50,57 @@
 IDENT_NO_SPACE = set("()[]")
 
 
+def _add_extra_space(prev_item, item):
+    # pylint: disable=too-many-boolean-expressions, too-many-return-statements
+
+    ##### the scenarios where explicitly disallow spaces: #####
+
+    # if we already have a space, don't add another
+    if prev_item[-1].isspace() or item[0].isspace():
+        return False
+
+    # none of the following should be separated by spaces:
+    # - groups of digits
+    # - namespaced::function::calls
+    # - characters within an identifier like array[0]()
+    if (
+        (prev_item[-1] in DIGITS and item[0] in DIGITS)
+        or item == "::"
+        or prev_item == "::"
+        or (prev_item[-1] in IDENT_NO_SPACE and item[0] in IDENT_NO_SPACE)
+    ):
+        return False
+
+    # specific characters are also blocklisted from having spaces
+    if prev_item[-1] in NEVER_SPACE_AFTER or item[0] in NEVER_SPACE_BEFORE:
+        return False
+
+    ##### the scenarios where we add spaces: #####
+
+    # scenario 1, the prev token ended with an identifier character
+    # and the next character is not an "IDENT_NO_SPACE" character
+    if is_id_continue(prev_item[-1]) and not item[0] in IDENT_NO_SPACE:
+        return True
+
+    # scenario 2, the prev token or the next token should be followed by a space
+    if (
+        prev_item[-1] in CHAR_SPACE_AFTER
+        or prev_item in KEYWORDS_SPACE_AFTER
+        or item[0] in CHAR_SPACE_BEFORE
+        or item in KEYWORDS_SPACE_BEFORE
+    ):
+        return True
+
+    # scenario 3, the previous token was a block opening brace and
+    # the next token is not a closing brace (so the block is on one
+    # line and not empty)
+    if prev_item[-1] == "{" and item[0] != "}":
+        return True
+
+    ##### otherwise, we don't add a space #####
+    return False
+
+
 def _postprocess_reconstruct(items):
     """
     Postprocess the stream of tokens derived from the AST during reconstruction.
@@ -57,7 +110,7 @@ def _postprocess_reconstruct(items):
     prev_item = ""
     for item in items:
         # first, handle any deferred tokens
-        if type(prev_item) == tuple and prev_item[0] == "_deferred":
+        if isinstance(prev_item, tuple) and prev_item[0] == "_deferred":
             prev_item = prev_item[1]
 
             # if the deferred token was a comma, see if we're ending a block
@@ -67,48 +120,9 @@ def _postprocess_reconstruct(items):
             else:
                 yield prev_item
 
-        # these rules apply if we need to add space prior to the current token
-        if (
-            prev_item  # make sure a previous item exists
-            and item  # and that the current item isn't empty
-            # if the last character of the previous item is a space, we abort
-            and not prev_item[-1].isspace()
-            # if the previous character was a number and the next character is
-            # a number. we don't want to break up a numeric literal, abort
-            and not (prev_item[-1] in DIGITS and item[0] in DIGITS)
-            # if the next item has a space at the beginning, we don't need to
-            # add one, abort
-            and not item[0].isspace()
-            # if the previous character should not have a space after it, abort
-            and not prev_item[-1] in NEVER_SPACE_AFTER
-            # if the next character should not have a space before it, abort
-            and not item[0] in NEVER_SPACE_BEFORE
-            # double colons do not get space as they're part of a call to a
-            # namespaced function
-            and not (item == "::" or prev_item == "::")
-            # if both the last character and the next character are
-            # IDENT_NO_SPACE characters, we don't want a space between them either
-            and not (prev_item[-1] in IDENT_NO_SPACE and item[0] in IDENT_NO_SPACE)
-            # now the scenarios when we do not abort are
-            and (
-                # scenario 1, the prev token ended with an identifier character
-                # and the next character is not an "IDENT_NO_SPACE" character
-                (is_id_continue(prev_item[-1]) and not item[0] in IDENT_NO_SPACE)
-                # scenario 2, the prev token ended with a character that should
-                # be followed by a space
-                or (
-                    prev_item[-1] in CHAR_SPACE_AFTER
-                    or prev_item in KEYWORDS_SPACE_AFTER
-                )
-                # scenario 3, the next token begins with a character that needs
-                # a space preceeding it
-                or (item[0] in CHAR_SPACE_BEFORE or item in KEYWORDS_SPACE_BEFORE)
-                # scenario 4, the previous token was a block opening brace and
-                # the next token is not a closing brace (so the block is on one
-                # line and not empty)
-                or (prev_item[-1] == "{" and item[0] != "}")
-            )
-        ):
+        # if we're between two tokens, determine if we need to add an extra space
+        # we need the previous item and the current item to exist to evaluate these rules
+        if prev_item and item and _add_extra_space(prev_item, item):
             yield " "
 
         # in some cases, we may want to defer printing the next token
@@ -127,15 +141,17 @@ def _postprocess_reconstruct(items):
         prev_item = item
 
     # if the last token was deferred, print it before continuing
-    if type(prev_item) == tuple and prev_item[0] == "_deferred":
+    if isinstance(prev_item, tuple) and prev_item[0] == "_deferred":
         yield prev_item[1]
 
 
 class HCLReconstructor:
+    """This class converts a Lark.Tree AST back into a string representing the underlying HCL code."""
     def __init__(self, parser):
         self._recons = Reconstructor(parser)
 
     def reconstruct(self, tree):
+        """Convert a Lark.Tree AST back into a string representation of HCL."""
         return self._recons.reconstruct(
             tree,
             _postprocess_reconstruct,
diff --git a/test/unit/test_reconstruct.py b/test/unit/test_reconstruct.py
index f03f6aa1..b9545def 100644
--- a/test/unit/test_reconstruct.py
+++ b/test/unit/test_reconstruct.py
@@ -25,14 +25,17 @@ def test_write_terraform(self):
             yield self.check_terraform, hcl_path
 
     def test_write_terraform_exact(self):
-        """Test reconstructing a set of hcl2 files, to make sure they reconstruct exactly the same, including whitespace"""
+        """
+        Test reconstructing a set of hcl2 files, to make sure they
+        reconstruct exactly the same, including whitespace.
+        """
 
         # the reconstruction process is not precise, so some files do not
         # reconstruct their whitespace exactly the same, but they are
         # syntactically equivalent. This list is a target for further
         # improvements to the whitespace handling of the reconstruction
         # algorithm.
-        INEXACT_FILES = [
+        inexact_files = [
             # the reconstructor loses commas on the last element in an array,
             # even if they're in the input file
             "iam.tf",
@@ -46,11 +49,14 @@ def test_write_terraform_exact(self):
         ]
 
         for hcl_path in HCL2_FILES:
-            if hcl_path not in INEXACT_FILES:
+            if hcl_path not in inexact_files:
                 yield self.check_whitespace, hcl_path
 
     def check_terraform(self, hcl_path_str: str):
-        """Loads a single hcl2 file, parses it, reconstructs it, parses the reconstructed file, and compares with the expected json"""
+        """
+        Loads a single hcl2 file, parses it, reconstructs it,
+        parses the reconstructed file, and compares with the expected json
+        """
         hcl_path = (HCL2_DIR / hcl_path_str).absolute()
         json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json")
         with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file:
@@ -101,5 +107,6 @@ def check_whitespace(self, hcl_path_str: str):
             self.assertMultiLineEqual(
                 hcl_reconstructed,
                 hcl_file_content,
-                f"file {hcl_path_str} does not match its reconstructed version exactly. this is usually whitespace related.",
+                f"file {hcl_path_str} does not match its reconstructed version \
+                    exactly. this is usually whitespace related.",
             )

From b4d59e4ad29435001bed3286f07b3ed5cea7b7cf Mon Sep 17 00:00:00 2001
From: Sam Weaver <sweaver@relaypro.com>
Date: Thu, 3 Oct 2024 15:09:10 -0400
Subject: [PATCH 12/14] Give terminals names to make the AST more readable,
 uncomment caching based tests

---
 hcl2/hcl2.lark         |  9 ++++++---
 test/unit/test_load.py | 20 ++++++--------------
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark
index 1f964563..4a4489a7 100644
--- a/hcl2/hcl2.lark
+++ b/hcl2/hcl2.lark
@@ -3,9 +3,11 @@ body : (new_line_or_comment? (attribute | block))* new_line_or_comment?
 attribute : identifier EQ expression
 block : identifier (identifier | STRING_LIT)* new_line_or_comment? "{" body "}"
 new_line_and_or_comma: new_line_or_comment | "," | "," new_line_or_comment
-new_line_or_comment: ( /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ )+
+new_line_or_comment: ( NL_OR_COMMENT )+
+NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/
 
-identifier : /[a-zA-Z_][a-zA-Z0-9_-]*/ | IN | FOR | IF | FOR_EACH
+identifier : NAME | IN | FOR | IF | FOR_EACH
+NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/
 IF : "if"
 IN : "in"
 FOR : "for"
@@ -18,8 +20,9 @@ conditional : expression "?" new_line_or_comment? expression new_line_or_comment
 ?operation : unary_op | binary_op
 !unary_op : ("-" | "!") expr_term
 binary_op : expression binary_term new_line_or_comment?
-!binary_operator : "==" | "!=" | "<" | ">" | "<=" | ">=" | "-" | "*" | "/" | "%" | "&&" | "||" | "+"
+!binary_operator : BINARY_OP
 binary_term : binary_operator new_line_or_comment? expression
+BINARY_OP : "==" | "!=" | "<" | ">" | "<=" | ">=" | "-" | "*" | "/" | "%" | "&&" | "||" | "+"
 
 expr_term : "(" new_line_or_comment? expression new_line_or_comment? ")"
             | float_lit
diff --git a/test/unit/test_load.py b/test/unit/test_load.py
index 762412b4..d21ca02f 100644
--- a/test/unit/test_load.py
+++ b/test/unit/test_load.py
@@ -19,23 +19,15 @@ class TestLoad(TestCase):
 
     def test_load_terraform(self):
         """Test parsing a set of hcl2 files and force recreating the parser file"""
-        # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed:
-        #
-        #   https://github.com/lark-parser/lark/issues/1472
-        #
-        # # delete the parser file to force it to be recreated
-        # PARSER_FILE.unlink()
+        # delete the parser file to force it to be recreated
+        PARSER_FILE.unlink()
         for hcl_path in HCL2_FILES:
             yield self.check_terraform, hcl_path
 
-    # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed:
-    #
-    #   https://github.com/lark-parser/lark/issues/1472
-    #
-    # def test_load_terraform_from_cache(self):
-    #     """Test parsing a set of hcl2 files from a cached parser file"""
-    #     for hcl_path in HCL2_FILES:
-    #         yield self.check_terraform, hcl_path
+    def test_load_terraform_from_cache(self):
+        """Test parsing a set of hcl2 files from a cached parser file"""
+        for hcl_path in HCL2_FILES:
+            yield self.check_terraform, hcl_path
 
     def check_terraform(self, hcl_path_str: str):
         """Loads a single hcl2 file, parses it and compares with the expected json"""

From 6dfde1ea2b09946022dff84e2849ea5d791a7d86 Mon Sep 17 00:00:00 2001
From: Kamil Kozik <115631738+kkozik-amplify@users.noreply.github.com>
Date: Mon, 7 Oct 2024 16:41:44 +0200
Subject: [PATCH 13/14] Add newline to hcl2.lark

---
 hcl2/hcl2.lark | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark
index 4a4489a7..d26acc1b 100644
--- a/hcl2/hcl2.lark
+++ b/hcl2/hcl2.lark
@@ -82,4 +82,4 @@ full_splat : "[*]" (get_attr | index)*
 !for_intro : "for" new_line_or_comment? identifier ("," identifier new_line_or_comment?)? new_line_or_comment? "in" new_line_or_comment? expression new_line_or_comment? ":" new_line_or_comment?
 !for_cond : "if" new_line_or_comment? expression
 
-%ignore /[ \t]+/
\ No newline at end of file
+%ignore /[ \t]+/

From d3f5e9537dab6adf149722a55b94991e3850fb8c Mon Sep 17 00:00:00 2001
From: Kamil Kozik <115631738+kkozik-amplify@users.noreply.github.com>
Date: Mon, 7 Oct 2024 16:41:57 +0200
Subject: [PATCH 14/14] Add newline to .gitignore

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index aea11ed0..75af5e4b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -123,4 +123,4 @@ lark_parser.py
 .lark_cache.bin
 
 # ASDF tool-versions file
-.tool-versions
\ No newline at end of file
+.tool-versions