Skip to content

Commit

Permalink
Fixes Issue #18 by adding a new conversion pre-processor (#39)
Browse files Browse the repository at this point in the history
* Adds pre-process phase to recipe conversion work

- This will enable us to address issues that are easily fixed by modifying the
  recipe before parsing. This should be used as a last-resort or for problems
  where the cost of implementing a fix is outweighed by how many recipes are
  afflicted by the problem.
- Simplifies some logic in `convert.py`

* Adds environ[] replacement to the pre-processor step

* Adds initial unit tests for pre-processor

* Update conda_recipe_manager/parser/recipe_parser_convert.py

Co-authored-by: Bianca Henderson <[email protected]>

* Update conda_recipe_manager/parser/recipe_parser_convert.py

Co-authored-by: Bianca Henderson <[email protected]>

---------

Co-authored-by: Bianca Henderson <[email protected]>
  • Loading branch information
schuylermartin45 and beeankha authored Apr 30, 2024
1 parent 6d75c8d commit ae34991
Show file tree
Hide file tree
Showing 6 changed files with 229 additions and 28 deletions.
89 changes: 62 additions & 27 deletions conda_recipe_manager/commands/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class ExitCode(IntEnum):
PARSE_EXCEPTION = 102
RENDER_EXCEPTION = 103
READ_EXCEPTION = 104
PRE_PROCESS_EXCEPTION = 105


@dataclass
Expand Down Expand Up @@ -74,13 +75,38 @@ def set_return_code(self) -> None:
self.code = ExitCode.RENDER_WARNINGS


def _record_unrecoverable_failure(
conversion_result: ConversionResult,
exit_code: ExitCode,
e_msg: str,
print_output: bool,
e: Optional[Exception] = None,
) -> ConversionResult:
"""
Convenience function that streamlines the process of recording an unrecoverable conversion failure.
:param conversion_result: Conversion result instance to use. This is passed into aggregate any other messages that
could be logged prior to reaching this fatal error case.
:param exit_code: Exit code to return for this error case.
:param e_msg: Error message to display, if enabled.
:param print_output: Prints the recipe to STDERR if the output file is not specified and this flag is `True`.
:param e: (Optional) Exception instance to capture, if applicable
:returns: The final `conversion_result` instance that should be returned immediately.
"""
print_err(e_msg, print_enabled=print_output)
if e is not None:
print_err(e, print_enabled=print_output)
conversion_result.msg_tbl.add_message(MessageCategory.EXCEPTION, e_msg)
conversion_result.code = exit_code
return conversion_result


def convert_file(file_path: Path, output: Optional[Path], print_output: bool) -> ConversionResult:
"""
Converts a single recipe file to the new format, tracking results.
:param file_path: Path to the recipe file to convert
:param output: If specified, the file contents are written to this file path. Otherwise, the file is dumped to
STDOUT IF `print_output` is set to `True`.
:param print_output: Prints the recipe to STDOUT if the output file is not specified and this flag is `True`.
:param print_output: Prints the recipe to STDOUT/STDERR if the output file is not specified and this flag is `True`.
:returns: A struct containing the results of the conversion process, including debugging metadata.
"""
conversion_result = ConversionResult(
Expand All @@ -89,43 +115,52 @@ def convert_file(file_path: Path, output: Optional[Path], print_output: bool) ->

recipe_content = None
try:
with open(file_path, "r", encoding="utf-8") as file:
recipe_content = file.read()
except IOError:
pass

if recipe_content is None:
conversion_result.code = ExitCode.READ_EXCEPTION
e_msg = f"EXCEPTION: Failed to read: {file_path}"
print_err(e_msg, print_enabled=print_output)
conversion_result.msg_tbl.add_message(MessageCategory.EXCEPTION, e_msg)
return conversion_result
recipe_content = Path(file_path).read_text(encoding="utf-8")
except Exception as e: # pylint: disable=broad-exception-caught
return _record_unrecoverable_failure(
conversion_result, ExitCode.READ_EXCEPTION, f"EXCEPTION: Failed to read: {file_path}", print_output, e
)

# Pre-process the recipe
try:
recipe_content = RecipeParserConvert.pre_process_recipe_text(recipe_content)
except Exception as e: # pylint: disable=broad-exception-caught
return _record_unrecoverable_failure(
conversion_result,
ExitCode.PRE_PROCESS_EXCEPTION,
"EXCEPTION: An exception occurred while pre-processing the recipe file",
print_output,
e,
)

# Parse the recipe
parser: RecipeParserConvert
try:
parser = RecipeParserConvert(recipe_content)
except Exception as e: # pylint: disable=broad-exception-caught
e_msg = "EXCEPTION: An exception occurred while parsing the recipe file"
print_err(e_msg, print_enabled=print_output)
print_err(e, print_enabled=print_output)
conversion_result.msg_tbl.add_message(MessageCategory.EXCEPTION, e_msg)
conversion_result.code = ExitCode.PARSE_EXCEPTION
return conversion_result
return _record_unrecoverable_failure(
conversion_result,
ExitCode.PARSE_EXCEPTION,
"EXCEPTION: An exception occurred while parsing the recipe file",
print_output,
e,
)

# Convert the recipe
try:
conversion_result.content, conversion_result.msg_tbl = parser.render_to_new_recipe_format()
except Exception as e: # pylint: disable=broad-exception-caught
e_msg = "EXCEPTION: An exception occurred while converting to the new recipe file"
print_err(e_msg, print_enabled=print_output)
print_err(e, print_enabled=print_output)
conversion_result.msg_tbl.add_message(MessageCategory.EXCEPTION, e_msg)
conversion_result.code = ExitCode.RENDER_EXCEPTION
return conversion_result
return _record_unrecoverable_failure(
conversion_result,
ExitCode.RENDER_EXCEPTION,
"EXCEPTION: An exception occurred while converting to the new recipe file",
print_output,
e,
)

# Print or dump the results to a file. Printing is disabled for bulk operations.
if output is None:
print_out(conversion_result.content, print_enabled=print_output)
else:
print_out(conversion_result.content, print_enabled=print_output and (output is None))
if output is not None:
print_err(
"WARNING: File is not called `recipe.yaml`.",
print_enabled=print_output and os.path.basename(output) != "recipe.yaml",
Expand Down
7 changes: 6 additions & 1 deletion conda_recipe_manager/parser/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,12 @@ class Regex:
# Pattern to detect Jinja variable names and functions
_JINJA_VAR_FUNCTION_PATTERN: Final[str] = r"[a-zA-Z_][a-zA-Z0-9_\|\'\"\(\)\, =\.\-]*"

# Jinja regular expressions
## Pre-process conversion tooling regular expressions ##
# Finds `environ[]` used by a some recipe files. Requires a whitespace character to prevent matches with
# `os.environ[]`, which is very rare.
PRE_PROCESS_ENVIRON: Final[re.Pattern[str]] = re.compile(r"\s+environ\[(\"|')(.*)(\"|')\]")

## Jinja regular expressions ##
JINJA_SUB: Final[re.Pattern[str]] = re.compile(r"{{\s*" + _JINJA_VAR_FUNCTION_PATTERN + r"\s*}}")
JINJA_FUNCTION_LOWER: Final[re.Pattern[str]] = re.compile(r"\|\s*lower")
JINJA_LINE: Final[re.Pattern[str]] = re.compile(r"({%.*%}|{#.*#})\n")
Expand Down
35 changes: 35 additions & 0 deletions conda_recipe_manager/parser/recipe_parser_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,41 @@ def _upgrade_multi_output(self, base_package_paths: list[str]) -> None:
# found at the top-level. So for consistency, we sort on that ordering.
self._sort_subtree_keys(output_path, TOP_LEVEL_KEY_SORT_ORDER)

@staticmethod
def pre_process_recipe_text(content: str) -> str:
"""
Takes the content of a recipe file and performs manipulations prior to the parsing stage. This should be
used sparingly for solving conversion issues.
Ideally the pre-processor phase is only used when:
- There is no other feasible way to solve a conversion issue.
- There is a proof-of-concept fix that would be easier to develop as a pre-processor step that could be
refactored into the parser later.
- The number of recipes afflicted by an issue does not justify the engineering effort required to handle
the issue in the parsing phase.
:param content: Recipe file contents to pre-process
:returns: Pre-processed recipe file contents
"""
# Convert the old JINJA `environ[""]` variable usage to the new `get.env("")` syntax.
# NOTE:
# - This is mostly used by Bioconda recipes and R-based-packages in the `license_file` field.
# - From our search, it looks like we never deal with more than one set of outer quotes within the brackets
replacements: list[tuple[str, str]] = []
for groups in cast(list[str], Regex.PRE_PROCESS_ENVIRON.findall(content)):
# Each match should return ["<quote char>", "<key>", "<quote_char>"]
quote_char = groups[0]
key = groups[1]
replacements.append(
(
f"environ[{quote_char}{key}{quote_char}]",
f"env.get({quote_char}{key}{quote_char})",
)
)
for old, new in replacements:
content = content.replace(old, new, 1)

return content

def render_to_new_recipe_format(self) -> tuple[str, MessageTable]:
"""
Takes the current recipe representation and renders it to the new format WITHOUT modifying the current recipe
Expand Down
20 changes: 20 additions & 0 deletions tests/parser/test_recipe_parser_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,30 @@

import pytest

from conda_recipe_manager.parser.recipe_parser_convert import RecipeParserConvert
from conda_recipe_manager.parser.types import MessageCategory
from tests.file_loading import TEST_FILES_PATH, load_file, load_recipe_convert


@pytest.mark.parametrize(
"input_file,expected_file",
[
("simple-recipe_environ.yaml", "pre-processed-simple-recipe_environ.yaml"),
("simple-recipe.yaml", "simple-recipe.yaml"), # Unchanged file
],
)
def test_pre_process_recipe_text(input_file: str, expected_file: str) -> None:
"""
Validates the pre-processor phase of the conversion process. A recipe file should come in
as a string and return a modified string, if applicable.
:param input_file: Test input recipe file name
:param expected_file: Name of the file containing the expected output of a test instance
"""
assert RecipeParserConvert.pre_process_recipe_text(load_file(f"{TEST_FILES_PATH}/{input_file}")) == load_file(
f"{TEST_FILES_PATH}/{expected_file}"
)


@pytest.mark.parametrize(
"file_base,errors,warnings",
[
Expand Down
53 changes: 53 additions & 0 deletions tests/test_aux_files/pre-processed-simple-recipe_environ.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{% set zz_non_alpha_first = 42 %}
{% set name = "types-toml" %}
{% set version = "0.10.8.6" %}

package:
name: {{ name|lower }} # [unix]

build:
number: 0
skip: true # [py<37]
is_true: true

# Comment above a top-level structure
requirements:
empty_field1:
host:
- setuptools # [unix]
- fakereq # [unix] selector with comment
empty_field2: # [unix and win] # selector with comment with comment symbol
run:
- python # not a selector
empty_field3:

about:
summary: This is a small recipe for testing
description: |
This is a PEP '561 type stub package for the toml package.
It can be used by type-checking tools like mypy, pyright,
pytype, PyCharm, etc. to check code that uses toml.
license: Apache-2.0 AND MIT

multi_level:
list_1:
- foo
# Ensure a comment in a list is supported
- bar
list_2:
- cat
- {{ env.get('baz') }}
- mat
list_3:
- ls
- sl
- cowsay

test_var_usage:
foo: {{ version }}
bar:
- baz
- {{ env.get("foobar") }}
- blah
- This {{ name }} is silly
- last
53 changes: 53 additions & 0 deletions tests/test_aux_files/simple-recipe_environ.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{% set zz_non_alpha_first = 42 %}
{% set name = "types-toml" %}
{% set version = "0.10.8.6" %}

package:
name: {{ name|lower }} # [unix]

build:
number: 0
skip: true # [py<37]
is_true: true

# Comment above a top-level structure
requirements:
empty_field1:
host:
- setuptools # [unix]
- fakereq # [unix] selector with comment
empty_field2: # [unix and win] # selector with comment with comment symbol
run:
- python # not a selector
empty_field3:

about:
summary: This is a small recipe for testing
description: |
This is a PEP '561 type stub package for the toml package.
It can be used by type-checking tools like mypy, pyright,
pytype, PyCharm, etc. to check code that uses toml.
license: Apache-2.0 AND MIT

multi_level:
list_1:
- foo
# Ensure a comment in a list is supported
- bar
list_2:
- cat
- {{ environ['baz'] }}
- mat
list_3:
- ls
- sl
- cowsay

test_var_usage:
foo: {{ version }}
bar:
- baz
- {{ environ["foobar"] }}
- blah
- This {{ name }} is silly
- last

0 comments on commit ae34991

Please sign in to comment.