Skip to content

Commit

Permalink
Fixes parsing issue with * characters
Browse files Browse the repository at this point in the history
- Quick hacky solution to solve a string quoting issue in parsing/rendering
- When our bulk conversion script is run against `aggregate`, this fix
  increases our success rate from 44% -> 58%
  • Loading branch information
schuylermartin45 committed Mar 22, 2024
1 parent 7743774 commit b962512
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 2 deletions.
9 changes: 7 additions & 2 deletions conda_recipe_manager/parser/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from __future__ import annotations

import json
from typing import cast
from typing import Final, cast

from conda_recipe_manager.parser._types import (
RECIPE_MANAGER_SUB_MARKER,
Expand All @@ -17,6 +17,9 @@
from conda_recipe_manager.parser.types import TAB_AS_SPACES, MultilineVariant, NodeValue
from conda_recipe_manager.types import H, SentinelType

# Commonly used special characters that we need to ensure get quoted when rendered as a YAML string.
_TO_QUOTE_SPECIAL_CASES: Final[set[str]] = {"*"}


def str_to_stack_path(path: str) -> StrStack:
"""
Expand Down Expand Up @@ -120,8 +123,10 @@ def stringify_yaml(
# quoting all YAML strings. Although not wrong, it does not follow our common practices. Quote escaping is not
# required for multiline strings. We do not escape quotes for Jinja value statements. We make an exception for
# strings containing the NEW recipe format syntax, ${{ }}, which is valid YAML.
#
# In addition, there are a handful of special cases that need to be quoted in order to produce valid YAML.
if multiline_variant == MultilineVariant.NONE and isinstance(val, str) and not Regex.JINJA_SUB.match(val):
if "${{" not in val and ("'" in val or '"' in val):
if val in _TO_QUOTE_SPECIAL_CASES or ("${{" not in val and ("'" in val or '"' in val)):
# The PyYaml equivalent function injects newlines, hence why we abuse the JSON library to write our YAML
return json.dumps(val)
return val
Expand Down
1 change: 1 addition & 0 deletions tests/parser/test_recipe_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def test_loading_obj_in_list() -> None:
"huggingface_hub.yaml", # Contains a blank lines in a multiline string
"simple-recipe_multiline_strings.yaml", # Contains multiple multiline strings, using various operators
"curl.yaml", # Complex, multi-output recipe
"gsm-amzn2-aarch64.yaml", # Regression test: Contains `- '*'` string that failed to parse
],
)
def test_round_trip(file: str) -> None:
Expand Down
50 changes: 50 additions & 0 deletions tests/test_aux_files/gsm-amzn2-aarch64.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package:
name: gsm-amzn2-aarch64
version: 1.0.13

source:
- url: https://graviton-rpms.s3.amazonaws.com/amzn2-core_2021_01_26/amzn2-core/gsm-1.0.13-11.amzn2.0.2.aarch64.rpm
sha256: 7700d84e3f08ac1d119e4e1b28a9b744ee130a283b8e69c1051da0cd77abab64
no_hoist: true
folder: binary
- url: https://graviton-rpms.s3.amazonaws.com/amzn2-core-source_2021_01_26/amzn2-core-source/gsm-1.0.13-11.amzn2.0.2.src.rpm
sha256: 6fc1395957fa593720a0b6a91898f946573c2ce2b898179f691407e8ecabe88d
no_hoist: true
folder: source

build:
number: 5
noarch: generic
missing_dso_whitelist:
- "*"

requirements:
build:
- glibc-amzn2-aarch64 ==2.26
host:
- glibc-amzn2-aarch64 ==2.26
run:
- glibc-amzn2-aarch64 ==2.26

about:
home: http://www.quut.com/gsm/
license: MIT
license_family: MIT
description: |
Contains runtime shared libraries for libgsm, an implementation of the
European GSM 06.10 provisional standard for full-rate speech transcoding, prI-
ETS 300 036, which uses RPE/LTP (residual pulse excitation/long term
prediction) coding at 13 kbit/s. GSM 06.10 compresses frames of 162 13-bit
samples (8 kHz sampling rate, i.e. a frame rate of 50 Hz) into 260 bits; for
compatibility with typical UNIX applications, our implementation turns frames
of 160 16-bit linear samples into 33-byte frames (1650 Bytes/s). The quality
of the algorithm is good enough for reliable speaker recognition; even music
often survives transcoding in recognizable form (given the bandwidth
limitations of 8 kHz sampling rate). The interfaces offered are a front end
modelled after compress(1), and a library API. Compression and decompression
run faster than realtime on most SPARCstations. The implementation has been
verified against the ETSI standard test patterns.
summary: (CDT) Shared libraries for GSM speech compressor

extras:
rpm_name: gsm

0 comments on commit b962512

Please sign in to comment.