Skip to content

Commit

Permalink
registry parser changes to support quotes, fatiando#357
Browse files Browse the repository at this point in the history
  • Loading branch information
bmcfee committed Mar 17, 2023
1 parent a965902 commit 1132182
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 14 deletions.
44 changes: 30 additions & 14 deletions pooch/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
import os
import time
import contextlib
import re
from pathlib import Path
import shlex
import shutil

import requests
Expand Down Expand Up @@ -654,23 +654,39 @@ def load_registry(self, fname):

line = line.strip()
# skip line comments
if line.startswith("#"):
if line.startswith("#") or len(line) == 0:
continue

elements = shlex.split(line)
if not len(elements) in [0, 2, 3]:
try:
# First try to split off the last token
prefix, suffix = line.rsplit(maxsplit=1)
except ValueError as exc:
raise OSError(
f"Invalid entry in Pooch registry file '{fname}': "
f"expected 2 or 3 elements in line {linenum + 1} but got "
f"{len(elements)}. Offending entry: '{line}'"
)
if elements:
file_name = elements[0]
file_checksum = elements[1]
if len(elements) == 3:
file_url = elements[2]
self.urls[file_name] = file_url
self.registry[file_name] = file_checksum.lower()
f"expected at least 2 elements in line {linenum + 1}. "
f"Offending entry: '{line}'"
) from exc

# Is suffix a url?
if re.match('^(https?|s?ftp|doi):', suffix):
file_url = suffix
try:
file_name, file_checksum = prefix.rsplit(maxsplit=1)
except ValueError as exc:
raise OSError(
f"Invalid entry in Pooch registry file '{fname}': "
f"expected at least 3 elements in line {linenum + 1}. "
f"Offending entry: '{line}'"
) from exc
else:
# Not a url, we only have hash and filename
file_name = prefix
file_checksum = suffix
file_url = None

if file_url is not None:
self.urls[file_name] = file_url
self.registry[file_name] = file_checksum.lower()

def load_registry_from_doi(self):
"""
Expand Down
13 changes: 13 additions & 0 deletions pooch/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,19 @@ def test_pooch_load_registry_with_spaces():
assert "other with spaces.txt" in pup.registry


def test_pooch_load_registry_with_quotes():
"Verify that files with quotes in the name work"
pup = Pooch(path="", base_url="")
pup.load_registry(os.path.join(DATA_DIR, "registry-quotes.txt"))
assert "foo'.txt" in pup.registry # vanilla single quote, no space
assert "bar'baz quux'.txt" in pup.registry # quotes and spaces
assert '"foo bar.txt"' in pup.registry # spaces, double quotes
assert '"foo bar2.txt"' in pup.registry # double quotes, spaces, url
assert '"foobar3.txt"' in pup.registry # double quotes, no spaces, url
assert "foo'2.txt" in pup.registry # single quote, no space, url



@pytest.mark.network
def test_check_availability():
"Should correctly check availability of existing and non existing files"
Expand Down

0 comments on commit 1132182

Please sign in to comment.