Skip to content

Commit 80a3ff8

Browse files
committed
closes #191
1 parent 8230ea3 commit 80a3ff8

File tree

3 files changed

+23
-0
lines changed

3 files changed

+23
-0
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
The `pycldf` package adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
44

55

6+
## Unreleased
7+
8+
- Make sure all local media files are copied with `Dataset.copy` as well.
9+
10+
611
## [1.41.0] - 2025-02-15
712

813
- Added a utility function to query SQLite DBs using user-defined functions, aggregates or collations.

src/pycldf/dataset.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -957,6 +957,8 @@ def copy(self, dest: typing.Union[str, pathlib.Path], mdname: str = None) -> pat
957957
... if 'with_examples' in ds.directory.name:
958958
... ds.copy('some_directory', mdname='md.json')
959959
"""
960+
from pycldf.media import MediaTable
961+
960962
dest = pathlib.Path(dest)
961963
if not dest.exists():
962964
dest.mkdir(parents=True)
@@ -983,6 +985,12 @@ def copy(self, dest: typing.Union[str, pathlib.Path], mdname: str = None) -> pat
983985
mdpath = dest.joinpath(
984986
mdname or # noqa: W504
985987
(self.tablegroup.base.split('/')[-1] if from_url else self.tablegroup._fname.name))
988+
if 'MediaTable' in self:
989+
for f in MediaTable(self):
990+
if f.scheme == 'file' and f.local_path().exists():
991+
target = dest / f.relpath
992+
target.parent.mkdir(parents=True, exist_ok=True)
993+
shutil.copy(f.local_path(), target)
986994
if from_url:
987995
del ds.tablegroup.at_props['base'] # pragma: no cover
988996
ds.write_metadata(fname=mdpath)

tests/test_dataset.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
Generic, Wordlist, StructureDataset, Dictionary, ParallelText, Dataset, TextCorpus,
1515
GitRepository, make_column, get_modules, iter_datasets, SchemaError)
1616
from pycldf.sources import Sources
17+
from pycldf.media import MediaTable
1718

1819

1920
@pytest.fixture
@@ -925,6 +926,15 @@ def test_Dataset_copy(tmp_path):
925926
assert Dataset.from_metadata(tmp_path / 'moved' / 'md.json').validate()
926927

927928

929+
def test_Dataset_copy_with_media(tmp_path, dataset_with_media):
930+
dataset_with_media.copy(tmp_path, mdname='md.json')
931+
filecontent = {f.id: f.read() for f in MediaTable(dataset_with_media)}
932+
ds = Dataset.from_metadata(tmp_path / 'md.json')
933+
for i, f in enumerate(MediaTable(ds)):
934+
assert f.read() == filecontent[f.id]
935+
assert i > 1
936+
937+
928938
def test_Dataset_rename_column(ds):
929939
lt = ds.add_component('LanguageTable')
930940
lt.aboutUrl = URITemplate('{#ID}.md')

0 commit comments

Comments
 (0)