diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..629bd764 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include imagebind/bpe/* diff --git a/bpe/bpe_simple_vocab_16e6.txt.gz b/imagebind/bpe/bpe_simple_vocab_16e6.txt.gz similarity index 100% rename from bpe/bpe_simple_vocab_16e6.txt.gz rename to imagebind/bpe/bpe_simple_vocab_16e6.txt.gz diff --git a/imagebind/data.py b/imagebind/data.py index 70a7a03b..d9ec534b 100644 --- a/imagebind/data.py +++ b/imagebind/data.py @@ -7,6 +7,7 @@ import logging import math +import pkg_resources import torch import torch.nn as nn @@ -22,6 +23,7 @@ DEFAULT_AUDIO_FRAME_SHIFT_MS = 10 # in milliseconds +BPE_PACKAGE = "imagebind" BPE_PATH = "bpe/bpe_simple_vocab_16e6.txt.gz" @@ -105,7 +107,8 @@ def load_and_transform_vision_data(image_paths, device): def load_and_transform_text(text, device): if text is None: return None - tokenizer = SimpleTokenizer(bpe_path=BPE_PATH) + bpe_path = pkg_resources.resource_filename(BPE_PACKAGE, BPE_PATH) + tokenizer = SimpleTokenizer(bpe_path=bpe_path) tokens = [tokenizer(t).unsqueeze(0).to(device) for t in text] tokens = torch.cat(tokens, dim=0) return tokens diff --git a/setup.py b/setup.py index 0706a923..1e4865ba 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,7 @@ name='imagebind', version='0.1.0', packages=find_packages(), + include_package_data=True, description='A brief description of the package', long_description=open('README.md').read(), long_description_content_type="text/markdown", @@ -17,4 +18,4 @@ ], install_requires=required, dependency_links=['https://download.pytorch.org/whl/cu113'], -) \ No newline at end of file +)