diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..629bd764
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include imagebind/bpe/*
diff --git a/bpe/bpe_simple_vocab_16e6.txt.gz b/imagebind/bpe/bpe_simple_vocab_16e6.txt.gz
similarity index 100%
rename from bpe/bpe_simple_vocab_16e6.txt.gz
rename to imagebind/bpe/bpe_simple_vocab_16e6.txt.gz
diff --git a/imagebind/data.py b/imagebind/data.py
index 70a7a03b..d9ec534b 100644
--- a/imagebind/data.py
+++ b/imagebind/data.py
@@ -7,6 +7,7 @@
 
 import logging
 import math
+import pkg_resources
 
 import torch
 import torch.nn as nn
@@ -22,6 +23,7 @@
 
 DEFAULT_AUDIO_FRAME_SHIFT_MS = 10  # in milliseconds
 
+BPE_PACKAGE = "imagebind"
 BPE_PATH = "bpe/bpe_simple_vocab_16e6.txt.gz"
 
 
@@ -105,7 +107,8 @@ def load_and_transform_vision_data(image_paths, device):
 def load_and_transform_text(text, device):
     if text is None:
         return None
-    tokenizer = SimpleTokenizer(bpe_path=BPE_PATH)
+    bpe_path = pkg_resources.resource_filename(BPE_PACKAGE, BPE_PATH)
+    tokenizer = SimpleTokenizer(bpe_path=bpe_path)
     tokens = [tokenizer(t).unsqueeze(0).to(device) for t in text]
     tokens = torch.cat(tokens, dim=0)
     return tokens
diff --git a/setup.py b/setup.py
index 0706a923..1e4865ba 100644
--- a/setup.py
+++ b/setup.py
@@ -7,6 +7,7 @@
     name='imagebind',
     version='0.1.0',
     packages=find_packages(),
+    include_package_data=True,
     description='A brief description of the package',
     long_description=open('README.md').read(),
     long_description_content_type="text/markdown",
@@ -17,4 +18,4 @@
     ],
     install_requires=required,
     dependency_links=['https://download.pytorch.org/whl/cu113'],
-)
\ No newline at end of file
+)