Skip to content

Commit 08e9fe2

Browse files
authored
Merge pull request #2 from dClimate/evan-dev
Evan dev
2 parents da4ee9f + eddab39 commit 08e9fe2

File tree

9 files changed

+1115
-0
lines changed

9 files changed

+1115
-0
lines changed

.gitignore

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# poetry
98+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
101+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102+
#poetry.lock
103+
104+
# pdm
105+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106+
#pdm.lock
107+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108+
# in version control.
109+
# https://pdm.fming.dev/#use-with-ide
110+
.pdm.toml
111+
112+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113+
__pypackages__/
114+
115+
# Celery stuff
116+
celerybeat-schedule
117+
celerybeat.pid
118+
119+
# SageMath parsed files
120+
*.sage.py
121+
122+
# Environments
123+
.env
124+
.venv
125+
env/
126+
venv/
127+
ENV/
128+
env.bak/
129+
venv.bak/
130+
131+
# Spyder project settings
132+
.spyderproject
133+
.spyproject
134+
135+
# Rope project settings
136+
.ropeproject
137+
138+
# mkdocs documentation
139+
/site
140+
141+
# mypy
142+
.mypy_cache/
143+
.dmypy.json
144+
dmypy.json
145+
146+
# Pyre type checker
147+
.pyre/
148+
149+
# pytype static type analyzer
150+
.pytype/
151+
152+
# Cython debug symbols
153+
cython_debug/

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# python-iamap
2+
3+
## python implementation of a HAMT, adapted from rvagg's JavaScript version
4+
5+
See https://github.com/rvagg/iamap#readme for details on a JS implementation of this code. This python version is adapted from rvagg's code; there are one-to-one mappings between functions/classes in this repo and those in the JS one. As a result, the JS code can serve as a canonical guide to implementation and functionality.
6+
7+
See https://ipld.io/specs/advanced-data-layouts/hamt/spec/ for information on the concept of a HAMT and how it fits into the IPLD/IPFS ecosystem.
8+
9+
10+
## Motivation
11+
12+
dClimate uses HAMTs as key/value stores that can be distributed across multiple nodes and used without the whole data structure being loaded into memory. This is extremely useful in the context of [zarrs](https://zarr.readthedocs.io/en/stable/), where metadata mapping coordinates to chunks containing the actual data can stretch into the 10s or even 100s of MBs. Because IPFS imposes a limit on the sizes of blocks that can be transferred from peer to peer, it is not feasible to store all this metadata in a single IPFS object. Instead, a HAMT can be used to provide efficient lookups in a data structure distributed across many IPFS objects, with only the parts of the HAMT needed for the lookup ever being accessed.
13+
14+
See [ipldstore](https://github.com/dClimate/ipldstore/tree/hamt) for an example of this HAMT implementation in action.

examples/ipfs_store.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import requests
2+
import dag_cbor
3+
import cbor2
4+
from multiformats import CID
5+
6+
7+
class HamtIPFSStore:
8+
def save(self, obj):
9+
obj = dag_cbor.encode(obj)
10+
res = requests.post(
11+
"http://localhost:5001/api/v0/dag/put",
12+
params={"store-codec": "dag-cbor", "input-codec": "dag-cbor", "pin": False},
13+
files={"dummy": obj},
14+
)
15+
res.raise_for_status()
16+
return CID.decode(res.json()["Cid"]["/"])
17+
18+
def load(self, id):
19+
if isinstance(id, cbor2.CBORTag):
20+
id = CID.decode(id.value[1:])
21+
res = requests.post(
22+
"http://localhost:5001/api/v0/block/get", params={"arg": str(id)}
23+
)
24+
res.raise_for_status()
25+
return dag_cbor.decode(res.content)
26+
27+
def is_equal(self, id1: CID, id2: CID):
28+
return str(id1) == str(id2)
29+
30+
def is_link(self, obj: CID):
31+
return isinstance(obj, CID) and obj.codec.name == "dag-cbor"

py_hamt/__init__.py

Whitespace-only changes.

py_hamt/bit_utils.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import math
2+
3+
4+
def extract_bits(hash_obj: bytes, depth: int, nbits: int) -> int:
5+
"""Extract `nbits` bits from `hash_obj`, beginning at position `depth * nbits`,
6+
and convert them into an unsigned integer value.
7+
8+
Args:
9+
hash_obj (bytes): binary hash to extract bit sequence from
10+
depth (int): depth of the node containing the hash
11+
nbits (int): bit width of hash
12+
13+
Returns:
14+
int: An unsigned integer version of the bit sequence
15+
"""
16+
start = depth * nbits
17+
start_offset = start % 8
18+
19+
byte_count = math.ceil((start_offset + nbits) / 8)
20+
byte_start = start >> 3
21+
end_offset = byte_count * 8 - nbits - start_offset
22+
23+
result = 0
24+
25+
for i in range(byte_count):
26+
local = hash_obj[byte_start + i]
27+
shift = 0
28+
local_bit_length = 8
29+
30+
if i == 0:
31+
local_bit_length -= start_offset
32+
33+
if i == byte_count - 1:
34+
local_bit_length -= start_offset
35+
shift = end_offset
36+
local >>= shift
37+
38+
if local_bit_length < 8:
39+
m = (1 << local_bit_length) - 1
40+
local &= m
41+
42+
if shift < 8:
43+
result = result << (8 - shift)
44+
result |= local
45+
46+
return result
47+
48+
49+
def set_bit(bitmap: bytes, position: int, to_set: bool) -> bytes:
50+
"""set the `position` bit in the given `bitmap` to be `to_set` (truthy=1, falsey=0)
51+
52+
Args:
53+
bitmap (bytes): bitmap to modify
54+
position (int): location in the bitmap to modify
55+
to_set (bool): whether to set true or false
56+
57+
Returns:
58+
bytes: Modified bitmap
59+
"""
60+
has = bitmap_has(bitmap, position)
61+
byte = math.floor(position / 8)
62+
offset = position % 8
63+
# if we assume that `bitmap` is already the opposite of `set`, we could skip this check
64+
if (to_set and not has) or (not to_set and has):
65+
new_bit_map = bytearray(bitmap)
66+
b = bitmap[byte]
67+
if to_set:
68+
b |= 1 << offset
69+
else:
70+
b ^= 1 << offset
71+
72+
# since bytes are immutable, we need to change bytes to bytearrays
73+
new_bit_map[byte] = b
74+
return bytes(new_bit_map)
75+
return bitmap
76+
77+
78+
def bitmap_has(
79+
bitmap: bytes,
80+
position: int,
81+
) -> bool:
82+
"""check whether `bitmap` has a `1` at the given `position` bit.
83+
84+
Args:
85+
bitmap (bytes): bytes to check
86+
position (int): Position of bit to read
87+
88+
Returns:
89+
bool: whether the `bitmap` has a 1 value at the `position` bit
90+
"""
91+
byte = math.floor(position / 8)
92+
offset = position % 8
93+
return ((bitmap[byte] >> offset) & 1) == 1
94+
95+
96+
def rank(bitmap: bytes, position: int) -> int:
97+
"""count how many `1` bits are in `bitmap` up until `position`
98+
tells us where in the compacted element array an element should live
99+
Args:
100+
bitmap (bytes): bitmap to count truthy bits on
101+
position (int): where to stop counting
102+
103+
Returns:
104+
int: how many bits are `1` in `bitmap`
105+
"""
106+
t = 0
107+
for i in range(position):
108+
if bitmap_has(bitmap, i):
109+
t += 1
110+
return t

0 commit comments

Comments
 (0)