Skip to content

Commit 08e9fe2

Browse files
authoredMar 7, 2023
Merge pull request #2 from dClimate/evan-dev
Evan dev
2 parents da4ee9f + eddab39 commit 08e9fe2

9 files changed

+1115
-0
lines changed
 

‎.gitignore

+153
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# poetry
98+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
101+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102+
#poetry.lock
103+
104+
# pdm
105+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106+
#pdm.lock
107+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108+
# in version control.
109+
# https://pdm.fming.dev/#use-with-ide
110+
.pdm.toml
111+
112+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113+
__pypackages__/
114+
115+
# Celery stuff
116+
celerybeat-schedule
117+
celerybeat.pid
118+
119+
# SageMath parsed files
120+
*.sage.py
121+
122+
# Environments
123+
.env
124+
.venv
125+
env/
126+
venv/
127+
ENV/
128+
env.bak/
129+
venv.bak/
130+
131+
# Spyder project settings
132+
.spyderproject
133+
.spyproject
134+
135+
# Rope project settings
136+
.ropeproject
137+
138+
# mkdocs documentation
139+
/site
140+
141+
# mypy
142+
.mypy_cache/
143+
.dmypy.json
144+
dmypy.json
145+
146+
# Pyre type checker
147+
.pyre/
148+
149+
# pytype static type analyzer
150+
.pytype/
151+
152+
# Cython debug symbols
153+
cython_debug/

‎README.md

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# python-iamap
2+
3+
## python implementation of a HAMT, adapted from rvagg's JavaScript version
4+
5+
See https://github.com/rvagg/iamap#readme for details on a JS implementation of this code. This python version is adapted from rvagg's code; there are one-to-one mappings between functions/classes in this repo and those in the JS one. As a result, the JS code can serve as a canonical guide to implementation and functionality.
6+
7+
See https://ipld.io/specs/advanced-data-layouts/hamt/spec/ for information on the concept of a HAMT and how it fits into the IPLD/IPFS ecosystem.
8+
9+
10+
## Motivation
11+
12+
dClimate uses HAMTs as key/value stores that can be distributed across multiple nodes and used without the whole data structure being loaded into memory. This is extremely useful in the context of [zarrs](https://zarr.readthedocs.io/en/stable/), where metadata mapping coordinates to chunks containing the actual data can stretch into the 10s or even 100s of MBs. Because IPFS imposes a limit on the sizes of blocks that can be transferred from peer to peer, it is not feasible to store all this metadata in a single IPFS object. Instead, a HAMT can be used to provide efficient lookups in a data structure distributed across many IPFS objects, with only the parts of the HAMT needed for the lookup ever being accessed.
13+
14+
See [ipldstore](https://github.com/dClimate/ipldstore/tree/hamt) for an example of this HAMT implementation in action.

‎examples/ipfs_store.py

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import requests
2+
import dag_cbor
3+
import cbor2
4+
from multiformats import CID
5+
6+
7+
class HamtIPFSStore:
8+
def save(self, obj):
9+
obj = dag_cbor.encode(obj)
10+
res = requests.post(
11+
"http://localhost:5001/api/v0/dag/put",
12+
params={"store-codec": "dag-cbor", "input-codec": "dag-cbor", "pin": False},
13+
files={"dummy": obj},
14+
)
15+
res.raise_for_status()
16+
return CID.decode(res.json()["Cid"]["/"])
17+
18+
def load(self, id):
19+
if isinstance(id, cbor2.CBORTag):
20+
id = CID.decode(id.value[1:])
21+
res = requests.post(
22+
"http://localhost:5001/api/v0/block/get", params={"arg": str(id)}
23+
)
24+
res.raise_for_status()
25+
return dag_cbor.decode(res.content)
26+
27+
def is_equal(self, id1: CID, id2: CID):
28+
return str(id1) == str(id2)
29+
30+
def is_link(self, obj: CID):
31+
return isinstance(obj, CID) and obj.codec.name == "dag-cbor"

‎py_hamt/__init__.py

Whitespace-only changes.

‎py_hamt/bit_utils.py

+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import math
2+
3+
4+
def extract_bits(hash_obj: bytes, depth: int, nbits: int) -> int:
5+
"""Extract `nbits` bits from `hash_obj`, beginning at position `depth * nbits`,
6+
and convert them into an unsigned integer value.
7+
8+
Args:
9+
hash_obj (bytes): binary hash to extract bit sequence from
10+
depth (int): depth of the node containing the hash
11+
nbits (int): bit width of hash
12+
13+
Returns:
14+
int: An unsigned integer version of the bit sequence
15+
"""
16+
start = depth * nbits
17+
start_offset = start % 8
18+
19+
byte_count = math.ceil((start_offset + nbits) / 8)
20+
byte_start = start >> 3
21+
end_offset = byte_count * 8 - nbits - start_offset
22+
23+
result = 0
24+
25+
for i in range(byte_count):
26+
local = hash_obj[byte_start + i]
27+
shift = 0
28+
local_bit_length = 8
29+
30+
if i == 0:
31+
local_bit_length -= start_offset
32+
33+
if i == byte_count - 1:
34+
local_bit_length -= start_offset
35+
shift = end_offset
36+
local >>= shift
37+
38+
if local_bit_length < 8:
39+
m = (1 << local_bit_length) - 1
40+
local &= m
41+
42+
if shift < 8:
43+
result = result << (8 - shift)
44+
result |= local
45+
46+
return result
47+
48+
49+
def set_bit(bitmap: bytes, position: int, to_set: bool) -> bytes:
50+
"""set the `position` bit in the given `bitmap` to be `to_set` (truthy=1, falsey=0)
51+
52+
Args:
53+
bitmap (bytes): bitmap to modify
54+
position (int): location in the bitmap to modify
55+
to_set (bool): whether to set true or false
56+
57+
Returns:
58+
bytes: Modified bitmap
59+
"""
60+
has = bitmap_has(bitmap, position)
61+
byte = math.floor(position / 8)
62+
offset = position % 8
63+
# if we assume that `bitmap` is already the opposite of `set`, we could skip this check
64+
if (to_set and not has) or (not to_set and has):
65+
new_bit_map = bytearray(bitmap)
66+
b = bitmap[byte]
67+
if to_set:
68+
b |= 1 << offset
69+
else:
70+
b ^= 1 << offset
71+
72+
# since bytes are immutable, we need to change bytes to bytearrays
73+
new_bit_map[byte] = b
74+
return bytes(new_bit_map)
75+
return bitmap
76+
77+
78+
def bitmap_has(
79+
bitmap: bytes,
80+
position: int,
81+
) -> bool:
82+
"""check whether `bitmap` has a `1` at the given `position` bit.
83+
84+
Args:
85+
bitmap (bytes): bytes to check
86+
position (int): Position of bit to read
87+
88+
Returns:
89+
bool: whether the `bitmap` has a 1 value at the `position` bit
90+
"""
91+
byte = math.floor(position / 8)
92+
offset = position % 8
93+
return ((bitmap[byte] >> offset) & 1) == 1
94+
95+
96+
def rank(bitmap: bytes, position: int) -> int:
97+
"""count how many `1` bits are in `bitmap` up until `position`
98+
tells us where in the compacted element array an element should live
99+
Args:
100+
bitmap (bytes): bitmap to count truthy bits on
101+
position (int): where to stop counting
102+
103+
Returns:
104+
int: how many bits are `1` in `bitmap`
105+
"""
106+
t = 0
107+
for i in range(position):
108+
if bitmap_has(bitmap, i):
109+
t += 1
110+
return t

‎py_hamt/hamt.py

+668
Large diffs are not rendered by default.

‎pyproject.toml

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# pyproject.toml
2+
[build-system]
3+
requires = [
4+
"setuptools>=45",
5+
"wheel",
6+
"setuptools_scm>=6.2"
7+
]
8+
build-backend = "setuptools.build_meta"
9+
[tool.setuptools_scm]
10+
version_scheme = "post-release"
11+
local_scheme = "no-local-version"

‎setup.cfg

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
2+
3+
# https://packaging.python.org/en/latest/tutorials/packaging-projects/
4+
# https://setuptools.pypa.io/en/latest/userguide/declarative_config.html
5+
# https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#setup-cfg
6+
7+
8+
[metadata]
9+
name = py-hamt
10+
url = https://github.com/dClimate/py-iamap
11+
12+
[options]
13+
packages = find:
14+
python_requires = >=3.8
15+
16+
[options.extras_require]
17+
dev =
18+
flake8
19+
black
20+
21+
[options.package_data]
22+
* = py.typed, *.json
23+
24+
[options.packages.find]
25+
exclude =
26+
test
27+
examples
28+
29+
[flake8]
30+
max-line-length = 119
31+
32+
[tool.black]
33+
line-length = 119

‎test/bit_utils_test.py

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
from py_hamt import bit_utils
2+
3+
# mask_fun tests
4+
assert bit_utils.extract_bits(bytes([0b11111111]), 0, 5) == 0b11111
5+
assert bit_utils.extract_bits(bytes([0b10101010]), 0, 5) == 0b10101
6+
assert bit_utils.extract_bits(bytes([0b10000000]), 0, 5) == 0b10000
7+
assert bit_utils.extract_bits(bytes([0b00010000]), 0, 5) == 0b00010
8+
9+
# bitmap_has tests
10+
assert not bit_utils.bitmap_has(bytes([0b0]), 0)
11+
assert not bit_utils.bitmap_has(bytes([0b0]), 1)
12+
assert bit_utils.bitmap_has(bytes([0b1]), 0)
13+
assert not bit_utils.bitmap_has(bytes([0b1]), 1)
14+
assert not bit_utils.bitmap_has(bytes([0b101010]), 2)
15+
assert bit_utils.bitmap_has(bytes([0b101010]), 3)
16+
assert not bit_utils.bitmap_has(bytes([0b101010]), 4)
17+
assert bit_utils.bitmap_has(bytes([0b101010]), 5)
18+
assert bit_utils.bitmap_has(bytes([0b100000]), 5)
19+
assert bit_utils.bitmap_has(bytes([0b0100000]), 5)
20+
assert bit_utils.bitmap_has(bytes([0b00100000]), 5)
21+
print("bitmap_has tests passed")
22+
23+
# index tests
24+
assert bit_utils.rank(bytes([0b111111]), 0) == 0
25+
assert bit_utils.rank(bytes([0b111111]), 1) == 1
26+
assert bit_utils.rank(bytes([0b111111]), 2) == 2
27+
assert bit_utils.rank(bytes([0b111111]), 4) == 4
28+
assert bit_utils.rank(bytes([0b111100]), 2) == 0
29+
assert bit_utils.rank(bytes([0b111101]), 4) == 3
30+
assert bit_utils.rank(bytes([0b111001]), 4) == 2
31+
assert bit_utils.rank(bytes([0b111000]), 4) == 1
32+
assert bit_utils.rank(bytes([0b110000]), 4) == 0
33+
# new node, no bitmask_fun, insertion at the start
34+
assert bit_utils.rank(bytes([0b000000]), 0) == 0
35+
assert bit_utils.rank(bytes([0b000000]), 1) == 0
36+
assert bit_utils.rank(bytes([0b000000]), 2) == 0
37+
assert bit_utils.rank(bytes([0b000000]), 3) == 0
38+
print("index tests passed")
39+
40+
# set_bit tests
41+
assert bit_utils.set_bit(bytes([0b0]), 0, 1) == bytes([0b00000001])
42+
assert bit_utils.set_bit(bytes([0b0]), 0, 1) == bytes(([0b00000001]))
43+
assert bit_utils.set_bit(bytes([0b0]), 1, 1) == bytes(([0b00000010]))
44+
assert bit_utils.set_bit(bytes([0b0]), 7, 1) == bytes(([0b10000000]))
45+
assert bit_utils.set_bit(bytes([0b11111111]), 0, 1) == bytes(([0b11111111]))
46+
assert bit_utils.set_bit(bytes([0b11111111]), 7, 1) == bytes(([0b11111111]))
47+
assert bit_utils.set_bit(bytes([0b01010101]), 1, 1) == bytes(([0b01010111]))
48+
assert bit_utils.set_bit(bytes([0b01010101]), 7, 1) == bytes(([0b11010101]))
49+
assert bit_utils.set_bit(bytes([0b11111111]), 0, 0) == bytes(([0b11111110]))
50+
assert bit_utils.set_bit(bytes([0b11111111]), 1, 0) == bytes(([0b11111101]))
51+
assert bit_utils.set_bit(bytes([0b11111111]), 7, 0) == bytes(([0b01111111]))
52+
assert bit_utils.set_bit(bytes([0b0, 0b11111111]), 8 + 0, 1) == bytes(
53+
([0b0, 0b11111111])
54+
)
55+
assert bit_utils.set_bit(bytes([0b0, 0b11111111]), 8 + 7, 1) == bytes(
56+
([0b0, 0b11111111])
57+
)
58+
assert bit_utils.set_bit(bytes([0b0, 0b01010101]), 8 + 1, 1) == bytes(
59+
([0b0, 0b01010111])
60+
)
61+
assert bit_utils.set_bit(bytes([0b0, 0b01010101]), 8 + 7, 1) == bytes(
62+
([0b0, 0b11010101])
63+
)
64+
assert bit_utils.set_bit(bytes([0b0, 0b11111111]), 8 + 0, 0) == bytes(
65+
([0b0, 0b11111110])
66+
)
67+
assert bit_utils.set_bit(bytes([0b0, 0b11111111]), 8 + 1, 0) == bytes(
68+
([0b0, 0b11111101])
69+
)
70+
assert bit_utils.set_bit(bytes([0b0, 0b11111111]), 8 + 7, 0) == bytes(
71+
([0b0, 0b01111111])
72+
)
73+
assert bit_utils.set_bit(bytes([0b0]), 0, 0) == bytes(([0b00000000]))
74+
assert bit_utils.set_bit(bytes([0b0]), 7, 0) == bytes(([0b00000000]))
75+
assert bit_utils.set_bit(bytes([0b01010101]), 0, 0) == bytes(([0b01010100]))
76+
assert bit_utils.set_bit(bytes([0b01010101]), 6, 0) == bytes(([0b00010101]))
77+
assert bit_utils.set_bit(
78+
bytes([0b11000010, 0b11010010, 0b01001010, 0b0000001]), 0, 0
79+
) == bytes(([0b11000010, 0b11010010, 0b01001010, 0b0000001]))
80+
assert bit_utils.set_bit(
81+
bytes([0b11000010, 0b11010010, 0b01001010, 0b0000001]), 0, 1
82+
) == bytes(([0b11000011, 0b11010010, 0b01001010, 0b0000001]))
83+
assert bit_utils.set_bit(
84+
bytes([0b11000010, 0b11010010, 0b01001010, 0b0000001]), 12, 0
85+
) == bytes(([0b11000010, 0b11000010, 0b01001010, 0b0000001]))
86+
assert bit_utils.set_bit(
87+
bytes([0b11000010, 0b11010010, 0b01001010, 0b0000001]), 12, 1
88+
) == bytes(([0b11000010, 0b11010010, 0b01001010, 0b0000001]))
89+
assert bit_utils.set_bit(
90+
bytes([0b11000010, 0b11010010, 0b01001010, 0b0000001]), 24, 0
91+
) == bytes(([0b11000010, 0b11010010, 0b01001010, 0b0000000]))
92+
assert bit_utils.set_bit(
93+
bytes([0b11000010, 0b11010010, 0b01001010, 0b0000001]), 24, 1
94+
) == bytes(([0b11000010, 0b11010010, 0b01001010, 0b0000001]))
95+
print("all tests passed")

0 commit comments

Comments
 (0)
Please sign in to comment.