Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# python-iamap

## python implementation of a HAMT, adapted from rvagg's JavaScript version

See https://github.com/rvagg/iamap#readme for details on a JS implementation of this code. This python version is adapted from rvagg's code; there are one-to-one mappings between functions/classes in this repo and those in the JS one. As a result, the JS code can serve as a canonical guide to implementation and functionality.

See https://ipld.io/specs/advanced-data-layouts/hamt/spec/ for information on the concept of a HAMT and how it fits into the IPLD/IPFS ecosystem.


## Motivation

dClimate uses HAMTs as key/value stores that can be distributed across multiple nodes and used without the whole data structure being loaded into memory. This is extremely useful in the context of [zarrs](https://zarr.readthedocs.io/en/stable/), where metadata mapping coordinates to chunks containing the actual data can stretch into the 10s or even 100s of MBs. Because IPFS imposes a limit on the sizes of blocks that can be transferred from peer to peer, it is not feasible to store all this metadata in a single IPFS object. Instead, a HAMT can be used to provide efficient lookups in a data structure distributed across many IPFS objects, with only the parts of the HAMT needed for the lookup ever being accessed.

See [ipldstore](https://github.com/dClimate/ipldstore/tree/hamt) for an example of this HAMT implementation in action.
31 changes: 31 additions & 0 deletions examples/ipfs_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import requests
import dag_cbor
import cbor2
from multiformats import CID


class HamtIPFSStore:
def save(self, obj):
obj = dag_cbor.encode(obj)
res = requests.post(
"http://localhost:5001/api/v0/dag/put",
params={"store-codec": "dag-cbor", "input-codec": "dag-cbor", "pin": False},
files={"dummy": obj},
)
res.raise_for_status()
return CID.decode(res.json()["Cid"]["/"])

def load(self, id):
if isinstance(id, cbor2.CBORTag):
id = CID.decode(id.value[1:])
res = requests.post(
"http://localhost:5001/api/v0/block/get", params={"arg": str(id)}
)
res.raise_for_status()
return dag_cbor.decode(res.content)

def is_equal(self, id1: CID, id2: CID):
return str(id1) == str(id2)

def is_link(self, obj: CID):
return isinstance(obj, CID) and obj.codec.name == "dag-cbor"
Empty file added py_hamt/__init__.py
Empty file.
110 changes: 110 additions & 0 deletions py_hamt/bit_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import math


def extract_bits(hash_obj: bytes, depth: int, nbits: int) -> int:
"""Extract `nbits` bits from `hash_obj`, beginning at position `depth * nbits`,
and convert them into an unsigned integer value.

Args:
hash_obj (bytes): binary hash to extract bit sequence from
depth (int): depth of the node containing the hash
nbits (int): bit width of hash

Returns:
int: An unsigned integer version of the bit sequence
"""
start = depth * nbits
start_offset = start % 8

byte_count = math.ceil((start_offset + nbits) / 8)
byte_start = start >> 3
end_offset = byte_count * 8 - nbits - start_offset

result = 0

for i in range(byte_count):
local = hash_obj[byte_start + i]
shift = 0
local_bit_length = 8

if i == 0:
local_bit_length -= start_offset

if i == byte_count - 1:
local_bit_length -= start_offset
shift = end_offset
local >>= shift

if local_bit_length < 8:
m = (1 << local_bit_length) - 1
local &= m

if shift < 8:
result = result << (8 - shift)
result |= local

return result


def set_bit(bitmap: bytes, position: int, to_set: bool) -> bytes:
"""set the `position` bit in the given `bitmap` to be `to_set` (truthy=1, falsey=0)

Args:
bitmap (bytes): bitmap to modify
position (int): location in the bitmap to modify
to_set (bool): whether to set true or false

Returns:
bytes: Modified bitmap
"""
has = bitmap_has(bitmap, position)
byte = math.floor(position / 8)
offset = position % 8
# if we assume that `bitmap` is already the opposite of `set`, we could skip this check
if (to_set and not has) or (not to_set and has):
new_bit_map = bytearray(bitmap)
b = bitmap[byte]
if to_set:
b |= 1 << offset
else:
b ^= 1 << offset

# since bytes are immutable, we need to change bytes to bytearrays
new_bit_map[byte] = b
return bytes(new_bit_map)
return bitmap


def bitmap_has(
bitmap: bytes,
position: int,
) -> bool:
"""check whether `bitmap` has a `1` at the given `position` bit.

Args:
bitmap (bytes): bytes to check
position (int): Position of bit to read

Returns:
bool: whether the `bitmap` has a 1 value at the `position` bit
"""
byte = math.floor(position / 8)
offset = position % 8
return ((bitmap[byte] >> offset) & 1) == 1
Comment thread
eschechter marked this conversation as resolved.


def rank(bitmap: bytes, position: int) -> int:
"""count how many `1` bits are in `bitmap` up until `position`
tells us where in the compacted element array an element should live
Args:
bitmap (bytes): bitmap to count truthy bits on
position (int): where to stop counting

Returns:
int: how many bits are `1` in `bitmap`
"""
t = 0
Comment thread
eschechter marked this conversation as resolved.
for i in range(position):
if bitmap_has(bitmap, i):
t += 1
return t
Loading