-
-
Notifications
You must be signed in to change notification settings - Fork 16
[FEATURE]: Implement Canonical Serialization for Blocks and Transactions #67
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 27 commits
0153620
2359fe4
1319419
2e8efb5
90a3194
6625e38
423c4dd
48703eb
50afef2
98d882c
9091307
e2a0cd5
464e951
e91e497
00ed340
08533b5
1f7eddc
e90dd22
f674b2a
8aaee28
7421205
5b9fb54
9033681
ecb7031
128f2c0
ec74fae
c552316
504a9e4
785cedd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,16 @@ | ||
| import time | ||
| import hashlib | ||
| from typing import List, Optional | ||
| from typing import Optional # <-- Removed 'List' as requested | ||
|
|
||
| from .transaction import Transaction | ||
| from .serialization import canonical_json_hash | ||
| from .serialization import canonical_json_hash, canonical_json_bytes | ||
|
|
||
|
|
||
| def _sha256(data: str) -> str: | ||
| return hashlib.sha256(data.encode()).hexdigest() | ||
|
|
||
|
|
||
| def _calculate_merkle_root(transactions: List[Transaction]) -> Optional[str]: | ||
| # <-- Updated 'List' to built-in 'list' | ||
| def _calculate_merkle_root(transactions: list[Transaction]) -> Optional[str]: | ||
| if not transactions: | ||
| return None | ||
|
|
||
|
|
@@ -32,27 +34,27 @@ def _calculate_merkle_root(transactions: List[Transaction]) -> Optional[str]: | |
|
|
||
| return tx_hashes[0] | ||
|
|
||
|
|
||
| class Block: | ||
| def __init__( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like that you are freezing the |
||
| self, | ||
| index: int, | ||
| previous_hash: str, | ||
| transactions: Optional[List[Transaction]] = None, | ||
| transactions: Optional[list[Transaction]] = None, # <-- Updated to built-in 'list' | ||
| timestamp: Optional[float] = None, | ||
| difficulty: Optional[int] = None, | ||
| miner: Optional[str] = None | ||
| ): | ||
| self.index = index | ||
| self.previous_hash = previous_hash | ||
| self.transactions: List[Transaction] = transactions or [] | ||
|
|
||
| # Freeze transactions into an immutable tuple to prevent header/body mismatch | ||
| self.transactions = tuple(transactions) if transactions else () | ||
| self.miner = miner | ||
| # Deterministic timestamp (ms) | ||
| self.timestamp: int = ( | ||
| round(time.time() * 1000) | ||
| if timestamp is None | ||
| else int(timestamp) | ||
| ) | ||
|
|
||
| self.difficulty: Optional[int] = difficulty | ||
| self.nonce: int = 0 | ||
| self.hash: Optional[str] = None | ||
|
|
@@ -64,15 +66,19 @@ def __init__( | |
| # HEADER (used for mining) | ||
| # ------------------------- | ||
| def to_header_dict(self): | ||
| return { | ||
| header = { | ||
| "index": self.index, | ||
| "previous_hash": self.previous_hash, | ||
| "merkle_root": self.merkle_root, | ||
| "timestamp": self.timestamp, | ||
| "difficulty": self.difficulty, | ||
| "nonce": self.nonce, | ||
| } | ||
|
|
||
| # Include miner in header only when present (optional field) <-- Reworded comment | ||
| if self.miner is not None: | ||
| header["miner"] = self.miner | ||
| return header | ||
|
|
||
| # ------------------------- | ||
| # BODY (transactions only) | ||
| # ------------------------- | ||
|
|
@@ -87,11 +93,10 @@ def to_body_dict(self): | |
| # FULL BLOCK | ||
| # ------------------------- | ||
| def to_dict(self): | ||
| return { | ||
| **self.to_header_dict(), | ||
| **self.to_body_dict(), | ||
| "hash": self.hash, | ||
| } | ||
| data = self.to_header_dict() | ||
| data.update(self.to_body_dict()) # Reuses existing serialization logic | ||
| data["hash"] = self.hash | ||
| return data | ||
|
coderabbitai[bot] marked this conversation as resolved.
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| # ------------------------- | ||
| # HASH CALCULATION | ||
|
|
@@ -105,15 +110,43 @@ def from_dict(cls, payload: dict): | |
| Transaction.from_dict(tx_payload) | ||
| for tx_payload in payload.get("transactions", []) | ||
| ] | ||
|
|
||
| # Safely extract and cast difficulty if it exists | ||
| raw_diff = payload.get("difficulty") | ||
| parsed_diff = int(raw_diff) if raw_diff is not None else None | ||
|
|
||
| # Safely extract and cast timestamp if it exists <-- Added explicit timestamp casting | ||
| raw_ts = payload.get("timestamp") | ||
| parsed_ts = int(raw_ts) if raw_ts is not None else None | ||
|
|
||
| block = cls( | ||
| index=payload["index"], | ||
| index=int(payload["index"]), | ||
| previous_hash=payload["previous_hash"], | ||
| transactions=transactions, | ||
| timestamp=payload.get("timestamp"), | ||
| difficulty=payload.get("difficulty"), | ||
| timestamp=parsed_ts, # <-- Passed the casted timestamp | ||
| difficulty=parsed_diff, | ||
| miner=payload.get("miner"), | ||
| ) | ||
| block.nonce = payload.get("nonce", 0) | ||
| block.nonce = int(payload.get("nonce", 0)) | ||
| block.hash = payload.get("hash") | ||
| if "merkle_root" in payload: | ||
| block.merkle_root = payload["merkle_root"] | ||
|
|
||
| # Verify the block hash | ||
| expected_hash = block.compute_hash() | ||
| if block.hash is not None and block.hash != expected_hash: | ||
| raise ValueError("block hash does not match header") | ||
|
|
||
| # Recalculate and verify the Merkle root! | ||
| if "merkle_root" in payload and payload["merkle_root"] != block.merkle_root: | ||
| raise ValueError("merkle_root does not match transactions") | ||
| return block | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| @property | ||
| def canonical_payload(self) -> bytes: | ||
| """Returns the full block (header + body) as canonical bytes for networking.""" | ||
| # Sanity checks to prevent broadcasting invalid blocks | ||
| if self.hash is None: | ||
| raise ValueError("block hash is missing") | ||
| if self.hash != self.compute_hash(): | ||
| raise ValueError("block hash does not match header") | ||
|
|
||
| return canonical_json_bytes(self.to_dict()) | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,7 +9,7 @@ | |
| import json | ||
| import logging | ||
|
|
||
| from .serialization import canonical_json_hash | ||
| from .serialization import canonical_json_hash, canonical_json_dumps | ||
| from .validators import is_valid_receiver | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
@@ -58,9 +58,7 @@ async def _notify_peer_connected(self, writer, error_message): | |
| async def start(self, port: int = 9000, host: str = "127.0.0.1"): | ||
| """Start listening for incoming peer connections on the given port.""" | ||
| self._port = port | ||
| self._server = await asyncio.start_server( | ||
| self._handle_incoming, host, port | ||
| ) | ||
| self._server = await asyncio.start_server(self._handle_incoming, host, port) | ||
| logger.info("Network: Listening on %s:%d", host, port) | ||
|
|
||
| async def stop(self): | ||
|
|
@@ -208,7 +206,9 @@ def _validate_block_payload(self, payload): | |
| ) | ||
|
|
||
| def _validate_message(self, message): | ||
| # FIX: Check if message is a dictionary first to prevent crashes | ||
| if not isinstance(message, dict): | ||
| logger.warning("Network: Received non-dict message") | ||
| return False | ||
| required_fields = {"type", "data"} | ||
| if not required_fields.issubset(set(message)): | ||
|
|
@@ -307,7 +307,7 @@ async def _listen_to_peer( | |
|
|
||
| async def _broadcast_raw(self, payload: dict): | ||
| """Send a JSON message to every connected peer.""" | ||
| line = (json.dumps(payload) + "\n").encode() | ||
| line = (canonical_json_dumps(payload) + "\n").encode() | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| disconnected = [] | ||
| for reader, writer in self._peers: | ||
| try: | ||
|
|
@@ -336,12 +336,19 @@ async def broadcast_transaction(self, tx): | |
| self._mark_seen("tx", payload["data"]) | ||
| await self._broadcast_raw(payload) | ||
|
|
||
| async def broadcast_block(self, block, miner=None): | ||
| async def broadcast_block(self, block): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are doing json.loads(block.canonical_payload.decode("utf-8")).
|
||
| """Broadcast a block. Block must have miner populated.""" | ||
| logger.info("Network: Broadcasting Block #%d", block.index) | ||
| block_payload = block.to_dict() | ||
| if miner is not None: | ||
| block_payload["miner"] = miner | ||
| payload = {"type": "block", "data": block_payload} | ||
|
|
||
| # Enforce that the block is fully populated before it enters the network layer | ||
| if getattr(block, "miner", None) is None: | ||
| raise ValueError("block.miner must be populated before broadcasting") | ||
|
|
||
| payload = { | ||
| "type": "block", | ||
| "data": json.loads(block.canonical_payload.decode("utf-8")) | ||
| } | ||
|
|
||
| self._mark_seen("block", payload["data"]) | ||
| await self._broadcast_raw(payload) | ||
|
coderabbitai[bot] marked this conversation as resolved.
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| from minichain.serialization import canonical_json_hash | ||
| from minichain.transaction import Transaction | ||
| from minichain.block import Block | ||
|
|
||
| def test_raw_data_determinism(): | ||
| print("--- Testing Raw Data Determinism ---") | ||
| # Same data, different key order | ||
| data_v1 = {"amount": 100, "nonce": 1, "receiver": "Alice", "sender": "Bob"} | ||
| data_v2 = {"sender": "Bob", "receiver": "Alice", "nonce": 1, "amount": 100} | ||
|
|
||
| hash_1 = canonical_json_hash(data_v1) | ||
| hash_2 = canonical_json_hash(data_v2) | ||
|
|
||
| print(f"Hash 1: {hash_1}") | ||
| print(f"Hash 2: {hash_2}") | ||
| assert hash_1 == hash_2 | ||
| print("Success: Raw hashes match regardless of key order!\n") | ||
|
|
||
| def test_transaction_id_stability(): | ||
| print("--- Testing Transaction ID Stability ---") | ||
| # FIX: Add a fixed timestamp so tx1 and tx2 are identical | ||
| tx_params = {"sender": "Alice", "receiver": "Bob", "amount": 50, "nonce": 1, "timestamp": 123456789} | ||
|
|
||
| tx1 = Transaction(**tx_params) | ||
| tx2 = Transaction(**tx_params) | ||
|
|
||
| print(f"TX ID: {tx1.tx_id}") | ||
| assert tx1.tx_id == tx2.tx_id, "Cross-instance TX IDs must match with same timestamp" | ||
| print("✅ Success: Transaction ID is stable!\n") | ||
|
|
||
| def test_block_serialization_determinism(): | ||
| print("--- Testing Block Serialization & Cross-Instance Determinism ---") | ||
| # FIX: Use fixed timestamps for both transaction and block | ||
| tx_params = {"sender": "A", "receiver": "B", "amount": 10, "nonce": 5, "timestamp": 1000} | ||
|
|
||
| # Create two separate but identical transaction instances | ||
| tx1 = Transaction(**tx_params) | ||
| tx2 = Transaction(**tx_params) | ||
|
|
||
| # Add the miner field | ||
| block1 = Block(index=1, previous_hash="0"*64, transactions=[tx1], difficulty=2, timestamp=999999, miner="a" * 40) | ||
| block2 = Block(index=1, previous_hash="0"*64, transactions=[tx2], difficulty=2, timestamp=999999, miner="a" * 40) | ||
|
|
||
| # Pre-compute the hashes before asserting | ||
| block1.hash = block1.compute_hash() | ||
| block2.hash = block2.compute_hash() | ||
|
|
||
| assert block1.canonical_payload == block2.canonical_payload, "Identical blocks must have identical payloads" | ||
| assert block1.compute_hash() == block2.compute_hash(), "Identical blocks must have identical hashes" | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| print("✅ Success: Block serialization is cross-instance deterministic!\n") | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| def test_block_from_dict_rejects_tampered_payload(): | ||
| print("--- Testing Tamper Rejection ---") | ||
| tx = Transaction(sender="A", receiver="B", amount=10, nonce=5, timestamp=1000) | ||
| block = Block( | ||
| index=1, previous_hash="0"*64, transactions=[tx], | ||
| difficulty=2, timestamp=999999, miner="a"*40 | ||
| ) | ||
| block.hash = block.compute_hash() | ||
|
|
||
| # Test tampered Merkle Root (only one instance needed) | ||
| bad_merkle = block.to_dict() | ||
| bad_merkle["merkle_root"] = "f" * 64 | ||
| try: | ||
| Block.from_dict(bad_merkle) | ||
| raise AssertionError("Expected ValueError for tampered merkle_root") # Robust error | ||
| except ValueError: | ||
| pass | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| # Test tampered Hash | ||
| bad_hash = block.to_dict() | ||
| bad_hash["hash"] = "0" * 64 | ||
| try: | ||
| Block.from_dict(bad_hash) | ||
| raise AssertionError("Expected ValueError for tampered hash") | ||
| except ValueError: | ||
| pass | ||
|
|
||
| print("✅ Success: Tampered payloads are rejected!\n") | ||
|
|
||
| if __name__ == "__main__": | ||
| # Removed try/except so that AssertionErrors 'bubble up' to the test runner | ||
| test_raw_data_determinism() | ||
| test_transaction_id_stability() | ||
| test_block_serialization_determinism() | ||
| test_block_from_dict_rejects_tampered_payload() # <--- ADDED THIS LINE | ||
| print("🚀 ALL CANONICAL TESTS PASSED!") | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
Uh oh!
There was an error while loading. Please reload this page.