Skip to content

Commit f7cc2fb

Browse files
committed
Refactor message content serialization and validation: Introduce a default method to dump message content
1 parent e872112 commit f7cc2fb

File tree

5 files changed

+88
-22
lines changed

5 files changed

+88
-22
lines changed

aleph_message/models/__init__.py

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pydantic import BaseModel, Field, validator
1010
from typing_extensions import TypeAlias
1111

12+
from ..utils import dump_content
1213
from .abstract import BaseContent
1314
from .base import Chain, HashType, MessageType
1415
from .execution.base import MachineType, Payment, PaymentType # noqa
@@ -105,7 +106,7 @@ class ForgetContent(BaseContent):
105106
"""Content of a FORGET message"""
106107

107108
hashes: List[ItemHash]
108-
aggregates: List[ItemHash] = Field(default_factory=list)
109+
aggregates: Optional[List[ItemHash]] = None
109110
reason: Optional[str] = None
110111

111112
def __hash__(self):
@@ -179,6 +180,36 @@ def check_item_content(cls, v: Optional[str], values) -> Optional[str]:
179180
)
180181
return v
181182

183+
@validator("content")
184+
def check_content(cls, v, values):
185+
item_type = values["item_type"]
186+
if item_type == ItemType.inline:
187+
try:
188+
item_content = json.loads(values["item_content"])
189+
except JSONDecodeError:
190+
raise ValueError(
191+
"Field 'item_content' does not appear to be valid JSON"
192+
)
193+
json_dump = json.loads(v.json())
194+
for key, value in json_dump.items():
195+
if value != item_content[key]:
196+
if isinstance(value, list):
197+
for item in value:
198+
if item not in item_content[key]:
199+
raise ValueError(
200+
f"Field 'content.{key}' does not match 'item_content.{key}': {item} != {item_content[key]}"
201+
)
202+
if isinstance(value, dict):
203+
for item in value.items():
204+
if item not in item_content[key].items():
205+
raise ValueError(
206+
f"Field 'content.{key}' does not match 'item_content.{key}': {value} != {item_content[key]}"
207+
)
208+
raise ValueError(
209+
f"Field 'content.{key}' does not match 'item_content.{key}': {value} != {item_content[key]} or type mismatch ({type(value)} != {type(item_content[key])})"
210+
)
211+
return v
212+
182213
@validator("item_hash")
183214
def check_item_hash(cls, v: ItemHash, values) -> ItemHash:
184215
item_type = values["item_type"]
@@ -255,20 +286,6 @@ class ProgramMessage(BaseMessage):
255286
type: Literal[MessageType.program]
256287
content: ProgramContent
257288

258-
@validator("content")
259-
def check_content(cls, v, values):
260-
item_type = values["item_type"]
261-
if item_type == ItemType.inline:
262-
item_content = json.loads(values["item_content"])
263-
if v.dict(exclude_none=True) != item_content:
264-
# Print differences
265-
vdict = v.dict(exclude_none=True)
266-
for key, value in item_content.items():
267-
if vdict[key] != value:
268-
print(f"{key}: {vdict[key]} != {value}")
269-
raise ValueError("Content and item_content differ")
270-
return v
271-
272289

273290
class InstanceMessage(BaseMessage):
274291
type: Literal[MessageType.instance]
@@ -315,12 +332,12 @@ def parse_message(message_dict: Dict) -> AlephMessage:
315332

316333

317334
def add_item_content_and_hash(message_dict: Dict, inplace: bool = False):
335+
# TODO: I really don't like this function. There is no validation of the
336+
# message_dict, if it is indeed a real message, and can lead to unexpected results.
318337
if not inplace:
319338
message_dict = copy(message_dict)
320339

321-
message_dict["item_content"] = json.dumps(
322-
message_dict["content"], separators=(",", ":")
323-
)
340+
message_dict["item_content"] = dump_content(message_dict["content"])
324341
message_dict["item_hash"] = sha256(
325342
message_dict["item_content"].encode()
326343
).hexdigest()

aleph_message/models/abstract.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from pydantic import BaseModel
22

3+
from aleph_message.utils import dump_content
4+
35

46
def hashable(obj):
57
"""Convert `obj` into a hashable object."""
@@ -23,3 +25,6 @@ class BaseContent(BaseModel):
2325

2426
address: str
2527
time: float
28+
29+
def json(self, *args, **kwargs):
30+
return dump_content(self)

aleph_message/models/execution/volume.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from enum import Enum
55
from typing import Literal, Optional, Union
66

7-
from pydantic import ConstrainedInt
7+
from pydantic import ConstrainedInt, Extra
88

99
from ...utils import Gigabytes, gigabyte_to_mebibyte
1010
from ..abstract import HashableModel
@@ -18,6 +18,11 @@ class AbstractVolume(HashableModel, ABC):
1818
@abstractmethod
1919
def is_read_only(self): ...
2020

21+
class Config:
22+
# This is the only type where we really need to forbid extra fields.
23+
# Otherwise the pydantic_encoder will take the first allowed type instead of the correct one.
24+
extra = Extra.forbid
25+
2126

2227
class ImmutableVolume(AbstractVolume):
2328
ref: ItemHash

aleph_message/tests/test_models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,12 +271,12 @@ def test_create_new_message():
271271
"chain": "ETH",
272272
"sender": "0x101d8D16372dBf5f1614adaE95Ee5CCE61998Fc9",
273273
"type": "POST",
274-
"time": "1625652287.017",
274+
"time": 1625652287.017,
275275
"item_type": "inline",
276276
"content": {
277277
"address": "0x101d8D16372dBf5f1614adaE95Ee5CCE61998Fc9",
278278
"type": "test-message",
279-
"time": "1625652287.017",
279+
"time": 1625652287.017,
280280
"content": {
281281
"hello": "world",
282282
},

aleph_message/utils.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
from __future__ import annotations
22

3+
import json
34
import math
4-
from typing import NewType
5+
from datetime import date, datetime, time
6+
from typing import Any, Dict, NewType, Union
7+
8+
from pydantic import BaseModel
9+
from pydantic.json import pydantic_encoder
510

611
Megabytes = NewType("Megabytes", int)
712
Mebibytes = NewType("Mebibytes", int)
@@ -15,3 +20,37 @@ def gigabyte_to_mebibyte(n: Gigabytes) -> Mebibytes:
1520
mebibyte = 2**20
1621
gigabyte = 10**9
1722
return Mebibytes(math.ceil(n * gigabyte / mebibyte))
23+
24+
25+
def extended_json_encoder(obj: Any) -> Any:
26+
"""
27+
Extended JSON encoder for dumping objects that contain pydantic models and datetime objects.
28+
"""
29+
if isinstance(obj, datetime):
30+
return obj.timestamp()
31+
elif isinstance(obj, date):
32+
return obj.toordinal()
33+
elif isinstance(obj, time):
34+
return obj.hour * 3600 + obj.minute * 60 + obj.second + obj.microsecond / 1e6
35+
else:
36+
return pydantic_encoder(obj)
37+
38+
39+
def dump_content(obj: Union[Dict, BaseModel]) -> str:
40+
"""Dump message content as JSON string."""
41+
if isinstance(obj, dict):
42+
# without None values
43+
obj = obj.copy()
44+
for key in list(obj.keys()):
45+
if obj[key] is None:
46+
del obj[key]
47+
return json.dumps(obj, separators=(",", ":"), default=extended_json_encoder)
48+
49+
if isinstance(obj, BaseModel):
50+
return json.dumps(
51+
obj.dict(exclude_none=True),
52+
separators=(",", ":"),
53+
default=extended_json_encoder,
54+
)
55+
56+
raise TypeError(f"Invalid type: `{type(obj)}`")

0 commit comments

Comments
 (0)