23
23
import inspect
24
24
from pathlib import Path
25
25
from typing import Any , Dict , List , Optional
26
+ import gzip
26
27
27
28
from pydantic import BaseModel
28
29
from typing_extensions import Literal
30
+ import msgpack
31
+
32
+
33
+ def msgpk (cls ):
34
+ """
35
+ Decorator that adds MessagePack serialization methods to Pydantic models.
36
+
37
+ Adds methods:
38
+ - to_msgpack_bytes() -> bytes: Serialize to compact binary format
39
+ - from_msgpack_bytes(data: bytes) -> cls: Deserialize from binary format
40
+ - to_msgpack_dict() -> dict: Convert to msgpack-compatible dict
41
+ - from_msgpack_dict(data: dict) -> cls: Create instance from msgpack dict
42
+ """
43
+
44
+ def _prepare_for_serialization (obj : Any ) -> Any :
45
+ """Convert objects to serialization-friendly format."""
46
+ if isinstance (obj , Path ):
47
+ return str (obj )
48
+ elif isinstance (obj , dict ):
49
+ return {
50
+ _prepare_for_serialization (k ): _prepare_for_serialization (v )
51
+ for k , v in obj .items ()
52
+ }
53
+ elif isinstance (obj , list ):
54
+ return [_prepare_for_serialization (item ) for item in obj ]
55
+ elif isinstance (obj , tuple ):
56
+ return tuple (_prepare_for_serialization (item ) for item in obj )
57
+ elif isinstance (obj , set ):
58
+ return [_prepare_for_serialization (item ) for item in obj ]
59
+ elif hasattr (obj , "model_dump" ): # Pydantic model
60
+ return _prepare_for_serialization (obj .model_dump ())
61
+ else :
62
+ return obj
63
+
64
+ def to_msgpack_bytes (self ) -> bytes :
65
+ """Serialize the model to compact binary format using MessagePack + gzip."""
66
+ data = _prepare_for_serialization (self .model_dump ())
67
+ msgpack_data = msgpack .packb (data , use_bin_type = True )
68
+ return gzip .compress (msgpack_data )
69
+
70
+ @classmethod
71
+ def from_msgpack_bytes (cls_obj , data : bytes ):
72
+ """Deserialize from MessagePack + gzip binary format."""
73
+ decompressed_data = gzip .decompress (data )
74
+ obj_dict = msgpack .unpackb (decompressed_data , raw = False )
75
+ return cls_obj .model_validate (obj_dict )
76
+
77
+ def to_msgpack_dict (self ) -> dict :
78
+ """Convert to msgpack-compatible dictionary format."""
79
+ return _prepare_for_serialization (self .model_dump ())
80
+
81
+ @classmethod
82
+ def from_msgpack_dict (cls_obj , data : dict ):
83
+ """Create instance from msgpack-compatible dictionary."""
84
+ return cls_obj .model_validate (data )
85
+
86
+ def get_msgpack_size (self ) -> int :
87
+ """Get the size of the msgpack serialization in bytes."""
88
+ return len (self .to_msgpack_bytes ())
89
+
90
+ def get_compression_ratio (self ) -> float :
91
+ """Get compression ratio compared to JSON."""
92
+ json_size = len (self .model_dump_json ().encode ("utf-8" ))
93
+ msgpack_gzip_size = self .get_msgpack_size ()
94
+ return msgpack_gzip_size / json_size if json_size > 0 else 1.0
95
+
96
+ # Add methods to the class
97
+ cls .to_msgpack_bytes = to_msgpack_bytes
98
+ cls .from_msgpack_bytes = from_msgpack_bytes
99
+ cls .to_msgpack_dict = to_msgpack_dict
100
+ cls .from_msgpack_dict = from_msgpack_dict
101
+ cls .get_msgpack_size = get_msgpack_size
102
+ cls .get_compression_ratio = get_compression_ratio
103
+
104
+ return cls
29
105
30
106
31
107
def builder (cls ):
@@ -92,26 +168,9 @@ def build(self):
92
168
93
169
94
170
@builder
171
+ @msgpk
95
172
class PyImport (BaseModel ):
96
- """Represents a Python import statement.
97
-
98
- Attributes:
99
- module (str): The name of the module being imported.
100
- name (str): The name of the imported entity (e.g., function, class).
101
- alias (Optional[str]): An optional alias for the imported entity.
102
- start_line (int): The line number where the import statement starts.
103
- end_line (int): The line number where the import statement ends.
104
- start_column (int): The starting column of the import statement.
105
- end_column (int): The ending column of the import statement.
106
-
107
- Example:
108
- - import numpy as np will be represented as:
109
- PyImport(module="numpy", name="np", alias="np", start_line=1, end_line=1, start_column=0, end_column=16)
110
- - from math import sqrt will be represented as:
111
- PyImport(module="math", name="sqrt", alias=None, start_line=2, end_line=2, start_column=0, end_column=20
112
- - from os.path import join as path_join will be represented as:
113
- PyImport(module="os.path", name="path_join", alias="join", start_line=3, end_line=3, start_column=0, end_column=30)
114
- """
173
+ """Represents a Python import statement."""
115
174
116
175
module : str
117
176
name : str
@@ -123,18 +182,9 @@ class PyImport(BaseModel):
123
182
124
183
125
184
@builder
185
+ @msgpk
126
186
class PyComment (BaseModel ):
127
- """
128
- Represents a Python comment.
129
-
130
- Attributes:
131
- content (str): The actual comment string (without the leading '#').
132
- start_line (int): The line number where the comment starts.
133
- end_line (int): The line number where the comment ends (same as start_line for single-line comments).
134
- start_column (int): The starting column of the comment.
135
- end_column (int): The ending column of the comment.
136
- is_docstring (bool): Whether this comment is actually a docstring (triple-quoted string).
137
- """
187
+ """Represents a Python comment."""
138
188
139
189
content : str
140
190
start_line : int = - 1
@@ -145,20 +195,9 @@ class PyComment(BaseModel):
145
195
146
196
147
197
@builder
198
+ @msgpk
148
199
class PySymbol (BaseModel ):
149
- """
150
- Represents a symbol used or declared in Python code.
151
-
152
- Attributes:
153
- name (str): The name of the symbol (e.g., 'x', 'self.x', 'os.path').
154
- scope (Literal['local', 'nonlocal', 'global', 'class', 'module']): The scope where the symbol is accessed.
155
- kind (Literal['variable', 'parameter', 'attribute', 'function', 'class', 'module']): The kind of symbol.
156
- type (Optional[str]): Inferred or annotated type, if available.
157
- qualified_name (Optional[str]): Fully qualified name (e.g., 'self.x', 'os.path.join').
158
- is_builtin (bool): Whether this is a Python builtin.
159
- lineno (int): Line number where the symbol is accessed or declared.
160
- col_offset (int): Column offset.
161
- """
200
+ """Represents a symbol used or declared in Python code."""
162
201
163
202
name : str
164
203
scope : Literal ["local" , "nonlocal" , "global" , "class" , "module" ]
@@ -171,11 +210,9 @@ class PySymbol(BaseModel):
171
210
172
211
173
212
@builder
213
+ @msgpk
174
214
class PyVariableDeclaration (BaseModel ):
175
- """Represents a Python variable declaration.
176
-
177
- Attributes:
178
- """
215
+ """Represents a Python variable declaration."""
179
216
180
217
name : str
181
218
type : Optional [str ]
@@ -189,18 +226,9 @@ class PyVariableDeclaration(BaseModel):
189
226
190
227
191
228
@builder
229
+ @msgpk
192
230
class PyCallableParameter (BaseModel ):
193
- """Represents a parameter of a Python callable (function/method).
194
-
195
- Attributes:
196
- name (str): The name of the parameter.
197
- type (str): The type of the parameter.
198
- default_value (str): The default value of the parameter, if any.
199
- start_line (int): The line number where the parameter is defined.
200
- end_line (int): The line number where the parameter definition ends.
201
- start_column (int): The column number where the parameter starts.
202
- end_column (int): The column number where the parameter ends.
203
- """
231
+ """Represents a parameter of a Python callable (function/method)."""
204
232
205
233
name : str
206
234
type : Optional [str ] = None
@@ -212,10 +240,9 @@ class PyCallableParameter(BaseModel):
212
240
213
241
214
242
@builder
243
+ @msgpk
215
244
class PyCallsite (BaseModel ):
216
- """
217
- Represents a Python call site (function or method invocation) with contextual metadata.
218
- """
245
+ """Represents a Python call site (function or method invocation) with contextual metadata."""
219
246
220
247
method_name : str
221
248
receiver_expr : Optional [str ] = None
@@ -231,26 +258,9 @@ class PyCallsite(BaseModel):
231
258
232
259
233
260
@builder
261
+ @msgpk
234
262
class PyCallable (BaseModel ):
235
- """Represents a Python callable (function/method).
236
-
237
- Attributes:
238
- name (str): The name of the callable.
239
- signature (str): The fully qualified name of the callable (e.g., module.function_name).
240
- docstring (PyComment): The docstring of the callable.
241
- decorators (List[str]): List of decorators applied to the callable.
242
- parameters (List[PyCallableParameter]): List of parameters for the callable.
243
- return_type (Optional[str]): The type of the return value, if specified.
244
- code (str): The actual code of the callable.
245
- start_line (int): The line number where the callable is defined.
246
- end_line (int): The line number where the callable definition ends.
247
- code_start_line (int): The line number where the code block starts.
248
- accessed_symbols (List[str]): Symbols accessed within the callable.
249
- call_sites (List[str]): Call sites of this callable.
250
- is_entrypoint (bool): Whether this callable is an entry point.
251
- local_variables (List[PyVariableDeclaration]): Local variables within the callable.
252
- cyclomatic_complexity (int): Cyclomatic complexity of the callable.
253
- """
263
+ """Represents a Python callable (function/method)."""
254
264
255
265
name : str
256
266
path : str
@@ -274,16 +284,9 @@ def __hash__(self) -> int:
274
284
275
285
276
286
@builder
287
+ @msgpk
277
288
class PyClassAttribute (BaseModel ):
278
- """Represents a Python class attribute.
279
-
280
- Attributes:
281
- name (str): The name of the attribute.
282
- type (str): The type of the attribute.
283
- docstring (PyComment): The docstring of the attribute.
284
- start_line (int): The line number where the attribute is defined.
285
- end_line (int): The line number where the attribute definition ends.
286
- """
289
+ """Represents a Python class attribute."""
287
290
288
291
name : str
289
292
type : Optional [str ] = None
@@ -293,20 +296,9 @@ class PyClassAttribute(BaseModel):
293
296
294
297
295
298
@builder
299
+ @msgpk
296
300
class PyClass (BaseModel ):
297
- """Represents a Python class.
298
-
299
- Attributes:
300
- name (str): The name of the class.
301
- signature (str): The fully qualified name of the class (e.g., module.class_name).
302
- docstring (PyComment): The docstring of the class.
303
- base_classes (List[str]): List of base class names.
304
- methods (Dict[str, PyCallable]): Mapping of method names to their callable representations.
305
- attributes (Dict[str, PyClassAttribute]): Mapping of attribute names to their variable declarations.
306
- inner_classes (Dict[str, "PyClass"]): Mapping of inner class names to their class representations.
307
- start_line (int): The line number where the class definition starts.
308
- end_line (int): The line number where the class definition ends.
309
- """
301
+ """Represents a Python class."""
310
302
311
303
name : str
312
304
signature : str # e.g., module.class_name
@@ -325,18 +317,9 @@ def __hash__(self):
325
317
326
318
327
319
@builder
320
+ @msgpk
328
321
class PyModule (BaseModel ):
329
- """Represents a Python module.
330
-
331
- Attributes:
332
- file_path (str): The file path of the module.
333
- module_name (str): The name of the module (e.g., module.submodule).
334
- imports (List[PyImport]): List of import statements in the module.
335
- comments (List[PyComment]): List of comments in the module.
336
- classes (Dict[str, PyClass]): Mapping of class names to their class representations.
337
- functions (Dict[str, PyCallable]): Mapping of function names to their callable representations.
338
- variables (List[PyVariableDeclaration]): List of variable declarations in the module.
339
- """
322
+ """Represents a Python module."""
340
323
341
324
file_path : str
342
325
module_name : str
@@ -348,13 +331,8 @@ class PyModule(BaseModel):
348
331
349
332
350
333
@builder
334
+ @msgpk
351
335
class PyApplication (BaseModel ):
352
- """Represents a Python application.
353
-
354
- Attributes:
355
- name (str): The name of the application.
356
- version (str): The version of the application.
357
- description (str): A brief description of the application.
358
- """
336
+ """Represents a Python application."""
359
337
360
338
symbol_table : dict [Path , PyModule ]
0 commit comments