|
1 | 1 | import asyncio
|
2 | 2 | import datetime
|
| 3 | +import struct |
3 | 4 | import uuid as uuid_package
|
4 | 5 | from typing import (
|
5 | 6 | Dict,
|
|
47 | 48 | from weaviate.connect import ConnectionV4
|
48 | 49 | from weaviate.connect.v4 import _ExpectedStatusCodes
|
49 | 50 | from weaviate.logger import logger
|
| 51 | +from weaviate.proto.v1 import base_pb2 |
50 | 52 | from weaviate.types import BEACON, UUID, VECTORS
|
51 | 53 | from weaviate.util import _datetime_to_string, _get_vector_v4
|
52 | 54 | from weaviate.validator import _validate_input, _ValidateArgument
|
|
57 | 59 | from weaviate.exceptions import WeaviateInvalidInputError
|
58 | 60 |
|
59 | 61 |
|
| 62 | +def _pack_named_vectors(vectors: Dict[str, List[float]]) -> List[base_pb2.Vectors]: |
| 63 | + return [ |
| 64 | + base_pb2.Vectors( |
| 65 | + name=name, |
| 66 | + vector_bytes=struct.pack("{}f".format(len(vector)), *vector), |
| 67 | + ) |
| 68 | + for name, vector in vectors.items() |
| 69 | + ] |
| 70 | + |
| 71 | + |
| 72 | +def _pack_vector(vector: Any) -> bytes: |
| 73 | + vector_list = _get_vector_v4(vector) |
| 74 | + return struct.pack("{}f".format(len(vector_list)), *vector_list) |
| 75 | + |
| 76 | + |
60 | 77 | class _DataBase:
|
61 | 78 | def __init__(
|
62 | 79 | self,
|
@@ -281,6 +298,42 @@ def with_data_model(self, data_model: Type[TProperties]) -> "_DataCollectionAsyn
|
281 | 298 | data_model,
|
282 | 299 | )
|
283 | 300 |
|
| 301 | + def __validate_vector( |
| 302 | + self, |
| 303 | + idx: int, |
| 304 | + obj: Union[Properties, DataObject[Properties, Optional[ReferenceInputs]]] |
| 305 | + ) -> Tuple[_BatchObject, Optional[bytes], Optional[List[base_pb2.Vectors]]]: |
| 306 | + if isinstance(obj, DataObject): |
| 307 | + vector_bytes = ( |
| 308 | + _pack_vector(obj.vector) |
| 309 | + if obj.vector is not None and not isinstance(obj.vector, dict) |
| 310 | + else None |
| 311 | + ) |
| 312 | + vectors = ( |
| 313 | + _pack_named_vectors(obj.vector) |
| 314 | + if obj.vector is not None and isinstance(obj.vector, dict) |
| 315 | + else None |
| 316 | + ) |
| 317 | + return _BatchObject( |
| 318 | + collection=self.name, |
| 319 | + vector=obj.vector, |
| 320 | + uuid=str(obj.uuid if obj.uuid is not None else uuid_package.uuid4()), |
| 321 | + properties=cast(dict, obj.properties), |
| 322 | + tenant=self._tenant, |
| 323 | + references=obj.references, |
| 324 | + index=idx, |
| 325 | + ), vector_bytes, vectors |
| 326 | + |
| 327 | + return _BatchObject( |
| 328 | + collection=self.name, |
| 329 | + vector=None, |
| 330 | + uuid=str(uuid_package.uuid4()), |
| 331 | + properties=cast(dict, obj), |
| 332 | + tenant=self._tenant, |
| 333 | + references=None, |
| 334 | + index=idx, |
| 335 | + ), None, None |
| 336 | + |
284 | 337 | def __parse_vector(self, obj: Dict[str, Any], vector: VECTORS) -> Dict[str, Any]:
|
285 | 338 | if isinstance(vector, dict):
|
286 | 339 | obj["vectors"] = {key: _get_vector_v4(val) for key, val in vector.items()}
|
@@ -360,27 +413,7 @@ async def insert_many(
|
360 | 413 | If every object in the batch fails to be inserted. The exception message contains details about the failure.
|
361 | 414 | """
|
362 | 415 | objs = [
|
363 |
| - ( |
364 |
| - _BatchObject( |
365 |
| - collection=self.name, |
366 |
| - vector=obj.vector, |
367 |
| - uuid=str(obj.uuid if obj.uuid is not None else uuid_package.uuid4()), |
368 |
| - properties=cast(dict, obj.properties), |
369 |
| - tenant=self._tenant, |
370 |
| - references=obj.references, |
371 |
| - index=idx, |
372 |
| - ) |
373 |
| - if isinstance(obj, DataObject) |
374 |
| - else _BatchObject( |
375 |
| - collection=self.name, |
376 |
| - vector=None, |
377 |
| - uuid=str(uuid_package.uuid4()), |
378 |
| - properties=cast(dict, obj), |
379 |
| - tenant=self._tenant, |
380 |
| - references=None, |
381 |
| - index=idx, |
382 |
| - ) |
383 |
| - ) |
| 416 | + self.__validate_vector(idx, obj) |
384 | 417 | for idx, obj in enumerate(objects)
|
385 | 418 | ]
|
386 | 419 | res = await self._batch_grpc.objects(objs, timeout=self._connection.timeout_config.insert)
|
|
0 commit comments