Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
History
-------

9.8.2 (2025-03-21)
------------------

- Retrying annotations update to avoid temporary concurrency issues in
source composites updates.

9.8.1 (2025-01-14)
------------------

Expand Down
37 changes: 33 additions & 4 deletions bigml/api_handlers/sourcehandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import sys
import os
import numbers
import time
import logging

from urllib import parse

Expand Down Expand Up @@ -67,8 +69,13 @@
from bigml.api_handlers.resourcehandler import ResourceHandlerMixin, LOGGER
from bigml.fields import Fields

LOG_FORMAT = '%(asctime)-15s: %(message)s'
LOGGER = logging.getLogger('BigML')
CONSOLE = logging.StreamHandler()
CONSOLE.setLevel(logging.WARNING)
LOGGER.addHandler(CONSOLE)

MAX_CHANGES = 500
MAX_CHANGES = 5


def compact_regions(regions):
Expand Down Expand Up @@ -508,6 +515,8 @@ def update_composite_annotations(self, source, images_file,
try:
_ = file_list.index(filename)
except ValueError:
LOGGER.error("WARNING: Could not find annotated file (%s)"
" in the composite's sources list", filename)
continue
for key in annotation.keys():
if key == "file":
Expand Down Expand Up @@ -539,9 +548,12 @@ def update_composite_annotations(self, source, images_file,
"components": source_ids})
elif optype == "regions":
for value, source_id in values:
if isinstance(value, dict):
# dictionary should contain the bigml-coco format
value = compact_regions(value)
changes.append(
{"field": field,
"value": compact_regions(value),
"value": value,
"components": [source_id]})
else:
for value, source_id in values:
Expand All @@ -550,16 +562,33 @@ def update_composite_annotations(self, source, images_file,
"value": value,
"components": [source_id]})
except Exception:
LOGGER.error("WARNING: Problem adding annotation to %s (%s)",
field, values)
pass

# we need to limit the amount of changes per update
batches_number = int(len(changes) / MAX_CHANGES)
for offset in range(0, batches_number + 1):
new_batch = changes[offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES]
new_batch = changes[
offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES]
if new_batch:
source = self.update_source(source,
{"row_values": new_batch})
self.ok(source)
if source["error"] is not None:
# retrying in case update is temporarily unavailable
time.sleep(1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would perhaps wait a little longer, or make two or three retries...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

source = self.get_source(source)
self.ok(source)
source = self.update_source(source,
{"row_values": new_batch})
if source["error"] is not None:
LOGGER.error("WARNING: Some annotations were not"
" updated (%s)",
new_batch)
if not self.ok(source):
raise Exception(
f"Failed to update {len(new_batch)} annotations.")
time.sleep(0.1)

return source

Expand Down
8 changes: 4 additions & 4 deletions bigml/bigmlconnection.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ def _create(self, url, body, verify=None, organization=None):
error = json_load(response.content)
LOGGER.error(self.error_message(error, method='create'))
elif code != HTTP_ACCEPTED:
LOGGER.error("Unexpected error (%s)", code)
LOGGER.error("CREATE Unexpected error (%s)", code)
code = HTTP_INTERNAL_SERVER_ERROR
except ValueError as exc:
LOGGER.error("Malformed response: %s", str(exc))
Expand Down Expand Up @@ -489,7 +489,7 @@ def _get(self, url, query_string='',
LOGGER.error(self.error_message(error, method='get',
resource_id=resource_id))
else:
LOGGER.error("Unexpected error (%s)", code)
LOGGER.error("GET Unexpected error (%s)", code)
code = HTTP_INTERNAL_SERVER_ERROR

except ValueError as exc:
Expand Down Expand Up @@ -582,7 +582,7 @@ def _list(self, url, query_string='', organization=None):
HTTP_TOO_MANY_REQUESTS]:
error = json_load(response.content)
else:
LOGGER.error("Unexpected error (%s)", code)
LOGGER.error("LIST Unexpected error (%s)", code)
code = HTTP_INTERNAL_SERVER_ERROR
except ValueError as exc:
LOGGER.error("Malformed response: %s", str(exc))
Expand Down Expand Up @@ -662,7 +662,7 @@ def _update(self, url, body, organization=None, resource_id=None):
LOGGER.error(self.error_message(error, method='update',
resource_id=resource_id))
else:
LOGGER.error("Unexpected error (%s)", code)
LOGGER.error("UPDATE Unexpected error (%s)", code)
code = HTTP_INTERNAL_SERVER_ERROR
except ValueError:
LOGGER.error("Malformed response")
Expand Down
13 changes: 13 additions & 0 deletions bigml/tests/create_dataset_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,16 @@ def clone_dataset(step, dataset):
def the_cloned_dataset_is(step, dataset):
"""Checking the dataset is a clone"""
eq_(world.dataset["origin"], dataset)


def check_annotations(step, annotations_field, annotations_num):
"""Checking the dataset contains a number of annotations"""
annotations_num = int(annotations_num)
field = world.dataset["fields"][annotations_field]
if field["optype"] == "regions":
count = field["summary"]["regions"]["sum"]
else:
count = 0
for _, num in field["summary"]["categories"]:
count += num
eq_(count, annotations_num)
36 changes: 36 additions & 0 deletions bigml/tests/test_22_source_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from .world import world, setup_module, teardown_module, show_doc, \
show_method
from . import create_source_steps as source_create
from . import create_dataset_steps as dataset_create


class TestUploadSource:
Expand Down Expand Up @@ -125,3 +126,38 @@ def test_scenario3(self):
source_create.the_source_is_finished(
self, example["source_wait"])
source_create.the_cloned_source_origin_is(self, source)

def test_scenario4(self):
"""
Scenario: Successfully adding annotatations to composite source:
Given I create an annotated images data source uploading a "<data>" file
And I wait until the source is ready less than <source_wait> secs
And I create a dataset
And I wait until the dataset is ready less than <dataset_wait> secs
Then the new dataset has <annotations_num> annotations in the <annotations_field> field
"""
headers = ["data", "source_wait", "dataset_wait", "annotations_num",
"annotations_field"]
examples = [
['data/images/metadata.json', '500', '500', '12',
'100002'],
['data/images/metadata_compact.json', '500', '500', '3',
'100003']]
show_doc(self.test_scenario4)
for example in examples:
example = dict(zip(headers, example))
show_method(self, self.bigml["method"], example)
source_create.i_create_annotated_source(
self,
example["data"],
args={"image_analysis": {"enabled": False,
"extracted_features": []}})
source_create.the_source_is_finished(
self, example["source_wait"])
dataset_create.i_create_a_dataset(self)
dataset_create.the_dataset_is_finished_in_less_than(
self, example["dataset_wait"])
dataset_create.check_annotations(self,
example["annotations_field"],
example["annotations_num"])

2 changes: 1 addition & 1 deletion bigml/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '9.8.1'
__version__ = '9.8.2'
2 changes: 2 additions & 0 deletions data/images/annotations_compact.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[{"file": "f1/fruits1f.png", "my_regions": "[[\"region1\" 0.2 0.2 0.4 0.4]]"},
{"file": "f1/fruits1.png", "my_regions": "[[\"region2\" 0.3 0.3 0.5 0.5] [\"region1\" 0.6 0.6 0.8 0.8]]"}]
5 changes: 5 additions & 0 deletions data/images/metadata_compact.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"description": "Fruit images to test colour distributions with regions",
"images_file": "./fruits_hist.zip",
"new_fields": [{"name": "my_regions", "optype": "regions"}],
"source_id": null,
"annotations": "./annotations_compact.json"}
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
download_url="https://github.com/bigmlcom/python",
license="http://www.apache.org/licenses/LICENSE-2.0",
setup_requires = ['pytest'],
install_requires = ["setuptools==69.0.0", "unidecode", "bigml-chronos>=0.4.3", "requests",
install_requires = ["setuptools==70.0.0", "unidecode",
"bigml-chronos>=0.4.3", "requests",
"requests-toolbelt", "msgpack", "numpy>=1.22", "scipy",
"javascript"],
extras_require={"images": IMAGES_DEPENDENCIES,
Expand Down
Loading