Skip to content

Commit

Permalink
clip: show errors if unable to process image
Browse files Browse the repository at this point in the history
  • Loading branch information
dale-wahl committed Jan 8, 2025
1 parent 4912ef4 commit b0889ae
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions processors/machine_learning/clip_categorize_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def process(self):
image_name = ".".join(result_filename.split(".")[:-1])
data = {
"id": image_name,
"filename": result_filename,
"filename": result_data.get("filename"),
"categories": result_data,
"image_metadata": image_metadata.get(image_name, {}) if image_metadata else {},
}
Expand All @@ -265,10 +265,15 @@ def map_item(item):
image_metadata = item.get("image_metadata", {})
# Updates to CLIP output; categories used to be a list of categories, but now is a dict with: {"predictions": [[category_label, precent_float],]}
categories = item.get("categories")
error = None
if type(categories) == list:
pass
elif type(categories) == dict and "predictions" in categories:
categories = categories.get("predictions")
elif type(categories) == dict:
error = categories.get("error", "N/A")
if "predictions" in categories:
categories = categories.get("predictions")
else:
categories = []
else:
raise KeyError("Unexpected categories format; check NDJSON")

Expand All @@ -287,6 +292,7 @@ def map_item(item):
"original_url": image_metadata.get("url", "N/A"),
"post_ids": ", ".join([str(post_id) for post_id in image_metadata.get("post_ids", [])]),
"from_dataset": image_metadata.get("from_dataset", ""),
"error": error,
**all_cats
})

Expand Down

0 comments on commit b0889ae

Please sign in to comment.