Support creating text-mode detectors (#363)

Tyler Romero · Auto-format Bot · web-flow · commit a600a8726092 · 2025-05-15T13:55:58.000-07:00
Co-authored-by: Auto-format Bot &lt;autoformatbot@groundlight.ai&gt;
diff --git a/src/groundlight/experimental_api.py b/src/groundlight/experimental_api.py
@@ -30,6 +30,7 @@
 from groundlight_openapi_client.model.payload_template_request import PayloadTemplateRequest
 from groundlight_openapi_client.model.rule_request import RuleRequest
 from groundlight_openapi_client.model.status_enum import StatusEnum
+from groundlight_openapi_client.model.text_mode_configuration import TextModeConfiguration
 from groundlight_openapi_client.model.webhook_action_request import WebhookActionRequest
 from model import (
     ROI,
@@ -1053,6 +1054,60 @@ def create_bounding_box_detector(  # noqa: PLR0913 # pylint: disable=too-many-ar
         obj = self.detectors_api.create_detector(detector_creation_input, _request_timeout=DEFAULT_REQUEST_TIMEOUT)
         return Detector.parse_obj(obj.to_dict())
 
+    def create_text_recognition_detector(  # noqa: PLR0913 # pylint: disable=too-many-arguments, too-many-locals
+        self,
+        name: str,
+        query: str,
+        *,
+        group_name: Optional[str] = None,
+        confidence_threshold: Optional[float] = None,
+        patience_time: Optional[float] = None,
+        pipeline_config: Optional[str] = None,
+        metadata: Union[dict, str, None] = None,
+    ) -> Detector:
+        """
+        Creates a text recognition detector that can read specified spans of text from images.
+
+        **Example usage**::
+
+            gl = ExperimentalApi()
+
+            # Create a text recognition detector
+            detector = gl.create_text_recognition_detector(
+                name="date_and_time_detector",
+                query="Read the date and time from the bottom left corner of the image.",
+            )
+
+        :param name: A short, descriptive name for the detector.
+        :param query: A question about the object to detect in the image.
+        :param group_name: Optional name of a group to organize related detectors together.
+        :param confidence_threshold: A value that sets the minimum confidence level required for the ML model's
+                            predictions. If confidence is below this threshold, the query may be sent for human review.
+        :param patience_time: The maximum time in seconds that Groundlight will attempt to generate a
+                            confident prediction before falling back to human review. Defaults to 30 seconds.
+        :param pipeline_config: Advanced usage only. Configuration string needed to instantiate a specific
+                              prediction pipeline for this detector.
+        :param metadata: A dictionary or JSON string containing custom key/value pairs to associate with
+
+        :return: The created Detector object
+        """
+
+        detector_creation_input = self._prep_create_detector(
+            name=name,
+            query=query,
+            group_name=group_name,
+            confidence_threshold=confidence_threshold,
+            patience_time=patience_time,
+            pipeline_config=pipeline_config,
+            metadata=metadata,
+        )
+        detector_creation_input.mode = ModeEnum.TEXT
+        mode_config = TextModeConfiguration()
+
+        detector_creation_input.mode_configuration = mode_config
+        obj = self.detectors_api.create_detector(detector_creation_input, _request_timeout=DEFAULT_REQUEST_TIMEOUT)
+        return Detector.parse_obj(obj.to_dict())
+
     def _download_mlbinary_url(self, detector: Union[str, Detector]) -> EdgeModelInfo:
         """
         Gets a temporary presigned URL to download the model binaries for the given detector, along
diff --git a/test/conftest.py b/test/conftest.py
@@ -5,11 +5,11 @@
 from model import Detector, ImageQuery, ImageQueryTypeEnum, ResultTypeEnum
 
 
-def pytest_configure(config):
+def pytest_configure(config):  # pylint: disable=unused-argument
     # Run environment check before tests
     gl = Groundlight()
-    if gl._user_is_privileged():
-        raise Exception(
+    if gl._user_is_privileged():  # pylint: disable=protected-access
+        raise RuntimeError(
             "ERROR: You are running tests with a privileged user. Please run tests with a non-privileged user."
         )
 
@@ -31,6 +31,17 @@ def fixture_detector(gl: Groundlight) -> Detector:
     return gl.create_detector(name=name, query=query, pipeline_config=pipeline_config)
 
 
+@pytest.fixture(name="count_detector")
+def fixture_count_detector(gl_experimental: ExperimentalApi) -> Detector:
+    """Creates a new Test detector."""
+    name = f"Test {datetime.utcnow()}"  # Need a unique name
+    query = "How many dogs?"
+    pipeline_config = "never-review-multi"  # always predicts 0
+    return gl_experimental.create_counting_detector(
+        name=name, query=query, class_name="dog", pipeline_config=pipeline_config
+    )
+
+
 @pytest.fixture(name="image_query_yes")
 def fixture_image_query_yes(gl: Groundlight, detector: Detector) -> ImageQuery:
     iq = gl.submit_image_query(detector=detector.id, image="test/assets/dog.jpeg", human_review="NEVER")
@@ -43,9 +54,27 @@ def fixture_image_query_no(gl: Groundlight, detector: Detector) -> ImageQuery:
     return iq
 
 
+@pytest.fixture(name="image_query_one")
+def fixture_image_query_one(gl_experimental: Groundlight, count_detector: Detector) -> ImageQuery:
+    iq = gl_experimental.submit_image_query(
+        detector=count_detector.id, image="test/assets/dog.jpeg", human_review="NEVER"
+    )
+    return iq
+
+
+@pytest.fixture(name="image_query_zero")
+def fixture_image_query_zero(gl_experimental: Groundlight, count_detector: Detector) -> ImageQuery:
+    iq = gl_experimental.submit_image_query(
+        detector=count_detector.id, image="test/assets/no_dogs.jpeg", human_review="NEVER"
+    )
+    return iq
+
+
 @pytest.fixture(name="gl_experimental")
-def _gl() -> ExperimentalApi:
-    return ExperimentalApi()
+def fixture_gl_experimental() -> ExperimentalApi:
+    _gl = ExperimentalApi()
+    _gl.DEFAULT_WAIT = 10
+    return _gl
 
 
 @pytest.fixture(name="initial_iq")
diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py
@@ -72,35 +72,6 @@ def is_valid_display_label(label: str) -> bool:
     return label in VALID_DISPLAY_LABELS
 
 
-@pytest.fixture(name="gl")
-def fixture_gl() -> Groundlight:
-    """Creates a Groundlight client object for testing."""
-    _gl = Groundlight()
-    _gl.DEFAULT_WAIT = 10
-    return _gl
-
-
-@pytest.fixture(name="detector")
-def fixture_detector(gl: Groundlight) -> Detector:
-    """Creates a new Test detector."""
-    name = f"Test {datetime.utcnow()}"  # Need a unique name
-    query = "Is there a dog?"
-    pipeline_config = "never-review"
-    return gl.create_detector(name=name, query=query, pipeline_config=pipeline_config)
-
-
-@pytest.fixture(name="image_query_yes")
-def fixture_image_query_yes(gl: Groundlight, detector: Detector) -> ImageQuery:
-    iq = gl.submit_image_query(detector=detector.id, image="test/assets/dog.jpeg", human_review="NEVER")
-    return iq
-
-
-@pytest.fixture(name="image_query_no")
-def fixture_image_query_no(gl: Groundlight, detector: Detector) -> ImageQuery:
-    iq = gl.submit_image_query(detector=detector.id, image="test/assets/cat.jpeg", human_review="NEVER")
-    return iq
-
-
 @pytest.fixture(name="image")
 def fixture_image() -> str:
     return "test/assets/dog.jpeg"
diff --git a/test/unit/test_experimental.py b/test/unit/test_experimental.py
@@ -66,19 +66,13 @@ def test_update_detector_escalation_type(gl_experimental: ExperimentalApi):
     updated_detector.escalation_type == "STANDARD"
 
 
-@pytest.mark.skip(
-    reason=(
-        "Users currently don't have permission to turn object detection on their own. If you have questions, reach out"
-        " to Groundlight support."
-    )
-)
-def test_submit_roi(gl_experimental: ExperimentalApi, image_query_yes: ImageQuery):
+def test_submit_roi(gl_experimental: ExperimentalApi, image_query_one: ImageQuery):
     """
     verify that we can submit an ROI
     """
     label_name = "dog"
     roi = gl_experimental.create_roi(label_name, (0, 0), (0.5, 0.5))
-    gl_experimental.add_label(image_query_yes.id, "YES", [roi])
+    gl_experimental.add_label(image_query_one.id, 1, [roi])
 
 
 @pytest.mark.skip(
@@ -87,21 +81,21 @@ def test_submit_roi(gl_experimental: ExperimentalApi, image_query_yes: ImageQuer
         " to Groundlight support."
     )
 )
-def test_submit_multiple_rois(gl_experimental: ExperimentalApi, image_query_no: ImageQuery):
+def test_submit_multiple_rois(gl_experimental: ExperimentalApi, image_query_one: ImageQuery):
     """
     verify that we can submit multiple ROIs
     """
     label_name = "dog"
     roi = gl_experimental.create_roi(label_name, (0, 0), (0.5, 0.5))
-    gl_experimental.add_label(image_query_no, "YES", [roi] * 3)
+    gl_experimental.add_label(image_query_one, 3, [roi] * 3)
 
 
 def test_counting_detector(gl_experimental: ExperimentalApi):
     """
     verify that we can create and submit to a counting detector
     """
     name = f"Test {datetime.utcnow()}"
-    created_detector = gl_experimental.create_counting_detector(name, "How many dogs", "dog")
+    created_detector = gl_experimental.create_counting_detector(name, "How many dogs", "dog", confidence_threshold=0.0)
     assert created_detector is not None
     count_iq = gl_experimental.submit_image_query(created_detector, "test/assets/dog.jpeg")
     assert count_iq.result.count is not None
@@ -112,7 +106,7 @@ def test_counting_detector_async(gl_experimental: ExperimentalApi):
     verify that we can create and submit to a counting detector
     """
     name = f"Test {datetime.utcnow()}"
-    created_detector = gl_experimental.create_counting_detector(name, "How many dogs", "dog")
+    created_detector = gl_experimental.create_counting_detector(name, "How many dogs", "dog", confidence_threshold=0.0)
     assert created_detector is not None
     async_iq = gl_experimental.ask_async(created_detector, "test/assets/dog.jpeg")
     # attempting to access fields within the result should raise an exception
@@ -126,27 +120,34 @@ def test_counting_detector_async(gl_experimental: ExperimentalApi):
     assert _image_query.result is not None
 
 
-@pytest.mark.skip(
-    reason=(
-        "General users currently currently can't use multiclass detectors. If you have questions, reach out"
-        " to Groundlight support, or upgrade your plan."
-    )
-)
 def test_multiclass_detector(gl_experimental: ExperimentalApi):
     """
     verify that we can create and submit to a multi-class detector
     """
     name = f"Test {datetime.utcnow()}"
     class_names = ["Golden Retriever", "Labrador Retriever", "Poodle"]
     created_detector = gl_experimental.create_multiclass_detector(
-        name, "What kind of dog is this?", class_names=class_names
+        name, "What kind of dog is this?", class_names=class_names, confidence_threshold=0.0
     )
     assert created_detector is not None
     mc_iq = gl_experimental.submit_image_query(created_detector, "test/assets/dog.jpeg")
     assert mc_iq.result.label is not None
     assert mc_iq.result.label in class_names
 
 
+def test_text_recognition_detector(gl_experimental: ExperimentalApi):
+    """
+    verify that we can create and submit to a text recognition detector
+    """
+    name = f"Test {datetime.utcnow()}"
+    created_detector = gl_experimental.create_text_recognition_detector(
+        name, "What is the date and time?", confidence_threshold=0.0
+    )
+    assert created_detector is not None
+    mc_iq = gl_experimental.submit_image_query(created_detector, "test/assets/dog.jpeg")
+    assert mc_iq.result.text is not None
+
+
 @pytest.mark.skip(
     reason=(
         "General users currently currently can't use bounding box detectors. If you have questions, reach out"
@@ -159,7 +160,7 @@ def test_bounding_box_detector(gl_experimental: ExperimentalApi):
     """
     name = f"Test {datetime.now(timezone.utc)}"
     created_detector = gl_experimental.create_bounding_box_detector(
-        name, "Draw a bounding box around each dog in the image", "dog"
+        name, "Draw a bounding box around each dog in the image", "dog", confidence_threshold=0.0
     )
     assert created_detector is not None
     bbox_iq = gl_experimental.submit_image_query(created_detector, "test/assets/dog.jpeg")
@@ -179,7 +180,7 @@ def test_bounding_box_detector_async(gl_experimental: ExperimentalApi):
     """
     name = f"Test {datetime.now(timezone.utc)}"
     created_detector = gl_experimental.create_bounding_box_detector(
-        name, "Draw a bounding box around each dog in the image", "dog"
+        name, "Draw a bounding box around each dog in the image", "dog", confidence_threshold=0.0
     )
     assert created_detector is not None
     async_iq = gl_experimental.ask_async(created_detector, "test/assets/dog.jpeg")
diff --git a/test/unit/test_labels.py b/test/unit/test_labels.py
@@ -64,3 +64,22 @@ def test_multiclass_labels(gl_experimental: ExperimentalApi):
     assert iq1.result.label == "cherry"
     with pytest.raises(ApiException) as _:
         gl_experimental.add_label(iq1, "MAYBE")
+
+
+def test_text_recognition_labels(gl_experimental: ExperimentalApi):
+    name = f"Test text recognition labels{datetime.utcnow()}"
+    det = gl_experimental.create_text_recognition_detector(name, "test_query")
+    iq1 = gl_experimental.submit_image_query(det, "test/assets/cat.jpeg")
+    gl_experimental.add_label(iq1, "apple text")
+    iq1 = gl_experimental.get_image_query(iq1.id)
+    assert iq1.result.text == "apple text"
+    gl_experimental.add_label(iq1, "banana text")
+    iq1 = gl_experimental.get_image_query(iq1.id)
+    assert iq1.result.text == "banana text"
+    gl_experimental.add_label(iq1, "")
+    iq1 = gl_experimental.get_image_query(iq1.id)
+    assert iq1.result.text == ""
+
+    gl_experimental.add_label(iq1, "UNCLEAR")
+    iq1 = gl_experimental.get_image_query(iq1.id)
+    assert iq1.result.text is None