diff --git a/examples/demo_normal_playwright.py b/examples/demo_normal_playwright.py index 2237887cbb..2d0a92b173 100644 --- a/examples/demo_normal_playwright.py +++ b/examples/demo_normal_playwright.py @@ -37,19 +37,11 @@ def patch_modelhub(modelhub: ModelHub): :param modelhub: :return: """ - + modelhub.nested_categories.update({"dog": []}) modelhub.clip_candidates.update( { - "the largest animal in real life": [ - "parrot", - "bee", - "ladybug", - "frog", - "crab", - "bat", - "butterfly", - "dragonfly", - ] + "the largest animal in real life": {"parrot": ["ladybug", "crab", "bee", "frog"]}, + "dog": {"dog": ["panda", "elephant", "owl", "raccoon", "suit"]}, } ) diff --git a/hcaptcha_challenger/agents/playwright/control.py b/hcaptcha_challenger/agents/playwright/control.py index fe0235eda4..720bc2521c 100644 --- a/hcaptcha_challenger/agents/playwright/control.py +++ b/hcaptcha_challenger/agents/playwright/control.py @@ -463,29 +463,30 @@ async def _binary_challenge(self, frame_challenge: FrameLocator, model: ResNetCo await fl.click() async def _catch_all_binary_challenge(self, frame_challenge: FrameLocator): - dl = match_datalake(self.modelhub, self.label) - tool = ZeroShotImageClassifier.from_datalake(dl) - # Default to `RESNET.OPENAI` perf_counter 1.794s t0 = time.perf_counter() model = register_pipline(self.modelhub) te = time.perf_counter() logger.debug( - "handle task", - unsupervised="binary", - candidate_labels=tool.candidate_labels, - prompt=self.prompt, - timit=f"{te - t0:.3f}s", + "handle task", unsupervised="binary", prompt=self.prompt, timit=f"{te - t0:.3f}s" ) # {{< CATCH EXAMPLES >}} + dl = match_datalake(self.modelhub, self.label) + tool = ZeroShotImageClassifier.from_datalake(dl) + target = {} if self.example_paths: example_path = self.example_paths[-1] - results = tool(model, image=Image.open(example_path)) + results = model(images=[Image.open(example_path)], candidate_labels=dl.candidates) target = results[0] + # {{< CATCH CHALLENGES >}} + if point_ket := target.get("label", ""): + dl = match_datalake(self.modelhub, self.label, point_ket) + tool = ZeroShotImageClassifier.from_datalake(dl) + # {{< IMAGE CLASSIFICATION >}} times = int(len(self.qr.tasklist) / 9) for pth in range(times): diff --git a/hcaptcha_challenger/components/common.py b/hcaptcha_challenger/components/common.py index d843faaee8..373327a749 100644 --- a/hcaptcha_challenger/components/common.py +++ b/hcaptcha_challenger/components/common.py @@ -45,15 +45,41 @@ def rank_models( return best_model, model_name -def match_datalake(modelhub: ModelHub, label: str) -> DataLake: +def match_datalake(modelhub: ModelHub, label: str, point_ket: str = "") -> DataLake: + """ + + :param modelhub: + :param label: + :param point_ket: focus examples + :return: + """ # prelude datalake if dl := modelhub.datalake.get(label): return dl # prelude clip_candidates for ket in reversed(modelhub.clip_candidates.keys()): - if label in ket: - candidates = modelhub.clip_candidates[ket] + # select challenge prompt + if label not in ket: + continue + + if exp2pos := modelhub.clip_candidates[ket]: + candidates: List[str] = [] + + # Select nested example + if not point_ket: + # Multiple nested groups + example_candidates = list(exp2pos.keys()) + # There was only one control group + if len(exp2pos) <= 1: + example_candidates.extend(exp2pos.values()) + candidates = example_candidates + # Select challenge-label from example-label + elif challenge_candidates := exp2pos.get(point_ket, []): + candidates[0] = point_ket + candidates.extend(challenge_candidates) + + # Throw out the prompts if candidates and len(candidates) > 2: dl = DataLake.from_binary_labels(candidates[:1], candidates[1:]) return dl diff --git a/hcaptcha_challenger/onnx/modelhub.py b/hcaptcha_challenger/onnx/modelhub.py index ac46954692..df0bc7b0ac 100644 --- a/hcaptcha_challenger/onnx/modelhub.py +++ b/hcaptcha_challenger/onnx/modelhub.py @@ -307,9 +307,19 @@ class ModelHub: DEFAULT_CLIP_TEXTUAL_MODEL: str = "textual_CLIP-ViT-L-14-DataComp.XL-s13B-b90K.onnx" """ - clip_candidates: Dict[str, List[str]] = field(default_factory=dict) + clip_candidates: Dict[str, Dict[str, List[str]]] = field(default_factory=dict) """ CLIP self-supervised candidates + --- + the largest animal in real life: + squirrel: + - bee + - ladybug + parrot: + - ladybug + - crab + - bee + - frog """ release_url: str = "" @@ -551,6 +561,11 @@ class DataLake: preferably an independent noun or clause """ + candidates: List[str] = field(default_factory=list) + """ + positive_labels + negative_labels + """ + joined_dirs: List[str] | Path | None = None """ Attributes reserved for AutoLabeling @@ -574,6 +589,9 @@ class DataLake: Insert self-supervised prompt """ + def __post_init__(self): + self.candidates = self.candidates or self.positive_labels + self.negative_labels + @classmethod def from_challenge_prompt(cls, raw_prompt: str): return cls(raw_prompt=raw_prompt) diff --git a/src/objects.yaml b/src/objects.yaml index 1bee4ea5dd..b77b2e2afd 100644 --- a/src/objects.yaml +++ b/src/objects.yaml @@ -4,27 +4,47 @@ branches: objects_url: https://raw.githubusercontent.com/QDIN2DIM/hcaptcha-challenger/main/src/objects.yaml circle_seg: appears_only_once_2309_yolov8s-seg.onnx clip_candidates: + dog: + dog: + - panda + - elephant + - owl + - raccoon + - suit + please click on all images of one object that is bigger than other: + excavator: + - red panda + the fastest entity: + car: + - ladybug + - horse the largest animal in real life: - - parrot - - ladybug - - crab - - bee - - frog - - bat - - butterfly - - dragonfly - - giraffe - - duck - - cookie - - turtle - - dog - - cat - - tiger + squirrel: + - bee + - ladybug + parrot: + - ladybug + - crab + - bee + - frog + tiger: + - cat + - dog + lion: + - cat + - dog + - fox + - goat + elephant: + - cow + - dog + - panda water vehicle: - water vehicle - flying vehicle - land vehicle nested_categories: + dog: plant: - nested_plant2311.onnx - plant2319.onnx diff --git a/tests/test_prompt_handler.py b/tests/test_prompt_handler.py index 16a971ad7e..b6cd90b672 100644 --- a/tests/test_prompt_handler.py +++ b/tests/test_prompt_handler.py @@ -58,6 +58,8 @@ def test_split_binary_prompt(): "please click on the most similar object to the following reference shape:", "please click in the center of an observation wheel", "Please select all images of one type that appear warmer in comparison to other images", + "Please click on all images of one object that is bigger than other", + "Please click on all images containing the fastest entity", ] for prompt in prompts_: print(handle(prompt))