IgnatG
diff --git a/‎docs/examples/japanese_extraction.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/examples/japanese_extraction.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎langextract/annotation.py‎
Lines changed: 133 additions & 0 deletions b/‎langextract/annotation.py‎
Lines changed: 133 additions & 0 deletions
@@ -51,7 +51,7 @@ for entity in result.extractions:
     if entity.char_interval:
         start, end = entity.char_interval.start_pos, entity.char_interval.end_pos
         position_info = f" (pos: {start}-{end})"
-    
+
     print(f"• {entity.extraction_class}: {entity.extraction_text}{position_info}")
 
 # Expected Output:
 
@@ -206,6 +206,139 @@ def __init__(
         "Annotator initialized with format_handler: %s", format_handler
     )
 
+  def _process_batch_with_retry(
+      self,
+      batch_prompts: list[str],
+      batch: list[chunking.TextChunk],
+      retry_transient_errors: bool = True,
+      max_retries: int = 3,
+      retry_initial_delay: float = 1.0,
+      retry_backoff_factor: float = 2.0,
+      retry_max_delay: float = 60.0,
+      **kwargs,
+  ) -> Iterator[list[core_types.ScoredOutput]]:
+    """Process a batch of prompts with individual chunk retry capability.
+
+    This method processes each chunk individually and retries failed chunks
+    due to transient errors (like 503 "model overloaded") while preserving
+    successful chunks from the same batch.
+
+    Args:
+      batch_prompts: List of prompts for the batch
+      batch: List of TextChunk objects corresponding to the prompts
+      retry_transient_errors: Whether to retry on transient errors
+      max_retries: Maximum number of retry attempts
+      retry_initial_delay: Initial delay before retry
+      retry_backoff_factor: Backoff multiplier for retries
+      retry_max_delay: Maximum delay between retries
+      **kwargs: Additional arguments passed to the language model
+
+    Yields:
+      Lists of ScoredOutputs, with retries for failed chunks
+    """
+    try:
+      batch_results = list(
+          self._language_model.infer(
+              batch_prompts=batch_prompts,
+              **kwargs,
+          )
+      )
+
+      yield from batch_results
+      return
+
+    except Exception as e:
+      if not retry_utils.is_transient_error(e):
+        raise
+
+      logging.warning(
+          "Batch processing failed with transient error: %s. "
+          "Falling back to individual chunk processing with retry.",
+          str(e),
+      )
+
+    individual_results = []
+
+    for i, (prompt, chunk) in enumerate(zip(batch_prompts, batch)):
+      try:
+        chunk_result = self._process_single_chunk_with_retry(
+            prompt=prompt,
+            chunk=chunk,
+            retry_transient_errors=retry_transient_errors,
+            max_retries=max_retries,
+            retry_initial_delay=retry_initial_delay,
+            retry_backoff_factor=retry_backoff_factor,
+            retry_max_delay=retry_max_delay,
+            **kwargs,
+        )
+        individual_results.append(chunk_result)
+
+      except Exception as e:
+        logging.error(
+            "Failed to process chunk %d after retries: %s. "
+            "Chunk info: document_id=%s, text_length=%d. "
+            "Stopping document processing.",
+            i,
+            str(e),
+            chunk.document_id,
+            len(chunk.chunk_text),
+        )
+        raise
+
+    yield from individual_results
+
+  def _process_single_chunk_with_retry(
+      self,
+      prompt: str,
+      chunk: chunking.TextChunk,
+      retry_transient_errors: bool = True,
+      max_retries: int = 3,
+      retry_initial_delay: float = 1.0,
+      retry_backoff_factor: float = 2.0,
+      retry_max_delay: float = 60.0,
+      **kwargs,
+  ) -> list[core_types.ScoredOutput]:
+    """Process a single chunk with retry logic.
+
+    Args:
+      prompt: The prompt for this chunk
+      chunk: The TextChunk object
+      retry_transient_errors: Whether to retry on transient errors
+      max_retries: Maximum number of retry attempts
+      retry_initial_delay: Initial delay before retry
+      retry_backoff_factor: Backoff multiplier for retries
+      retry_max_delay: Maximum delay between retries
+      **kwargs: Additional arguments for the language model
+
+    Returns:
+      List containing a single ScoredOutput for this chunk
+    """
+
+    # Use the retry decorator with custom parameters
+    @retry_utils.retry_chunk_processing(
+        max_retries=max_retries,
+        initial_delay=retry_initial_delay,
+        backoff_factor=retry_backoff_factor,
+        max_delay=retry_max_delay,
+        enabled=retry_transient_errors,
+    )
+    def _process_chunk():
+      batch_results = list(
+          self._language_model.infer(
+              batch_prompts=[prompt],
+              **kwargs,
+          )
+      )
+
+      if not batch_results:
+        raise exceptions.InferenceOutputError(
+            f"No results returned for chunk in document {chunk.document_id}"
+        )
+
+      return batch_results[0]
+
+    return _process_chunk()
+
   def annotate_documents(
       self,
       documents: Iterable[data.Document],