feat: update the grader training data format to get query and other f… (#161)

jc200808 · web-flow · commit 157edd50f31a · 2026-04-07T19:46:47.000+08:00
* feat: update the grader training data format to get query and other fields directly from the top

* feat: fix test_minimax_chat_model
diff --git a/cookbooks/training_judge_model/grpo/grader_rl_dataset.py b/cookbooks/training_judge_model/grpo/grader_rl_dataset.py
@@ -405,21 +405,14 @@ def _parse_config(self, config: Union[DictConfig, Dict[str, Any]]):
     def _build_messages(self, example: dict) -> List[dict]:
         """Build chat messages from example - Pointwise mode with text format only."""
         messages = []
-
-        # Check if it's the new JSON structure (has 'input' key with nested structure)
-        if "input" in example and isinstance(example["input"], dict) and "query" in example["input"]:
-            # New JSON format
+        # Check if example has 'query' directly at top level
+        if "query" in example and isinstance(example["query"], str) and example["query"]:
+            query = example["query"]
+            messages.append({"role": "user", "content": query})
+        # Check if example has 'input' key with nested structure
+        elif "input" in example and isinstance(example["input"], dict) and "query" in example["input"]:
             query = example["input"].get("query", "")
-            if query:
-                messages.append({"role": "user", "content": query})
-
-            # Get chosen response (positive example)
-            if "chosen" in example and isinstance(example["chosen"], dict):
-                response_data = example["chosen"].get("response", {})
-                if isinstance(response_data, dict):
-                    response_content = response_data.get("content", "")
-                    if response_content:
-                        messages.append({"role": "assistant", "content": response_content})
+            messages.append({"role": "user", "content": query})
         else:
             # Old format - handle standard structure
             messages = self._build_old_format_messages(example)
@@ -615,8 +608,37 @@ def _format_grader_template(self, messages: List[dict], example: dict, grader_pr
         memory = ""
         action = ""
         reflection = ""
-        if "input" in example and isinstance(example["input"], dict) and "query" in example["input"]:
-            # New JSON format
+        query = ""
+
+        # Check if example has fields directly at top level
+        if "query" in example and isinstance(example["query"], str):
+            query = example.get("query", "")
+            if "context" in example:
+                context = example.get("context", "")
+                if isinstance(context, str):
+                    try:
+                        parsed_data = json.loads(context)
+                        if isinstance(parsed_data, dict):
+                            context = parsed_data.get("task_context", "")
+                            tool_definitions = parsed_data.get("tool_definitions", "")
+                            history = parsed_data.get("history", "")
+                    except (json.JSONDecodeError, TypeError, Exception):
+                        pass
+                elif isinstance(context, dict):
+                    context = context.get("task_context", "")
+                    tool_definitions = context.get("tool_definitions", "")
+                    history = context.get("history", "")
+            reference_response = example.get("reference_response", "")
+            # Extract fields directly from example top level
+            response = example.get("response", "")
+            tool_calls = example.get("tool_calls", "")
+            tool_responses = example.get("tool_responses", "")
+            plan = example.get("plan", "")
+            observation = example.get("observation", "")
+            memory = example.get("memory", "")
+            action = example.get("action", "")
+            reflection = example.get("reflection", "")
+        elif "input" in example and isinstance(example["input"], dict) and "query" in example["input"]:
             query = example["input"].get("query", "")
             context = example["input"].get("context", "")
             if context:
diff --git a/cookbooks/training_judge_model/grpo/pointwise/utils/preprocess_grader_data.py b/cookbooks/training_judge_model/grpo/pointwise/utils/preprocess_grader_data.py
@@ -136,51 +136,53 @@ def process_single_file(data_file: str, split_ratio: float, seed: int, sample_nu
                 is_bin = True
 
             try:
-                if (
-                    item["chosen"]
-                    and item["chosen"]["response"]
-                    and "tool_calls" in item["chosen"]["response"]
-                    and isinstance(item["chosen"]["response"].get("tool_calls", []), list)
-                ):
-                    item["chosen"]["response"]["tool_calls"] = json.dumps(item["chosen"]["response"]["tool_calls"])
-
-                if (
-                    item["rejected"]
-                    and item["rejected"]["response"]
-                    and "tool_calls" in item["rejected"]["response"]
-                    and isinstance(item["rejected"]["response"].get("tool_calls", []), list)
-                ):
-                    item["rejected"]["response"]["tool_calls"] = json.dumps(item["rejected"]["response"]["tool_calls"])
+                # Process chosen response
+                if item["chosen"] and item["chosen"].get("response"):
+                    chosen_response = item["chosen"]["response"]
+                    if "tool_calls" in chosen_response and isinstance(chosen_response.get("tool_calls", []), list):
+                        chosen_response["tool_calls"] = json.dumps(chosen_response["tool_calls"])
+                    if "content" in chosen_response:
+                        chosen_response["response"] = chosen_response.pop("content")
+
+                # Process rejected response
+                if item["rejected"] and item["rejected"].get("response"):
+                    rejected_response = item["rejected"]["response"]
+                    if "tool_calls" in rejected_response and isinstance(rejected_response.get("tool_calls", []), list):
+                        rejected_response["tool_calls"] = json.dumps(rejected_response["tool_calls"])
+                    if "content" in rejected_response:
+                        rejected_response["response"] = rejected_response.pop("content")
 
                 if item["input"] and item["input"].get("context", "") and not isinstance(item["input"]["context"], str):
                     item["input"]["context"] = json.dumps(item["input"]["context"])
             except Exception as e:
                 raise e
+
+            # Create new_item with response key removed
             if "chosen" not in item or not item["chosen"]:
                 print(f"Warning: Missing chosen answer in item {item} from {data_file}. Skipping item.")
             else:
-                output_data.append(
-                    {
-                        "input": item["input"],
-                        "answer": item["chosen"],
-                        "label": 1,  # positive example
-                        "score": 1.0 if is_bin else 5.0,
-                        "task_type": task_type,
-                    }
-                )
+                chosen_response = item["chosen"].get("response", item["chosen"])
+                new_item_chosen = {
+                    **item["input"],
+                    **chosen_response,
+                    "label": 1,  # positive example
+                    "score": 1.0 if is_bin else 5.0,
+                    "task_type": task_type,
+                }
+                output_data.append(new_item_chosen)
 
             if "rejected" not in item or not item["rejected"]:
                 print(f"Warning: Missing rejected answer in item {item} from {data_file}. Skipping item.")
             else:
-                output_data.append(
-                    {
-                        "input": item["input"],
-                        "answer": item["rejected"],
-                        "label": 0,  # negative example
-                        "score": 0.0 if is_bin else 1.0,
-                        "task_type": task_type,
-                    }
-                )
+                rejected_response = item["rejected"].get("response", item["rejected"])
+                new_item_rejected = {
+                    **item["input"],
+                    **rejected_response,
+                    "label": 0,  # negative example
+                    "score": 0.0 if is_bin else 1.0,
+                    "task_type": task_type,
+                }
+                output_data.append(new_item_rejected)
     except KeyError as e:
         print(f"Error: Missing required key {e} in file {data_file}")
         return False