datajuicer · cmgzn · Jun 17, 2026 · Jun 10, 2026 · Jun 12, 2026 · Jun 12, 2026
diff --git a/docs/op_doc_enhance_workflow/utils/model.py b/docs/op_doc_enhance_workflow/utils/model.py
@@ -3,7 +3,7 @@
     prepare_model,
 )
 
-API_MODEL = "qwen3-max"
+API_MODEL = "qwen3.7-max"
 
 
 def chat(messages: list[dict]):

diff --git a/tests/ops/aggregator/test_entity_attribute_aggregator.py b/tests/ops/aggregator/test_entity_attribute_aggregator.py
@@ -40,7 +40,8 @@ def test_default_aggregator(self):
             },
         ]
         op = EntityAttributeAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             entity='李莲花',
             attribute='主要经历'
         )
@@ -59,7 +60,8 @@ def test_input_output(self):
             },
         ]
         op = EntityAttributeAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             entity='李莲花',
             attribute='身份背景',
             input_key='sub_docs',
@@ -80,7 +82,8 @@ def test_max_token_num(self):
             },
         ]
         op = EntityAttributeAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             entity='李莲花',
             attribute='身份背景',
             max_token_num=200
@@ -100,7 +103,8 @@ def test_word_limit_num(self):
             },
         ]
         op = EntityAttributeAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             entity='李莲花',
             attribute='身份背景',
             word_limit=20
@@ -128,7 +132,8 @@ def test_example_prompt(self):
             '孙行者、齐天大圣、美猴王\n'
         )
         op = EntityAttributeAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             entity='李莲花',
             attribute='另外身份',
             example_prompt=example_prompt,

diff --git a/tests/ops/aggregator/test_meta_tags_aggregator.py b/tests/ops/aggregator/test_meta_tags_aggregator.py
@@ -48,7 +48,8 @@ def test_default_aggregator(self):
             },
         ]
         op = MetaTagsAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             meta_tag_key=MetaKeys.query_sentiment_label,
         )
         self._run_helper(op, samples)
@@ -77,7 +78,8 @@ def test_target_tags(self):
             },
         ]
         op = MetaTagsAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             meta_tag_key=MetaKeys.query_sentiment_label,
             target_tags=['开心', '难过', '其他']
         )
@@ -106,7 +108,8 @@ def test_tag_list(self):
             },
         ]
         op = MetaTagsAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             meta_tag_key=MetaKeys.dialog_sentiment_labels,
             target_tags=['开心', '难过', '其他']
         )

diff --git a/tests/ops/aggregator/test_most_relevant_entities_aggregator.py b/tests/ops/aggregator/test_most_relevant_entities_aggregator.py
@@ -42,7 +42,8 @@ def test_default_aggregator(self):
         ]
 
         op = MostRelevantEntitiesAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             entity='李莲花',
             query_entity_type='人物'
         )
@@ -62,7 +63,8 @@ def test_input_output(self):
         ]
 
         op = MostRelevantEntitiesAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             entity='李莲花',
             query_entity_type='人物',
             input_key='events',
@@ -83,7 +85,8 @@ def test_max_token_num(self):
             },
         ]
         op = MostRelevantEntitiesAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             entity='李莲花',
             query_entity_type='人物',
             max_token_num=40

diff --git a/tests/ops/aggregator/test_nested_aggregator.py b/tests/ops/aggregator/test_nested_aggregator.py
@@ -41,7 +41,8 @@ def test_default_aggregator(self):
             },
         ]
         op = NestedAggregator(
-            api_model='qwen2.5-72b-instruct'
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
         )
         self._run_helper(op, samples)
 
@@ -58,7 +59,8 @@ def test_input_output(self):
             },
         ]
         op = NestedAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             input_key='sub_docs',
             output_key='text'
         )
@@ -77,7 +79,8 @@ def test_max_token_num_1(self):
             },
         ]
         op = NestedAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             max_token_num=2
         )
         self._run_helper(op, samples)
@@ -95,7 +98,8 @@ def test_max_token_num_2(self):
             },
         ]
         op = NestedAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             max_token_num=90
         )
         self._run_helper(op, samples)
@@ -113,7 +117,8 @@ def test_max_token_num_3(self):
             },
         ]
         op = NestedAggregator(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             max_token_num=200
         )
         self._run_helper(op, samples)

diff --git a/tests/ops/filter/test_llm_analysis_filter.py b/tests/ops/filter/test_llm_analysis_filter.py
@@ -7,7 +7,7 @@
 
 @skip_if_from_fork("Skipping API-based test because running from a fork repo")
 class LLMAnalysisFilterTest(DataJuicerTestCaseBase):
-    api_or_hf_model = 'qwen2.5-72b-instruct'
+    api_or_hf_model = 'qwen3.7-max'
 
     def _run_test(self, dataset: Dataset, op):
         if Fields.stats not in dataset.features:
@@ -45,28 +45,33 @@ def test_default_case(self):
             'text': "This comprehensive study examines the impact of climate change on global ecosystems, providing detailed analysis supported by extensive data collection over a decade. The research methodology includes rigorous statistical analysis and peer reviews from leading experts in environmental science."
         }]
         dataset = Dataset.from_list(ds_list)
-        op = LLMAnalysisFilter(api_or_hf_model=self.api_or_hf_model)
+        op = LLMAnalysisFilter(
+            api_or_hf_model=self.api_or_hf_model,
+            sampling_params={"enable_thinking": False},
+        )
         dataset = self._run_test(dataset, op)
 
     def test_rft_data(self):
         ds_list = [{
-            "text": "What is the fastest animal?",
-            "analysis": "The fastest animal is fish because they swim very fast in water.",
+            "text": "What is the fastest land animal?",
+            "analysis": "Fish is the fastest land animal because it swims in the ocean, flies above trees, and every animal is the same speed.",
             "answer": "Fish."
         }, {
             "text": "Why do leaves change color in autumn?",
-            "analysis": "Leaves change color because of the decrease in sunlight and temperature. Chlorophyll breaks down, revealing other pigments like yellow and orange.",
-            "answer": "Due to less sunlight and colder temperatures, chlorophyll breaks down, showing other colors."
+            "analysis": "As days get shorter, trees stop replacing chlorophyll. The green color fades and yellow or orange pigments that were already in the leaves become visible, though this skips some details such as red pigments.",
+            "answer": "Shorter daylight reduces chlorophyll, revealing other pigments in the leaves."
         }, {
             "text": "How does photosynthesis work?",
-            "analysis": "Photosynthesis is the process by which plants convert light energy into chemical energy. Chlorophyll absorbs sunlight, which drives the conversion of carbon dioxide and water into glucose and oxygen.",
-            "answer": "Plants use chlorophyll to absorb sunlight, converting carbon dioxide and water into glucose and oxygen."
+            "analysis": "Photosynthesis is the biochemical process by which green plants convert light energy into chemical energy stored in glucose. Chlorophyll in chloroplasts absorbs photons, driving the light-dependent reactions that produce ATP and NADPH. These then fuel the Calvin cycle, fixing CO2 into glyceraldehyde-3-phosphate, which is subsequently converted to glucose. Oxygen is released as a byproduct from water splitting.",
+            "answer": "Plants use chlorophyll to absorb sunlight, converting carbon dioxide and water into glucose and oxygen through light-dependent reactions and the Calvin cycle."
         }]
         dataset = Dataset.from_list(ds_list)
         op = LLMAnalysisFilter(
             api_or_hf_model=self.api_or_hf_model,
             input_keys=['text', 'analysis', 'answer'],
             field_names=['Query', 'Analysis', 'Answer'],
+            min_score=0.7,
+            sampling_params={"enable_thinking": False},
         )
         dataset = self._run_test(dataset, op)
 
@@ -81,7 +86,8 @@ def test_custom_dimension_keys(self):
         dataset = Dataset.from_list(ds_list)
         op = LLMAnalysisFilter(
             api_or_hf_model=self.api_or_hf_model,
-            dim_required_keys=["clarity", "fluency"]
+            dim_required_keys=["clarity", "fluency"],
+            sampling_params={"enable_thinking": False},
         )
         dataset = self._run_test(dataset, op)
 

diff --git a/tests/ops/filter/test_llm_difficulty_score_filter.py b/tests/ops/filter/test_llm_difficulty_score_filter.py
@@ -10,7 +10,7 @@
 
 @skip_if_from_fork("Skipping API-based test because running from a fork repo")
 class LLMDifficultyScoreFilterTest(DataJuicerTestCaseBase):
-    api_or_hf_model = 'qwen2.5-72b-instruct'
+    api_or_hf_model = 'qwen3.7-max'
 
     def _run_test(self, dataset: Dataset, op):
         if Fields.stats not in dataset.features:
@@ -44,7 +44,10 @@ def test_default_case(self):
             "In quantum field theory, renormalization addresses infinities arising from loop integrals in Feynman diagrams. By redefining parameters such as mass and charge, physicists ensure finite predictions align with experimental observations. However, this procedure raises philosophical questions about whether these adjustments reflect physical reality or merely mathematical conveniences."
         }]
         dataset = Dataset.from_list(ds_list)
-        op = LLMDifficultyScoreFilter(api_or_hf_model=self.api_or_hf_model)
+        op = LLMDifficultyScoreFilter(
+            api_or_hf_model=self.api_or_hf_model,
+            sampling_params={'enable_thinking': False},
+        )
         dataset= self._run_test(dataset, op)
 
     def test_rft_data(self):
@@ -66,6 +69,7 @@ def test_rft_data(self):
             api_or_hf_model=self.api_or_hf_model,
             input_keys=['text', 'analysis', 'answer'],
             field_names=['Query', 'Analysis', 'Answer'],
+            sampling_params={'enable_thinking': False},
         )
         dataset= self._run_test(dataset, op)
 

diff --git a/tests/ops/filter/test_llm_quality_score_filter.py b/tests/ops/filter/test_llm_quality_score_filter.py
@@ -10,7 +10,7 @@
 
 @skip_if_from_fork("Skipping API-based test because running from a fork repo")
 class LLMQualityScoreFilterTest(DataJuicerTestCaseBase):
-    api_or_hf_model = 'qwen2.5-72b-instruct'
+    api_or_hf_model = 'qwen3.7-max'
 
     def _run_test(self, dataset: Dataset, op):
         if Fields.stats not in dataset.features:
@@ -44,7 +44,10 @@ def test_default_case(self):
             "Cats are domesticated animals known for their agility, intelligence, and independent nature. Research shows that they spend approximately 70% of their lives sleeping, which helps conserve energy for hunting. Unlike dogs, cats are obligate carnivores, meaning their diet must consist primarily of meat to meet nutritional needs."
         }]
         dataset = Dataset.from_list(ds_list)
-        op = LLMQualityScoreFilter(api_or_hf_model=self.api_or_hf_model)
+        op = LLMQualityScoreFilter(
+            api_or_hf_model=self.api_or_hf_model,
+            sampling_params={'enable_thinking': False},
+        )
         dataset= self._run_test(dataset, op)
 
     def test_rft_data(self):
@@ -66,6 +69,7 @@ def test_rft_data(self):
             api_or_hf_model=self.api_or_hf_model,
             input_keys=['text', 'analysis', 'answer'],
             field_names=['Query', 'Analysis', 'Answer'],
+            sampling_params={'enable_thinking': False},
         )
         dataset= self._run_test(dataset, op)
 

diff --git a/tests/ops/filter/test_llm_task_relevance_filter.py b/tests/ops/filter/test_llm_task_relevance_filter.py
@@ -10,7 +10,7 @@
 
 @skip_if_from_fork("Skipping API-based test because running from a fork repo")
 class LLMTaskRelevanceFilterTest(DataJuicerTestCaseBase):
-    api_or_hf_model = 'qwen2.5-72b-instruct'
+    api_or_hf_model = 'qwen3.7-max'
 
     def _run_test(self, dataset: Dataset, op, tgt_list):
         if Fields.stats not in dataset.features:
@@ -46,6 +46,7 @@ def test_default_case(self):
         task_desc = "To solve high school-level math problems."
         op = LLMTaskRelevanceFilter(
             api_or_hf_model=self.api_or_hf_model,
+            sampling_params={'enable_thinking': False},
         )
         op.prepare_valid_feature(valid_dataset, task_desc)
         self._run_test(dataset, op, tgt_list)

diff --git a/tests/ops/mapper/test_calibrate_qa_mapper.py b/tests/ops/mapper/test_calibrate_qa_mapper.py
@@ -62,12 +62,13 @@ def test(self):
         # before running this test, set below environment variables:
         # export OPENAI_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1/
         # export OPENAI_API_KEY=your_dashscope_key
-        op = CalibrateQAMapper(api_model='qwen2.5-72b-instruct')
+        op = CalibrateQAMapper(api_model='qwen3.7-max', sampling_params={'enable_thinking': False})
         self._run_op(op)
 
     def test_args(self):
         op = CalibrateQAMapper(
-            api_model='qwen2.5-72b-instruct',
+            api_model='qwen3.7-max',
+            sampling_params={'enable_thinking': False},
             api_endpoint=
             'https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions',
             response_path='choices.0.message.content')

diff --git a/tests/ops/mapper/test_calibrate_query_mapper.py b/tests/ops/mapper/test_calibrate_query_mapper.py
@@ -7,10 +7,11 @@
 
 class CalibrateQueryMapperTest(DataJuicerTestCaseBase):
 
-    def _run_op(self, api_model, response_path=None):
+    def _run_op(self, api_model, response_path=None, sampling_params=None):
 
         op = CalibrateQueryMapper(api_model=api_model,
-                                  response_path=response_path)
+                                  response_path=response_path,
+                                  sampling_params=sampling_params)
 
         reference = """# 角色语言风格
 1. 下面是李莲花的问答样例，你必须贴合他的语言风格：
@@ -66,7 +67,7 @@ def test(self):
         # before running this test, set below environment variables:
         # export OPENAI_API_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
         # export OPENAI_API_KEY=your_key
-        self._run_op('qwen2.5-72b-instruct')
+        self._run_op('qwen3.7-max', sampling_params={'enable_thinking': False})
 
 
 if __name__ == '__main__':

diff --git a/tests/ops/mapper/test_calibrate_response_mapper.py b/tests/ops/mapper/test_calibrate_response_mapper.py
@@ -9,10 +9,11 @@
 
 class CalibrateResponseMapperTest(DataJuicerTestCaseBase):
 
-    def _run_op(self, api_model, response_path=None):
+    def _run_op(self, api_model, response_path=None, sampling_params=None):
 
         op = CalibrateResponseMapper(api_model=api_model,
-                                     response_path=response_path)
+                                     response_path=response_path,
+                                     sampling_params=sampling_params)
 
         reference = """# 角色语言风格
 1. 下面是李莲花的问答样例，你必须贴合他的语言风格：
@@ -68,7 +69,7 @@ def test(self):
         # before running this test, set below environment variables:
         # export OPENAI_API_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
         # export OPENAI_API_KEY=your_key
-        self._run_op('qwen2.5-72b-instruct')
+        self._run_op('qwen3.7-max', sampling_params={'enable_thinking': False})
 
 
 if __name__ == '__main__':