ck draft

tpfz · tpfz · commit 4904c2f7db2d · 2025-10-21T17:34:56.000+08:00
diff --git a/backend/modules/evaluation/infra/repo/experiment/ck/convertor/expt_turn_result_filter.go b/backend/modules/evaluation/infra/repo/experiment/ck/convertor/expt_turn_result_filter.go
@@ -26,11 +26,11 @@ func ExptTurnResultFilterEntity2PO(filterEntity *entity.ExptTurnResultFilterEnti
 	}
 
 	return &model.ExptTurnResultFilter{
-		SpaceID:          stringifyInt64(filterEntity.SpaceID),
-		ExptID:           stringifyInt64(filterEntity.ExptID),
-		ItemID:           stringifyInt64(filterEntity.ItemID),
+		SpaceID:          strconv.FormatInt(filterEntity.SpaceID, 10),
+		ExptID:           strconv.FormatInt(filterEntity.ExptID, 10),
+		ItemID:           strconv.FormatInt(filterEntity.ItemID, 10),
 		ItemIdx:          filterEntity.ItemIdx,
-		TurnID:           stringifyInt64(filterEntity.TurnID),
+		TurnID:           strconv.FormatInt(filterEntity.TurnID, 10),
 		Status:           int32(filterEntity.Status),
 		EvalTargetData:   filterEntity.EvalTargetData,
 		EvaluatorScore:   filterEntity.EvaluatorScore,
@@ -39,6 +39,7 @@ func ExptTurnResultFilterEntity2PO(filterEntity *entity.ExptTurnResultFilterEnti
 		AnnotationString: filterEntity.AnnotationString,
 		CreatedDate:      filterEntity.CreatedDate,
 		EvalSetVersionID: strconv.FormatInt(filterEntity.EvalSetVersionID, 10),
+		UpdatedAt:        filterEntity.UpdatedAt,
 	}
 }
 
@@ -70,11 +71,6 @@ func ExptTurnResultFilterPO2Entity(filterPO *model.ExptTurnResultFilter) *entity
 	}
 }
 
-// stringifyInt64 将 int64 转换为 string
-func stringifyInt64(i int64) string {
-	return string(rune(i))
-}
-
 // ParseStringToInt64 将 string 转换为 int64
 func ParseStringToInt64(s string) int64 {
 	if s == "" {
diff --git a/backend/modules/evaluation/infra/repo/experiment/ck/expt_turn_result_filter.go b/backend/modules/evaluation/infra/repo/experiment/ck/expt_turn_result_filter.go
@@ -6,6 +6,7 @@ package ck
 import (
 	"context"
 	"fmt"
+	"os"
 	"strconv"
 	"strings"
 	"time"
@@ -116,9 +117,19 @@ func (d *exptTurnResultFilterDAOImpl) Save(ctx context.Context, filter []*model.
 // 定义浮点数比较的精度
 const floatEpsilon = 1e-8
 
+// getClickHouseDatabaseName 从环境变量获取ClickHouse数据库名
+func getClickHouseDatabaseName() string {
+	dbName := os.Getenv("COZE_LOOP_CLICKHOUSE_DATABASE")
+	if dbName == "" {
+		// 默认值，保持向后兼容
+		dbName = "cozeloop-clickhouse"
+	}
+	return "`" + dbName + "`"
+}
+
 func (d *exptTurnResultFilterDAOImpl) QueryItemIDStates(ctx context.Context, cond *ExptTurnResultFilterQueryCond) (map[string]int32, int64, error) {
-	joinSQL, whereSQL, keywordCond, args := d.buildQueryConditions(ctx, cond)
-	sql := d.buildBaseSQL(ctx, joinSQL, whereSQL, keywordCond, cond.EvalSetSyncCkDate, &args)
+	whereSQL, keywordCond, args := d.buildQueryConditions(ctx, cond)
+	sql := d.buildBaseSQL(ctx, whereSQL, keywordCond, &args)
 	total, err := d.getTotalCount(ctx, sql, args)
 	if err != nil {
 		return nil, total, err
@@ -129,8 +140,7 @@ func (d *exptTurnResultFilterDAOImpl) QueryItemIDStates(ctx context.Context, con
 }
 
 // buildQueryConditions 构建查询条件
-func (d *exptTurnResultFilterDAOImpl) buildQueryConditions(ctx context.Context, cond *ExptTurnResultFilterQueryCond) (string, string, string, []interface{}) {
-	joinSQL := ""
+func (d *exptTurnResultFilterDAOImpl) buildQueryConditions(ctx context.Context, cond *ExptTurnResultFilterQueryCond) (string, string, []interface{}) {
 	whereSQL := ""
 	keywordCond := ""
 	args := []interface{}{}
@@ -139,7 +149,7 @@ func (d *exptTurnResultFilterDAOImpl) buildQueryConditions(ctx context.Context,
 	d.buildMapFieldConditions(cond, &whereSQL, &args)
 	d.buildKeywordSearchConditions(ctx, cond, &keywordCond, &args)
 
-	return joinSQL, whereSQL, keywordCond, args
+	return whereSQL, keywordCond, args
 }
 
 // buildMainTableConditions 构建主表字段条件
@@ -364,23 +374,18 @@ func (d *exptTurnResultFilterDAOImpl) buildKeywordSearchConditions(ctx context.C
 }
 
 // buildBaseSQL 构建基础SQL语句
-func (d *exptTurnResultFilterDAOImpl) buildBaseSQL(ctx context.Context, joinSQL, whereSQL, keywordCond, evalSetSyncCkDate string, args *[]interface{}) string {
-	sql := "SELECT  etrf.item_id, etrf.status FROM " + d.configer.GetCKDBName(ctx).ExptTurnResultFilterDBName + ".expt_turn_result_filter etrf"
+func (d *exptTurnResultFilterDAOImpl) buildBaseSQL(ctx context.Context, whereSQL, keywordCond string, args *[]interface{}) string {
+	sql := "SELECT  etrf.item_id, etrf.status FROM " + getClickHouseDatabaseName() + ".expt_turn_result_filter etrf"
 	sql += " WHERE 1=1"
-	if joinSQL != "" || keywordCond != "" {
-		sql += " And dis.sync_ck_date = ?"
+	if keywordCond != "" {
 		// 将 evalSetSyncCkDate 插入到 args 切片的第一个位置
-		newArgs := make([]interface{}, 0, len(*args)+1)
-		newArgs = append(newArgs, evalSetSyncCkDate)
+		newArgs := make([]interface{}, 0, len(*args))
 		newArgs = append(newArgs, *args...)
 		*args = newArgs
 	}
 	if whereSQL != "" {
 		sql += whereSQL
 	}
-	if joinSQL != "" {
-		sql += joinSQL
-	}
 	if keywordCond != "" {
 		sql += keywordCond
 	}
@@ -389,7 +394,7 @@ func (d *exptTurnResultFilterDAOImpl) buildBaseSQL(ctx context.Context, joinSQL,
 
 // getTotalCount 获取总记录数
 func (d *exptTurnResultFilterDAOImpl) getTotalCount(ctx context.Context, sql string, args []interface{}) (int64, error) {
-	countSQL := "SELECT COUNT(DISTINCT etrf.item_id) FROM (" + sql + ")"
+	countSQL := "SELECT COUNT(DISTINCT item_id) FROM (" + sql + ")"
 	var total int64
 	logs.CtxInfo(ctx, "Query count sql: %v, args: %v", countSQL, args)
 	if err := d.db.NewSession(ctx).Raw(countSQL, args...).Scan(&total).Error; err != nil {
@@ -509,7 +514,7 @@ func (d *exptTurnResultFilterDAOImpl) buildGetByExptIDItemIDsSQL(ctx context.Con
 		"etrf.evaluator_score['key9'] as evaluator_score_key_9, " +
 		"etrf.evaluator_score['key10'] as evaluator_score_key_10, " +
 		"etrf.evaluator_score_corrected " +
-		"FROM " + d.configer.GetCKDBName(ctx).ExptTurnResultFilterDBName + ".expt_turn_result_filter" + " etrf " +
+		"FROM " + getClickHouseDatabaseName() + ".expt_turn_result_filter" + " etrf " +
 		"WHERE etrf.space_id = ? AND etrf.expt_id = ? AND etrf.created_date =?"
 	if len(itemIDs) > 0 {
 		sql += " AND etrf.item_id IN (?)"
diff --git a/backend/modules/evaluation/infra/repo/experiment/expt_turn_result_filter_repo_impl.go b/backend/modules/evaluation/infra/repo/experiment/expt_turn_result_filter_repo_impl.go
@@ -6,6 +6,7 @@ package experiment
 import (
 	"context"
 	"strconv"
+	"time"
 
 	"github.com/coze-dev/coze-loop/backend/infra/db"
 	"github.com/coze-dev/coze-loop/backend/modules/evaluation/domain/entity"
@@ -40,6 +41,7 @@ func (e *ExptTurnResultFilterRepoImpl) Save(ctx context.Context, filter []*entit
 	// 转换为 model.ExptTurnResultFilterAccelerator
 	models := make([]*model.ExptTurnResultFilter, 0, len(filter))
 	for _, filterEntity := range filter {
+		filterEntity.UpdatedAt = time.Now()
 		models = append(models, convertor.ExptTurnResultFilterEntity2PO(filterEntity))
 	}
 	logs.CtxInfo(ctx, "ExptTurnResultFilterRepoImpl.Save: %v", json.Jsonify(models))
diff --git a/release/deployment/docker-compose/bootstrap/clickhouse-init/init-sql/evaluation.sql b/release/deployment/docker-compose/bootstrap/clickhouse-init/init-sql/evaluation.sql
@@ -1,11 +1,8 @@
 -- Copyright (c) 2025 coze-dev Authors
 -- SPDX-License-Identifier: Apache-2.0
 
--- Create database if not exists
-CREATE DATABASE IF NOT EXISTS cozeloop_evaluation;
-
--- Create expt_turn_result_filter_local table for docker environment
-CREATE TABLE IF NOT EXISTS cozeloop_evaluation.expt_turn_result_filter_local
+-- Create expt_turn_result_filter table for docker environment
+CREATE TABLE IF NOT EXISTS expt_turn_result_filter
 (
     `space_id` String,
     `expt_id` String,
@@ -28,7 +25,7 @@ CREATE TABLE IF NOT EXISTS cozeloop_evaluation.expt_turn_result_filter_local
     INDEX idx_item_id item_id TYPE bloom_filter() GRANULARITY 1,
     INDEX idx_turn_id turn_id TYPE bloom_filter() GRANULARITY 1
 )
-ENGINE = ReplacingMergeTree()
+ENGINE = ReplacingMergeTree(updated_at)
 PARTITION BY created_date
 ORDER BY (expt_id, item_id, turn_id)
 SETTINGS index_granularity = 8192;
diff --git a/release/deployment/docker-compose/conf/evaluation.yaml b/release/deployment/docker-compose/conf/evaluation.yaml
@@ -49,7 +49,7 @@ expt_export_csv_event_rmq:
 
 # ClickHouse table configuration
 clickhouse_table_config:
-  expt_turn_result_filter_table_name: 'expt_turn_result_filter_local'
+  expt_turn_result_filter_table_name: 'expt_turn_result_filter'
 
 rate_limiter_conf:
   - key_expr: biz_key + string(space_id)
@@ -2202,4 +2202,4 @@ expt_export_white_list:
   allow_all: true
 
 clickhouse_config:
-  expt_turn_result_filter_db_name: "cozeloop_evaluation"
+  expt_turn_result_filter_db_name: "cozeloop-clickhouse"
diff --git a/release/deployment/docker-compose/conf/model_config.yaml b/release/deployment/docker-compose/conf/model_config.yaml
@@ -1,31 +1,159 @@
 models:
+  # reasoning model
   - id: 1
-    name: "doubao"
+    workspace_id: 0 # In the future, there will be the concept of public/private workspaces. Public models are managed by the public workspace, private models by the private workspace. Currently, all models belong to the public workspace, and the public workspace id is temporarily set to 0.
+    name: "deepseek-r1-distill-qwen-32b-250120"
     frame: "eino"
     protocol: "ark"
     protocol_config:
-      api_key: "***"
-      model: "***"
+      api_key: "a715a14b-3b93-47da-8bc9-844c12fecff7"
+      model: "ep-20250304143659-5bcjt"
     param_config:
       param_schemas:
         - name: "temperature"
           label: "temperature"
-          desc: "Increasing temperature makes model output more diverse and creative, while decreasing it makes output more focused on instructions but less diverse. It's recommended not to adjust this simultaneously with 'Top p'."
+          desc: "Increasing temperature will make the model output more diverse and creative. Conversely, lowering the temperature will make the output more compliant with instructions but reduce diversity. It is recommended not to adjust together with 'Top p'."
           type: "float"
           min: "0"
           max: "1.0"
           default_val: "0.7"
         - name: "max_tokens"
           label: "max_tokens"
-          desc: "Controls the maximum number of tokens in model output. Typically, 100 tokens equals about 150 Chinese characters."
+          desc: "Controls the maximum length of model output tokens. Typically, 100 tokens are about 150 Chinese characters."
           type: "int"
           min: "1"
-          max: "4096"
+          max: "8192"
           default_val: "2048"
+  # multimodal model
+  - id: 2
+    workspace_id: 0 # In the future, there will be the concept of public/private workspaces. Public models are managed by the public workspace, private models by the private workspace. Currently, all models belong to the public workspace, and the public workspace id is temporarily set to 0.
+    name: "doubao-1.5-vision-pro-32k"
+    desc: ""
+    ability:
+      max_context_tokens: 65536
+      max_input_tokens: 65536
+      max_output_tokens: 8192
+      function_call: false
+      json_mode: false
+      multi_modal: true
+      ability_multi_modal:
+        image: true
+        ability_image:
+          url_enabled: true
+          binary_enabled: true
+          max_image_size: 20 # unit MB
+          max_image_count: 20
+    frame: "eino"
+    protocol: "ark"
+    protocol_config:
+      base_url: "https://ark.cn-beijing.volces.com/api/v3"
+      api_key: "a715a14b-3b93-47da-8bc9-844c12fecff7"
+      model: "ep-20250304145131-ndcct"
+      protocol_config_ark:
+        region: "cn-beijing"
+    scenario_configs:
+      default:
+        scenario: "default"
+        quota:
+          qpm: 0
+          tpm: 0
+        unavailable: false
+      evaluator:
+        scenario: "evaluator"
+        quota:
+          qpm: 0
+          tpm: 0
+        unavailable: false
+    param_config:
+      param_schemas:
+        - name: "temperature"
+          label: "temperature"
+          desc: "Increasing temperature will make the model output more diverse and creative. Conversely, lowering the temperature will make the output more compliant with instructions but reduce diversity. It is recommended not to adjust together with 'Top p'."
+          type: "float"
+          min: "0"
+          max: "1.0"
+          default_val: "0.7"
+        - name: "max_tokens"
+          label: "max_tokens"
+          desc: "Controls the maximum length of model output tokens. Typically, 100 tokens are about 150 Chinese characters."
+          type: "int"
+          min: "1"
+          max: "8192"
+          default_val: "2048"
+  # fc model
+  - id: 3
+    workspace_id: 0 # In the future, there will be the concept of public/private workspaces. Public models are managed by the public workspace, private models by the private workspace. Currently, all models belong to the public workspace, and the public workspace id is temporarily set to 0.
+    name: "doubao-1.5-lite-32k"
+    desc: ""
+    ability:
+      max_context_tokens: 32000
+      max_input_tokens: 32000
+      max_output_tokens: 12000
+      function_call: true
+      json_mode: false
+      multi_modal: false
+    frame: "eino"
+    protocol: "ark"
+    protocol_config:
+      base_url: "https://ark.cn-beijing.volces.com/api/v3"
+      api_key: "a715a14b-3b93-47da-8bc9-844c12fecff7"
+      model: "ep-20250227201314-frn9m"
+      protocol_config_ark:
+        region: "cn-beijing"
+    scenario_configs:
+      default:
+        scenario: "default"
+        quota:
+          qpm: 0
+          tpm: 0
+        unavailable: false
+      evaluator:
+        scenario: "evaluator"
+        quota:
+          qpm: 0
+          tpm: 0
+        unavailable: false
+    param_config:
+      param_schemas:
+        - name: "temperature"
+          label: "temperature"
+          desc: "Increasing temperature will make the model output more diverse and creative. Conversely, lowering the temperature will make the output more compliant with instructions but reduce diversity. It is recommended not to adjust together with 'Top p'."
+          type: "float"
+          min: "0"
+          max: "1.0"
+          default_val: "0.1"
         - name: "top_p"
           label: "top_p"
-          desc: "Selects the minimum token set with cumulative probability reaching top_p during generation, excluding tokens outside the set, balancing diversity and reasonableness."
+          desc: "The model will consider token results within top_p probability mass."
           type: "float"
-          min: "0.001"
+          min: "0"
           max: "1.0"
-          default_val: "0.7"
+          default_val: "0.1"
+        - name: "max_tokens"
+          label: "max_tokens"
+          desc: "Controls the maximum length of model output tokens. Typically, 100 tokens are about 150 Chinese characters."
+          type: "int"
+          min: "1"
+          max: "8192"
+          default_val: "2048"
+        - name: "top_k"
+          label: "top_k"  # Displayed as a name on the front end
+          desc: "Only sample from the top k tokens with the highest probability to limit the candidate range and improve generation stability." # Displayed as a description on the front end
+          type: "int" # Required. Must be float, int, bool, string
+          min: "1"
+          max: "100"
+          default_val: "50"
+        - name: "frequency_penalty"
+          label: "frequency_penalty"  # Displayed as a name on the front end
+          desc: "Penalizes generated tokens, with higher frequency resulting in higher penalties, suppressing repetitive content." # Displayed as a description on the front end
+          type: "float" # Required. Must be float, int, bool, string
+          min: "0"
+          max: "2.0"
+          default_val: "0"
+        - name: "presence_penalty"
+          label: "presence_penalty"  # Displayed as a name on the front end
+          desc: "Penalizes all tokens that have appeared, preventing the same content from appearing repeatedly, increasing content diversity." # Displayed as a description on the front end
+          type: "float" # Required. Must be float, int, bool, string
+          min: "0"
+          max: "2.0"
+          default_val: "0"
diff --git a/release/deployment/helm-chart/charts/app/bootstrap/init/clickhouse/init-sql/evaluation.sql b/release/deployment/helm-chart/charts/app/bootstrap/init/clickhouse/init-sql/evaluation.sql
@@ -1,11 +1,8 @@
 -- Copyright (c) 2025 coze-dev Authors
 -- SPDX-License-Identifier: Apache-2.0
 
--- Create database if not exists
-CREATE DATABASE IF NOT EXISTS cozeloop_evaluation;
-
--- Create expt_turn_result_filter_local table for kubernetes environment
-CREATE TABLE IF NOT EXISTS cozeloop_evaluation.expt_turn_result_filter_local
+-- Create expt_turn_result_filter table for kubernetes environment
+CREATE TABLE IF NOT EXISTS expt_turn_result_filter
 (
     `space_id` String,
     `expt_id` String,
diff --git a/release/deployment/helm-chart/umbrella/conf/evaluation.yaml b/release/deployment/helm-chart/umbrella/conf/evaluation.yaml
@@ -49,7 +49,7 @@ expt_export_csv_event_rmq:
 
 # ClickHouse table configuration
 clickhouse_table_config:
-  expt_turn_result_filter_table_name: 'expt_turn_result_filter_local'
+  expt_turn_result_filter_table_name: 'expt_turn_result_filter'
 
 rate_limiter_conf:
   - key_expr: biz_key + string(space_id)