Skip to content

Commit d40a561

Browse files
committed
fix(evaluation): panic
fix(evaluation): BatchGetExperimentResult with sorted turn results fix(evaluation): BatchGetExperimentResult panic fix(evaluation): kill expt fix(evaluation): kill expt fix(evaluation): ut fix(evaluation): expt terminating for time cost fix(evaluation): ut
1 parent 0cfcdc3 commit d40a561

38 files changed

+1445
-208
lines changed

backend/kitex_gen/coze/loop/evaluation/domain/expt/expt.go

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/modules/evaluation/application/experiment_app.go

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@ import (
77
"context"
88
"fmt"
99
"strconv"
10+
"time"
1011

1112
"github.com/bytedance/gg/gptr"
1213

14+
"github.com/coze-dev/coze-loop/backend/infra/backoff"
1315
"github.com/coze-dev/coze-loop/backend/infra/idgen"
1416
"github.com/coze-dev/coze-loop/backend/kitex_gen/base"
1517
"github.com/coze-dev/coze-loop/backend/kitex_gen/coze/loop/evaluation"
@@ -28,6 +30,7 @@ import (
2830
"github.com/coze-dev/coze-loop/backend/modules/evaluation/pkg/errno"
2931
"github.com/coze-dev/coze-loop/backend/pkg/errorx"
3032
"github.com/coze-dev/coze-loop/backend/pkg/json"
33+
"github.com/coze-dev/coze-loop/backend/pkg/lang/goroutine"
3134
"github.com/coze-dev/coze-loop/backend/pkg/lang/maps"
3235
"github.com/coze-dev/coze-loop/backend/pkg/lang/ptr"
3336
"github.com/coze-dev/coze-loop/backend/pkg/lang/slices"
@@ -514,27 +517,49 @@ func (e *experimentApplication) RetryExperiment(ctx context.Context, req *expt.R
514517

515518
func (e *experimentApplication) KillExperiment(ctx context.Context, req *expt.KillExperimentRequest) (r *expt.KillExperimentResponse, err error) {
516519
session := entity.NewSession(ctx)
520+
logs.CtxInfo(ctx, "KillExperiment receive req, expt_id: %v, user_id: %v", req.GetExptID(), session.UserID)
517521

518522
got, err := e.manager.Get(ctx, req.GetExptID(), req.GetWorkspaceID(), session)
519523
if err != nil {
520524
return nil, err
521525
}
522526

523-
err = e.auth.AuthorizationWithoutSPI(ctx, &rpc.AuthorizationWithoutSPIParam{
524-
ObjectID: strconv.FormatInt(req.GetExptID(), 10),
525-
SpaceID: req.GetWorkspaceID(),
526-
ActionObjects: []*rpc.ActionObject{{Action: gptr.Of(consts.Run), EntityType: gptr.Of(rpc.AuthEntityType_EvaluationExperiment)}},
527-
OwnerID: gptr.Of(got.CreatedBy),
528-
ResourceSpaceID: req.GetWorkspaceID(),
529-
})
530-
if err != nil {
531-
return nil, err
527+
if got.Status == entity.ExptStatus_Pending {
528+
return nil, errorx.NewByCode(errno.TerminatePendingExperimentErrorCode)
532529
}
533530

534-
if err := e.manager.CompleteExpt(ctx, req.GetExptID(), req.GetWorkspaceID(), session, entity.WithStatus(entity.ExptStatus_Terminated)); err != nil {
531+
if !e.configer.GetMaintainerUserIDs(ctx)[session.UserID] {
532+
if err := e.auth.AuthorizationWithoutSPI(ctx, &rpc.AuthorizationWithoutSPIParam{
533+
ObjectID: strconv.FormatInt(req.GetExptID(), 10),
534+
SpaceID: req.GetWorkspaceID(),
535+
ActionObjects: []*rpc.ActionObject{{Action: gptr.Of(consts.Run), EntityType: gptr.Of(rpc.AuthEntityType_EvaluationExperiment)}},
536+
OwnerID: gptr.Of(got.CreatedBy),
537+
ResourceSpaceID: req.GetWorkspaceID(),
538+
}); err != nil {
539+
return nil, err
540+
}
541+
}
542+
543+
if err := e.manager.SetExptTerminating(ctx, req.GetExptID(), got.LatestRunID, req.GetWorkspaceID(), session); err != nil {
535544
return nil, err
536545
}
537546

547+
kill := func(ctx context.Context, exptID, exptRunID, spaceID int64, session *entity.Session) error {
548+
if err := e.manager.CompleteRun(ctx, exptID, exptRunID, spaceID, session, entity.WithStatus(entity.ExptStatus_Terminated)); err != nil {
549+
return err
550+
}
551+
return e.manager.CompleteExpt(ctx, exptID, spaceID, session,
552+
entity.WithStatus(entity.ExptStatus_Terminated), entity.WithCompleteInterval(time.Second), entity.NoCompleteItemTurn(), entity.NoAggrCalculate())
553+
}
554+
555+
goroutine.Go(ctx, func() {
556+
if err := backoff.RetryWithElapsedTime(ctx, time.Minute*1, func() error {
557+
return kill(ctx, req.GetExptID(), got.LatestRunID, req.GetWorkspaceID(), session)
558+
}); err != nil {
559+
logs.CtxInfo(ctx, "kill expt failed, expt_id: %v, err: %v", req.GetExptID(), err)
560+
}
561+
})
562+
538563
return &expt.KillExperimentResponse{BaseResp: base.NewBaseResp()}, nil
539564
}
540565

backend/modules/evaluation/application/experiment_app_test.go

Lines changed: 151 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
repo_mocks "github.com/coze-dev/coze-loop/backend/modules/evaluation/domain/repo/mocks"
1919

2020
idgenmock "github.com/coze-dev/coze-loop/backend/infra/idgen/mocks"
21+
"github.com/coze-dev/coze-loop/backend/infra/middleware/session"
2122
"github.com/coze-dev/coze-loop/backend/kitex_gen/base"
2223
"github.com/coze-dev/coze-loop/backend/kitex_gen/coze/loop/data/domain/tag"
2324
"github.com/coze-dev/coze-loop/backend/kitex_gen/coze/loop/evaluation/domain/common"
@@ -1814,11 +1815,13 @@ func TestExperimentApplication_KillExperiment(t *testing.T) {
18141815
// Create mock objects
18151816
mockManager := servicemocks.NewMockIExptManager(ctrl)
18161817
mockAuth := rpcmocks.NewMockIAuthProvider(ctrl)
1818+
mockConfiger := componentMocks.NewMockIConfiger(ctrl)
18171819

18181820
// Test data
18191821
validWorkspaceID := int64(123)
18201822
validExptID := int64(456)
18211823
validUserID := int64(789)
1824+
validRunID := int64(999)
18221825

18231826
tests := []struct {
18241827
name string
@@ -1828,19 +1831,57 @@ func TestExperimentApplication_KillExperiment(t *testing.T) {
18281831
wantErr bool
18291832
}{
18301833
{
1831-
name: "successfully terminate experiment",
1834+
name: "successfully terminate experiment with maintainer permission",
18321835
req: &exptpb.KillExperimentRequest{
18331836
WorkspaceID: gptr.Of(validWorkspaceID),
18341837
ExptID: gptr.Of(validExptID),
18351838
},
18361839
mockSetup: func() {
18371840
// 获取实验信息
18381841
mockManager.EXPECT().Get(gomock.Any(), validExptID, validWorkspaceID, gomock.Any()).Return(&entity.Experiment{
1839-
ID: validExptID,
1840-
SpaceID: validWorkspaceID,
1841-
CreatedBy: strconv.FormatInt(validUserID, 10),
1842+
ID: validExptID,
1843+
SpaceID: validWorkspaceID,
1844+
CreatedBy: strconv.FormatInt(validUserID, 10),
1845+
LatestRunID: validRunID,
1846+
}, nil)
1847+
1848+
// Maintainer权限检查 - 用户是maintainer
1849+
mockConfiger.EXPECT().GetMaintainerUserIDs(gomock.Any()).Return(map[string]bool{
1850+
strconv.FormatInt(validUserID, 10): true,
1851+
})
1852+
1853+
// 设置终止中状态(实现中同步执行)
1854+
mockManager.EXPECT().SetExptTerminating(gomock.Any(), validExptID, validRunID, validWorkspaceID, gomock.Any()).Return(nil)
1855+
1856+
// 异步终止:允许在后台调用,不校验调用次数
1857+
mockManager.EXPECT().CompleteRun(gomock.Any(), validExptID, validRunID, validWorkspaceID, gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
1858+
mockManager.EXPECT().CompleteExpt(gomock.Any(), validExptID, validWorkspaceID, gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
1859+
},
1860+
wantResp: &exptpb.KillExperimentResponse{
1861+
BaseResp: base.NewBaseResp(),
1862+
},
1863+
wantErr: false,
1864+
},
1865+
{
1866+
name: "successfully terminate experiment with regular permission",
1867+
req: &exptpb.KillExperimentRequest{
1868+
WorkspaceID: gptr.Of(validWorkspaceID),
1869+
ExptID: gptr.Of(validExptID),
1870+
},
1871+
mockSetup: func() {
1872+
// 获取实验信息
1873+
mockManager.EXPECT().Get(gomock.Any(), validExptID, validWorkspaceID, gomock.Any()).Return(&entity.Experiment{
1874+
ID: validExptID,
1875+
SpaceID: validWorkspaceID,
1876+
CreatedBy: strconv.FormatInt(validUserID, 10),
1877+
LatestRunID: validRunID,
18421878
}, nil)
18431879

1880+
// Maintainer权限检查 - 用户不是maintainer
1881+
mockConfiger.EXPECT().GetMaintainerUserIDs(gomock.Any()).Return(map[string]bool{
1882+
"other_user": true,
1883+
})
1884+
18441885
// 权限验证
18451886
mockAuth.EXPECT().AuthorizationWithoutSPI(gomock.Any(), &rpc.AuthorizationWithoutSPIParam{
18461887
ObjectID: strconv.FormatInt(validExptID, 10),
@@ -1850,19 +1891,12 @@ func TestExperimentApplication_KillExperiment(t *testing.T) {
18501891
ResourceSpaceID: validWorkspaceID,
18511892
}).Return(nil)
18521893

1853-
// 终止实验
1854-
mockManager.EXPECT().CompleteExpt(gomock.Any(), validExptID, validWorkspaceID, gomock.Any(), gomock.Any()).DoAndReturn(
1855-
func(ctx context.Context, exptID, spaceID int64, session *entity.Session, opts ...entity.CompleteExptOptionFn) error {
1856-
// 验证传入的 opts 是否包含正确的状态设置
1857-
opt := &entity.CompleteExptOption{}
1858-
for _, fn := range opts {
1859-
fn(opt)
1860-
}
1861-
if opt.Status != entity.ExptStatus_Terminated {
1862-
t.Errorf("expected status %v, got %v", entity.ExptStatus_Terminated, opt.Status)
1863-
}
1864-
return nil
1865-
})
1894+
// 设置终止中状态(实现中同步执行)
1895+
mockManager.EXPECT().SetExptTerminating(gomock.Any(), validExptID, validRunID, validWorkspaceID, gomock.Any()).Return(nil)
1896+
1897+
// 异步终止:允许在后台调用,不校验调用次数
1898+
mockManager.EXPECT().CompleteRun(gomock.Any(), validExptID, validRunID, validWorkspaceID, gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
1899+
mockManager.EXPECT().CompleteExpt(gomock.Any(), validExptID, validWorkspaceID, gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
18661900
},
18671901
wantResp: &exptpb.KillExperimentResponse{
18681902
BaseResp: base.NewBaseResp(),
@@ -1881,6 +1915,99 @@ func TestExperimentApplication_KillExperiment(t *testing.T) {
18811915
wantResp: nil,
18821916
wantErr: true,
18831917
},
1918+
{
1919+
name: "permission validation failed for regular user",
1920+
req: &exptpb.KillExperimentRequest{
1921+
WorkspaceID: gptr.Of(validWorkspaceID),
1922+
ExptID: gptr.Of(validExptID),
1923+
},
1924+
mockSetup: func() {
1925+
// 获取实验信息
1926+
mockManager.EXPECT().Get(gomock.Any(), validExptID, validWorkspaceID, gomock.Any()).Return(&entity.Experiment{
1927+
ID: validExptID,
1928+
SpaceID: validWorkspaceID,
1929+
CreatedBy: strconv.FormatInt(validUserID, 10),
1930+
LatestRunID: validRunID,
1931+
}, nil)
1932+
1933+
// Maintainer权限检查 - 用户不是maintainer
1934+
mockConfiger.EXPECT().GetMaintainerUserIDs(gomock.Any()).Return(map[string]bool{
1935+
"other_user": true,
1936+
})
1937+
1938+
// 权限验证失败
1939+
mockAuth.EXPECT().AuthorizationWithoutSPI(gomock.Any(), &rpc.AuthorizationWithoutSPIParam{
1940+
ObjectID: strconv.FormatInt(validExptID, 10),
1941+
SpaceID: validWorkspaceID,
1942+
ActionObjects: []*rpc.ActionObject{{Action: gptr.Of(consts.Run), EntityType: gptr.Of(rpc.AuthEntityType_EvaluationExperiment)}},
1943+
OwnerID: gptr.Of(strconv.FormatInt(validUserID, 10)),
1944+
ResourceSpaceID: validWorkspaceID,
1945+
}).Return(errorx.NewByCode(errno.CommonNoPermissionCode))
1946+
},
1947+
wantResp: nil,
1948+
wantErr: true,
1949+
},
1950+
{
1951+
name: "complete run failed",
1952+
req: &exptpb.KillExperimentRequest{
1953+
WorkspaceID: gptr.Of(validWorkspaceID),
1954+
ExptID: gptr.Of(validExptID),
1955+
},
1956+
mockSetup: func() {
1957+
// 获取实验信息
1958+
mockManager.EXPECT().Get(gomock.Any(), validExptID, validWorkspaceID, gomock.Any()).Return(&entity.Experiment{
1959+
ID: validExptID,
1960+
SpaceID: validWorkspaceID,
1961+
CreatedBy: strconv.FormatInt(validUserID, 10),
1962+
LatestRunID: validRunID,
1963+
}, nil)
1964+
1965+
// Maintainer权限检查 - 用户是maintainer
1966+
mockConfiger.EXPECT().GetMaintainerUserIDs(gomock.Any()).Return(map[string]bool{
1967+
strconv.FormatInt(validUserID, 10): true,
1968+
})
1969+
1970+
// 设置终止中状态
1971+
mockManager.EXPECT().SetExptTerminating(gomock.Any(), validExptID, validRunID, validWorkspaceID, gomock.Any()).Return(nil)
1972+
1973+
// 异步终止运行失败:允许后台调用
1974+
mockManager.EXPECT().CompleteRun(gomock.Any(), validExptID, validRunID, validWorkspaceID, gomock.Any(), gomock.Any()).Return(
1975+
errorx.NewByCode(errno.CommonInternalErrorCode)).AnyTimes()
1976+
},
1977+
wantResp: &exptpb.KillExperimentResponse{BaseResp: base.NewBaseResp()},
1978+
wantErr: false,
1979+
},
1980+
{
1981+
name: "complete experiment failed",
1982+
req: &exptpb.KillExperimentRequest{
1983+
WorkspaceID: gptr.Of(validWorkspaceID),
1984+
ExptID: gptr.Of(validExptID),
1985+
},
1986+
mockSetup: func() {
1987+
// 获取实验信息
1988+
mockManager.EXPECT().Get(gomock.Any(), validExptID, validWorkspaceID, gomock.Any()).Return(&entity.Experiment{
1989+
ID: validExptID,
1990+
SpaceID: validWorkspaceID,
1991+
CreatedBy: strconv.FormatInt(validUserID, 10),
1992+
LatestRunID: validRunID,
1993+
}, nil)
1994+
1995+
// Maintainer权限检查 - 用户是maintainer
1996+
mockConfiger.EXPECT().GetMaintainerUserIDs(gomock.Any()).Return(map[string]bool{
1997+
strconv.FormatInt(validUserID, 10): true,
1998+
})
1999+
2000+
// 设置终止中状态
2001+
mockManager.EXPECT().SetExptTerminating(gomock.Any(), validExptID, validRunID, validWorkspaceID, gomock.Any()).Return(nil)
2002+
2003+
// 异步终止
2004+
mockManager.EXPECT().CompleteRun(gomock.Any(), validExptID, validRunID, validWorkspaceID, gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
2005+
mockManager.EXPECT().CompleteExpt(gomock.Any(), validExptID, validWorkspaceID, gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(
2006+
errorx.NewByCode(errno.CommonInternalErrorCode)).AnyTimes()
2007+
},
2008+
wantResp: &exptpb.KillExperimentResponse{BaseResp: base.NewBaseResp()},
2009+
wantErr: false,
2010+
},
18842011
}
18852012

18862013
for _, tt := range tests {
@@ -1896,7 +2023,7 @@ func TestExperimentApplication_KillExperiment(t *testing.T) {
18962023
nil, // scheduler
18972024
nil, // recordEval
18982025
nil,
1899-
nil, // configer
2026+
mockConfiger, // configer
19002027
mockAuth,
19012028
nil, // userInfoService
19022029
nil, // evalTargetService
@@ -1907,8 +2034,13 @@ func TestExperimentApplication_KillExperiment(t *testing.T) {
19072034
nil,
19082035
)
19092036

2037+
// 设置 context 中的 UserID,这样 entity.NewSession 才能获取到 UserID
2038+
ctx := session.WithCtxUser(context.Background(), &session.User{
2039+
ID: strconv.FormatInt(validUserID, 10),
2040+
})
2041+
19102042
// 执行测试
1911-
gotResp, err := app.KillExperiment(context.Background(), tt.req)
2043+
gotResp, err := app.KillExperiment(ctx, tt.req)
19122044

19132045
// 验证结果
19142046
if tt.wantErr {

backend/modules/evaluation/domain/component/conf.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,5 @@ type IConfiger interface {
1818
GetExptTurnResultFilterBmqProducerCfg(ctx context.Context) *entity.BmqProducerCfg
1919
GetCKDBName(ctx context.Context) *entity.CKDBConfig
2020
GetExptExportWhiteList(ctx context.Context) *entity.ExptExportWhiteList
21+
GetMaintainerUserIDs(ctx context.Context) map[string]bool
2122
}

backend/modules/evaluation/domain/component/mocks/expt_configer.go

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/modules/evaluation/domain/entity/common.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,3 +355,7 @@ func StorageProviderFromString(s string) (StorageProvider, error) {
355355
}
356356

357357
func StorageProviderPtr(v StorageProvider) *StorageProvider { return &v }
358+
359+
type SystemMaintainerConf struct {
360+
UserIDs []string `json:"user_ids" mapstructure:"user_ids"`
361+
}

backend/modules/evaluation/domain/entity/expt.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ const (
3535
ExptStatus_Terminated ExptStatus = 13
3636
// System terminated
3737
ExptStatus_SystemTerminated ExptStatus = 14
38+
ExptStatus_Terminating ExptStatus = 15
3839

3940
// 流式执行完成,不再接收新的请求
4041
ExptStatus_Draining ExptStatus = 21
@@ -265,8 +266,6 @@ type ExptCalculateStats struct {
265266
SuccessItemCnt int
266267
ProcessingItemCnt int
267268
TerminatedItemCnt int
268-
269-
IncompleteTurnIDs []*ItemTurnID
270269
}
271270

272271
type ItemTurnID struct {

0 commit comments

Comments
 (0)