From 5d02080cfcff7db78f56d69599dafaa5f0f3ff89 Mon Sep 17 00:00:00 2001 From: papysans <807399089@qq.com> Date: Fri, 22 May 2026 17:28:07 +0800 Subject: [PATCH 1/2] feat(catalog): expose upstream zh descriptions via locale-aware API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream catalog/index.json already ships per-entry description_zh alongside description (en); the ingest path now stores them in a new descriptions jsonb column on capability_items and capability_versions, mirroring the JSONB locale-map pattern item_categories already uses. API endpoints resolve description per request locale (?lang= query > Accept-Language > en) and surface the raw descriptions map so clients can re-resolve on locale switch without re-fetching. - migration: add descriptions jsonb DEFAULT '{}' on items + versions - ingest: catalogEntry.DescriptionZh + integral-replacement write - handlers/locale.go: ResolveLocale + PickDescription + list helpers - buildItemResponse + bypass list/search/recommend paths wired up - search SELECT columns include descriptions for vector/hybrid paths - test fixtures (sqlite CREATE TABLE) updated; 24 new unit tests pass Resolved description field stays backwards compatible — callers that omit Accept-Language continue to see the English string. --- docs/CATALOG_INGEST.md | 20 +++ server/cmd/migrate/main_test.go | 1 + server/internal/handlers/capability_item.go | 25 ++-- .../internal/handlers/capability_registry.go | 2 + server/internal/handlers/locale.go | 116 ++++++++++++++++ server/internal/handlers/locale_test.go | 128 ++++++++++++++++++ server/internal/handlers/recommend.go | 2 + server/internal/handlers/registry_test.go | 2 + server/internal/handlers/search.go | 20 +++ server/internal/models/models.go | 2 + .../services/catalog_ingest_service.go | 112 ++++++++++++--- .../services/catalog_ingest_service_test.go | 124 +++++++++++++++++ server/internal/services/scan_service_test.go | 5 +- server/internal/services/search_service.go | 6 +- ...000000_add_descriptions_jsonb_to_items.sql | 13 ++ 15 files changed, 542 insertions(+), 36 deletions(-) create mode 100644 server/internal/handlers/locale.go create mode 100644 server/internal/handlers/locale_test.go create mode 100644 server/internal/services/catalog_ingest_service_test.go create mode 100644 server/migrations/20260522000000_add_descriptions_jsonb_to_items.sql diff --git a/docs/CATALOG_INGEST.md b/docs/CATALOG_INGEST.md index c0a29d89..ae918ff4 100644 --- a/docs/CATALOG_INGEST.md +++ b/docs/CATALOG_INGEST.md @@ -241,6 +241,26 @@ bundle 比上次少了大量 entry。检查上游是不是改了某个 source | 代码量 | 540 行 fake-git/fake-registry 逻辑 | 一个 CatalogIngestService(~830 行带详尽注释) | | 增量同步 | 全文件 walk + glob | 按 manifest entry 逐条对账,content_hash 命中即 skip | +## description 多语言(i18n) + +上游 `catalog/index.json` 的每条 entry 同时携带三个描述字段: + +- `description` — LLM 规范化的英文版本(ingest 默认消费) +- `description_zh` — 中文翻译(≈99.8% 覆盖率) +- `description_original` — 源仓库原文(暂不入库) + +ingest 写入 `capability_items.descriptions jsonb` 列(同时给 `capability_versions` 写一份),shape 为 `{"en":"...","zh":"..."}`。`capability_items.description text` 列保留作为向后兼容的英文默认值(embedding 向量也由它生成)。 + +API 侧 `ItemResponse.description` 字段会按请求 locale resolve: + +- 优先级:`?lang=` query > `Accept-Language` header > 默认 `en` +- 归一化:`zh-CN` / `zh-TW` / `zh-Hant` / `zh-Hans` → `zh`;`en-*` → `en`;其它 locale 落回 `en` +- 同时在响应 JSON 中暴露原始 `descriptions: {"en":"...","zh":"..."}` 对象,前端切 locale 时无需重新请求 + +前端通过 `pickItemDescription(item, locale)` helper 消费(落回顺序与服务端一致)。 + +新增字段细节见 openspec change:`openspec/changes/add-item-description-i18n/`。 + ## 相关代码与文档 - 下游 service:`server/internal/services/catalog_ingest_service.go` diff --git a/server/cmd/migrate/main_test.go b/server/cmd/migrate/main_test.go index f90fa455..77988d3b 100644 --- a/server/cmd/migrate/main_test.go +++ b/server/cmd/migrate/main_test.go @@ -45,6 +45,7 @@ func newMigrateTestDB(t *testing.T) *gorm.DB { item_type TEXT NOT NULL, name TEXT NOT NULL, description TEXT, + descriptions TEXT NOT NULL DEFAULT '{}', category TEXT, version TEXT DEFAULT '1.0.0', content TEXT, diff --git a/server/internal/handlers/capability_item.go b/server/internal/handlers/capability_item.go index d303a099..74561613 100644 --- a/server/internal/handlers/capability_item.go +++ b/server/internal/handlers/capability_item.go @@ -482,7 +482,8 @@ type ItemResponse struct { Slug string `json:"slug"` ItemType string `json:"itemType"` Name string `json:"name"` - Description string `json:"description"` + Description string `json:"description"` // Resolved per `?lang=` query or Accept-Language header; en is the default. Use `descriptions` for raw locale map. + Descriptions datatypes.JSON `json:"descriptions" swaggertype:"object"` // Raw locale → text map, e.g. {"en":"...","zh":"..."}. Category string `json:"category"` Version string `json:"version"` Content string `json:"content"` @@ -594,13 +595,14 @@ func reconcileItemCurrentRevision(db *gorm.DB, item *models.CapabilityItem) { Update("current_revision", latestRevision).Error } -func buildItemResponse(db *gorm.DB, item models.CapabilityItem, userID string) ItemResponse { +func buildItemResponse(c *gin.Context, db *gorm.DB, item models.CapabilityItem, userID string) ItemResponse { reconcileItemCurrentRevision(db, &item) if TagSvc != nil && item.ID != "" && len(item.Tags) == 0 { if tagsMap, err := TagSvc.GetItemTags([]string{item.ID}); err == nil && tagsMap != nil { item.Tags = tagsMap[item.ID] } } + locale := ResolveLocale(c) resp := ItemResponse{ ID: item.ID, RegistryID: item.RegistryID, @@ -608,7 +610,8 @@ func buildItemResponse(db *gorm.DB, item models.CapabilityItem, userID string) I Slug: item.Slug, ItemType: item.ItemType, Name: item.Name, - Description: item.Description, + Description: PickDescription(item.Descriptions, item.Description, locale), + Descriptions: item.Descriptions, Category: item.Category, Version: item.Version, Content: item.Content, @@ -837,6 +840,7 @@ func ListItems(c *gin.Context) { c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to fetch items"}) return } + ResolveItemListLocale(c, items) c.JSON(http.StatusOK, gin.H{"items": items, "total": total, "page": page, "pageSize": pageSize, "hasMore": int64((page-1)*pageSize+pageSize) < total}) } @@ -924,7 +928,7 @@ func CreateItem(c *gin.Context) { CategorySvc.EnsureCategory(req.Category, req.CreatedBy) } - c.JSON(http.StatusCreated, buildItemResponse(db, *item, c.GetString(middleware.UserIDKey))) + c.JSON(http.StatusCreated, buildItemResponse(c, db, *item, c.GetString(middleware.UserIDKey))) } // GetItem godoc @@ -945,7 +949,7 @@ func GetItem(c *gin.Context) { c.JSON(http.StatusNotFound, gin.H{"error": "Item not found"}) return } - c.JSON(http.StatusOK, buildItemResponse(db, item, c.GetString(middleware.UserIDKey))) + c.JSON(http.StatusOK, buildItemResponse(c, db, item, c.GetString(middleware.UserIDKey))) } // ListItemAssets godoc @@ -1206,7 +1210,7 @@ func (h *ItemHandler) updateItemFromJSON(c *gin.Context) { } } - c.JSON(http.StatusOK, buildItemResponse(db, item, c.GetString(middleware.UserIDKey))) + c.JSON(http.StatusOK, buildItemResponse(c, db, item, c.GetString(middleware.UserIDKey))) } // updateItemFromArchive handles multipart/form-data archive upload item update. @@ -1336,7 +1340,7 @@ func (h *ItemHandler) updateItemFromArchive(c *gin.Context) { c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update item"}) return } - c.JSON(http.StatusOK, buildItemResponse(db, item, c.GetString(middleware.UserIDKey))) + c.JSON(http.StatusOK, buildItemResponse(c, db, item, c.GetString(middleware.UserIDKey))) return } @@ -1516,7 +1520,7 @@ func (h *ItemHandler) updateItemFromArchive(c *gin.Context) { }() } - c.JSON(http.StatusOK, buildItemResponse(db, item, c.GetString(middleware.UserIDKey))) + c.JSON(http.StatusOK, buildItemResponse(c, db, item, c.GetString(middleware.UserIDKey))) } // DeleteItem godoc @@ -1841,6 +1845,7 @@ func ListAllItems(c *gin.Context) { c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to fetch items"}) return } + ResolveItemListLocale(c, items) // Collect unique repo IDs from preloaded registries and batch-fetch repo names repoIDSet := make(map[string]bool) @@ -2123,7 +2128,7 @@ func (h *ItemHandler) createItemFromJSON(c *gin.Context) { } } - c.JSON(http.StatusCreated, buildItemResponse(h.db, *item, c.GetString(middleware.UserIDKey))) + c.JSON(http.StatusCreated, buildItemResponse(c, h.db, *item, c.GetString(middleware.UserIDKey))) } // cleanupStorageKeys deletes uploaded objects after a later step fails. @@ -2388,7 +2393,7 @@ func (h *ItemHandler) createItemFromArchive(c *gin.Context) { } enqueueScanAsync(item.ID, 1, "create") - c.JSON(http.StatusCreated, buildItemResponse(h.db, *item, c.GetString(middleware.UserIDKey))) + c.JSON(http.StatusCreated, buildItemResponse(c, h.db, *item, c.GetString(middleware.UserIDKey))) } // MoveItem godoc diff --git a/server/internal/handlers/capability_registry.go b/server/internal/handlers/capability_registry.go index 1442a011..96bad79c 100644 --- a/server/internal/handlers/capability_registry.go +++ b/server/internal/handlers/capability_registry.go @@ -464,6 +464,8 @@ func ListMyItems(c *gin.Context) { repoVisibilityMap["public"] = "public" } + ResolveItemListLocale(c, items) + // Build response with repo info result := make([]MyItem, len(items)) for i, item := range items { diff --git a/server/internal/handlers/locale.go b/server/internal/handlers/locale.go new file mode 100644 index 00000000..9cc504c3 --- /dev/null +++ b/server/internal/handlers/locale.go @@ -0,0 +1,116 @@ +package handlers + +import ( + "encoding/json" + "strings" + + "github.com/costrict/costrict-web/server/internal/models" + "github.com/gin-gonic/gin" + "gorm.io/datatypes" +) + +// DefaultLocale is the fallback when neither `?lang=` nor `Accept-Language` +// yields a recognized locale, and the safe choice for "no header" callers +// (matches the pre-i18n API behavior so external consumers see no change). +const DefaultLocale = "en" + +// ResolveLocale picks a locale tag from the request. Priority: +// +// 1. `?lang=` query parameter (highest) +// 2. `Accept-Language` header — first language tag +// 3. DefaultLocale ("en") +// +// Returned value is normalized to the primary subtag (zh-CN → zh, en-US → en); +// unrecognized locales fall back to DefaultLocale. +func ResolveLocale(c *gin.Context) string { + if c == nil { + return DefaultLocale + } + if q := strings.TrimSpace(c.Query("lang")); q != "" { + return normalizeLocale(q) + } + if h := strings.TrimSpace(c.GetHeader("Accept-Language")); h != "" { + // Accept-Language: zh-CN,en;q=0.9 → take "zh-CN" (first tag), ignore q values. + first := strings.SplitN(h, ",", 2)[0] + first = strings.SplitN(first, ";", 2)[0] + return normalizeLocale(first) + } + return DefaultLocale +} + +// normalizeLocale collapses a BCP-47-ish tag down to the primary subtag we +// actually serve. Upstream currently ships only en + zh translations; any +// other language (ja/ko/de/…) falls back to en until upstream produces a +// matching `description_` field. +func normalizeLocale(raw string) string { + if raw == "" { + return DefaultLocale + } + primary := strings.ToLower(strings.SplitN(raw, "-", 2)[0]) + primary = strings.SplitN(primary, "_", 2)[0] + switch primary { + case "zh": + return "zh" + case "en": + return "en" + default: + return DefaultLocale + } +} + +// PickDescription resolves a single localized description string from the +// per-item descriptions JSONB map, falling back gracefully to keep +// pre-i18n rows readable. +// +// Order: +// +// 1. descriptions[locale] if non-empty +// 2. descriptions[DefaultLocale] if non-empty +// 3. fallbackText (the legacy capability_items.description column) +// 4. "" +func PickDescription(descriptions datatypes.JSON, fallbackText string, locale string) string { + if len(descriptions) > 0 { + var m map[string]string + if err := json.Unmarshal(descriptions, &m); err == nil { + if v := m[locale]; v != "" { + return v + } + if v := m[DefaultLocale]; v != "" { + return v + } + } + } + return fallbackText +} + +// ResolveItemListLocale rewrites the `Description` field of each item in +// place to the locale-resolved value, so list endpoints that serialize +// raw `[]models.CapabilityItem` (or types embedding it) still honor +// Accept-Language without needing to wrap every row in ItemResponse. +// The original `descriptions` JSONB stays on the row so frontends can +// re-resolve on locale switch without re-fetching. +func ResolveItemListLocale(c *gin.Context, items []models.CapabilityItem) { + if len(items) == 0 { + return + } + locale := ResolveLocale(c) + for i := range items { + items[i].Description = PickDescription(items[i].Descriptions, items[i].Description, locale) + } +} + +// ResolveCapabilityItemPointersLocale handles the case where the caller has +// pointers (e.g. inside a result struct field) and wants in-place rewrite +// without exposing the full slice surface to ResolveItemListLocale. +func ResolveCapabilityItemPointersLocale(c *gin.Context, items []*models.CapabilityItem) { + if len(items) == 0 { + return + } + locale := ResolveLocale(c) + for _, it := range items { + if it == nil { + continue + } + it.Description = PickDescription(it.Descriptions, it.Description, locale) + } +} diff --git a/server/internal/handlers/locale_test.go b/server/internal/handlers/locale_test.go new file mode 100644 index 00000000..a406877d --- /dev/null +++ b/server/internal/handlers/locale_test.go @@ -0,0 +1,128 @@ +package handlers + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/gin-gonic/gin" + "gorm.io/datatypes" +) + +func newTestContext(t *testing.T, target, acceptLanguage string) *gin.Context { + t.Helper() + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + req := httptest.NewRequest(http.MethodGet, target, nil) + if acceptLanguage != "" { + req.Header.Set("Accept-Language", acceptLanguage) + } + c.Request = req + return c +} + +func TestResolveLocale_QueryParamWins(t *testing.T) { + c := newTestContext(t, "/items?lang=en", "zh-CN") + if got := ResolveLocale(c); got != "en" { + t.Errorf("?lang= should override Accept-Language: got %q want %q", got, "en") + } +} + +func TestResolveLocale_AcceptLanguageZhCN(t *testing.T) { + c := newTestContext(t, "/items", "zh-CN,en;q=0.9") + if got := ResolveLocale(c); got != "zh" { + t.Errorf("zh-CN should normalize to zh: got %q", got) + } +} + +func TestResolveLocale_AcceptLanguageZhTW(t *testing.T) { + c := newTestContext(t, "/items", "zh-TW") + if got := ResolveLocale(c); got != "zh" { + t.Errorf("zh-TW should normalize to zh: got %q", got) + } +} + +func TestResolveLocale_AcceptLanguageZhHant(t *testing.T) { + c := newTestContext(t, "/items", "zh-Hant") + if got := ResolveLocale(c); got != "zh" { + t.Errorf("zh-Hant should normalize to zh: got %q", got) + } +} + +func TestResolveLocale_AcceptLanguageEnUS(t *testing.T) { + c := newTestContext(t, "/items", "en-US") + if got := ResolveLocale(c); got != "en" { + t.Errorf("en-US should normalize to en: got %q", got) + } +} + +func TestResolveLocale_UnsupportedFallsBackToEn(t *testing.T) { + c := newTestContext(t, "/items", "ja-JP") + if got := ResolveLocale(c); got != "en" { + t.Errorf("ja-JP should fall back to en: got %q", got) + } +} + +func TestResolveLocale_EmptyDefaultsToEn(t *testing.T) { + c := newTestContext(t, "/items", "") + if got := ResolveLocale(c); got != "en" { + t.Errorf("no header should default to en: got %q", got) + } +} + +func TestResolveLocale_NilContext(t *testing.T) { + if got := ResolveLocale(nil); got != "en" { + t.Errorf("nil context should default to en: got %q", got) + } +} + +func TestResolveLocale_MultipleAcceptLanguageTags(t *testing.T) { + // Accept-Language: ja-JP first, zh-CN second. We only honor the first tag + // — no q-value parsing. + c := newTestContext(t, "/items", "ja-JP,zh-CN;q=0.9,en;q=0.8") + if got := ResolveLocale(c); got != "en" { + t.Errorf("first tag ja-JP should fall back to en: got %q", got) + } +} + +func TestPickDescription_HitsLocaleKey(t *testing.T) { + descs := datatypes.JSON([]byte(`{"en":"Hello","zh":"你好"}`)) + if got := PickDescription(descs, "fallback", "zh"); got != "你好" { + t.Errorf("expected 你好, got %q", got) + } +} + +func TestPickDescription_FallsBackToEn(t *testing.T) { + descs := datatypes.JSON([]byte(`{"en":"Hello"}`)) + if got := PickDescription(descs, "fallback", "zh"); got != "Hello" { + t.Errorf("missing zh should fall back to en: got %q", got) + } +} + +func TestPickDescription_FallsBackToText(t *testing.T) { + descs := datatypes.JSON([]byte(`{}`)) + if got := PickDescription(descs, "Legacy text", "zh"); got != "Legacy text" { + t.Errorf("empty map should fall back to text column: got %q", got) + } +} + +func TestPickDescription_AllEmpty(t *testing.T) { + if got := PickDescription(nil, "", "zh"); got != "" { + t.Errorf("everything empty should return empty string: got %q", got) + } +} + +func TestPickDescription_NilDescriptionsUsesText(t *testing.T) { + if got := PickDescription(nil, "Plain text", "zh"); got != "Plain text" { + t.Errorf("nil descriptions should use text column: got %q", got) + } +} + +func TestPickDescription_LocaleKeyExistsButEmpty(t *testing.T) { + // `"zh": ""` should not be selected — it's semantically the same as the key missing. + descs := datatypes.JSON([]byte(`{"en":"Hi","zh":""}`)) + if got := PickDescription(descs, "fallback", "zh"); got != "Hi" { + t.Errorf("empty zh value should fall back to en: got %q", got) + } +} diff --git a/server/internal/handlers/recommend.go b/server/internal/handlers/recommend.go index ed59f819..9e8c163e 100644 --- a/server/internal/handlers/recommend.go +++ b/server/internal/handlers/recommend.go @@ -101,6 +101,7 @@ func (h *RecommendHandler) GetTrending(c *gin.Context) { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return } + ResolveItemListLocale(c, items) offset := (page - 1) * pageSize c.JSON(http.StatusOK, gin.H{ @@ -137,6 +138,7 @@ func (h *RecommendHandler) GetNewAndNoteworthy(c *gin.Context) { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return } + ResolveItemListLocale(c, items) offset := (page - 1) * pageSize c.JSON(http.StatusOK, gin.H{ diff --git a/server/internal/handlers/registry_test.go b/server/internal/handlers/registry_test.go index 1c4ec4c1..2b422d7d 100644 --- a/server/internal/handlers/registry_test.go +++ b/server/internal/handlers/registry_test.go @@ -81,6 +81,7 @@ func setupTestDB(t *testing.T) func() { item_type TEXT NOT NULL, name TEXT NOT NULL, description TEXT, + descriptions TEXT NOT NULL DEFAULT '{}', category TEXT, version TEXT DEFAULT '1.0.0', content TEXT, @@ -112,6 +113,7 @@ func setupTestDB(t *testing.T) func() { revision INTEGER NOT NULL, name TEXT, description TEXT, + descriptions TEXT NOT NULL DEFAULT '{}', category TEXT, version TEXT, content TEXT NOT NULL, diff --git a/server/internal/handlers/search.go b/server/internal/handlers/search.go index c39d08af..bafc292c 100644 --- a/server/internal/handlers/search.go +++ b/server/internal/handlers/search.go @@ -75,6 +75,7 @@ func (h *SearchHandler) SemanticSearch(c *gin.Context) { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return } + resolveSearchResultLocale(c, result) c.JSON(http.StatusOK, result) } @@ -131,6 +132,7 @@ func (h *SearchHandler) HybridSearch(c *gin.Context) { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return } + resolveSearchResultLocale(c, result) c.JSON(http.StatusOK, result) } @@ -164,6 +166,7 @@ func (h *SearchHandler) FindSimilar(c *gin.Context) { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return } + resolveSearchItemSliceLocale(c, items) offset := (page - 1) * pageSize c.JSON(http.StatusOK, gin.H{ @@ -175,6 +178,23 @@ func (h *SearchHandler) FindSimilar(c *gin.Context) { }) } +func resolveSearchResultLocale(c *gin.Context, result *services.SearchResult) { + if result == nil { + return + } + resolveSearchItemSliceLocale(c, result.Items) +} + +func resolveSearchItemSliceLocale(c *gin.Context, items []services.SearchResultItem) { + if len(items) == 0 { + return + } + locale := ResolveLocale(c) + for i := range items { + items[i].Description = PickDescription(items[i].Descriptions, items[i].Description, locale) + } +} + func intersectStrings(a, b []string) []string { m := make(map[string]bool) for _, s := range b { diff --git a/server/internal/models/models.go b/server/internal/models/models.go index 00cc520b..a1677341 100644 --- a/server/internal/models/models.go +++ b/server/internal/models/models.go @@ -349,6 +349,7 @@ type CapabilityItem struct { ItemType string `gorm:"not null;index;uniqueIndex:idx_item_repo_type_slug" json:"itemType"` Name string `gorm:"not null" json:"name"` Description string `json:"description"` + Descriptions datatypes.JSON `gorm:"type:jsonb;not null;default:'{}'" json:"descriptions" swaggertype:"object"` // {"en":"...","zh":"..."} — locale → text map; flat `description` is the en/default resolution Category string `json:"category"` Version string `gorm:"default:'1.0.0'" json:"version"` Content string `gorm:"type:text" json:"content"` @@ -426,6 +427,7 @@ type CapabilityVersion struct { Revision int `gorm:"not null;column:revision" json:"revision"` Name string `json:"name"` Description string `json:"description"` + Descriptions datatypes.JSON `gorm:"type:jsonb;not null;default:'{}'" json:"descriptions" swaggertype:"object"` Category string `json:"category"` Version string `json:"version"` Content string `gorm:"type:text;not null" json:"content"` diff --git a/server/internal/services/catalog_ingest_service.go b/server/internal/services/catalog_ingest_service.go index 8d8efe8d..07593520 100644 --- a/server/internal/services/catalog_ingest_service.go +++ b/server/internal/services/catalog_ingest_service.go @@ -172,14 +172,15 @@ type catalogBundleManifest struct { // We deliberately keep this struct small — index.json carries many fields // (evaluation, freshness_label, weak_dims, …) that the DB does not need. type catalogEntry struct { - ID string `json:"id"` - Type string `json:"type"` - Source string `json:"source"` - Description string `json:"description"` - Category string `json:"category"` - Tags []string `json:"tags"` - FinalScore float64 `json:"final_score"` - Security *catalogSecurityBlock `json:"security,omitempty"` + ID string `json:"id"` + Type string `json:"type"` + Source string `json:"source"` + Description string `json:"description"` + DescriptionZh string `json:"description_zh"` + Category string `json:"category"` + Tags []string `json:"tags"` + FinalScore float64 `json:"final_score"` + Security *catalogSecurityBlock `json:"security,omitempty"` } // catalogSecurityBlock mirrors the schema written by the upstream LLM @@ -519,6 +520,10 @@ func (s *CatalogIngestService) computeMetadataDelta(item *models.CapabilityItem, if entry.Description != "" && item.Description != entry.Description { return true } + // descriptions JSONB drift: upstream brought a new zh translation, etc. + if !descriptionsJSONEqual(item.Descriptions, buildDescriptionsJSON(entry)) { + return true + } if entry.Category != "" && item.Category != entry.Category { return true } @@ -529,6 +534,28 @@ func (s *CatalogIngestService) computeMetadataDelta(item *models.CapabilityItem, return false } +// descriptionsJSONEqual compares two locale → text JSON maps for semantic +// equality. Byte comparison would be incorrect because json.Marshal does +// not guarantee key order across Go versions. +func descriptionsJSONEqual(a, b datatypes.JSON) bool { + var ma, mb map[string]string + if len(a) > 0 { + _ = json.Unmarshal(a, &ma) + } + if len(b) > 0 { + _ = json.Unmarshal(b, &mb) + } + if len(ma) != len(mb) { + return false + } + for k, v := range ma { + if mb[k] != v { + return false + } + } + return true +} + func (s *CatalogIngestService) applyMetadataDelta(item *models.CapabilityItem, entry catalogEntry) error { updates := map[string]any{} if entry.Source != "" { @@ -537,6 +564,13 @@ func (s *CatalogIngestService) applyMetadataDelta(item *models.CapabilityItem, e if entry.Description != "" { updates["description"] = entry.Description } + // descriptions JSONB is rewritten unconditionally on each ingest pass so + // that a removed upstream zh translation also clears from the DB row. + // Spec: integral replacement, no merge with prior content. + newDescs := buildDescriptionsJSON(entry) + if !descriptionsJSONEqual(item.Descriptions, newDescs) { + updates["descriptions"] = newDescs + } if entry.Category != "" { updates["category"] = entry.Category } @@ -601,6 +635,7 @@ func (s *CatalogIngestService) updateItem( existing.Name = parsed.Name existing.Description = description + existing.Descriptions = buildDescriptionsJSON(entry) existing.Category = category existing.Version = parsed.Version existing.Content = parsed.Content @@ -629,13 +664,18 @@ func (s *CatalogIngestService) updateItem( } ver := &models.CapabilityVersion{ - ID: uuid.New().String(), - ItemID: existing.ID, - Revision: maxRevision + 1, - Content: parsed.Content, - Metadata: metadataJSON(meta), - CommitMsg: fmt.Sprintf("ingest: catalog %s", entry.ID), - CreatedBy: triggerUser, + ID: uuid.New().String(), + ItemID: existing.ID, + Revision: maxRevision + 1, + Name: parsed.Name, + Description: description, + Descriptions: buildDescriptionsJSON(entry), + Category: category, + Version: parsed.Version, + Content: parsed.Content, + Metadata: metadataJSON(meta), + CommitMsg: fmt.Sprintf("ingest: catalog %s", entry.ID), + CreatedBy: triggerUser, } if err := s.DB.Create(ver).Error; err != nil { return err @@ -687,6 +727,7 @@ func (s *CatalogIngestService) insertItem( ItemType: parsed.ItemType, Name: parsed.Name, Description: description, + Descriptions: buildDescriptionsJSON(entry), Category: category, Version: parsed.Version, Content: parsed.Content, @@ -712,13 +753,18 @@ func (s *CatalogIngestService) insertItem( } ver := &models.CapabilityVersion{ - ID: uuid.New().String(), - ItemID: newItem.ID, - Revision: 1, - Content: parsed.Content, - Metadata: metadataJSON(meta), - CommitMsg: fmt.Sprintf("ingest: initial import from catalog %s", entry.ID), - CreatedBy: triggerUser, + ID: uuid.New().String(), + ItemID: newItem.ID, + Revision: 1, + Name: parsed.Name, + Description: description, + Descriptions: buildDescriptionsJSON(entry), + Category: category, + Version: parsed.Version, + Content: parsed.Content, + Metadata: metadataJSON(meta), + CommitMsg: fmt.Sprintf("ingest: initial import from catalog %s", entry.ID), + CreatedBy: triggerUser, } if err := s.DB.Create(ver).Error; err != nil { return nil, err @@ -1009,6 +1055,28 @@ func entryDirFromSourcePath(sourcePath string) string { return filepath.Join(parts[0], parts[1]) } +// buildDescriptionsJSON packs the upstream entry's per-locale descriptions +// into a JSONB map. en/zh are written only when the corresponding upstream +// field is non-empty; the resulting map fully replaces the column on write +// so a removed upstream translation also disappears from the DB row. +func buildDescriptionsJSON(entry catalogEntry) datatypes.JSON { + m := map[string]string{} + if entry.Description != "" { + m["en"] = entry.Description + } + if entry.DescriptionZh != "" { + m["zh"] = entry.DescriptionZh + } + if len(m) == 0 { + return datatypes.JSON([]byte("{}")) + } + b, err := json.Marshal(m) + if err != nil { + return datatypes.JSON([]byte("{}")) + } + return datatypes.JSON(b) +} + // chooseTags implements the precedence rule: upstream tags win when // non-empty (catalog has authoritative taxonomy), otherwise fall back to // what the per-file parser extracted (e.g. SKILL.md frontmatter). diff --git a/server/internal/services/catalog_ingest_service_test.go b/server/internal/services/catalog_ingest_service_test.go new file mode 100644 index 00000000..99a61443 --- /dev/null +++ b/server/internal/services/catalog_ingest_service_test.go @@ -0,0 +1,124 @@ +package services + +import ( + "encoding/json" + "testing" + + "gorm.io/datatypes" +) + +func decodeDescriptions(t *testing.T, raw datatypes.JSON) map[string]string { + t.Helper() + m := map[string]string{} + if len(raw) == 0 { + return m + } + if err := json.Unmarshal(raw, &m); err != nil { + t.Fatalf("unmarshal descriptions: %v", err) + } + return m +} + +func TestBuildDescriptionsJSON_BothLocales(t *testing.T) { + entry := catalogEntry{ + Description: "A skill that does X", + DescriptionZh: "一个执行 X 的技能", + } + got := decodeDescriptions(t, buildDescriptionsJSON(entry)) + want := map[string]string{ + "en": "A skill that does X", + "zh": "一个执行 X 的技能", + } + if len(got) != len(want) { + t.Fatalf("unexpected key count: got=%v want=%v", got, want) + } + for k, v := range want { + if got[k] != v { + t.Errorf("descriptions[%q] = %q; want %q", k, got[k], v) + } + } +} + +func TestBuildDescriptionsJSON_OnlyEnglish(t *testing.T) { + entry := catalogEntry{Description: "Only english here"} + got := decodeDescriptions(t, buildDescriptionsJSON(entry)) + if _, hasZh := got["zh"]; hasZh { + t.Errorf("zh key should not be present when entry.DescriptionZh is empty; got=%v", got) + } + if got["en"] != "Only english here" { + t.Errorf("en mismatch: %q", got["en"]) + } +} + +func TestBuildDescriptionsJSON_OnlyChinese(t *testing.T) { + entry := catalogEntry{DescriptionZh: "只有中文"} + got := decodeDescriptions(t, buildDescriptionsJSON(entry)) + if _, hasEn := got["en"]; hasEn { + t.Errorf("en key should not be present when entry.Description is empty; got=%v", got) + } + if got["zh"] != "只有中文" { + t.Errorf("zh mismatch: %q", got["zh"]) + } +} + +func TestBuildDescriptionsJSON_BothEmptyReturnsEmptyObject(t *testing.T) { + got := buildDescriptionsJSON(catalogEntry{}) + if string(got) != "{}" { + t.Errorf("expected empty object, got %q", string(got)) + } +} + +func TestDescriptionsJSONEqual_KeyOrderInsensitive(t *testing.T) { + a := datatypes.JSON([]byte(`{"en":"hi","zh":"你好"}`)) + b := datatypes.JSON([]byte(`{"zh":"你好","en":"hi"}`)) + if !descriptionsJSONEqual(a, b) { + t.Errorf("equal maps with different key order should be equal") + } +} + +func TestDescriptionsJSONEqual_DifferentValues(t *testing.T) { + a := datatypes.JSON([]byte(`{"en":"hi"}`)) + b := datatypes.JSON([]byte(`{"en":"hello"}`)) + if descriptionsJSONEqual(a, b) { + t.Errorf("different values should not compare equal") + } +} + +func TestDescriptionsJSONEqual_MissingKey(t *testing.T) { + a := datatypes.JSON([]byte(`{"en":"hi","zh":"你好"}`)) + b := datatypes.JSON([]byte(`{"en":"hi"}`)) + if descriptionsJSONEqual(a, b) { + t.Errorf("upstream removed zh should not compare equal — that triggers re-write") + } +} + +func TestDescriptionsJSONEqual_EmptyVsAbsent(t *testing.T) { + a := datatypes.JSON(nil) + b := datatypes.JSON([]byte(`{}`)) + if !descriptionsJSONEqual(a, b) { + t.Errorf("nil and empty-object should compare equal") + } +} + +// TestBuildDescriptionsJSON_IntegralReplacement encodes the "ingest re-writes +// integrally, no merging" semantic from the spec. When upstream drops the zh +// translation in a later bundle, the next ingest pass MUST produce a JSON +// without the zh key — not merge with the previously stored value. +func TestBuildDescriptionsJSON_IntegralReplacement(t *testing.T) { + first := decodeDescriptions(t, buildDescriptionsJSON(catalogEntry{ + Description: "v1", + DescriptionZh: "旧", + })) + second := decodeDescriptions(t, buildDescriptionsJSON(catalogEntry{ + Description: "v2", + })) + if first["zh"] != "旧" { + t.Fatalf("setup: expected first.zh='旧', got %q", first["zh"]) + } + if _, hasZh := second["zh"]; hasZh { + t.Errorf("second pass should drop zh key when upstream removed description_zh; got=%v", second) + } + if second["en"] != "v2" { + t.Errorf("second.en should be 'v2', got %q", second["en"]) + } +} diff --git a/server/internal/services/scan_service_test.go b/server/internal/services/scan_service_test.go index b013e4e6..4ef8e80f 100644 --- a/server/internal/services/scan_service_test.go +++ b/server/internal/services/scan_service_test.go @@ -29,7 +29,7 @@ func TestScanItem_PluginSkip(t *testing.T) { `CREATE TABLE capability_items ( id TEXT PRIMARY KEY, registry_id TEXT NOT NULL, repo_id TEXT NOT NULL, slug TEXT NOT NULL, item_type TEXT NOT NULL, name TEXT NOT NULL, - description TEXT, category TEXT, version TEXT DEFAULT '1.0.0', + description TEXT, descriptions TEXT NOT NULL DEFAULT '{}', category TEXT, version TEXT DEFAULT '1.0.0', content TEXT, content_md5 TEXT DEFAULT '', current_revision INTEGER NOT NULL DEFAULT 1, metadata TEXT DEFAULT '{}', source_path TEXT, source_sha TEXT, source_type TEXT DEFAULT 'direct', source TEXT DEFAULT '', @@ -133,6 +133,7 @@ func TestScanItemUpdatesCategoryFromScanResult(t *testing.T) { item_type TEXT NOT NULL, name TEXT NOT NULL, description TEXT, + descriptions TEXT NOT NULL DEFAULT '{}', category TEXT, version TEXT DEFAULT '1.0.0', content TEXT, @@ -319,6 +320,7 @@ func TestScanItemBackfillsBuiltinTagsFromScanResult(t *testing.T) { item_type TEXT NOT NULL, name TEXT NOT NULL, description TEXT, + descriptions TEXT NOT NULL DEFAULT '{}', category TEXT, version TEXT DEFAULT '1.0.0', content TEXT, @@ -517,6 +519,7 @@ func newScanShortCircuitTestDB(t *testing.T) *gorm.DB { item_type TEXT NOT NULL, name TEXT NOT NULL, description TEXT, + descriptions TEXT NOT NULL DEFAULT '{}', category TEXT, version TEXT DEFAULT '1.0.0', content TEXT, diff --git a/server/internal/services/search_service.go b/server/internal/services/search_service.go index 410e07ce..d079aed0 100644 --- a/server/internal/services/search_service.go +++ b/server/internal/services/search_service.go @@ -112,7 +112,7 @@ func (s *SearchService) SemanticSearch(ctx context.Context, req SearchRequest) ( sql := ` SELECT * FROM ( - SELECT id, registry_id, slug, item_type, name, description, category, version, + SELECT id, registry_id, slug, item_type, name, description, descriptions, category, version, content, metadata, source_path, source_sha, install_count, status, created_by, updated_by, created_at, updated_at, embedding_updated_at, experience_score, @@ -193,7 +193,7 @@ func (s *SearchService) HybridSearch(ctx context.Context, req SearchRequest) (*S like := database.ILike(s.db) sql := fmt.Sprintf(` - SELECT id, registry_id, slug, item_type, name, description, category, version, + SELECT id, registry_id, slug, item_type, name, description, descriptions, category, version, content, metadata, source_path, source_sha, install_count, status, created_by, updated_by, created_at, updated_at, embedding_updated_at, experience_score, @@ -354,7 +354,7 @@ func (s *SearchService) FindSimilar(ctx context.Context, itemID string, page, pa // Find similar items var similarItems []SearchResultItem sql := ` - SELECT id, registry_id, slug, item_type, name, description, category, version, + SELECT id, registry_id, slug, item_type, name, description, descriptions, category, version, content, metadata, source_path, source_sha, install_count, status, created_by, updated_by, created_at, updated_at, embedding_updated_at, experience_score, diff --git a/server/migrations/20260522000000_add_descriptions_jsonb_to_items.sql b/server/migrations/20260522000000_add_descriptions_jsonb_to_items.sql new file mode 100644 index 00000000..96a10e2c --- /dev/null +++ b/server/migrations/20260522000000_add_descriptions_jsonb_to_items.sql @@ -0,0 +1,13 @@ +-- +goose Up +ALTER TABLE capability_items + ADD COLUMN IF NOT EXISTS descriptions JSONB NOT NULL DEFAULT '{}'::jsonb; + +ALTER TABLE capability_versions + ADD COLUMN IF NOT EXISTS descriptions JSONB NOT NULL DEFAULT '{}'::jsonb; + +-- +goose Down +ALTER TABLE capability_versions + DROP COLUMN IF EXISTS descriptions; + +ALTER TABLE capability_items + DROP COLUMN IF EXISTS descriptions; From 3d83ba1a9d63f1cbc9fc6413da01e5c4f984d079 Mon Sep 17 00:00:00 2001 From: papysans <807399089@qq.com> Date: Fri, 22 May 2026 17:45:12 +0800 Subject: [PATCH 2/2] fix(migrate): import google/uuid missing from preexisting backfillOrganizations cmd/migrate/main.go references uuid.New() at line 1261 in backfillOrganizations but never imported the package. Caught when the staging Docker build attempted RUN go build ./cmd/migrate and failed; local go build ./internal/... had been masking it. --- server/cmd/migrate/main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/server/cmd/migrate/main.go b/server/cmd/migrate/main.go index 1fe05d93..5189363c 100644 --- a/server/cmd/migrate/main.go +++ b/server/cmd/migrate/main.go @@ -16,6 +16,7 @@ import ( "github.com/costrict/costrict-web/server/internal/services" "github.com/costrict/costrict-web/server/internal/team" migrations "github.com/costrict/costrict-web/server/migrations" + "github.com/google/uuid" "github.com/pressly/goose/v3" "gorm.io/gorm" )