Skip to content

Commit b7ff0d5

Browse files
akoclaude
andcommitted
perf(marketplace): fetch search pages concurrently (~5x faster deep scans)
Keyword search has to scan the catalog (the Content API has no server-side filter), and a rare/multi-word query forced a ~23-page sequential walk — ~3m45s in a high-latency environment, slow enough that callers thought it had hung. Search now fetches the first page alone (so a common early match stays a single request) then ramps to concurrent batches (errgroup, bounded at searchConcurrency=8) for the deep-scan tail, preserving catalog order and the stop-at-limit / stop-at-end-of-catalog semantics. Measured ~3m45s -> ~44s on a slow link (network-bound; ~5x); near-instant on a normal link. Tests (race-clean): first-page-alone keeps the common case to one request; pagination still finds a second-page match; end-of-catalog stops early. Promotes golang.org/x/sync to a direct dependency. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 2e8ad25 commit b7ff0d5

3 files changed

Lines changed: 89 additions & 11 deletions

File tree

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ require (
2626
go.mongodb.org/mongo-driver v1.17.9
2727
go.starlark.net v0.0.0-20260102030733-3fee463870c9
2828
go.uber.org/zap v1.28.0
29+
golang.org/x/sync v0.20.0
2930
golang.org/x/term v0.43.0
3031
gopkg.in/yaml.v3 v3.0.1
3132
modernc.org/sqlite v1.51.0
@@ -69,7 +70,6 @@ require (
6970
go.uber.org/multierr v1.10.0 // indirect
7071
golang.org/x/crypto v0.50.0 // indirect
7172
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
72-
golang.org/x/sync v0.20.0 // indirect
7373
golang.org/x/sys v0.44.0 // indirect
7474
golang.org/x/text v0.36.0 // indirect
7575
modernc.org/libc v1.72.3 // indirect

internal/marketplace/client.go

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111
"net/url"
1212
"strconv"
1313
"strings"
14+
15+
"golang.org/x/sync/errgroup"
1416
)
1517

1618
const (
@@ -24,6 +26,10 @@ const (
2426
// (End-of-catalog normally terminates the loop first: an offset past the
2527
// end returns a short/empty page.)
2628
maxSearchPages = 50
29+
// searchConcurrency bounds how many catalog pages are fetched in parallel
30+
// during a deep keyword scan (the first page is always fetched alone so a
31+
// common early match stays a single request).
32+
searchConcurrency = 8
2733
)
2834

2935
// Client is a typed wrapper around the marketplace REST API. Callers
@@ -72,20 +78,40 @@ func (c *Client) Search(ctx context.Context, query string, limit int) (*ContentL
7278
return &out, nil
7379
}
7480

81+
// Fetch the first page alone so a common query that matches early costs a
82+
// single request, then ramp to concurrent batches so a rare/deep match
83+
// (near-full scan) is a handful of round-trips instead of ~23 sequential
84+
// ones. Stops at `limit` matches or end-of-catalog (a short page).
7585
var matched []Content
76-
for page := range maxSearchPages {
77-
items, err := c.fetchContentPage(ctx, page*pageSize)
86+
for page := 0; page < maxSearchPages; {
87+
batch := searchConcurrency
88+
if page == 0 {
89+
batch = 1
90+
}
91+
if page+batch > maxSearchPages {
92+
batch = maxSearchPages - page
93+
}
94+
95+
pages, err := c.fetchPages(ctx, page, batch)
7896
if err != nil {
7997
return nil, err
8098
}
81-
matched = append(matched, filterItems(items, query)...)
99+
100+
endReached := false
101+
for _, items := range pages {
102+
matched = append(matched, filterItems(items, query)...)
103+
if len(items) < pageSize {
104+
endReached = true
105+
}
106+
}
82107
if limit > 0 && len(matched) >= limit {
83108
matched = matched[:limit]
84109
break
85110
}
86-
if len(items) < pageSize {
87-
break // reached the end of the catalog
111+
if endReached {
112+
break
88113
}
114+
page += batch
89115
}
90116
return &ContentList{Items: matched}, nil
91117
}
@@ -101,6 +127,28 @@ func (c *Client) fetchContentPage(ctx context.Context, offset int) ([]Content, e
101127
return out.Items, nil
102128
}
103129

130+
// fetchPages fetches n catalog pages starting at startPage concurrently and
131+
// returns them in page order (so client-side filtering preserves catalog order).
132+
func (c *Client) fetchPages(ctx context.Context, startPage, n int) ([][]Content, error) {
133+
results := make([][]Content, n)
134+
g, gctx := errgroup.WithContext(ctx)
135+
g.SetLimit(searchConcurrency)
136+
for i := range n {
137+
g.Go(func() error {
138+
items, err := c.fetchContentPage(gctx, (startPage+i)*pageSize)
139+
if err != nil {
140+
return err
141+
}
142+
results[i] = items
143+
return nil
144+
})
145+
}
146+
if err := g.Wait(); err != nil {
147+
return nil, err
148+
}
149+
return results, nil
150+
}
151+
104152
// filterItems returns items whose name or publisher contains query
105153
// (case-insensitive substring match).
106154
func filterItems(items []Content, query string) []Content {

internal/marketplace/client_test.go

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"net/http/httptest"
1010
"strconv"
1111
"strings"
12+
"sync"
1213
"testing"
1314
"time"
1415
)
@@ -143,10 +144,13 @@ func contentPage(startID, n int, special string) string {
143144
// page (offset=100) must still be found. The server returns a full page of 100
144145
// non-matching items at offset 0, and the match at offset 100.
145146
func TestSearch_PaginatesPastFirstPage(t *testing.T) {
146-
var offsetsSeen []string
147+
var mu sync.Mutex
148+
offsetsSeen := map[string]bool{}
147149
client, _ := newMockServer(t, func(w http.ResponseWriter, r *http.Request) {
148150
off := r.URL.Query().Get("offset")
149-
offsetsSeen = append(offsetsSeen, off)
151+
mu.Lock()
152+
offsetsSeen[off] = true
153+
mu.Unlock()
150154
switch off {
151155
case "0":
152156
_, _ = w.Write([]byte(contentPage(1, 100, ""))) // full page, no match
@@ -164,9 +168,35 @@ func TestSearch_PaginatesPastFirstPage(t *testing.T) {
164168
if len(result.Items) != 1 || result.Items[0].ContentID != 999999 {
165169
t.Fatalf("expected the second-page match (999999), got %+v", result.Items)
166170
}
167-
// It must have advanced past the first page (offset 0 was a full 100).
168-
if len(offsetsSeen) < 2 || offsetsSeen[1] != "100" {
169-
t.Errorf("expected pagination to request offset=100; offsets seen: %v", offsetsSeen)
171+
// It must have advanced past the first page (offset 0 was a full 100) and
172+
// requested the second page where the match lives. The second batch is
173+
// fetched concurrently, so order is not asserted.
174+
mu.Lock()
175+
defer mu.Unlock()
176+
if !offsetsSeen["0"] || !offsetsSeen["100"] {
177+
t.Errorf("expected requests at offset 0 and 100; offsets seen: %v", offsetsSeen)
178+
}
179+
}
180+
181+
// TestSearch_FirstPageAlone: when enough matches appear on the first (full)
182+
// page, search stops there — a single request — without firing the concurrent
183+
// follow-on batch. This keeps the common case fast.
184+
func TestSearch_FirstPageAlone(t *testing.T) {
185+
var calls int
186+
client, _ := newMockServer(t, func(w http.ResponseWriter, _ *http.Request) {
187+
calls++
188+
// A full page (pageSize items) that includes a match for "filler".
189+
_, _ = w.Write([]byte(contentPage(1, pageSize, "")))
190+
})
191+
res, err := client.Search(context.Background(), "filler", 1)
192+
if err != nil {
193+
t.Fatal(err)
194+
}
195+
if len(res.Items) != 1 {
196+
t.Errorf("expected limit=1 to return 1 match, got %d", len(res.Items))
197+
}
198+
if calls != 1 {
199+
t.Errorf("expected exactly 1 request when the limit is met on page 0, got %d", calls)
170200
}
171201
}
172202

0 commit comments

Comments
 (0)