Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit 52dadbc

Browse files
committed
add test for symbols sorted by order of frequency
Signed-off-by: Callum Styan <[email protected]>
1 parent 09b5f47 commit 52dadbc

File tree

2 files changed

+54
-5
lines changed

2 files changed

+54
-5
lines changed

index/index.go

+12-5
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ type symbolFrequencyPair struct {
6464

6565
type symbolFrequencylist []symbolFrequencyPair
6666

67-
func (s symbolFrequencylist) Len() int { return len(s) }
68-
func (s symbolFrequencylist) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
69-
func (s symbolFrequencylist) Less(i, j int) bool { return s[i].frequency < s[j].frequency }
67+
func (s symbolFrequencylist) Len() int { return len(s) }
68+
func (s symbolFrequencylist) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
69+
func (s symbolFrequencylist) Greater(i, j int) bool { return s[i].frequency > s[j].frequency }
7070

7171
type indexWriterStage uint8
7272

@@ -355,7 +355,14 @@ func (w *Writer) AddSymbols(sym map[string]int) error {
355355
for k, v := range sym {
356356
symbols = append(symbols, symbolFrequencyPair{k, v})
357357
}
358-
sort.Sort(sort.Reverse(symbols))
358+
sort.Slice(symbols, func(i, j int) bool {
359+
// We get the symbols back as a map so we need to be sure
360+
// to sort by symbol if the frequencies are the same.
361+
if symbols[i].frequency == symbols[j].frequency {
362+
return symbols[i].symbol > symbols[j].symbol
363+
}
364+
return symbols.Greater(i, j)
365+
})
359366

360367
const headerSize = 4
361368

@@ -874,7 +881,7 @@ func (r *Reader) Symbols() (map[string]int, error) {
874881
res[s] = 0
875882
}
876883
for _, s := range r.symbolSlice {
877-
res[s] = struct{}{}
884+
res[s] = 0
878885
}
879886
return res, nil
880887
}

index/index_test.go

+42
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,48 @@ func TestIndexRW_Postings(t *testing.T) {
232232
testutil.Ok(t, ir.Close())
233233
}
234234

235+
func TestIndexRW_SymbolsOrder(t *testing.T) {
236+
dir, err := ioutil.TempDir("", "test_index_order")
237+
testutil.Ok(t, err)
238+
defer os.RemoveAll(dir)
239+
240+
fn := filepath.Join(dir, "index")
241+
242+
iw, err := NewWriter(fn)
243+
testutil.Ok(t, err)
244+
245+
err = iw.AddSymbols(map[string]int{
246+
"a": 1,
247+
"b": 2,
248+
"c": 1,
249+
"2": 4,
250+
"3": 5,
251+
"4": 3,
252+
})
253+
254+
testutil.Ok(t, err)
255+
testutil.Ok(t, iw.Close())
256+
257+
exp := []string{"3", "2", "4", "b", "c", "a"}
258+
259+
ir, err := NewFileReader(fn)
260+
testutil.Ok(t, err)
261+
262+
err = ir.readSymbols(int(ir.toc.symbols))
263+
testutil.Ok(t, err)
264+
265+
s, err := ir.Symbols()
266+
t.Logf("symbols: %+v", s)
267+
268+
testutil.Equals(t, len(ir.symbolSlice), len(exp))
269+
270+
for i := range ir.symbolSlice {
271+
testutil.Equals(t, ir.symbolSlice[i], exp[i])
272+
}
273+
274+
testutil.Ok(t, ir.Close())
275+
}
276+
235277
func TestPersistence_index_e2e(t *testing.T) {
236278
dir, err := ioutil.TempDir("", "test_persistence_e2e")
237279
testutil.Ok(t, err)

0 commit comments

Comments
 (0)