Skip to content

Commit fa01ba9

Browse files
josharianAndreasArvidssonpre-commit-ci-lite[bot]
authored
avoid allocating hats to the first letter of a token (#1723)
We could get much fancier than this, but after running this with a day it appears to help some, and it is nice and simple. I propose that we declare that it fixes #1658, at least for now. ## Checklist - [/] I have added [tests](https://www.cursorless.org/docs/contributing/test-case-recorder/) - [/] I have updated the [docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and [cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet) - [/] I have not broken the cheatsheet --------- Co-authored-by: Andreas Arvidsson <[email protected]> Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
1 parent 06e973b commit fa01ba9

File tree

15 files changed

+97
-25
lines changed

15 files changed

+97
-25
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
languageId: plaintext
2+
command:
3+
version: 5
4+
spokenForm: take each
5+
action: {name: setSelection}
6+
targets:
7+
- type: primitive
8+
mark: {type: decoratedSymbol, symbolColor: default, character: e}
9+
usePrePhraseSnapshot: false
10+
initialState:
11+
documentContents: hello world
12+
selections:
13+
- anchor: {line: 0, character: 11}
14+
active: {line: 0, character: 11}
15+
marks:
16+
default.e:
17+
start: {line: 0, character: 0}
18+
end: {line: 0, character: 5}
19+
finalState:
20+
documentContents: hello world
21+
selections:
22+
- anchor: {line: 0, character: 0}
23+
active: {line: 0, character: 5}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
languageId: plaintext
2+
command:
3+
version: 7
4+
spokenForm: take each
5+
action:
6+
name: setSelection
7+
target:
8+
type: primitive
9+
mark: {type: decoratedSymbol, symbolColor: default, character: e}
10+
usePrePhraseSnapshot: false
11+
initialState:
12+
documentContents: hello world
13+
selections:
14+
- anchor: {line: 0, character: 11}
15+
active: {line: 0, character: 11}
16+
marks:
17+
default.e:
18+
start: {line: 0, character: 0}
19+
end: {line: 0, character: 5}
20+
finalState:
21+
documentContents: hello world
22+
selections:
23+
- anchor: {line: 0, character: 0}
24+
active: {line: 0, character: 5}

packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer.ts

+2-4
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@ import { getMatcher } from "../../../../tokenizer";
44
const CAMEL_REGEX = /\p{Lu}?\p{Ll}+|\p{Lu}+(?!\p{Ll})|\p{N}+/gu;
55

66
/**
7-
* This class just encapsulates the word-splitting logic from
8-
* {@link WordScopeHandler}. We could probably just inline it into that class,
9-
* but for now we need it here because we can't yet properly mock away vscode
10-
* for the unit tests in subtoken.test.ts.
7+
* This class encapsulates word-splitting logic.
8+
* It is used by the {@link WordScopeHandler} and the hat allocator.
119
*/
1210
export class WordTokenizer {
1311
private wordRegex: RegExp;

packages/cursorless-engine/src/test/fixtures/subtoken.fixture.ts

+4
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,8 @@ export const subtokenFixture: Fixture[] = [
8484
input: "_quickBrownFox_",
8585
expectedOutput: ["quick", "Brown", "Fox"],
8686
},
87+
{
88+
input: "thisIsATest",
89+
expectedOutput: ["this", "Is", "A", "Test"],
90+
},
8791
];

packages/cursorless-engine/src/util/allocateHats/HatMetrics.ts

+6
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ export type HatMetric = (hat: HatCandidate) => number;
1515
*/
1616
export const negativePenalty: HatMetric = ({ penalty }) => -penalty;
1717

18+
/**
19+
* @returns A metric that penalizes graphemes that are the first letter of a word within a token
20+
*/
21+
export const avoidFirstLetter: HatMetric = ({ isFirstLetter }) =>
22+
isFirstLetter ? -1 : 0;
23+
1824
/**
1925
* @param hatOldTokenRanks A map from a hat candidate (grapheme+style combination) to the score of the
2026
* token that used the given hat in the previous hat allocation.

packages/cursorless-engine/src/util/allocateHats/allocateHats.ts

+15-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import type {
77
TokenHat,
88
} from "@cursorless/common";
99
import { CompositeKeyMap, DefaultMap, Range } from "@cursorless/common";
10+
import { WordTokenizer } from "../../processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer";
1011
import type {
1112
Grapheme,
1213
TokenGraphemeSplitter,
@@ -19,6 +20,7 @@ export interface HatCandidate {
1920
grapheme: Grapheme;
2021
style: HatStyleName;
2122
penalty: number;
23+
isFirstLetter: boolean;
2224
}
2325

2426
interface AllocateHatsOptions {
@@ -169,12 +171,21 @@ function getTokenRemainingHatCandidates(
169171
graphemeRemainingHatCandidates: DefaultMap<string, HatStyleName[]>,
170172
enabledHatStyles: HatStyleMap,
171173
): HatCandidate[] {
174+
const candidates: HatCandidate[] = [];
175+
const graphemes = tokenGraphemeSplitter.getTokenGraphemes(token.text);
176+
const firstLetterOffsets = new Set(
177+
new WordTokenizer(token.editor.document.languageId)
178+
.splitIdentifier(token.text)
179+
.map((word) => word.index),
180+
);
181+
172182
// Use iteration here instead of functional constructs,
173183
// because this is a hot path and we want to avoid allocating arrays
174184
// and calling tiny functions lots of times.
175-
const candidates: HatCandidate[] = [];
176-
const graphemes = tokenGraphemeSplitter.getTokenGraphemes(token.text);
185+
177186
for (const grapheme of graphemes) {
187+
const isFirstLetter = firstLetterOffsets.has(grapheme.tokenStartOffset);
188+
178189
for (const style of graphemeRemainingHatCandidates.get(grapheme.text)) {
179190
// Allocating and pushing all of these objects is
180191
// the single most expensive thing in hat allocation.
@@ -183,9 +194,11 @@ function getTokenRemainingHatCandidates(
183194
grapheme,
184195
style,
185196
penalty: enabledHatStyles[style].penalty,
197+
isFirstLetter,
186198
});
187199
}
188200
}
201+
189202
return candidates;
190203
}
191204

packages/cursorless-engine/src/util/allocateHats/chooseTokenHat.ts

+4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import type { HatStability, TokenHat } from "@cursorless/common";
22
import type { HatCandidate } from "./allocateHats";
33
import type { RankingContext } from "./getHatRankingContext";
44
import {
5+
avoidFirstLetter,
56
hatOldTokenRank,
67
isOldTokenHat,
78
minimumTokenRankContainingGrapheme,
@@ -75,6 +76,9 @@ export function chooseTokenHat(
7576
// Narrow to the hats with the lowest penalty
7677
negativePenalty,
7778

79+
// Avoid the first grapheme of the token if possible
80+
avoidFirstLetter,
81+
7882
// Prefer hats that sit on a grapheme that doesn't appear in any highly
7983
// ranked token
8084
minimumTokenRankContainingGrapheme(tokenRank, graphemeTokenRanks),

packages/cursorless-vscode-e2e/src/suite/breakpoints.vscode.test.ts

+4-4
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ async function breakpointHarpAdd() {
3838
mark: {
3939
type: "decoratedSymbol",
4040
symbolColor: "default",
41-
character: "h",
41+
character: "e",
4242
},
4343
},
4444
],
@@ -66,7 +66,7 @@ async function breakpointTokenHarpAdd() {
6666
mark: {
6767
type: "decoratedSymbol",
6868
symbolColor: "default",
69-
character: "h",
69+
character: "e",
7070
},
7171
},
7272
],
@@ -101,7 +101,7 @@ async function breakpointHarpRemove() {
101101
mark: {
102102
type: "decoratedSymbol",
103103
symbolColor: "default",
104-
character: "h",
104+
character: "e",
105105
},
106106
},
107107
],
@@ -136,7 +136,7 @@ async function breakpointTokenHarpRemove() {
136136
mark: {
137137
type: "decoratedSymbol",
138138
symbolColor: "default",
139-
character: "h",
139+
character: "e",
140140
},
141141
},
142142
],

packages/cursorless-vscode-e2e/src/suite/commandHistory.vscode.test.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ suite("commandHistory", function () {
4545
async function testActive(tmpdir: string) {
4646
await injectFakeIsActive(true);
4747
await initalizeEditor();
48-
const command = takeCommand("h");
48+
const command = takeCommand("e");
4949
await runCursorlessCommand(command);
5050

5151
const content = await getLogEntry(tmpdir);
@@ -71,7 +71,7 @@ async function testSanitization(tmpdir: string) {
7171
async function testInactive(tmpdir: string) {
7272
await injectFakeIsActive(false);
7373
await initalizeEditor();
74-
await runCursorlessCommand(takeCommand("h"));
74+
await runCursorlessCommand(takeCommand("e"));
7575

7676
assert.notOk(existsSync(tmpdir));
7777
}

packages/cursorless-vscode-e2e/src/suite/crossCellsSetSelection.vscode.test.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ async function runTest() {
3434
mark: {
3535
type: "decoratedSymbol",
3636
symbolColor: "default",
37-
character: "w",
37+
character: "o",
3838
},
3939
},
4040
],

packages/cursorless-vscode-e2e/src/suite/intraCellSetSelection.vscode.test.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ async function runTest() {
3434
mark: {
3535
type: "decoratedSymbol",
3636
symbolColor: "default",
37-
character: "w",
37+
character: "r",
3838
},
3939
},
4040
],

packages/cursorless-vscode-e2e/src/suite/keyboard/basic.vscode.test.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,8 @@ async function basic() {
211211

212212
await vscode.commands.executeCommand("cursorless.keyboard.modal.modeOn");
213213

214-
// Target default f
215-
await typeText("df");
214+
// Target default o
215+
await typeText("do");
216216

217217
// Target containing function
218218
await typeText("sf");

packages/cursorless-vscode-e2e/src/suite/pourAcrossSplit.vscode.test.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ async function runTest() {
2626
mark: {
2727
type: "decoratedSymbol",
2828
symbolColor: "default",
29-
character: "h",
29+
character: "e",
3030
},
3131
},
3232
],

packages/cursorless-vscode-e2e/src/suite/testCaseRecorder.vscode.test.ts

+7-7
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ async function testCaseRecorderGracefulError() {
9393
}
9494

9595
await initalizeEditor(hatTokenMap);
96-
await takeHarp();
96+
await takeEach();
9797
await stopRecording();
9898
await checkRecordedTest(tmpdir);
9999
} finally {
@@ -108,7 +108,7 @@ async function runAndCheckTestCaseRecorder(
108108
) {
109109
await initalizeEditor(hatTokenMap);
110110
await startRecording(...extraArgs);
111-
await takeHarp();
111+
await takeEach();
112112
await stopRecording();
113113
await checkRecordedTest(tmpdir);
114114
}
@@ -132,10 +132,10 @@ async function stopRecording() {
132132
await vscode.commands.executeCommand("cursorless.recordTestCase");
133133
}
134134

135-
async function takeHarp() {
135+
async function takeEach() {
136136
await runCursorlessCommand({
137137
version: LATEST_VERSION,
138-
spokenForm: "take harp",
138+
spokenForm: "take each",
139139
usePrePhraseSnapshot: false,
140140
action: {
141141
name: "setSelection",
@@ -144,7 +144,7 @@ async function takeHarp() {
144144
mark: {
145145
type: "decoratedSymbol",
146146
symbolColor: "default",
147-
character: "h",
147+
character: "e",
148148
},
149149
},
150150
},
@@ -156,11 +156,11 @@ async function checkRecordedTest(tmpdir: string) {
156156
assert.lengthOf(paths, 1);
157157

158158
const actualRecordedTestPath = paths[0];
159-
assert.equal(path.basename(actualRecordedTestPath), "takeHarp.yml");
159+
assert.equal(path.basename(actualRecordedTestPath), "takeEach.yml");
160160

161161
const expected = (
162162
await readFile(
163-
getFixturePath("recorded/testCaseRecorder/takeHarp.yml"),
163+
getFixturePath("recorded/testCaseRecorder/takeEach.yml"),
164164
"utf8",
165165
)
166166
)

packages/cursorless-vscode-e2e/src/suite/wrapWithSnippetAcrossSplit.vscode.test.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ async function runTest() {
3838
mark: {
3939
type: "decoratedSymbol",
4040
symbolColor: "default",
41-
character: "h",
41+
character: "e",
4242
},
4343
},
4444
},

0 commit comments

Comments
 (0)