File tree 7 files changed +40
-0
lines changed
7 files changed +40
-0
lines changed Original file line number Diff line number Diff line change @@ -235,6 +235,19 @@ const tokenLimit = 10
235
235
const withinTokenLimit = isWithinTokenLimit (text , tokenLimit )
236
236
```
237
237
238
+ ### ` countTokens(text: string | Iterable<ChatMessage>): number `
239
+
240
+ Counts the number of tokens in the input text or chat. Use this method when you need to determine the number of tokens without checking against a limit.
241
+
242
+ Example:
243
+
244
+ ``` typescript
245
+ import { countTokens } from ' gpt-tokenizer'
246
+
247
+ const text = ' Hello, world!'
248
+ const tokenCount = countTokens (text )
249
+ ```
250
+
238
251
### ` encodeChat(chat: ChatMessage[], model?: ModelName): number[] `
239
252
240
253
Encodes the given chat into a sequence of tokens.
Original file line number Diff line number Diff line change @@ -128,6 +128,7 @@ export class GptEncoding {
128
128
this . isWithinTokenLimit = this . isWithinTokenLimit . bind ( this )
129
129
this . encodeChat = this . encodeChat . bind ( this )
130
130
this . encodeChatGenerator = this . encodeChatGenerator . bind ( this )
131
+ this . countTokens = this . countTokens . bind ( this )
131
132
this . modelName = modelName
132
133
}
133
134
@@ -349,6 +350,22 @@ export class GptEncoding {
349
350
return count
350
351
}
351
352
353
+ /**
354
+ * Counts the number of tokens in the input.
355
+ * @returns {number } The number of tokens.
356
+ */
357
+ countTokens ( input : string | Iterable < ChatMessage > ) : number {
358
+ const tokenGenerator =
359
+ typeof input === 'string'
360
+ ? this . encodeGenerator ( input )
361
+ : this . encodeChatGenerator ( input )
362
+ let count = 0
363
+ for ( const tokens of tokenGenerator ) {
364
+ count += tokens . length
365
+ }
366
+ return count
367
+ }
368
+
352
369
decode ( inputTokensToDecode : Iterable < number > ) : string {
353
370
return this . bytePairEncodingCoreProcessor . decodeNative ( inputTokensToDecode )
354
371
}
Original file line number Diff line number Diff line change @@ -13,11 +13,13 @@ const {
13
13
encode,
14
14
encodeGenerator,
15
15
isWithinTokenLimit,
16
+ countTokens,
16
17
encodeChat,
17
18
encodeChatGenerator,
18
19
vocabularySize,
19
20
} = api
20
21
export {
22
+ countTokens ,
21
23
decode ,
22
24
decodeAsyncGenerator ,
23
25
decodeGenerator ,
Original file line number Diff line number Diff line change @@ -13,11 +13,13 @@ const {
13
13
encode,
14
14
encodeGenerator,
15
15
isWithinTokenLimit,
16
+ countTokens,
16
17
encodeChat,
17
18
encodeChatGenerator,
18
19
vocabularySize,
19
20
} = api
20
21
export {
22
+ countTokens ,
21
23
decode ,
22
24
decodeAsyncGenerator ,
23
25
decodeGenerator ,
Original file line number Diff line number Diff line change @@ -13,9 +13,11 @@ const {
13
13
encode,
14
14
encodeGenerator,
15
15
isWithinTokenLimit,
16
+ countTokens,
16
17
vocabularySize,
17
18
} = api
18
19
export {
20
+ countTokens ,
19
21
decode ,
20
22
decodeAsyncGenerator ,
21
23
decodeGenerator ,
Original file line number Diff line number Diff line change @@ -13,9 +13,11 @@ const {
13
13
encode,
14
14
encodeGenerator,
15
15
isWithinTokenLimit,
16
+ countTokens,
16
17
vocabularySize,
17
18
} = api
18
19
export {
20
+ countTokens ,
19
21
decode ,
20
22
decodeAsyncGenerator ,
21
23
decodeGenerator ,
Original file line number Diff line number Diff line change @@ -13,9 +13,11 @@ const {
13
13
encode,
14
14
encodeGenerator,
15
15
isWithinTokenLimit,
16
+ countTokens,
16
17
vocabularySize,
17
18
} = api
18
19
export {
20
+ countTokens ,
19
21
decode ,
20
22
decodeAsyncGenerator ,
21
23
decodeGenerator ,
You can’t perform that action at this time.
0 commit comments