Added cache for EOG and Control tokens to Vocabulary

Lyrcaxis · Lyrcaxis · commit 1df95685e9e4 · 2025-02-26T12:25:20.000+02:00
diff --git a/LLama/Native/LLamaToken.cs b/LLama/Native/LLamaToken.cs
@@ -98,10 +98,7 @@ public bool IsControl(SafeLlamaModelHandle model)
     /// <returns></returns>
     public bool IsControl(SafeLlamaModelHandle.Vocabulary vocab)
     {
-        unsafe
-        {
-            return LLamaVocabNative.llama_vocab_is_control(vocab.VocabNative, this);
-        }
+        return vocab.ControlTokens.Contains(this);
     }
 
     /// <summary>
@@ -121,10 +118,7 @@ public bool IsEndOfGeneration(SafeLlamaModelHandle model)
     /// <returns></returns>
     public bool IsEndOfGeneration(SafeLlamaModelHandle.Vocabulary vocab)
     {
-        unsafe
-        {
-            return LLamaVocabNative.llama_vocab_is_eog(vocab.VocabNative, this);
-        }
+        return vocab.EOGTokens.Contains(this);
     }
 
     /// <inheritdoc />
diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs
@@ -633,15 +633,26 @@ public sealed class Vocabulary
             internal unsafe LLamaVocabNative* VocabNative => llama_model_get_vocab(_model);
 
             /// <summary>
-            /// Cache of all the tokens in the vocabulary, and their string representation
+            /// Map of each token in this vocabulary to its string representation
             /// </summary>
             public readonly IReadOnlyDictionary<LLamaToken, string> TokenToString;
 
+            /// <summary>
+            /// Contains unique tokens that are supposed to end the generation (e.g.: EOS, EOT, etc)
+            /// </summary>
+            public readonly HashSet<LLamaToken> EOGTokens;
+
+            /// <summary>
+            /// Contains unique tokens that exist for inference control rather than text output
+            /// </summary>
+            public readonly HashSet<LLamaToken> ControlTokens;
+
             internal Vocabulary(SafeLlamaModelHandle model)
             {
                 _model = model;
                 TokenToString = GetVocabCache();
 
+                // Cache the various properties that llama.cpp API exposes about the vocab
                 unsafe
                 {
                     var vocabNative = llama_model_get_vocab(_model);
@@ -662,6 +673,9 @@ internal Vocabulary(SafeLlamaModelHandle model)
                     DecoderStartToken = Normalize(llama_model_decoder_start_token(_model));
                     ShouldAddBOS = LLamaVocabNative.llama_vocab_get_add_bos(vocabNative);
                     ShouldAddEOS = LLamaVocabNative.llama_vocab_get_add_eos(vocabNative);
+
+                    EOGTokens = new HashSet<LLamaToken>(TokenToString.Keys.Where(token => LLamaVocabNative.llama_vocab_is_eog(vocabNative, token)));
+                    ControlTokens = new HashSet<LLamaToken>(TokenToString.Keys.Where(token => LLamaVocabNative.llama_vocab_is_control(vocabNative, token)));
                 }
             }
 

Original file line number	Diff line number	Diff line change
`@@ -98,10 +98,7 @@ public bool IsControl(SafeLlamaModelHandle model)`
`98`	`98`	`/// <returns></returns>`
`99`	`99`	`public bool IsControl(SafeLlamaModelHandle.Vocabulary vocab)`
`100`	`100`	`{`
`101`		`- unsafe`
`102`		`- {`
`103`		`- return LLamaVocabNative.llama_vocab_is_control(vocab.VocabNative, this);`
`104`		`- }`
	`101`	`+ return vocab.ControlTokens.Contains(this);`
`105`	`102`	`}`
`106`	`103`
`107`	`104`	`/// <summary>`
`@@ -121,10 +118,7 @@ public bool IsEndOfGeneration(SafeLlamaModelHandle model)`
`121`	`118`	`/// <returns></returns>`
`122`	`119`	`public bool IsEndOfGeneration(SafeLlamaModelHandle.Vocabulary vocab)`
`123`	`120`	`{`
`124`		`- unsafe`
`125`		`- {`
`126`		`- return LLamaVocabNative.llama_vocab_is_eog(vocab.VocabNative, this);`
`127`		`- }`
	`121`	`+ return vocab.EOGTokens.Contains(this);`
`128`	`122`	`}`
`129`	`123`
`130`	`124`	`/// <inheritdoc />`