@@ -28,7 +28,7 @@ impl LlamaContext<'_> {
2828 /// * `dest` - The sequence id to copy the cache to.
2929 /// * `size` - The size of the cache to copy.
3030 pub fn copy_cache ( & mut self , src : i32 , dest : i32 , size : i32 ) {
31- unsafe { llama_cpp_sys_2:: llama_kv_cache_seq_cp ( self . context . as_ptr ( ) , src, dest, 0 , size) }
31+ unsafe { llama_cpp_sys_2:: llama_kv_self_seq_cp ( self . context . as_ptr ( ) , src, dest, 0 , size) }
3232 }
3333
3434 /// Copy the cache from one sequence to another.
@@ -58,7 +58,7 @@ impl LlamaContext<'_> {
5858 . map_or ( Ok ( -1 ) , i32:: try_from)
5959 . map_err ( KvCacheConversionError :: P1TooLarge ) ?;
6060 unsafe {
61- llama_cpp_sys_2:: llama_kv_cache_seq_cp ( self . context . as_ptr ( ) , src, dest, p0, p1) ;
61+ llama_cpp_sys_2:: llama_kv_self_seq_cp ( self . context . as_ptr ( ) , src, dest, p0, p1) ;
6262 }
6363 Ok ( ( ) )
6464 }
@@ -92,18 +92,18 @@ impl LlamaContext<'_> {
9292 let p1 = p1
9393 . map_or ( Ok ( -1 ) , i32:: try_from)
9494 . map_err ( KvCacheConversionError :: P1TooLarge ) ?;
95- Ok ( unsafe { llama_cpp_sys_2:: llama_kv_cache_seq_rm ( self . context . as_ptr ( ) , src, p0, p1) } )
95+ Ok ( unsafe { llama_cpp_sys_2:: llama_kv_self_seq_rm ( self . context . as_ptr ( ) , src, p0, p1) } )
9696 }
9797
9898 /// Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
9999 #[ must_use]
100100 pub fn get_kv_cache_used_cells ( & self ) -> i32 {
101- unsafe { llama_cpp_sys_2:: llama_get_kv_cache_used_cells ( self . context . as_ptr ( ) ) }
101+ unsafe { llama_cpp_sys_2:: llama_kv_self_used_cells ( self . context . as_ptr ( ) ) }
102102 }
103103
104104 /// Clear the KV cache
105105 pub fn clear_kv_cache ( & mut self ) {
106- unsafe { llama_cpp_sys_2:: llama_kv_cache_clear ( self . context . as_ptr ( ) ) }
106+ unsafe { llama_cpp_sys_2:: llama_kv_self_clear ( self . context . as_ptr ( ) ) }
107107 }
108108
109109 /// Removes all tokens that do not belong to the specified sequence
@@ -112,7 +112,7 @@ impl LlamaContext<'_> {
112112 ///
113113 /// * `seq_id` - The sequence id to keep
114114 pub fn llama_kv_cache_seq_keep ( & mut self , seq_id : i32 ) {
115- unsafe { llama_cpp_sys_2:: llama_kv_cache_seq_keep ( self . context . as_ptr ( ) , seq_id) }
115+ unsafe { llama_cpp_sys_2:: llama_kv_self_seq_keep ( self . context . as_ptr ( ) , seq_id) }
116116 }
117117
118118 #[ allow( clippy:: doc_markdown) ]
@@ -147,7 +147,7 @@ impl LlamaContext<'_> {
147147 . map_or ( Ok ( -1 ) , i32:: try_from)
148148 . map_err ( KvCacheConversionError :: P1TooLarge ) ?;
149149 unsafe {
150- llama_cpp_sys_2:: llama_kv_cache_seq_add ( self . context . as_ptr ( ) , seq_id, p0, p1, delta) ;
150+ llama_cpp_sys_2:: llama_kv_self_seq_add ( self . context . as_ptr ( ) , seq_id, p0, p1, delta) ;
151151 }
152152 Ok ( ( ) )
153153 }
@@ -183,7 +183,7 @@ impl LlamaContext<'_> {
183183 . map_or ( Ok ( -1 ) , i32:: try_from)
184184 . map_err ( KvCacheConversionError :: P1TooLarge ) ?;
185185 let d = c_int:: from ( d. get ( ) ) ;
186- unsafe { llama_cpp_sys_2:: llama_kv_cache_seq_div ( self . context . as_ptr ( ) , seq_id, p0, p1, d) }
186+ unsafe { llama_cpp_sys_2:: llama_kv_self_seq_div ( self . context . as_ptr ( ) , seq_id, p0, p1, d) }
187187 Ok ( ( ) )
188188 }
189189
@@ -194,138 +194,19 @@ impl LlamaContext<'_> {
194194 /// * `seq_id` - The sequence id to get the max position for
195195 #[ must_use]
196196 pub fn kv_cache_seq_pos_max ( & self , seq_id : i32 ) -> i32 {
197- unsafe { llama_cpp_sys_2:: llama_kv_cache_seq_pos_max ( self . context . as_ptr ( ) , seq_id) }
197+ unsafe { llama_cpp_sys_2:: llama_kv_self_seq_pos_max ( self . context . as_ptr ( ) , seq_id) }
198198 }
199199
200200 /// Defragment the KV cache
201201 /// This will be applied:
202202 /// - lazily on next [`LlamaContext::decode`]
203203 /// - explicitly with [`Self::kv_cache_update`]
204204 pub fn kv_cache_defrag ( & mut self ) {
205- unsafe { llama_cpp_sys_2:: llama_kv_cache_defrag ( self . context . as_ptr ( ) ) }
205+ unsafe { llama_cpp_sys_2:: llama_kv_self_defrag ( self . context . as_ptr ( ) ) }
206206 }
207207
208208 /// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
209209 pub fn kv_cache_update ( & mut self ) {
210- unsafe { llama_cpp_sys_2:: llama_kv_cache_update ( self . context . as_ptr ( ) ) }
211- }
212-
213- /// Returns the number of tokens in the KV cache (slow, use only for debug)
214- /// If a KV cell has multiple sequences assigned to it, it will be counted multiple times
215- #[ must_use]
216- pub fn get_kv_cache_token_count ( & self ) -> i32 {
217- unsafe { llama_cpp_sys_2:: llama_get_kv_cache_token_count ( self . context . as_ptr ( ) ) }
218- }
219-
220- /// Create an empty KV cache view. (use only for debugging purposes)
221- ///
222- /// # Parameters
223- ///
224- /// * `n_max_seq` - Maximum number of sequences that can exist in a cell. It's not an error
225- /// if there are more sequences in a cell than this value, however they will
226- /// not be visible in the view `cells_sequences`.
227- #[ must_use]
228- pub fn new_kv_cache_view ( & self , n_max_seq : i32 ) -> KVCacheView {
229- let view =
230- unsafe { llama_cpp_sys_2:: llama_kv_cache_view_init ( self . context . as_ptr ( ) , n_max_seq) } ;
231- KVCacheView { view, ctx : self }
232- }
233- }
234-
235- /// Information associated with an individual cell in the KV cache view.
236- #[ derive( Debug ) ]
237- pub struct KVCacheViewCell {
238- /// The position for this cell. Takes KV cache shifts into account.
239- /// May be negative if the cell is not populated.
240- pub pos : llama_cpp_sys_2:: llama_pos ,
241- }
242-
243- /// An updateable view of the KV cache. (use only for debugging purposes)
244- #[ derive( Debug ) ]
245- pub struct KVCacheView < ' a > {
246- ctx : & ' a LlamaContext < ' a > ,
247- view : llama_cpp_sys_2:: llama_kv_cache_view ,
248- }
249-
250- impl KVCacheView < ' _ > {
251- /// Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
252- pub fn update ( & mut self ) {
253- unsafe {
254- llama_cpp_sys_2:: llama_kv_cache_view_update ( self . ctx . context . as_ptr ( ) , & mut self . view ) ;
255- }
256- }
257-
258- /// Number of KV cache cells. This will be the same as the context size.
259- #[ must_use]
260- pub fn n_cells ( & self ) -> i32 {
261- self . view . n_cells
262- }
263-
264- /// Number of tokens in the cache. For example, if there are two populated
265- /// cells, the first with 1 sequence id in it and the second with 2 sequence
266- /// ids then you'll have 3 tokens.
267- #[ must_use]
268- pub fn token_count ( & self ) -> i32 {
269- self . view . token_count
270- }
271-
272- /// Number of populated cache cells.
273- #[ must_use]
274- pub fn used_cells ( & self ) -> i32 {
275- self . view . used_cells
276- }
277-
278- /// Maximum contiguous empty slots in the cache.
279- #[ must_use]
280- pub fn max_contiguous ( & self ) -> i32 {
281- self . view . max_contiguous
282- }
283-
284- /// Index to the start of the `max_contiguous` slot range. Can be negative
285- /// when cache is full.
286- #[ must_use]
287- pub fn max_contiguous_idx ( & self ) -> i32 {
288- self . view . max_contiguous_idx
289- }
290-
291- /// Information for individual cells.
292- ///
293- /// # Panics
294- ///
295- /// - if `n_cells` does not fit into usize.
296- pub fn cells ( & self ) -> impl Iterator < Item = KVCacheViewCell > {
297- unsafe {
298- std:: slice:: from_raw_parts (
299- self . view . cells ,
300- usize:: try_from ( self . view . n_cells ) . expect ( "failed to fit n_cells into usize" ) ,
301- )
302- }
303- . iter ( )
304- . map ( |& cell| KVCacheViewCell { pos : cell. pos } )
305- }
306-
307- /// The sequences for each cell. There will be `n_max_seq` items per cell.
308- ///
309- /// # Panics
310- ///
311- /// - if `n_cells * n_max_seq` does not fit into usize.
312- /// - if `n_max_seq` does not fit into usize.
313- pub fn cells_sequences ( & self ) -> impl Iterator < Item = & [ llama_cpp_sys_2:: llama_seq_id ] > {
314- unsafe {
315- std:: slice:: from_raw_parts (
316- self . view . cells_sequences ,
317- usize:: try_from ( self . view . n_cells * self . view . n_seq_max )
318- . expect ( "failed to fit n_cells * n_max_seq into usize" ) ,
319- )
320- }
321- . chunks ( usize:: try_from ( self . view . n_seq_max ) . expect ( "failed to fit n_max_seq into usize" ) )
322- }
323- }
324-
325- impl Drop for KVCacheView < ' _ > {
326- fn drop ( & mut self ) {
327- unsafe {
328- llama_cpp_sys_2:: llama_kv_cache_view_free ( & mut self . view ) ;
329- }
210+ unsafe { llama_cpp_sys_2:: llama_kv_self_update ( self . context . as_ptr ( ) ) }
330211 }
331212}
0 commit comments