Skip to content

Commit 00f3bcb

Browse files
authored
Merge pull request #622 from nkoppel/update-llama-cpp
Update llama.cpp dependency to the latest version; Fix all clippy lints.
2 parents 8d87ea6 + 5dacedf commit 00f3bcb

File tree

9 files changed

+114
-131
lines changed

9 files changed

+114
-131
lines changed

llama-cpp-2/src/context.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,13 @@ impl<'model> LlamaContext<'model> {
5252
}
5353
}
5454

55-
/// Gets the max number of logical tokens that can be submitted to decode. Must be greater than or equal to n_ubatch.
55+
/// Gets the max number of logical tokens that can be submitted to decode. Must be greater than or equal to [`Self::n_ubatch`].
5656
#[must_use]
5757
pub fn n_batch(&self) -> u32 {
5858
unsafe { llama_cpp_sys_2::llama_n_batch(self.context.as_ptr()) }
5959
}
6060

61-
/// Gets the max number of physical tokens (hardware level) to decode in batch. Must be less than or equal to n_batch.
61+
/// Gets the max number of physical tokens (hardware level) to decode in batch. Must be less than or equal to [`Self::n_batch`].
6262
#[must_use]
6363
pub fn n_ubatch(&self) -> u32 {
6464
unsafe { llama_cpp_sys_2::llama_n_ubatch(self.context.as_ptr()) }
@@ -318,7 +318,7 @@ impl<'model> LlamaContext<'model> {
318318
scale: f32,
319319
) -> Result<(), LlamaLoraAdapterSetError> {
320320
let err_code = unsafe {
321-
llama_cpp_sys_2::llama_lora_adapter_set(
321+
llama_cpp_sys_2::llama_set_adapter_lora(
322322
self.context.as_ptr(),
323323
adapter.lora_adapter.as_ptr(),
324324
scale,
@@ -342,7 +342,7 @@ impl<'model> LlamaContext<'model> {
342342
adapter: &mut LlamaLoraAdapter,
343343
) -> Result<(), LlamaLoraAdapterRemoveError> {
344344
let err_code = unsafe {
345-
llama_cpp_sys_2::llama_lora_adapter_remove(
345+
llama_cpp_sys_2::llama_rm_adapter_lora(
346346
self.context.as_ptr(),
347347
adapter.lora_adapter.as_ptr(),
348348
)

llama-cpp-2/src/context/kv_cache.rs

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::num::{NonZeroU8, TryFromIntError};
66

77
/// Errors that can occur when attempting to prepare values for the kv cache
88
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
9+
#[allow(clippy::module_name_repetitions)]
910
pub enum KvCacheConversionError {
1011
/// Sequence id conversion to i32 failed
1112
#[error("Provided sequence id is too large for a i32")]
@@ -33,15 +34,16 @@ impl LlamaContext<'_> {
3334
/// Copy the cache from one sequence to another.
3435
///
3536
/// # Returns
36-
/// A `Result` indicating whether the operation was successful. If the either position exceeds
37-
/// the maximum i32 value, no copy is attempted and an `Err` is returned.
37+
/// A `Result` indicating whether the operation was successful.
3838
///
3939
/// # Parameters
40-
///
4140
/// * `src` - The sequence id to copy the cache from.
4241
/// * `dest` - The sequence id to copy the cache to.
4342
/// * `p0` - The start position of the cache to clear. If `None`, the entire cache is copied up to `p1`.
4443
/// * `p1` - The end position of the cache to clear. If `None`, the entire cache is copied starting from `p0`.
44+
///
45+
/// # Errors
46+
/// If either position exceeds [`i32::MAX`].
4547
pub fn copy_kv_cache_seq(
4648
&mut self,
4749
src: i32,
@@ -51,10 +53,10 @@ impl LlamaContext<'_> {
5153
) -> Result<(), KvCacheConversionError> {
5254
let p0 = p0
5355
.map_or(Ok(-1), i32::try_from)
54-
.map_err(|e| KvCacheConversionError::P0TooLarge(e))?;
56+
.map_err(KvCacheConversionError::P0TooLarge)?;
5557
let p1 = p1
5658
.map_or(Ok(-1), i32::try_from)
57-
.map_err(|e| KvCacheConversionError::P1TooLarge(e))?;
59+
.map_err(KvCacheConversionError::P1TooLarge)?;
5860
unsafe {
5961
llama_cpp_sys_2::llama_kv_cache_seq_cp(self.context.as_ptr(), src, dest, p0, p1);
6062
}
@@ -69,10 +71,12 @@ impl LlamaContext<'_> {
6971
/// either position exceeds the maximum i32 value, no removal is attempted and an `Err` is returned.
7072
///
7173
/// # Parameters
72-
///
7374
/// * `src` - The sequence id to clear the cache for. If `None`, matches all sequences
7475
/// * `p0` - The start position of the cache to clear. If `None`, the entire cache is cleared up to `p1`.
7576
/// * `p1` - The end position of the cache to clear. If `None`, the entire cache is cleared from `p0`.
77+
///
78+
/// # Errors
79+
/// If the sequence id or either position exceeds [`i32::MAX`].
7680
pub fn clear_kv_cache_seq(
7781
&mut self,
7882
src: Option<u32>,
@@ -81,13 +85,13 @@ impl LlamaContext<'_> {
8185
) -> Result<bool, KvCacheConversionError> {
8286
let src = src
8387
.map_or(Ok(-1), i32::try_from)
84-
.map_err(|e| KvCacheConversionError::SeqIdTooLarge(e))?;
88+
.map_err(KvCacheConversionError::SeqIdTooLarge)?;
8589
let p0 = p0
8690
.map_or(Ok(-1), i32::try_from)
87-
.map_err(|e| KvCacheConversionError::P0TooLarge(e))?;
91+
.map_err(KvCacheConversionError::P0TooLarge)?;
8892
let p1 = p1
8993
.map_or(Ok(-1), i32::try_from)
90-
.map_err(|e| KvCacheConversionError::P1TooLarge(e))?;
94+
.map_err(KvCacheConversionError::P1TooLarge)?;
9195
Ok(unsafe { llama_cpp_sys_2::llama_kv_cache_seq_rm(self.context.as_ptr(), src, p0, p1) })
9296
}
9397

@@ -118,15 +122,17 @@ impl LlamaContext<'_> {
118122
/// - explicitly with [`Self::kv_cache_update`]
119123
///
120124
/// # Returns
121-
/// A `Result` indicating whether the operation was successful. If either position
122-
/// exceeds the maximum i32 value, no update is attempted and an `Err` is returned.
125+
/// A `Result` indicating whether the operation was successful.
123126
///
124127
/// # Parameters
125128
///
126129
/// * `seq_id` - The sequence id to update
127130
/// * `p0` - The start position of the cache to update. If `None`, the entire cache is updated up to `p1`.
128131
/// * `p1` - The end position of the cache to update. If `None`, the entire cache is updated starting from `p0`.
129132
/// * `delta` - The relative position to add to the tokens
133+
///
134+
/// # Errors
135+
/// If either position exceeds [`i32::MAX`].
130136
pub fn kv_cache_seq_add(
131137
&mut self,
132138
seq_id: i32,
@@ -136,10 +142,10 @@ impl LlamaContext<'_> {
136142
) -> Result<(), KvCacheConversionError> {
137143
let p0 = p0
138144
.map_or(Ok(-1), i32::try_from)
139-
.map_err(|e| KvCacheConversionError::P0TooLarge(e))?;
145+
.map_err(KvCacheConversionError::P0TooLarge)?;
140146
let p1 = p1
141147
.map_or(Ok(-1), i32::try_from)
142-
.map_err(|e| KvCacheConversionError::P1TooLarge(e))?;
148+
.map_err(KvCacheConversionError::P1TooLarge)?;
143149
unsafe {
144150
llama_cpp_sys_2::llama_kv_cache_seq_add(self.context.as_ptr(), seq_id, p0, p1, delta);
145151
}
@@ -152,15 +158,17 @@ impl LlamaContext<'_> {
152158
/// - explicitly with [`Self::kv_cache_update`]
153159
///
154160
/// # Returns
155-
/// A `Result` indicating whether the operation was successful. If either position
156-
/// exceeds the maximum i32 value, no update is attempted and an `Err` is returned.
161+
/// A `Result` indicating whether the operation was successful.
157162
///
158163
/// # Parameters
159164
///
160165
/// * `seq_id` - The sequence id to update
161166
/// * `p0` - The start position of the cache to update. If `None`, the entire cache is updated up to `p1`.
162167
/// * `p1` - The end position of the cache to update. If `None`, the entire cache is updated starting from `p0`.
163168
/// * `d` - The factor to divide the positions by
169+
///
170+
/// # Errors
171+
/// If either position exceeds [`i32::MAX`].
164172
pub fn kv_cache_seq_div(
165173
&mut self,
166174
seq_id: i32,
@@ -170,10 +178,10 @@ impl LlamaContext<'_> {
170178
) -> Result<(), KvCacheConversionError> {
171179
let p0 = p0
172180
.map_or(Ok(-1), i32::try_from)
173-
.map_err(|e| KvCacheConversionError::P0TooLarge(e))?;
181+
.map_err(KvCacheConversionError::P0TooLarge)?;
174182
let p1 = p1
175183
.map_or(Ok(-1), i32::try_from)
176-
.map_err(|e| KvCacheConversionError::P1TooLarge(e))?;
184+
.map_err(KvCacheConversionError::P1TooLarge)?;
177185
let d = c_int::from(d.get());
178186
unsafe { llama_cpp_sys_2::llama_kv_cache_seq_div(self.context.as_ptr(), seq_id, p0, p1, d) }
179187
Ok(())
@@ -239,7 +247,7 @@ pub struct KVCacheView<'a> {
239247
view: llama_cpp_sys_2::llama_kv_cache_view,
240248
}
241249

242-
impl<'a> KVCacheView<'a> {
250+
impl KVCacheView<'_> {
243251
/// Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
244252
pub fn update(&mut self) {
245253
unsafe {
@@ -314,7 +322,7 @@ impl<'a> KVCacheView<'a> {
314322
}
315323
}
316324

317-
impl<'a> Drop for KVCacheView<'a> {
325+
impl Drop for KVCacheView<'_> {
318326
fn drop(&mut self) {
319327
unsafe {
320328
llama_cpp_sys_2::llama_kv_cache_view_free(&mut self.view);

llama-cpp-2/src/llama_batch.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ pub struct LlamaBatch {
1010
allocated: usize,
1111
/// The logits that are initialized. Used by [`LlamaContext`] to ensure that only initialized logits are accessed.
1212
pub(crate) initialized_logits: Vec<i32>,
13+
#[allow(clippy::doc_markdown)]
1314
/// The llama_cpp batch. always initialize by `llama_cpp_sys_2::llama_batch_init(allocated, <unknown>, <unknown>)`
1415
pub(crate) llama_batch: llama_batch,
1516
}
@@ -20,7 +21,7 @@ pub enum BatchAddError {
2021
/// There was not enough space in the batch to add the token.
2122
#[error("Insufficient Space of {0}")]
2223
InsufficientSpace(usize),
23-
/// Empty buffer is provided for get_one
24+
/// Empty buffer is provided for [`LlamaBatch::get_one`]
2425
#[error("Empty buffer")]
2526
EmptyBuffer,
2627
}
@@ -152,22 +153,35 @@ impl LlamaBatch {
152153
}
153154
}
154155

155-
/// llama_batch_get_one
156-
/// Return batch for single sequence of tokens starting at pos_0
156+
/// ``llama_batch_get_one``
157+
/// Return batch for single sequence of tokens
157158
///
158159
/// NOTE: this is a helper function to facilitate transition to the new batch API
159160
///
161+
/// # Errors
162+
/// If the provided token buffer is empty.
163+
///
164+
/// # Panics
165+
/// If the number of tokens in ``tokens`` exceeds [`i32::MAX`].
160166
pub fn get_one(tokens: &[LlamaToken]) -> Result<Self, BatchAddError> {
161167
if tokens.is_empty() {
162168
return Err(BatchAddError::EmptyBuffer);
163169
}
164170
let batch = unsafe {
165171
let ptr = tokens.as_ptr() as *mut i32;
166-
llama_cpp_sys_2::llama_batch_get_one(ptr, tokens.len() as i32)
172+
llama_cpp_sys_2::llama_batch_get_one(
173+
ptr,
174+
tokens
175+
.len()
176+
.try_into()
177+
.expect("number of tokens exceeds i32::MAX"),
178+
)
167179
};
168180
let batch = Self {
169181
allocated: 0,
170-
initialized_logits: vec![(tokens.len() - 1) as i32],
182+
initialized_logits: vec![(tokens.len() - 1)
183+
.try_into()
184+
.expect("number of tokens exceeds i32::MAX + 1")],
171185
llama_batch: batch,
172186
};
173187
Ok(batch)

0 commit comments

Comments
 (0)