diff --git a/.github/workflows/python-upload-test.yml b/.github/workflows/python-upload-test.yml index 74113f7c..3786de0f 100644 --- a/.github/workflows/python-upload-test.yml +++ b/.github/workflows/python-upload-test.yml @@ -105,14 +105,19 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest, macOS-latest] - target: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"] + target: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t"] include: - os: "ubuntu-latest" target: "sdist" python-version: "3.13" + - os: "ubuntu-latest" + target: "sdist" + python-version: "3.14" exclude: - os: "windows-latest" target: "3.13t" + - os: "windows-latest" + target: "3.14t" runs-on: ${{ matrix.os }} steps: @@ -136,17 +141,17 @@ jobs: # this must be after sudachipy install run: python -m pip install sudachidict_core - name: Install dependencies (test pretokenizer) - # tokenizers for py3.13t is not provided yet - if: ${{ matrix.target != '3.13t' }} + # tokenizers for py3.13t, py3.14, py3.14t are not provided yet + if: ${{ matrix.target != '3.13t' && matrix.target != '3.14' && matrix.target != '3.14t' }} run: python -m pip install tokenizers - name: Run test - if: ${{ matrix.target != '3.13t' }} + if: ${{ matrix.target != '3.13t' && matrix.target != '3.14' && matrix.target != '3.14t' }} working-directory: ./python run: python -m unittest - name: Run test (skip pretokenizer test) - # tokenizers for py3.13t is not provided yet - if: ${{ matrix.target == '3.13t' }} + # tokenizers for py3.13t, py3.14, py3.14t are not provided yet + if: ${{ matrix.target == '3.13t' || matrix.target == '3.14' || matrix.target == '3.14t' }} working-directory: ./python run: ls tests/test_*.py | grep -v pretokenizer | xargs -I{} python -m unittest {} - name: Check that binary works (C mode) diff --git a/Cargo.lock b/Cargo.lock index 6cd80223..71f4e407 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -614,11 +614,10 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.23.5" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872" +checksum = "37a6df7eab65fc7bee654a421404947e10a0f7085b6951bf2ea395f4659fb0cf" dependencies = [ - "cfg-if", "indoc", "libc", "memoffset", @@ -632,19 +631,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.23.5" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb" +checksum = "f77d387774f6f6eec64a004eac0ed525aab7fa1966d94b42f743797b3e395afb" dependencies = [ - "once_cell", "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.23.5" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d" +checksum = "2dd13844a4242793e02df3e2ec093f540d948299a6a77ea9ce7afd8623f542be" dependencies = [ "libc", "pyo3-build-config", @@ -652,9 +650,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.23.5" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da" +checksum = "eaf8f9f1108270b90d3676b8679586385430e5c0bb78bb5f043f95499c821a71" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -664,9 +662,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.23.5" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028" +checksum = "70a3b2274450ba5288bc9b8c1b69ff569d1d61189d4bff38f8d22e03d17f932b" dependencies = [ "heck", "proc-macro2", @@ -910,9 +908,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.16" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" +checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c" [[package]] name = "tempfile" diff --git a/python/Cargo.toml b/python/Cargo.toml index aff53759..9e6822d5 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -15,7 +15,7 @@ name = "sudachipy" crate-type = ["cdylib"] [dependencies] -pyo3 = { version = "0.23", features = ["extension-module"] } +pyo3 = { version = "0.27", features = ["extension-module"] } scopeguard = "1" # Apache 2.0/MIT thread_local = "1.1" # Apache 2.0/MIT diff --git a/python/py_src/sudachipy/sudachipy.pyi b/python/py_src/sudachipy/sudachipy.pyi index ca39a95c..e3a4e1a7 100644 --- a/python/py_src/sudachipy/sudachipy.pyi +++ b/python/py_src/sudachipy/sudachipy.pyi @@ -252,7 +252,7 @@ class Morpheme: Returns sub-morphemes in the provided split mode. :param mode: mode of new split. - :param out: write results to this MorhpemeList instead of creating new one. + :param out: write results to this MorphemeList instead of creating new one. See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for more information on output parameters. Returned MorphemeList will be invalidated if this MorphemeList is used as an output parameter. diff --git a/python/pyproject.toml b/python/pyproject.toml index b2a60c8e..035642d8 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -2,7 +2,7 @@ requires = ["setuptools", "wheel", "setuptools-rust"] [tool.cibuildwheel] -build = "cp39-* cp310-* cp311-* cp312-* cp313-* cp313t-*" +build = "cp39-* cp310-* cp311-* cp312-* cp313-* cp313t-* cp314-* cp314t-*" skip = "*t-win* *-win32 *-musllinux_*" enable = ["cpython-freethreading"] diff --git a/python/src/build.rs b/python/src/build.rs index fa8265ee..52a3c248 100644 --- a/python/src/build.rs +++ b/python/src/build.rs @@ -58,7 +58,7 @@ fn create_file(p: &Path) -> std::io::Result { /// /// :param matrix: Path to the matrix file. /// :param lex: List of paths to lexicon files. -/// :param output: Path to output built dictionray. +/// :param output: Path to output built dictionary. /// :param description: A description text to embed in the dictionary. /// :return: A build report, list of (part, size, time). /// @@ -107,7 +107,7 @@ fn build_system_dic<'py>( /// /// :param system: Path to the system dictionary. /// :param lex: List of paths to lexicon files. -/// :param output: Path to output built dictionray. +/// :param output: Path to output built dictionary. /// :param description: A description text to embed in the dictionary. /// :return: A build report, list of (part, size, time). /// @@ -168,7 +168,7 @@ fn resolve_as_pypathstr<'py>( data: &Bound<'py, PyAny>, ) -> PyResult>> { let binding = py.import("pathlib")?.getattr("Path")?; - let path = binding.downcast::()?; + let path = binding.cast::()?; if data.is_instance(path)? { Ok(Some(data.call_method0("resolve")?.str()?)) } else if data.is_instance_of::() { @@ -186,9 +186,7 @@ fn as_data_source<'py>( Some(pystr) => Ok(DataSource::File(Path::new(pystr.to_str()?))), None => { if original_obj.is_instance_of::() { - Ok(DataSource::Data( - original_obj.downcast::()?.as_bytes(), - )) + Ok(DataSource::Data(original_obj.cast::()?.as_bytes())) } else { errors::wrap(Err(format!( "data source should be only Path, bytes or str, was {}: {}", diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs index 2403fcec..661aef67 100644 --- a/python/src/dictionary.rs +++ b/python/src/dictionary.rs @@ -47,7 +47,7 @@ pub(crate) struct PyDicData { pub(crate) pos: Vec>, /// Compute default string representation for a morpheme using vtable dispatch. /// None by default (if outputting surface as it is) - /// This is default per-dictionary value, can be overriden when creating tokenizers and pre-tokenizers + /// This is default per-dictionary value, can be overridden when creating tokenizers and pre-tokenizers pub(crate) projection: PyProjector, } @@ -430,7 +430,7 @@ impl PyDictionary { /// /// :type pos_id: int #[pyo3(text_signature = "(self, /, pos_id: int) -> tuple[str, str, str, str, str, str] | None")] - fn pos_of<'py>(&'py self, py: Python<'py>, pos_id: usize) -> Option<&Bound<'py, PyTuple>> { + fn pos_of<'py>(&'py self, py: Python<'py>, pos_id: usize) -> Option<&'py Bound<'py, PyTuple>> { let dic = self.dictionary.as_ref().unwrap(); dic.pos.get(pos_id).map(|x| x.bind(py)) } @@ -516,21 +516,21 @@ fn read_config(config_opt: &Bound) -> PyResult { pub(crate) fn read_default_config(py: Python) -> PyResult { let path = py.import("sudachipy")?.getattr("_DEFAULT_SETTINGFILE")?; - let path = path.downcast::()?.to_str()?; + let path = path.cast::()?.to_str()?; let path = PathBuf::from(path); errors::wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path) } pub(crate) fn get_default_resource_dir(py: Python) -> PyResult { let path = py.import("sudachipy")?.getattr("_DEFAULT_RESOURCEDIR")?; - let path = path.downcast::()?.to_str()?; + let path = path.cast::()?.to_str()?; Ok(PathBuf::from(path)) } fn find_dict_path(py: Python, dict_type: &str) -> PyResult { let pyfunc = py.import("sudachipy")?.getattr("_find_dict_path")?; let path = pyfunc.call1((dict_type,))?; - let path = path.downcast::()?.to_str()?; + let path = path.cast::()?.to_str()?; Ok(PathBuf::from(path)) } diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs index ebd37d72..999dfe87 100644 --- a/python/src/morpheme.rs +++ b/python/src/morpheme.rs @@ -335,7 +335,7 @@ impl PyMorpheme { /// Returns the dictionary form. #[pyo3(text_signature = "(self, /) -> str")] - fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyResult> { + fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyResult> { Ok(self .morph(py) .get_word_info() @@ -345,7 +345,7 @@ impl PyMorpheme { /// Returns the normalized form. #[pyo3(text_signature = "(self, /) -> str")] - fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyResult> { + fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyResult> { Ok(self .morph(py) .get_word_info() @@ -355,7 +355,7 @@ impl PyMorpheme { /// Returns the reading form. #[pyo3(text_signature = "(self, /) -> str")] - fn reading_form<'py>(&'py self, py: Python<'py>) -> PyResult> { + fn reading_form<'py>(&'py self, py: Python<'py>) -> PyResult> { Ok(self .morph(py) .get_word_info() @@ -366,7 +366,7 @@ impl PyMorpheme { /// Returns sub-morphemes in the provided split mode. /// /// :param mode: mode of new split. - /// :param out: write results to this MorhpemeList instead of creating new one. + /// :param out: write results to this MorphemeList instead of creating new one. /// See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for /// more information on output parameters. /// Returned MorphemeList will be invalidated if this MorphemeList is used as an output parameter. @@ -444,7 +444,7 @@ impl PyMorpheme { /// Returns the list of synonym group ids. #[pyo3(text_signature = "(self, /) -> List[int]")] - fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> PyResult> { + fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> PyResult> { let mref = self.morph(py); let ids = mref.get_word_info().synonym_group_ids(); PyList::new(py, ids) diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs index 220d5120..4b86114d 100644 --- a/python/src/pos_matcher.rs +++ b/python/src/pos_matcher.rs @@ -53,7 +53,7 @@ impl PyPosMatcher { fn create_from_fn(dic: &Arc, func: &Bound) -> PyResult { let mut data = Vec::new(); for (pos_id, pos) in dic.pos.iter().enumerate() { - if func.call1((pos,))?.downcast::()?.is_true() { + if func.call1((pos,))?.cast::()?.is_true() { data.push(pos_id as u16); } } @@ -67,7 +67,7 @@ impl PyPosMatcher { let mut result = Vec::new(); for item in data { let item = item?; - let item = item.downcast::()?; + let item = item.cast::()?; Self::match_pos_elements(&mut result, dic.as_ref(), item)?; } Ok(Self { @@ -232,7 +232,7 @@ impl PyPosIter { slf } - fn __next__<'py>(&'py mut self, py: Python<'py>) -> Option<&Bound<'py, PyTuple>> { + fn __next__<'py>(&'py mut self, py: Python<'py>) -> Option<&'py Bound<'py, PyTuple>> { let idx = self.index; self.index += 1; if idx >= self.data.len() { diff --git a/python/src/pretokenizer.rs b/python/src/pretokenizer.rs index 67e19b89..f88a912b 100644 --- a/python/src/pretokenizer.rs +++ b/python/src/pretokenizer.rs @@ -19,7 +19,7 @@ use std::sync::Arc; use pyo3::intern; use pyo3::prelude::*; -use pyo3::sync::GILOnceCell; +use pyo3::sync::PyOnceLock; use pyo3::types::{PyList, PySlice, PyType}; use thread_local::ThreadLocal; @@ -138,7 +138,7 @@ impl PyPretokenizer { let pystr = string.str()?; let input_data = pystr.to_str()?; // tokenization itself should work without GIL, we have thread-local tokenizers here - py.allow_threads(|| self.tokenizer_cell().borrow_mut().tokenize(input_data))?; + py.detach(|| self.tokenizer_cell().borrow_mut().tokenize(input_data))?; // then prepare results with GIL self.tokenizer_cell().borrow_mut().collect_results(py)?; let cell = self.tokenizer_cell().borrow(); @@ -191,10 +191,10 @@ fn make_result_for_projection<'py>( ) -> PyResult> { let result = PyList::empty(py); let nstring = { - static NORMALIZED_STRING: GILOnceCell> = GILOnceCell::new(); + static NORMALIZED_STRING: PyOnceLock> = PyOnceLock::new(); NORMALIZED_STRING.get_or_try_init(py, || -> PyResult> { let ns = py.import("tokenizers")?.getattr("NormalizedString")?; - let tpe = ns.downcast::()?; + let tpe = ns.cast::()?; Ok(tpe.clone().unbind()) })? }; diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs index 37660823..525c674a 100644 --- a/python/src/tokenizer.rs +++ b/python/src/tokenizer.rs @@ -149,9 +149,9 @@ impl PyTokenizer { py: Python<'py>, text: &'py str, mode: Option<&Bound<'py, PyAny>>, - logger: Option, + logger: Option>, out: Option>, - ) -> PyResult> { + ) -> PyResult> { // restore default mode on scope exit let mode = match mode { None => None, @@ -164,7 +164,7 @@ impl PyTokenizer { // analysis can be done without GIL errors::wrap_ctx( - py.allow_threads(|| { + py.detach(|| { tokenizer.reset().push_str(text); tokenizer.do_tokenize() }), diff --git a/sudachi-fuzz/src/main.rs b/sudachi-fuzz/src/main.rs index fd9c2615..1b57ebda 100644 --- a/sudachi-fuzz/src/main.rs +++ b/sudachi-fuzz/src/main.rs @@ -22,7 +22,7 @@ fn consume_mlist<'a, 'b: 'a>( } // mlist.get_internal_cost() as isize; - // use black_box function to forbit optimizing accesses to API functions + // use black_box function to forbid optimizing accesses to API functions // this is important for fuzzing, we want to trigger any possible panics that can happen for i in 0..mlist.len() { let m = mlist.get(i); diff --git a/sudachi/src/dic/lexicon/word_id_table.rs b/sudachi/src/dic/lexicon/word_id_table.rs index bd79cfc5..7d26ba5a 100644 --- a/sudachi/src/dic/lexicon/word_id_table.rs +++ b/sudachi/src/dic/lexicon/word_id_table.rs @@ -24,7 +24,7 @@ pub struct WordIdTable<'a> { } impl<'a> WordIdTable<'a> { - pub fn new(bytes: &'a [u8], size: u32, offset: usize) -> WordIdTable { + pub fn new(bytes: &'a [u8], size: u32, offset: usize) -> WordIdTable<'a> { WordIdTable { bytes, size, diff --git a/sudachi/src/dic/lexicon/word_infos.rs b/sudachi/src/dic/lexicon/word_infos.rs index 4be9cf85..f15e1c7f 100644 --- a/sudachi/src/dic/lexicon/word_infos.rs +++ b/sudachi/src/dic/lexicon/word_infos.rs @@ -36,7 +36,7 @@ impl<'a> WordInfos<'a> { offset: usize, _word_size: u32, has_synonym_group_ids: bool, - ) -> WordInfos { + ) -> WordInfos<'a> { WordInfos { bytes, offset, diff --git a/sudachi/src/dic/lexicon/word_params.rs b/sudachi/src/dic/lexicon/word_params.rs index f7b77c2a..500c9712 100644 --- a/sudachi/src/dic/lexicon/word_params.rs +++ b/sudachi/src/dic/lexicon/word_params.rs @@ -25,7 +25,7 @@ impl<'a> WordParams<'a> { const PARAM_SIZE: usize = 3; const ELEMENT_SIZE: usize = 2 * Self::PARAM_SIZE; - pub fn new(bytes: &'a [u8], size: u32, offset: usize) -> WordParams { + pub fn new(bytes: &'a [u8], size: u32, offset: usize) -> WordParams<'a> { let n_entries = size as usize * Self::PARAM_SIZE; Self { data: CowArray::from_bytes(bytes, offset, n_entries), diff --git a/sudachi/src/dic/mod.rs b/sudachi/src/dic/mod.rs index 95374299..0d9948d9 100644 --- a/sudachi/src/dic/mod.rs +++ b/sudachi/src/dic/mod.rs @@ -92,7 +92,7 @@ impl<'a> LoadedDictionary<'a> { pub(crate) fn merge_dictionary( mut self, other: DictionaryLoader<'a>, - ) -> SudachiResult { + ) -> SudachiResult> { let npos = self.grammar.pos_list.len(); let lexicon = other.lexicon; let grammar = other.grammar; diff --git a/sudachi/src/input_text/buffer/edit.rs b/sudachi/src/input_text/buffer/edit.rs index 300f73ab..0b25e0fb 100644 --- a/sudachi/src/input_text/buffer/edit.rs +++ b/sudachi/src/input_text/buffer/edit.rs @@ -35,7 +35,7 @@ pub struct InputEditor<'a> { } impl<'a> InputEditor<'a> { - pub(super) fn new(replaces: &'a mut Vec>) -> InputEditor { + pub(super) fn new(replaces: &'a mut Vec>) -> InputEditor<'a> { InputEditor { replaces } }