WorksApplications · mh-northlander · Nov 11, 2024 · Jun 28, 2024 · May 31, 2024 · May 31, 2024
diff --git a/python/src/build.rs b/python/src/build.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -14,18 +14,21 @@
  *  limitations under the License.
  */
 
-use crate::dictionary::get_default_resource_dir;
-use crate::errors;
-use pyo3::prelude::*;
-use pyo3::types::{PyBytes, PyList, PyString, PyTuple, PyType};
 use std::fs::{File, OpenOptions};
 use std::io::BufWriter;
 use std::path::Path;
+
+use pyo3::prelude::*;
+use pyo3::types::{PyBytes, PyList, PyString, PyTuple, PyType};
+
 use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
 use sudachi::config::Config;
 use sudachi::dic::build::{DataSource, DictBuilder};
 use sudachi::dic::dictionary::JapaneseDictionary;
 
+use crate::dictionary::get_default_resource_dir;
+use crate::errors;
+
 pub fn register_functions(m: &PyModule) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(build_system_dic, m)?)?;
     m.add_function(wrap_pyfunction!(build_user_dic, m)?)?;
@@ -68,12 +71,14 @@ fn build_system_dic<'p>(
     description: Option<&str>,
 ) -> PyResult<&'p PyList> {
     let mut builder = DictBuilder::new_system();
-    description.map(|d| builder.set_description(d));
+    if let Some(d) = description {
+        builder.set_description(d)
+    }
 
     let matrix_src = as_data_source(py, matrix)?;
     errors::wrap_ctx(builder.read_conn(matrix_src), matrix)?;
     for f in lex.iter() {
-        let lex_src = as_data_source(py, &f)?;
+        let lex_src = as_data_source(py, f)?;
         errors::wrap_ctx(builder.read_lexicon(lex_src), &f)?;
     }
     let out_file = match as_data_source(py, output)? {
@@ -110,10 +115,12 @@ fn build_user_dic<'p>(
     };
 
     let mut builder = DictBuilder::new_user(&system_dic);
-    description.map(|d| builder.set_description(d));
+    if let Some(d) = description {
+        builder.set_description(d)
+    }
 
     for f in lex.iter() {
-        let lex_src = as_data_source(py, &f)?;
+        let lex_src = as_data_source(py, f)?;
         errors::wrap_ctx(builder.read_lexicon(lex_src), &f)?;
     }
     let out_file = match as_data_source(py, output)? {

diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021-2023 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -14,18 +14,18 @@
  *  limitations under the License.
  */
 
-use pyo3::prelude::*;
-use pyo3::types::{PySet, PyString, PyTuple};
 use std::convert::TryFrom;
 use std::fmt::Write;
 use std::ops::Deref;
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
 use std::sync::Arc;
-use sudachi::analysis::Mode;
 
-use crate::errors::{wrap, wrap_ctx, SudachiError as SudachiErr};
+use pyo3::prelude::*;
+use pyo3::types::{PySet, PyString, PyTuple};
+
 use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
+use sudachi::analysis::Mode;
 use sudachi::config::{Config, ConfigBuilder, SurfaceProjection};
 use sudachi::dic::dictionary::JapaneseDictionary;
 use sudachi::dic::grammar::Grammar;
@@ -35,6 +35,7 @@ use sudachi::plugin::input_text::InputTextPlugin;
 use sudachi::plugin::oov::OovProviderPlugin;
 use sudachi::plugin::path_rewrite::PathRewritePlugin;
 
+use crate::errors::{wrap, wrap_ctx, SudachiError as SudachiErr};
 use crate::morpheme::{PyMorphemeListWrapper, PyProjector};
 use crate::pos_matcher::PyPosMatcher;
 use crate::pretokenizer::PyPretokenizer;
@@ -178,10 +179,7 @@ impl PyDictionary {
         }
 
         let jdic = JapaneseDictionary::from_cfg(&config).map_err(|e| {
-            SudachiErr::new_err(format!(
-                "Error while constructing dictionary: {}",
-                e.to_string()
-            ))
+            SudachiErr::new_err(format!("Error while constructing dictionary: {}", e))
         })?;
 
         let pos_data = jdic
@@ -414,7 +412,7 @@ fn config_repr(cfg: &Config) -> Result<String, std::fmt::Error> {
 pub(crate) fn extract_mode<'py>(py: Python<'py>, mode: &'py PyAny) -> PyResult<Mode> {
     if mode.is_instance_of::<PyString>() {
         let mode = mode.str()?.to_str()?;
-        Mode::from_str(mode).map_err(|e| SudachiErr::new_err(e).into())
+        Mode::from_str(mode).map_err(SudachiErr::new_err)
     } else if mode.is_instance_of::<PySplitMode>() {
         let mode = mode.extract::<PySplitMode>()?;
         Ok(Mode::from(mode))
@@ -431,7 +429,7 @@ fn read_config(config_opt: &PyAny) -> PyResult<ConfigBuilder> {
     if config_opt.is_instance_of::<PyString>() {
         let config_str = config_opt.str()?.to_str()?.trim();
         // looks like json
-        if config_str.starts_with("{") && config_str.ends_with("}") {
+        if config_str.starts_with('{') && config_str.ends_with('}') {
             let result = ConfigBuilder::from_bytes(config_str.as_bytes());
             return wrap(result);
         }
@@ -451,7 +449,7 @@ fn read_config(config_opt: &PyAny) -> PyResult<ConfigBuilder> {
         return read_config(cfg_as_str);
     }
     Err(SudachiErr::new_err((
-        format!("passed config was not a string, json object or sudachipy.config.Config object"),
+        "passed config was not a string, json object or sudachipy.config.Config object".to_string(),
         config_opt.into_py(py),
     )))
 }

diff --git a/python/src/errors.rs b/python/src/errors.rs
@@ -14,9 +14,11 @@
  *  limitations under the License.
  */
 
-use pyo3::{import_exception, PyResult};
 use std::fmt::{Debug, Display};
 
+use pyo3::prelude::*;
+use pyo3::{import_exception, PyResult};
+
 // Sudachi exception class is defined in Python
 import_exception!(sudachipy.errors, SudachiError);
 

diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -158,7 +158,7 @@ impl PyMorphemeListWrapper {
         for (i, m) in list.iter().enumerate() {
             result.push_str(m.surface().deref());
             if i + 1 != nmorphs {
-                result.push_str(" ");
+                result.push(' ');
             }
         }
         PyString::new(py, result.as_str())
@@ -193,7 +193,7 @@ impl PyMorphemeListWrapper {
     }
 
     fn __bool__(&self, py: Python) -> bool {
-        self.internal(py).len() != 0
+        !self.internal(py).is_empty()
     }
 }
 
@@ -387,9 +387,7 @@ impl PyMorpheme {
         let splitted = list
             .internal(py)
             .split_into(mode, self.index, out_ref)
-            .map_err(|e| {
-                PyException::new_err(format!("Error while splitting morpheme: {}", e.to_string()))
-            })?;
+            .map_err(|e| PyException::new_err(format!("Error while splitting morpheme: {}", e)))?;
 
         if add_single.unwrap_or(true) && !splitted {
             list.internal(py)

diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -49,7 +49,7 @@ impl PyPosMatcher {
     fn create_from_fn(dic: &Arc<PyDicData>, func: &PyAny, py: Python) -> PyResult<Self> {
         let mut data = Vec::new();
         for (pos_id, pos) in dic.pos.iter().enumerate() {
-            let args = PyTuple::new(py, &[pos]);
+            let args = PyTuple::new(py, [pos]);
             if func.call1(args)?.downcast::<PyBool>()?.is_true() {
                 data.push(pos_id as u16);
             }
@@ -178,7 +178,6 @@ impl PyPosMatcher {
         let max_id = self.dic.pos.len();
         // map -> filter chain is needed to handle exactly u16::MAX POS entries
         let values = (0..max_id)
-            .into_iter()
             .map(|x| x as u16)
             .filter(|id| !self.matcher.matches_id(*id));
         let matcher = PosMatcher::new(values);

diff --git a/python/src/pretokenizer.rs b/python/src/pretokenizer.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -14,21 +14,23 @@
  *  limitations under the License.
  */
 
-use crate::dictionary::PyDicData;
-use crate::errors::wrap;
-use crate::morpheme::{PyMorphemeList, PyMorphemeListWrapper, PyProjector};
+use std::cell::RefCell;
+use std::sync::Arc;
+
 use pyo3::intern;
 use pyo3::prelude::*;
 use pyo3::sync::GILOnceCell;
 use pyo3::types::{PyList, PySlice, PyTuple, PyType};
-use std::cell::RefCell;
-use std::sync::Arc;
+use thread_local::ThreadLocal;
 
-use crate::projection::MorphemeProjection;
 use sudachi::analysis::stateful_tokenizer::StatefulTokenizer;
 use sudachi::dic::subset::InfoSubset;
 use sudachi::prelude::Mode;
-use thread_local::ThreadLocal;
+
+use crate::dictionary::PyDicData;
+use crate::errors::wrap;
+use crate::morpheme::{PyMorphemeList, PyMorphemeListWrapper, PyProjector};
+use crate::projection::MorphemeProjection;
 
 /// This struct perform actual tokenization
 /// There should be at most one instance per thread of execution
@@ -150,7 +152,7 @@ impl PyPretokenizer {
             }
             Some(h) => {
                 let mrp: &PyAny = morphs.as_ref(py);
-                let args = PyTuple::new(py, &[index, string, mrp]);
+                let args = PyTuple::new(py, [index, string, mrp]);
                 h.as_ref(py).call1(args)
             }
         }

diff --git a/python/src/projection.rs b/python/src/projection.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2023 Works Applications Co., Ltd.
+ *  Copyright (c) 2023-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -14,18 +14,22 @@
  *  limitations under the License.
  */
 
-use crate::dictionary::PyDicData;
-use crate::morpheme::PyProjector;
-use pyo3::types::PyString;
-use pyo3::{PyResult, Python};
 use std::convert::TryFrom;
 use std::ops::Deref;
 use std::sync::Arc;
+
+use pyo3::prelude::*;
+use pyo3::types::PyString;
+use pyo3::{PyResult, Python};
+
 use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
 use sudachi::config::SurfaceProjection;
 use sudachi::pos::PosMatcher;
 use sudachi::prelude::Morpheme;
 
+use crate::dictionary::PyDicData;
+use crate::morpheme::PyProjector;
+
 pub(crate) trait MorphemeProjection {
     fn project<'py>(&self, m: &Morpheme<Arc<PyDicData>>, py: Python<'py>) -> &'py PyString;
 }
@@ -112,9 +116,8 @@ impl MorphemeProjection for NormalizedNouns {
 }
 
 fn conjugating_matcher<D: DictionaryAccess>(dic: &D) -> PosMatcher {
-    make_matcher(dic, |pos| match pos[0].deref() {
-        "動詞" | "形容詞" | "助動詞" => true,
-        _ => false,
+    make_matcher(dic, |pos| {
+        matches!(pos[0].deref(), "動詞" | "形容詞" | "助動詞")
     })
 }
 

diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -21,7 +21,6 @@ use std::sync::Arc;
 use pyo3::prelude::*;
 
 use sudachi::analysis::stateful_tokenizer::StatefulTokenizer;
-
 use sudachi::dic::subset::InfoSubset;
 use sudachi::prelude::*;
 
@@ -145,7 +144,7 @@ impl PyTokenizer {
             None => None,
             Some(m) => Some(extract_mode(py, m)?),
         };
-        let default_mode = mode.map(|m| self.tokenizer.set_mode(m.into()));
+        let default_mode = mode.map(|m| self.tokenizer.set_mode(m));
         let mut tokenizer = scopeguard::guard(&mut self.tokenizer, |t| {
             default_mode.map(|m| t.set_mode(m));
         });
@@ -156,7 +155,7 @@ impl PyTokenizer {
             tokenizer.do_tokenize()
         });
 
-        err.map_err(|e| SudachiPyErr::new_err(format!("Tokenization error: {}", e.to_string())))?;
+        err.map_err(|e| SudachiPyErr::new_err(format!("Tokenization error: {}", e)))?;
 
         let out_list = match out {
             None => {
@@ -177,7 +176,7 @@ impl PyTokenizer {
 
         morphemes
             .collect_results(tokenizer.deref_mut())
-            .map_err(|e| SudachiPyErr::new_err(format!("Tokenization error: {}", e.to_string())))?;
+            .map_err(|e| SudachiPyErr::new_err(format!("Tokenization error: {}", e)))?;
 
         Ok(out_list)
     }

diff --git a/sudachi-cli/src/build.rs b/sudachi-cli/src/build.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -172,12 +172,12 @@ fn output_file(p: &Path) -> File {
     OpenOptions::new()
         .write(true)
         .create_new(true)
-        .open(&p)
+        .open(p)
         .unwrap_or_else(|e| panic!("failed to open {:?} for writing:\n{:?}", p, e))
 }
 
 fn dump_part(dict: PathBuf, part: String, output: PathBuf) {
-    let file = File::open(&dict).expect("open failed");
+    let file = File::open(dict).expect("open failed");
     let data = unsafe { Mmap::map(&file) }.expect("mmap failed");
     let loader =
         unsafe { DictionaryLoader::read_any_dictionary(&data) }.expect("failed to load dictionary");
@@ -215,7 +215,7 @@ fn dump_matrix<W: Write>(grammar: &Grammar, w: &mut W) {
     for left in 0..conn.num_left() {
         for right in 0..conn.num_right() {
             let cost = conn.cost(left as _, right as _);
-            write!(w, "{} {} {}\n", left, right, cost).unwrap();
+            writeln!(w, "{} {} {}", left, right, cost).unwrap();
         }
     }
 }