WorksApplications · mh-northlander · Nov 11, 2024 · Jun 28, 2024 · May 31, 2024 · May 31, 2024
diff --git a/python/src/build.rs b/python/src/build.rs
@@ -14,18 +14,21 @@
  *  limitations under the License.
  */
 
-use crate::dictionary::get_default_resource_dir;
-use crate::errors;
-use pyo3::prelude::*;
-use pyo3::types::{PyBytes, PyList, PyString, PyTuple, PyType};
 use std::fs::{File, OpenOptions};
 use std::io::BufWriter;
 use std::path::Path;
+
+use pyo3::prelude::*;
+use pyo3::types::{PyBytes, PyList, PyString, PyTuple, PyType};
+
 use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
 use sudachi::config::Config;
 use sudachi::dic::build::{DataSource, DictBuilder};
 use sudachi::dic::dictionary::JapaneseDictionary;
 
+use crate::dictionary::get_default_resource_dir;
+use crate::errors;
+
 pub fn register_functions(m: &Bound<PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(build_system_dic, m)?)?;
     m.add_function(wrap_pyfunction!(build_user_dic, m)?)?;
@@ -80,7 +83,9 @@ fn build_system_dic<'py>(
     description: Option<&str>,
 ) -> PyResult<Bound<'py, PyList>> {
     let mut builder = DictBuilder::new_system();
-    description.map(|d| builder.set_description(d));
+    if let Some(d) = description {
+        builder.set_description(d)
+    }
 
     let matrix_path = resolve_as_pypathstr(py, matrix)?;
     let matrix_src = as_data_source(matrix_path.as_ref(), matrix)?;
@@ -138,7 +143,9 @@ fn build_user_dic<'py>(
     };
 
     let mut builder = DictBuilder::new_user(&system_dic);
-    description.map(|d| builder.set_description(d));
+    if let Some(d) = description {
+        builder.set_description(d)
+    }
 
     for f in lex.iter() {
         let lex_path = resolve_as_pypathstr(py, &f)?;

diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs
@@ -14,17 +14,18 @@
  *  limitations under the License.
  */
 
-use pyo3::prelude::*;
-use pyo3::types::{PySet, PyString, PyTuple};
 use std::convert::TryFrom;
 use std::fmt::Write;
 use std::ops::Deref;
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
 use std::sync::Arc;
-use sudachi::analysis::Mode;
+
+use pyo3::prelude::*;
+use pyo3::types::{PySet, PyString, PyTuple};
 
 use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
+use sudachi::analysis::Mode;
 use sudachi::config::{Config, ConfigBuilder, SurfaceProjection};
 use sudachi::dic::dictionary::JapaneseDictionary;
 use sudachi::dic::grammar::Grammar;
@@ -447,7 +448,7 @@ fn config_repr(cfg: &Config) -> Result<String, std::fmt::Error> {
     Ok(result)
 }
 
-pub(crate) fn extract_mode<'py>(mode: &Bound<'py, PyAny>) -> PyResult<Mode> {
+pub(crate) fn extract_mode(mode: &Bound<'_, PyAny>) -> PyResult<Mode> {
     if mode.is_instance_of::<PyString>() {
         errors::wrap(Mode::from_str(mode.str()?.to_str()?))
     } else if mode.is_instance_of::<PySplitMode>() {
@@ -471,7 +472,7 @@ fn read_config(config_opt: &Bound<PyAny>) -> PyResult<ConfigBuilder> {
         let config_pystr = config_opt.str()?;
         let config_str = config_pystr.to_str()?.trim();
         // looks like json
-        if config_str.starts_with("{") && config_str.ends_with("}") {
+        if config_str.starts_with('{') && config_str.ends_with('}') {
             let result = ConfigBuilder::from_bytes(config_str.as_bytes());
             return errors::wrap(result);
         }

diff --git a/python/src/errors.rs b/python/src/errors.rs
@@ -14,10 +14,11 @@
  *  limitations under the License.
  */
 
+use std::fmt::{Debug, Display};
+
 use pyo3::exceptions::PyDeprecationWarning;
 use pyo3::prelude::*;
 use pyo3::{import_exception, PyResult};
-use std::fmt::{Debug, Display};
 
 // Sudachi exception class is defined in Python
 import_exception!(sudachipy.errors, SudachiError);

diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs
@@ -163,7 +163,7 @@ impl PyMorphemeListWrapper {
         for (i, m) in list.iter().enumerate() {
             result.push_str(m.surface().deref());
             if i + 1 != nmorphs {
-                result.push_str(" ");
+                result.push(' ');
             }
         }
         PyString::new_bound(py, result.as_str())
@@ -196,7 +196,7 @@ impl PyMorphemeListWrapper {
     }
 
     fn __bool__(&self, py: Python) -> bool {
-        self.internal(py).len() != 0
+        !self.internal(py).is_empty()
     }
 }
 

diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs
@@ -54,7 +54,7 @@ impl PyPosMatcher {
     fn create_from_fn(dic: &Arc<PyDicData>, func: &Bound<PyAny>, py: Python) -> PyResult<Self> {
         let mut data = Vec::new();
         for (pos_id, pos) in dic.pos.iter().enumerate() {
-            let args = PyTuple::new_bound(py, &[pos]);
+            let args = PyTuple::new_bound(py, [pos]);
             if func.call1(args)?.downcast::<PyBool>()?.is_true() {
                 data.push(pos_id as u16);
             }
@@ -198,7 +198,6 @@ impl PyPosMatcher {
         let max_id = self.dic.pos.len();
         // map -> filter chain is needed to handle exactly u16::MAX POS entries
         let values = (0..max_id)
-            .into_iter()
             .map(|x| x as u16)
             .filter(|id| !self.matcher.matches_id(*id));
         let matcher = PosMatcher::new(values);

diff --git a/python/src/pretokenizer.rs b/python/src/pretokenizer.rs
@@ -14,21 +14,23 @@
  *  limitations under the License.
  */
 
-use crate::dictionary::PyDicData;
-use crate::errors;
-use crate::morpheme::{PyMorphemeList, PyMorphemeListWrapper, PyProjector};
+use std::cell::RefCell;
+use std::sync::Arc;
+
 use pyo3::intern;
 use pyo3::prelude::*;
 use pyo3::sync::GILOnceCell;
 use pyo3::types::{PyList, PySlice, PyTuple, PyType};
-use std::cell::RefCell;
-use std::sync::Arc;
+use thread_local::ThreadLocal;
 
-use crate::projection::MorphemeProjection;
 use sudachi::analysis::stateful_tokenizer::StatefulTokenizer;
 use sudachi::dic::subset::InfoSubset;
 use sudachi::prelude::Mode;
-use thread_local::ThreadLocal;
+
+use crate::dictionary::PyDicData;
+use crate::errors;
+use crate::morpheme::{PyMorphemeList, PyMorphemeListWrapper, PyProjector};
+use crate::projection::MorphemeProjection;
 
 /// This struct perform actual tokenization
 /// There should be at most one instance per thread of execution
@@ -152,7 +154,7 @@ impl PyPretokenizer {
             }
             Some(h) => {
                 let mrp: &Bound<PyAny> = morphs.bind(py);
-                let args = PyTuple::new_bound(py, &[index, string, mrp]);
+                let args = PyTuple::new_bound(py, [index, string, mrp]);
                 h.bind(py).call1(args)
             }
         }

diff --git a/python/src/projection.rs b/python/src/projection.rs
@@ -14,20 +14,23 @@
  *  limitations under the License.
  */
 
-use crate::dictionary::PyDicData;
-use crate::errors;
-use crate::morpheme::PyProjector;
-use pyo3::prelude::*;
-use pyo3::types::PyString;
-use pyo3::{PyResult, Python};
 use std::convert::TryFrom;
 use std::ops::Deref;
 use std::sync::Arc;
+
+use pyo3::prelude::*;
+use pyo3::types::PyString;
+use pyo3::{PyResult, Python};
+
 use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
 use sudachi::config::SurfaceProjection;
 use sudachi::pos::PosMatcher;
 use sudachi::prelude::Morpheme;
 
+use crate::dictionary::PyDicData;
+use crate::errors;
+use crate::morpheme::PyProjector;
+
 pub(crate) trait MorphemeProjection {
     fn project<'py>(&self, m: &Morpheme<Arc<PyDicData>>, py: Python<'py>) -> Bound<'py, PyString>;
 }
@@ -114,9 +117,8 @@ impl MorphemeProjection for NormalizedNouns {
 }
 
 fn conjugating_matcher<D: DictionaryAccess>(dic: &D) -> PosMatcher {
-    make_matcher(dic, |pos| match pos[0].deref() {
-        "動詞" | "形容詞" | "助動詞" => true,
-        _ => false,
+    make_matcher(dic, |pos| {
+        matches!(pos[0].deref(), "動詞" | "形容詞" | "助動詞")
     })
 }
 

diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs
@@ -21,7 +21,6 @@ use std::sync::Arc;
 use pyo3::prelude::*;
 
 use sudachi::analysis::stateful_tokenizer::StatefulTokenizer;
-
 use sudachi::dic::subset::InfoSubset;
 use sudachi::prelude::*;
 
@@ -157,7 +156,7 @@ impl PyTokenizer {
             None => None,
             Some(m) => Some(extract_mode(m)?),
         };
-        let default_mode = mode.map(|m| self.tokenizer.set_mode(m.into()));
+        let default_mode = mode.map(|m| self.tokenizer.set_mode(m));
         let mut tokenizer = scopeguard::guard(&mut self.tokenizer, |t| {
             default_mode.map(|m| t.set_mode(m));
         });

diff --git a/sudachi-cli/src/build.rs b/sudachi-cli/src/build.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -14,12 +14,12 @@
  *  limitations under the License.
  */
 
-use memmap2::Mmap;
 use std::fs::{File, OpenOptions};
 use std::io::{BufWriter, Write};
 use std::path::{Path, PathBuf};
 
 use clap::{Args, Subcommand};
+use memmap2::Mmap;
 
 use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
 use sudachi::config::Config;

diff --git a/sudachi-cli/src/main.rs b/sudachi-cli/src/main.rs
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Works Applications Co., Ltd.
+ * Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,22 +34,17 @@ use sudachi::prelude::*;
 #[cfg(feature = "bake_dictionary")]
 const BAKED_DICTIONARY_BYTES: &[u8] = include_bytes!(env!("SUDACHI_DICT_PATH"));
 
-#[derive(Clone, Debug, Eq, PartialEq)]
+#[derive(Clone, Debug, Eq, PartialEq, Default)]
 pub enum SentenceSplitMode {
     /// Do both sentence splitting and analysis
+    #[default]
     Default,
     /// Do only sentence splitting and not analysis
     Only,
     /// Do only analysis without sentence splitting
     None,
 }
 
-impl Default for SentenceSplitMode {
-    fn default() -> Self {
-        SentenceSplitMode::Default
-    }
-}
-
 impl FromStr for SentenceSplitMode {
     type Err = &'static str;
 
@@ -156,7 +151,7 @@ fn main() {
     // output: stdout or file
     let inner_writer: Box<dyn Write> = match &args.output_file {
         Some(output_path) => Box::new(
-            File::create(&output_path)
+            File::create(output_path)
                 .unwrap_or_else(|_| panic!("Failed to open output file {:?}", &output_path)),
         ),
         None => Box::new(io::stdout()),
@@ -207,10 +202,10 @@ fn strip_eol(data: &str) -> &str {
     let mut bytes = data.as_bytes();
     let mut len = bytes.len();
     if len > 1 && bytes[len - 1] == b'\n' {
-        len = len - 1;
+        len -= 1;
         bytes = &bytes[..len];
         if len > 1 && bytes[len - 1] == b'\r' {
-            len = len - 1;
+            len -= 1;
             bytes = &bytes[..len];
         }
     }

diff --git a/sudachi-cli/src/output.rs b/sudachi-cli/src/output.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -44,7 +44,7 @@ impl Wakachi {
 
 impl<T: DictionaryAccess> SudachiOutput<T> for Wakachi {
     fn write(&self, writer: &mut Writer, morphemes: &MorphemeList<T>) -> SudachiResult<()> {
-        if morphemes.len() == 0 {
+        if morphemes.is_empty() {
             writer.write_all(b"\n")?;
             return Ok(());
         }

diff --git a/sudachi/src/analysis/created.rs b/sudachi/src/analysis/created.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -40,7 +40,7 @@ impl CreatedWords {
     const MAX_SHIFT: Carrier = CreatedWords::MAX_VALUE - 1;
 
     pub fn empty() -> CreatedWords {
-        return Default::default();
+        Default::default()
     }
 
     pub fn single<Pos: Into<i64>>(length: Pos) -> CreatedWords {
@@ -55,7 +55,7 @@ impl CreatedWords {
     #[must_use]
     pub fn add_word<P: Into<i64>>(&self, length: P) -> CreatedWords {
         let mask = CreatedWords::single(length);
-        return self.add(mask);
+        self.add(mask)
     }
 
     #[must_use]
@@ -67,21 +67,19 @@ impl CreatedWords {
         let mask = CreatedWords::single(length);
         if (self.0 & mask.0) == 0 {
             HasWord::No
+        } else if length.into() >= CreatedWords::MAX_VALUE as _ {
+            HasWord::Maybe
         } else {
-            if length.into() >= CreatedWords::MAX_VALUE as _ {
-                HasWord::Maybe
-            } else {
-                HasWord::Yes
-            }
+            HasWord::Yes
         }
     }
 
     pub fn is_empty(&self) -> bool {
-        return self.0 == 0;
+        self.0 == 0
     }
 
     pub fn not_empty(&self) -> bool {
-        return !self.is_empty();
+        !self.is_empty()
     }
 }