Skip to content

Commit

Permalink
mecab neologd 名詞,固有名詞,一般で数字がⅠ文字目のものをスキップ
Browse files Browse the repository at this point in the history
  • Loading branch information
phoepsilonix committed Nov 9, 2024
1 parent 787adb1 commit de68d39
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 3 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ members = [
resolver = "2"

[workspace.package]
version = "0.4.4"
version = "0.4.5"
authors = ["Masato TOYOSHIMA", "phoepsilonix <[email protected]>"]
edition = "2021"
rust-version = "1.82"
Expand Down
8 changes: 8 additions & 0 deletions crates/dict-to-mozc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,7 @@ fn id_expr(clsexpr: &str, _id_def: &mut IdDef, class_map: &mut MyIndexMap<String

//static KANA_CHECK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[\p{Hiragana}\p{Katakana}ーゝゞヽヾ゛゜・]+$").unwrap());
static KANA_CHECK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[(ぁ-ゖ)ゐゑゐ゙ゑ゙(ァ-ヺ)ー・゛゜]+$").unwrap());
static START_SUUJI_CHECK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[0-9]+").unwrap());
//static EISUU_CHECK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9' ]+$").unwrap());
static KIGOU_CHECK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-zA-Z' ]+$").unwrap());
// 地名チェックに用いる日本語判定
Expand All @@ -583,6 +584,10 @@ fn id_expr(clsexpr: &str, _id_def: &mut IdDef, class_map: &mut MyIndexMap<String
fn is_kana(str: &str) -> bool {
KANA_CHECK.is_match(&str)
}

fn is_start_suuji(str: &str) -> bool {
START_SUUJI_CHECK.is_match(&str)
}
/*
fn is_eisuu(str: &str) -> bool {
EISUU_CHECK.is_match(&str)
Expand Down Expand Up @@ -686,6 +691,9 @@ fn id_expr(clsexpr: &str, _id_def: &mut IdDef, class_map: &mut MyIndexMap<String
if _parts.len() > 2 {
if (! _args.places) && _parts[2].contains("地域") { return true };
}
if _parts.len() > 2 {
if _parts[0] == "名詞" && _parts[1] == "固有名詞" && _parts[2] == "一般" && is_start_suuji(&_notation) { return true };
}
false
}

Expand Down

0 comments on commit de68d39

Please sign in to comment.