Skip to content

Commit

Permalink
mecab neologd 名詞,固有名詞,一般で第「数字」,¥「数字」,¥「数字」もスキップ
Browse files Browse the repository at this point in the history
  • Loading branch information
phoepsilonix committed Nov 9, 2024
1 parent de68d39 commit e20d115
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 4 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ members = [
resolver = "2"

[workspace.package]
version = "0.4.5"
version = "0.4.6"
authors = ["Masato TOYOSHIMA", "phoepsilonix <[email protected]>"]
edition = "2021"
rust-version = "1.82"
Expand Down
3 changes: 2 additions & 1 deletion crates/dict-to-mozc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,8 @@ fn id_expr(clsexpr: &str, _id_def: &mut IdDef, class_map: &mut MyIndexMap<String

//static KANA_CHECK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[\p{Hiragana}\p{Katakana}ーゝゞヽヾ゛゜・]+$").unwrap());
static KANA_CHECK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[(ぁ-ゖ)ゐゑゐ゙ゑ゙(ァ-ヺ)ー・゛゜]+$").unwrap());
static START_SUUJI_CHECK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[0-9]+").unwrap());
//static START_SUUJI_CHECK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(?:\d|¥\d|¥\d|第\d)+").unwrap());
static START_SUUJI_CHECK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d|¥\d|¥\d|第\d)+").unwrap());
//static EISUU_CHECK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9' ]+$").unwrap());
static KIGOU_CHECK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-zA-Z' ]+$").unwrap());
// 地名チェックに用いる日本語判定
Expand Down

0 comments on commit e20d115

Please sign in to comment.