From e5c2febfbed2b9290c6b0269046ed4eeba4288ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=85=E6=88=8E=E6=B0=8F?= Date: Thu, 16 Jan 2025 10:38:38 +0800 Subject: [PATCH] feat(punctuator): convert punctuation in numbers fixes #972 use ascii punctuation ,.:' after numbers. they are auto-committed if followed by a digit. or commit manualy with space key. double strike the key to access the original binding. support half-shape and full-shape forms. opt-out with `punctuator/convert_punct_in_number: false`. --- src/rime/gear/punctuator.cc | 131 ++++++++++++++++++++++++++++++------ src/rime/gear/punctuator.h | 8 ++- 2 files changed, 117 insertions(+), 22 deletions(-) diff --git a/src/rime/gear/punctuator.cc b/src/rime/gear/punctuator.cc index a391b85159..fdb8c7c8e4 100644 --- a/src/rime/gear/punctuator.cc +++ b/src/rime/gear/punctuator.cc @@ -44,20 +44,47 @@ an PunctConfig::GetPunctDefinition(const string key) { Punctuator::Punctuator(const Ticket& ticket) : Processor(ticket) { Config* config = engine_->schema()->config(); if (config) { + config->GetBool("punctuator/convert_punct_in_number", + &convert_punct_in_number_); config->GetBool("punctuator/use_space", &use_space_); } config_.LoadConfig(engine_); } -static bool punctuation_is_translated(Context* ctx) { +static bool punctuation_is_translated(Context* ctx, const string& tag) { Composition& comp = ctx->composition(); - if (comp.empty() || !comp.back().HasTag("punct")) { + if (comp.empty() || !comp.back().HasTag(tag)) { return false; } auto cand = comp.back().GetSelectedCandidate(); return cand && cand->type() == "punct"; } +inline static bool is_digit_separator(char ch) { + return ch == '.' || ch == ':' || ch == ',' || ch == '\''; +} + +inline static bool ends_with_digit(const string& text) { + auto len = text.length(); + return len > 0 && isdigit(text[len - 1]); +} + +// recognizes patterns like 3.14 12:30 1,000 1'000 +static bool is_after_number(Context* ctx) { + const CommitHistory& history = ctx->commit_history(); + if (history.empty()) { + return false; + } + const CommitRecord& cr = history.back(); + return ends_with_digit(cr.text) & (cr.type == "thru" || cr.type == "raw"); +} + +static bool is_after_digit_separator(Context* ctx) { + const auto& comp = ctx->composition(); + return !comp.empty() && comp[0].HasTag("punct_number") && + comp[0].length == ctx->input().length(); +} + ProcessResult Punctuator::ProcessKeyEvent(const KeyEvent& key_event) { if (key_event.release() || key_event.ctrl() || key_event.alt() || key_event.super()) @@ -72,29 +99,70 @@ ProcessResult Punctuator::ProcessKeyEvent(const KeyEvent& key_event) { if (!use_space_ && ch == XK_space && ctx->IsComposing()) { return kNoop; } - if (ch == '.' || ch == ':') { // 3.14, 12:30 - const CommitHistory& history(ctx->commit_history()); - if (!history.empty()) { - const CommitRecord& cr(history.back()); - if (cr.type == "thru" && cr.text.length() == 1 && isdigit(cr.text[0])) { - return kRejected; - } - } + if (convert_punct_in_number_ && isdigit(ch) && + is_after_digit_separator(ctx)) { + ctx->PushInput(ch) && ctx->Commit(); + return kAccepted; + } + if (ConvertPunctInNumber(ch)) { + return kAccepted; } + // sync with full_shape option config_.LoadConfig(engine_); - string punct_key(1, ch); - auto punct_definition = config_.GetPunctDefinition(punct_key); + string key(1, ch); + auto punct_definition = config_.GetPunctDefinition(key); if (!punct_definition) return kNoop; - DLOG(INFO) << "punct key: '" << punct_key << "'"; - if (!AlternatePunct(punct_key, punct_definition)) { - ctx->PushInput(ch) && punctuation_is_translated(ctx) && - (ConfirmUniquePunct(punct_definition) || - AutoCommitPunct(punct_definition) || PairPunct(punct_definition)); + DLOG(INFO) << "punct key: '" << key << "'"; + if (AlternatePunct(key, punct_definition)) { + return kAccepted; + } + if (ReconvertPunct(key) || ctx->PushInput(ch)) { + if (punctuation_is_translated(ctx, "punct")) { + ConfirmUniquePunct(punct_definition) || + AutoCommitPunct(punct_definition) || PairPunct(punct_definition); + } } return kAccepted; } +bool Punctuator::ConvertPunctInNumber(char ch) { + if (!convert_punct_in_number_ || !is_digit_separator(ch)) { + return false; + } + Context* ctx = engine_->context(); + if (ctx->composition().empty() && is_after_number(ctx)) { + DLOG(INFO) << "convert punct in number: " << ch; + ctx->PushInput(ch) && punctuation_is_translated(ctx, "punct_number") && + ctx->composition().Forward(); + return true; + } + return false; +} + +bool Punctuator::ReconvertPunct(const string& key) { + if (!convert_punct_in_number_) { + return false; + } + Context* ctx = engine_->context(); + if (ctx->input() != key) { + return false; + } + Composition& comp = ctx->composition(); + if (!comp.empty()) { + Segment& segment = comp[0]; + if (segment.HasTag("punct_number")) { + segment.tags.erase("punct_number"); + segment.tags.insert("punct"); + segment.status = Segment::kVoid; + DLOG(INFO) << "reconvert punct, key = " << key; + ctx->ReopenPreviousSegment(); + return true; + } + } + return false; +} + bool Punctuator::AlternatePunct(const string& key, const an& definition) { if (!As(definition)) @@ -159,6 +227,11 @@ bool Punctuator::PairPunct(const an& definition) { } PunctSegmentor::PunctSegmentor(const Ticket& ticket) : Segmentor(ticket) { + Config* config = engine_->schema()->config(); + if (config) { + config->GetBool("punctuator/convert_punct_in_number", + &convert_punct_in_number_); + } config_.LoadConfig(engine_); } @@ -170,22 +243,29 @@ bool PunctSegmentor::Proceed(Segmentation* segmentation) { char ch = input[k]; if (ch < 0x20 || ch >= 0x7f) return true; + // sync with full_shape option config_.LoadConfig(engine_); - string punct_key(1, ch); - auto punct_definition = config_.GetPunctDefinition(punct_key); + string key(1, ch); + auto punct_definition = config_.GetPunctDefinition(key); if (!punct_definition) return true; { Segment segment(k, k + 1); DLOG(INFO) << "add a punctuation segment [" << segment.start << ", " << segment.end << ")"; - segment.tags.insert("punct"); + if (k == 0 && convert_punct_in_number_ && is_digit_separator(ch) && + is_after_number(engine_->context())) { + segment.tags.insert("punct_number"); + } else { + segment.tags.insert("punct"); + } segmentation->AddSegment(segment); } return false; // exclusive } -PunctTranslator::PunctTranslator(const Ticket& ticket) : Translator(ticket) { +PunctTranslator::PunctTranslator(const Ticket& ticket) + : Translator(ticket), formatter_(ticket) { const bool load_symbols = true; config_.LoadConfig(engine_, load_symbols); } @@ -233,8 +313,17 @@ an CreatePunctCandidate(const string& punct, an PunctTranslator::Query(const string& input, const Segment& segment) { + if (segment.HasTag("punct_number")) { + if (input.length() == 1 && is_digit_separator(input[0])) { + string punct = input; + formatter_.Format(&punct); + return New(CreatePunctCandidate(punct, segment)); + } + return nullptr; + } if (!segment.HasTag("punct")) return nullptr; + // sync with full_shape option config_.LoadConfig(engine_); auto definition = config_.GetPunctDefinition(input); if (!definition) diff --git a/src/rime/gear/punctuator.h b/src/rime/gear/punctuator.h index 836ec3aabb..15f279b635 100644 --- a/src/rime/gear/punctuator.h +++ b/src/rime/gear/punctuator.h @@ -13,6 +13,7 @@ #include #include #include +#include namespace rime { @@ -35,12 +36,15 @@ class Punctuator : public Processor { virtual ProcessResult ProcessKeyEvent(const KeyEvent& key_event); protected: - bool ConfirmUniquePunct(const an& definition); + bool ConvertPunctInNumber(char ch); + bool ReconvertPunct(const string& key); bool AlternatePunct(const string& key, const an& definition); + bool ConfirmUniquePunct(const an& definition); bool AutoCommitPunct(const an& definition); bool PairPunct(const an& definition); PunctConfig config_; + bool convert_punct_in_number_ = true; bool use_space_ = false; map, int> oddness_; }; @@ -52,6 +56,7 @@ class PunctSegmentor : public Segmentor { protected: PunctConfig config_; + bool convert_punct_in_number_ = true; }; class PunctTranslator : public Translator { @@ -73,6 +78,7 @@ class PunctTranslator : public Translator { const Segment& segment, const an& definition); + ShapeFormatter formatter_; PunctConfig config_; };