From b531199b8723e8e8f9ab1b8d301c8a3b465358ad Mon Sep 17 00:00:00 2001 From: Yiying Wu <35101503+ChopperMan33@users.noreply.github.com> Date: Wed, 23 Dec 2020 16:37:19 +0800 Subject: [PATCH] =?UTF-8?q?[ZH=20DateTimeV2]=20Fixed=20recognizing=20"?= =?UTF-8?q?=E5=B9=B4=E4=BB=A3"=20year-range/decade=20in=20Python/JavaScrip?= =?UTF-8?q?t=20(#742)=20(#2418)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../chinese/datePeriodConfiguration.ts | 89 +++++++++++++++++++ .../src/dateTime/constants.ts | 3 + .../date_time/chinese/dateperiod_parser.py | 75 ++++++++++++++++ .../date_time/constants.py | 4 + .../DateTime/Chinese/DatePeriodExtractor.json | 7 -- Specs/DateTime/Chinese/DatePeriodParser.json | 7 -- Specs/DateTime/Chinese/DateTimeModel.json | 54 +++++++++++ 7 files changed, 225 insertions(+), 14 deletions(-) diff --git a/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/datePeriodConfiguration.ts b/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/datePeriodConfiguration.ts index 8c802697d3..f0b65e7560 100644 --- a/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/datePeriodConfiguration.ts +++ b/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/datePeriodConfiguration.ts @@ -46,6 +46,7 @@ class ChineseDatePeriodExtractorConfiguration implements IDatePeriodExtractorCon RegExpUtility.getSafeRegExp(ChineseDateTime.WeekOfMonthRegex), RegExpUtility.getSafeRegExp(ChineseDateTime.SeasonWithYear), RegExpUtility.getSafeRegExp(ChineseDateTime.QuarterRegex), + RegExpUtility.getSafeRegExp(ChineseDateTime.DecadeRegex), ]; this.datePointExtractor = new ChineseDateExtractor(dmyDateFormat); this.integerExtractor = new ChineseIntegerExtractor(); @@ -301,6 +302,10 @@ export class ChineseDatePeriodParser extends BaseDatePeriodParser { readonly dynastyStartYear: string; readonly dynastyYearRegex: RegExp; readonly dynastyYearMap: ReadonlyMap; + readonly decadeRegex: RegExp + readonly thisRegex: RegExp + readonly nextRegex: RegExp + readonly lastRegex: RegExp constructor(dmyDateFormat: boolean) { let config = new ChineseDatePeriodParserConfiguration(dmyDateFormat); @@ -319,6 +324,10 @@ export class ChineseDatePeriodParser extends BaseDatePeriodParser { this.dynastyStartYear = ChineseDateTime.DynastyStartYear; this.dynastyYearRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.DynastyYearRegex); this.dynastyYearMap = ChineseDateTime.DynastyYearMap; + this.decadeRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.DecadeRegex); + this.thisRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.DatePeriodThisRegex); + this.nextRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.DatePeriodNextRegex); + this.lastRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.DatePeriodLastRegex); } parse(extractorResult: ExtractResult, referenceDate?: Date): DateTimeParseResult | null { @@ -359,6 +368,9 @@ export class ChineseDatePeriodParser extends BaseDatePeriodParser { if (!innerResult.success) { innerResult = this.parseQuarter(source, referenceDate); } + if (!innerResult.success){ + innerResult = this.parseDecade(source, referenceDate); + } if (innerResult.success) { if (innerResult.futureValue && innerResult.pastValue) { @@ -713,6 +725,83 @@ export class ChineseDatePeriodParser extends BaseDatePeriodParser { return this.parseCommonDurationWithUnit(beforeStr, sourceUnit, numStr, referenceDate); } + protected parseDecade(source: string, referenceDate: Date): DateTimeResolutionResult { + let result = new DateTimeResolutionResult(); + let century = Math.floor(referenceDate.getFullYear() / 100) + 1; + let decadeLastYear = 10; + let inputCentury = false; + let beginLuisStr; + let endLuisStr; + + let match = RegExpUtility.getMatches(this.decadeRegex, source).pop(); + if (!match) { + return result; + } + + let decadeStr = match.groups(Constants.Decade).value; + let decade = this.convertChineseToNumber(decadeStr); + let centuryStr = match.groups(Constants.Century).value; + if (centuryStr) { + century = this.convertChineseToNumber(centuryStr); + inputCentury = true; + } + else { + centuryStr = match.groups(Constants.RelCentury).value; + if (centuryStr) { + centuryStr = centuryStr.trim(); + let thismatch = RegExpUtility.getMatches(this.thisRegex, centuryStr).pop(); + let nextmatch = RegExpUtility.getMatches(this.nextRegex, centuryStr).pop(); + let lastmatch = RegExpUtility.getMatches(this.lastRegex, centuryStr).pop(); + + if (nextmatch) { + century++; + } + else if (lastmatch) { + century--; + } + + inputCentury = true; + } + } + + let beginYear = ((century - 1) * 100) + decade; + let endYear = beginYear + decadeLastYear; + + if (inputCentury) { + beginLuisStr = DateTimeFormatUtil.luisDate(beginYear, 0, 1); + endLuisStr = DateTimeFormatUtil.luisDate(endYear, 0, 1); + } + else { + let beginYearStr = "XX" + decade; + beginLuisStr = DateTimeFormatUtil.luisDate(-1, 0, 1); + beginLuisStr = beginLuisStr.replace("XXXX", beginYearStr); + + let endYearStr = "XX" + ("0" + endYear % 100).slice(-2); + endLuisStr = DateTimeFormatUtil.luisDate(-1, 0, 1); + endLuisStr = endLuisStr.replace("XXXX", endYearStr); + } + + result.timex = `(${beginLuisStr},${endLuisStr},P10Y)`; + + let futureYear = beginYear + let pastYear = beginYear; + let startDate = DateUtils.safeCreateFromValue(DateUtils.minValue(), beginYear, 0, 1); + + if (!inputCentury && startDate < referenceDate) { + futureYear += 100; + } + + if (!inputCentury && startDate >= referenceDate) { + pastYear -= 100; + } + + result.futureValue = [DateUtils.safeCreateFromValue(DateUtils.minValue(), futureYear, 0, 1), DateUtils.safeCreateFromValue(DateUtils.minValue(), futureYear + decadeLastYear, 0, 1)]; + result.pastValue = [DateUtils.safeCreateFromValue(DateUtils.minValue(), pastYear, 0, 1), DateUtils.safeCreateFromValue(DateUtils.minValue(), pastYear + decadeLastYear, 0, 1)]; + result.success = true; + + return result; + } + private parseCommonDurationWithUnit(beforeStr: string, sourceUnit: string, numStr: string, referenceDate: Date): DateTimeResolutionResult { let result = new DateTimeResolutionResult(); diff --git a/JavaScript/packages/recognizers-date-time/src/dateTime/constants.ts b/JavaScript/packages/recognizers-date-time/src/dateTime/constants.ts index d032f437d6..6aff136a75 100644 --- a/JavaScript/packages/recognizers-date-time/src/dateTime/constants.ts +++ b/JavaScript/packages/recognizers-date-time/src/dateTime/constants.ts @@ -22,6 +22,9 @@ export class Constants { static readonly ResolveKey: string = "resolve"; static readonly ResolveToPastKey: string = "resolveToPast"; static readonly ResolveToFutureKey: string = "resolveToFuture"; + static readonly Decade: string = "decade"; + static readonly Century: string = "century" + static readonly RelCentury: string = "relcentury" static readonly CommentKey: string = "Comment"; static readonly CommentAmPm: string = "ampm"; diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/chinese/dateperiod_parser.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/chinese/dateperiod_parser.py index 2317dd927b..028579bf66 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/chinese/dateperiod_parser.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/chinese/dateperiod_parser.py @@ -40,6 +40,14 @@ def __init__(self): ChineseDateTime.DatePeriodYearInChineseRegex) self.season_with_year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SeasonWithYear) + self.decade_regex = RegExpUtility.get_safe_reg_exp( + ChineseDateTime.DecadeRegex) + self.date_this_regex = RegExpUtility.get_safe_reg_exp( + ChineseDateTime.DatePeriodThisRegex) + self.date_last_regex = RegExpUtility.get_safe_reg_exp( + ChineseDateTime.DatePeriodLastRegex) + self.date_next_regex = RegExpUtility.get_safe_reg_exp( + ChineseDateTime.DatePeriodNextRegex) def parse(self, source: ExtractResult, reference: datetime = None) -> Optional[DateTimeParseResult]: result_value = None @@ -85,6 +93,9 @@ def parse(self, source: ExtractResult, reference: datetime = None) -> Optional[D if not inner_result.success: inner_result = self._parse_quarter(source_text, reference) + if not inner_result.success: + inner_result = self._parse_decade(source_text, reference) + if inner_result.success: if inner_result.future_value and inner_result.past_value: inner_result.future_resolution = { @@ -596,3 +607,67 @@ def _parse_quarter(self, source: str, reference: datetime) -> DateTimeResolution result.success = True return result + + def _parse_decade(self, source: str, reference: datetime) -> DateTimeResolutionResult: + result = DateTimeResolutionResult() + + century = int(reference.year / 100) + 1 + decade_last_year = 10 + input_century = False + + match = regex.search(self.decade_regex, source) + + if not match or len(match.group()) != len(source): + return result + + decade_str = RegExpUtility.get_group(match, Constants.DECADE) + decade = self.__convert_chinese_to_number(decade_str) + century_str = RegExpUtility.get_group(match, Constants.CENTURY) + if century_str != "": + century = self.__convert_chinese_to_number(century_str) + input_century = True + else: + century_str = RegExpUtility.get_group(match, Constants.REL_CENTURY) + if century_str != "": + century_str = century_str.strip().lower() + + this_match = regex.search(self.date_this_regex, century_str) + next_match = regex.search(self.date_next_regex, century_str) + last_match = regex.search(self.date_last_regex, century_str) + + if next_match: + century += 1 + elif last_match: + century -= 1 + + input_century = True + + begin_year = ((century - 1) * 100) + decade + end_year = begin_year + decade_last_year + + if input_century: + begin_luis_str = DateTimeFormatUtil.luis_date(begin_year, 1, 1) + end_luis_str = DateTimeFormatUtil.luis_date(end_year, 1, 1) + else: + begin_year_str = "XX{:02d}".format(decade) + begin_luis_str = DateTimeFormatUtil.luis_date(-1, 1, 1) + begin_luis_str = begin_luis_str.replace("XXXX", begin_year_str) + + end_year_str = "XX{:02d}".format(end_year % 100) + end_luis_str = DateTimeFormatUtil.luis_date(-1, 1, 1) + end_luis_str = end_luis_str.replace("XXXX", end_year_str) + + result.timex = f"({begin_luis_str},{end_luis_str},P10Y)" + + future_year, past_year = begin_year, begin_year + start_date = DateUtils.safe_create_from_min_value(begin_year, 1, 1) + if not input_century and start_date < reference: + future_year += 100 + if not input_century and start_date >= reference: + past_year -= 100 + + result.future_value = [DateUtils.safe_create_from_min_value(future_year, 1, 1), DateUtils.safe_create_from_min_value(future_year + decade_last_year, 1, 1)] + result.past_value = [DateUtils.safe_create_from_min_value(past_year, 1, 1), DateUtils.safe_create_from_min_value(past_year + decade_last_year, 1, 1)] + result.success = True + + return result diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/constants.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/constants.py index 4343856b6a..56937da5f5 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/constants.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/constants.py @@ -152,6 +152,10 @@ class Constants: CARDINAL = 'cardinal' + DECADE = 'decade' + CENTURY = 'century' + REL_CENTURY = 'relcentury' + SEAS = 'seas' SEASON = 'season' diff --git a/Specs/DateTime/Chinese/DatePeriodExtractor.json b/Specs/DateTime/Chinese/DatePeriodExtractor.json index e62ec166bb..6041e73478 100644 --- a/Specs/DateTime/Chinese/DatePeriodExtractor.json +++ b/Specs/DateTime/Chinese/DatePeriodExtractor.json @@ -210,7 +210,6 @@ }, { "Input": "上世纪90年代", - "NotSupported": "javascript", "Results": [ { "Text": "上世纪90年代", @@ -222,7 +221,6 @@ }, { "Input": "本世纪20年代", - "NotSupported": "javascript", "Results": [ { "Text": "本世纪20年代", @@ -234,7 +232,6 @@ }, { "Input": "20世纪80年代", - "NotSupported": "javascript", "Results": [ { "Text": "20世纪80年代", @@ -246,7 +243,6 @@ }, { "Input": "在50年代的时候", - "NotSupported": "javascript", "Results": [ { "Text": "50年代", @@ -258,7 +254,6 @@ }, { "Input": "19世纪70年代,他出生了", - "NotSupported": "javascript", "Results": [ { "Text": "19世纪70年代", @@ -270,7 +265,6 @@ }, { "Input": "十九世纪七十年代", - "NotSupported": "javascript", "Results": [ { "Text": "十九世纪七十年代", @@ -282,7 +276,6 @@ }, { "Input": "九十年代", - "NotSupported": "javascript", "Results": [ { "Text": "九十年代", diff --git a/Specs/DateTime/Chinese/DatePeriodParser.json b/Specs/DateTime/Chinese/DatePeriodParser.json index 627d8038b7..d8f94af422 100644 --- a/Specs/DateTime/Chinese/DatePeriodParser.json +++ b/Specs/DateTime/Chinese/DatePeriodParser.json @@ -598,7 +598,6 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", "Results": [ { "Text": "上世纪90年代", @@ -624,7 +623,6 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", "Results": [ { "Text": "本世纪20年代", @@ -650,7 +648,6 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", "Results": [ { "Text": "20世纪80年代", @@ -676,7 +673,6 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", "Results": [ { "Text": "50年代", @@ -702,7 +698,6 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", "Results": [ { "Text": "19世纪70年代", @@ -728,7 +723,6 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", "Results": [ { "Text": "十九世纪七十年代", @@ -754,7 +748,6 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", "Results": [ { "Text": "九十年代", diff --git a/Specs/DateTime/Chinese/DateTimeModel.json b/Specs/DateTime/Chinese/DateTimeModel.json index 8036b6fa22..faf15f97fd 100644 --- a/Specs/DateTime/Chinese/DateTimeModel.json +++ b/Specs/DateTime/Chinese/DateTimeModel.json @@ -6229,5 +6229,59 @@ "End": 8 } ] + }, + { + "Input": "十九世纪七十年代", + "Context": { + "ReferenceDateTime": "2017-03-22T00:00:00" + }, + "Results": [ + { + "Text": "十九世纪七十年代", + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(1870-01-01,1880-01-01,P10Y)", + "type": "daterange", + "start": "1870-01-01", + "end": "1880-01-01" + } + ] + }, + "Start": 0, + "End": 7 + } + ] + }, + { + "Input": "九十年代", + "Context": { + "ReferenceDateTime": "2017-03-22T00:00:00" + }, + "Results": [ + { + "Text": "九十年代", + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XX90-01-01,XX00-01-01,P10Y)", + "type": "daterange", + "start": "1990-01-01", + "end": "2000-01-01" + }, + { + "timex": "(XX90-01-01,XX00-01-01,P10Y)", + "type": "daterange", + "start": "2090-01-01", + "end": "2100-01-01" + } + ] + }, + "Start": 0, + "End": 3 + } + ] } ] \ No newline at end of file