Skip to content

Commit

Permalink
[ZH DateTimeV2] Fixed recognizing "年代" year-range/decade in Python/Ja…
Browse files Browse the repository at this point in the history
…vaScript (#742) (#2418)
  • Loading branch information
ChopperMan33 authored Dec 23, 2020
1 parent c72db40 commit b531199
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class ChineseDatePeriodExtractorConfiguration implements IDatePeriodExtractorCon
RegExpUtility.getSafeRegExp(ChineseDateTime.WeekOfMonthRegex),
RegExpUtility.getSafeRegExp(ChineseDateTime.SeasonWithYear),
RegExpUtility.getSafeRegExp(ChineseDateTime.QuarterRegex),
RegExpUtility.getSafeRegExp(ChineseDateTime.DecadeRegex),
];
this.datePointExtractor = new ChineseDateExtractor(dmyDateFormat);
this.integerExtractor = new ChineseIntegerExtractor();
Expand Down Expand Up @@ -301,6 +302,10 @@ export class ChineseDatePeriodParser extends BaseDatePeriodParser {
readonly dynastyStartYear: string;
readonly dynastyYearRegex: RegExp;
readonly dynastyYearMap: ReadonlyMap<string, number>;
readonly decadeRegex: RegExp
readonly thisRegex: RegExp
readonly nextRegex: RegExp
readonly lastRegex: RegExp

constructor(dmyDateFormat: boolean) {
let config = new ChineseDatePeriodParserConfiguration(dmyDateFormat);
Expand All @@ -319,6 +324,10 @@ export class ChineseDatePeriodParser extends BaseDatePeriodParser {
this.dynastyStartYear = ChineseDateTime.DynastyStartYear;
this.dynastyYearRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.DynastyYearRegex);
this.dynastyYearMap = ChineseDateTime.DynastyYearMap;
this.decadeRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.DecadeRegex);
this.thisRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.DatePeriodThisRegex);
this.nextRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.DatePeriodNextRegex);
this.lastRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.DatePeriodLastRegex);
}

parse(extractorResult: ExtractResult, referenceDate?: Date): DateTimeParseResult | null {
Expand Down Expand Up @@ -359,6 +368,9 @@ export class ChineseDatePeriodParser extends BaseDatePeriodParser {
if (!innerResult.success) {
innerResult = this.parseQuarter(source, referenceDate);
}
if (!innerResult.success){
innerResult = this.parseDecade(source, referenceDate);
}

if (innerResult.success) {
if (innerResult.futureValue && innerResult.pastValue) {
Expand Down Expand Up @@ -713,6 +725,83 @@ export class ChineseDatePeriodParser extends BaseDatePeriodParser {
return this.parseCommonDurationWithUnit(beforeStr, sourceUnit, numStr, referenceDate);
}

protected parseDecade(source: string, referenceDate: Date): DateTimeResolutionResult {
let result = new DateTimeResolutionResult();
let century = Math.floor(referenceDate.getFullYear() / 100) + 1;
let decadeLastYear = 10;
let inputCentury = false;
let beginLuisStr;
let endLuisStr;

let match = RegExpUtility.getMatches(this.decadeRegex, source).pop();
if (!match) {
return result;
}

let decadeStr = match.groups(Constants.Decade).value;
let decade = this.convertChineseToNumber(decadeStr);
let centuryStr = match.groups(Constants.Century).value;
if (centuryStr) {
century = this.convertChineseToNumber(centuryStr);
inputCentury = true;
}
else {
centuryStr = match.groups(Constants.RelCentury).value;
if (centuryStr) {
centuryStr = centuryStr.trim();
let thismatch = RegExpUtility.getMatches(this.thisRegex, centuryStr).pop();
let nextmatch = RegExpUtility.getMatches(this.nextRegex, centuryStr).pop();
let lastmatch = RegExpUtility.getMatches(this.lastRegex, centuryStr).pop();

if (nextmatch) {
century++;
}
else if (lastmatch) {
century--;
}

inputCentury = true;
}
}

let beginYear = ((century - 1) * 100) + decade;
let endYear = beginYear + decadeLastYear;

if (inputCentury) {
beginLuisStr = DateTimeFormatUtil.luisDate(beginYear, 0, 1);
endLuisStr = DateTimeFormatUtil.luisDate(endYear, 0, 1);
}
else {
let beginYearStr = "XX" + decade;
beginLuisStr = DateTimeFormatUtil.luisDate(-1, 0, 1);
beginLuisStr = beginLuisStr.replace("XXXX", beginYearStr);

let endYearStr = "XX" + ("0" + endYear % 100).slice(-2);
endLuisStr = DateTimeFormatUtil.luisDate(-1, 0, 1);
endLuisStr = endLuisStr.replace("XXXX", endYearStr);
}

result.timex = `(${beginLuisStr},${endLuisStr},P10Y)`;

let futureYear = beginYear
let pastYear = beginYear;
let startDate = DateUtils.safeCreateFromValue(DateUtils.minValue(), beginYear, 0, 1);

if (!inputCentury && startDate < referenceDate) {
futureYear += 100;
}

if (!inputCentury && startDate >= referenceDate) {
pastYear -= 100;
}

result.futureValue = [DateUtils.safeCreateFromValue(DateUtils.minValue(), futureYear, 0, 1), DateUtils.safeCreateFromValue(DateUtils.minValue(), futureYear + decadeLastYear, 0, 1)];
result.pastValue = [DateUtils.safeCreateFromValue(DateUtils.minValue(), pastYear, 0, 1), DateUtils.safeCreateFromValue(DateUtils.minValue(), pastYear + decadeLastYear, 0, 1)];
result.success = true;

return result;
}

private parseCommonDurationWithUnit(beforeStr: string, sourceUnit: string, numStr: string, referenceDate: Date): DateTimeResolutionResult {
let result = new DateTimeResolutionResult();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ export class Constants {
static readonly ResolveKey: string = "resolve";
static readonly ResolveToPastKey: string = "resolveToPast";
static readonly ResolveToFutureKey: string = "resolveToFuture";
static readonly Decade: string = "decade";
static readonly Century: string = "century"
static readonly RelCentury: string = "relcentury"

static readonly CommentKey: string = "Comment";
static readonly CommentAmPm: string = "ampm";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ def __init__(self):
ChineseDateTime.DatePeriodYearInChineseRegex)
self.season_with_year_regex = RegExpUtility.get_safe_reg_exp(
ChineseDateTime.SeasonWithYear)
self.decade_regex = RegExpUtility.get_safe_reg_exp(
ChineseDateTime.DecadeRegex)
self.date_this_regex = RegExpUtility.get_safe_reg_exp(
ChineseDateTime.DatePeriodThisRegex)
self.date_last_regex = RegExpUtility.get_safe_reg_exp(
ChineseDateTime.DatePeriodLastRegex)
self.date_next_regex = RegExpUtility.get_safe_reg_exp(
ChineseDateTime.DatePeriodNextRegex)

def parse(self, source: ExtractResult, reference: datetime = None) -> Optional[DateTimeParseResult]:
result_value = None
Expand Down Expand Up @@ -85,6 +93,9 @@ def parse(self, source: ExtractResult, reference: datetime = None) -> Optional[D
if not inner_result.success:
inner_result = self._parse_quarter(source_text, reference)

if not inner_result.success:
inner_result = self._parse_decade(source_text, reference)

if inner_result.success:
if inner_result.future_value and inner_result.past_value:
inner_result.future_resolution = {
Expand Down Expand Up @@ -596,3 +607,67 @@ def _parse_quarter(self, source: str, reference: datetime) -> DateTimeResolution

result.success = True
return result

def _parse_decade(self, source: str, reference: datetime) -> DateTimeResolutionResult:
result = DateTimeResolutionResult()

century = int(reference.year / 100) + 1
decade_last_year = 10
input_century = False

match = regex.search(self.decade_regex, source)

if not match or len(match.group()) != len(source):
return result

decade_str = RegExpUtility.get_group(match, Constants.DECADE)
decade = self.__convert_chinese_to_number(decade_str)
century_str = RegExpUtility.get_group(match, Constants.CENTURY)
if century_str != "":
century = self.__convert_chinese_to_number(century_str)
input_century = True
else:
century_str = RegExpUtility.get_group(match, Constants.REL_CENTURY)
if century_str != "":
century_str = century_str.strip().lower()

this_match = regex.search(self.date_this_regex, century_str)
next_match = regex.search(self.date_next_regex, century_str)
last_match = regex.search(self.date_last_regex, century_str)

if next_match:
century += 1
elif last_match:
century -= 1

input_century = True

begin_year = ((century - 1) * 100) + decade
end_year = begin_year + decade_last_year

if input_century:
begin_luis_str = DateTimeFormatUtil.luis_date(begin_year, 1, 1)
end_luis_str = DateTimeFormatUtil.luis_date(end_year, 1, 1)
else:
begin_year_str = "XX{:02d}".format(decade)
begin_luis_str = DateTimeFormatUtil.luis_date(-1, 1, 1)
begin_luis_str = begin_luis_str.replace("XXXX", begin_year_str)

end_year_str = "XX{:02d}".format(end_year % 100)
end_luis_str = DateTimeFormatUtil.luis_date(-1, 1, 1)
end_luis_str = end_luis_str.replace("XXXX", end_year_str)

result.timex = f"({begin_luis_str},{end_luis_str},P10Y)"

future_year, past_year = begin_year, begin_year
start_date = DateUtils.safe_create_from_min_value(begin_year, 1, 1)
if not input_century and start_date < reference:
future_year += 100
if not input_century and start_date >= reference:
past_year -= 100

result.future_value = [DateUtils.safe_create_from_min_value(future_year, 1, 1), DateUtils.safe_create_from_min_value(future_year + decade_last_year, 1, 1)]
result.past_value = [DateUtils.safe_create_from_min_value(past_year, 1, 1), DateUtils.safe_create_from_min_value(past_year + decade_last_year, 1, 1)]
result.success = True

return result
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ class Constants:

CARDINAL = 'cardinal'

DECADE = 'decade'
CENTURY = 'century'
REL_CENTURY = 'relcentury'

SEAS = 'seas'
SEASON = 'season'

Expand Down
7 changes: 0 additions & 7 deletions Specs/DateTime/Chinese/DatePeriodExtractor.json
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@
},
{
"Input": "上世纪90年代",
"NotSupported": "javascript",
"Results": [
{
"Text": "上世纪90年代",
Expand All @@ -222,7 +221,6 @@
},
{
"Input": "本世纪20年代",
"NotSupported": "javascript",
"Results": [
{
"Text": "本世纪20年代",
Expand All @@ -234,7 +232,6 @@
},
{
"Input": "20世纪80年代",
"NotSupported": "javascript",
"Results": [
{
"Text": "20世纪80年代",
Expand All @@ -246,7 +243,6 @@
},
{
"Input": "在50年代的时候",
"NotSupported": "javascript",
"Results": [
{
"Text": "50年代",
Expand All @@ -258,7 +254,6 @@
},
{
"Input": "19世纪70年代,他出生了",
"NotSupported": "javascript",
"Results": [
{
"Text": "19世纪70年代",
Expand All @@ -270,7 +265,6 @@
},
{
"Input": "十九世纪七十年代",
"NotSupported": "javascript",
"Results": [
{
"Text": "十九世纪七十年代",
Expand All @@ -282,7 +276,6 @@
},
{
"Input": "九十年代",
"NotSupported": "javascript",
"Results": [
{
"Text": "九十年代",
Expand Down
7 changes: 0 additions & 7 deletions Specs/DateTime/Chinese/DatePeriodParser.json
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,6 @@
"Context": {
"ReferenceDateTime": "2017-03-22T00:00:00"
},
"NotSupported": "javascript, python",
"Results": [
{
"Text": "上世纪90年代",
Expand All @@ -624,7 +623,6 @@
"Context": {
"ReferenceDateTime": "2017-03-22T00:00:00"
},
"NotSupported": "javascript, python",
"Results": [
{
"Text": "本世纪20年代",
Expand All @@ -650,7 +648,6 @@
"Context": {
"ReferenceDateTime": "2017-03-22T00:00:00"
},
"NotSupported": "javascript, python",
"Results": [
{
"Text": "20世纪80年代",
Expand All @@ -676,7 +673,6 @@
"Context": {
"ReferenceDateTime": "2017-03-22T00:00:00"
},
"NotSupported": "javascript, python",
"Results": [
{
"Text": "50年代",
Expand All @@ -702,7 +698,6 @@
"Context": {
"ReferenceDateTime": "2017-03-22T00:00:00"
},
"NotSupported": "javascript, python",
"Results": [
{
"Text": "19世纪70年代",
Expand All @@ -728,7 +723,6 @@
"Context": {
"ReferenceDateTime": "2017-03-22T00:00:00"
},
"NotSupported": "javascript, python",
"Results": [
{
"Text": "十九世纪七十年代",
Expand All @@ -754,7 +748,6 @@
"Context": {
"ReferenceDateTime": "2017-03-22T00:00:00"
},
"NotSupported": "javascript, python",
"Results": [
{
"Text": "九十年代",
Expand Down
Loading

0 comments on commit b531199

Please sign in to comment.