Add support for French decades with century (#3153)
* Draft commit * Update regexes * Fix DecodeRegex not defined before use for typescript * Add testcase for "not able to recognize French in the 90s" * Resolved review comments and added DateTimeModel specs --------- Co-authored-by: Michael Wang (Centific Technologies Inc) <v-michwang@microsoft.com>
This commit is contained in:
Родитель
1b88159e35
Коммит
cb8f16d8a1
|
@ -173,7 +173,9 @@ namespace Microsoft.Recognizers.Definitions.French
|
|||
public static readonly string PeriodTimeOfDayWithDateRegex = $@"\b(({TimeOfDayRegex}))\b";
|
||||
public const string LessThanRegex = @"^\b$";
|
||||
public const string MoreThanRegex = @"^\b$";
|
||||
public const string DurationUnitRegex = @"(?<unit>ann[eé]es?|ans?|mois|semaines?|jours?|heures?|hrs?|h|minutes?|mins?|secondes?|secs?|journ[eé]e)\b";
|
||||
public const string DecadeRegex = @"(?<decade>(?:dix|vingt|trente|quarante|cinquante|soixante-dix|soixante|quatre-vingt-dix|quatre-vingts|deux\s+mille))";
|
||||
public static readonly string DecadeWithCenturyInnerRegex = $@"(((?<century>\d|1\d|2\d)?((?<decade>\d0)\b)|(?<decade>\d0)(?=s))|(({CenturyRegex}(\s+)(et\s+)?)?{DecadeRegex})|({CenturyRegex}(\s+)(et\s+)?(?<decade>dix|centaines)))";
|
||||
public static readonly string DurationUnitRegex = $@"(?<unit>\bann[eé]es?(?!\s+{DecadeWithCenturyInnerRegex})\b|ans?|mois|semaines?|jours?|heures?|hrs?|h|minutes?|mins?|secondes?|secs?|journ[eé]e)\b";
|
||||
public const string SuffixAndRegex = @"(?<suffix>\s*(et)\s+(une?\s+)?(?<suffix_num>demi|quart))";
|
||||
public const string PeriodicRegex = @"\b(?<periodic>quotidien(ne)?|journellement|mensuel(le)?|jours?|hebdomadaire|bihebdomadaire|annuel(lement)?)\b";
|
||||
public static readonly string EachUnitRegex = $@"(?<each>(chaque|toutes les|tous les)(?<other>\s+autres)?\s*{DurationUnitRegex})";
|
||||
|
@ -255,8 +257,7 @@ namespace Microsoft.Recognizers.Definitions.French
|
|||
public const string NumberAsTimeRegex = @"^\b$";
|
||||
public const string TimeBeforeAfterRegex = @"^\b$";
|
||||
public const string DateNumberConnectorRegex = @"^\s*(?<connector>\s+[aà])\s*$";
|
||||
public const string DecadeRegex = @"^\b$";
|
||||
public const string DecadeWithCenturyRegex = @"^\b$";
|
||||
public static readonly string DecadeWithCenturyRegex = $@"(les\s+)?(années)\s+{DecadeWithCenturyInnerRegex}";
|
||||
public const string RelativeDecadeRegex = @"^\b$";
|
||||
public static readonly string YearSuffix = $@"(,?(\s*à)?\s*({DateYearRegex}|{FullTextYearRegex}))";
|
||||
public const string SuffixAfterRegex = @"^\b$";
|
||||
|
@ -720,11 +721,19 @@ namespace Microsoft.Recognizers.Definitions.French
|
|||
public const string NightRegex = @"\b(minuit|nuit)\b";
|
||||
public static readonly Dictionary<string, int> WrittenDecades = new Dictionary<string, int>
|
||||
{
|
||||
{ @"", 0 }
|
||||
{ @"dix", 10 },
|
||||
{ @"vingt", 20 },
|
||||
{ @"trente", 30 },
|
||||
{ @"quarante", 40 },
|
||||
{ @"cinquante", 50 },
|
||||
{ @"soixante", 60 },
|
||||
{ @"soixante-dix", 70 },
|
||||
{ @"quatre-vingt", 80 },
|
||||
{ @"quatre-vingt-dix", 90 }
|
||||
};
|
||||
public static readonly Dictionary<string, int> SpecialDecadeCases = new Dictionary<string, int>
|
||||
{
|
||||
{ @"", 0 }
|
||||
{ @"deux mille", 2000 }
|
||||
};
|
||||
public const string DefaultLanguageFallback = @"DMY";
|
||||
public static readonly string[] DurationDateRestrictions = { };
|
||||
|
|
|
@ -104,6 +104,7 @@ namespace Microsoft.Recognizers.Text.DateTime.French
|
|||
SeasonMap = config.SeasonMap;
|
||||
SpecialYearPrefixesMap = config.SpecialYearPrefixesMap;
|
||||
WrittenDecades = config.WrittenDecades;
|
||||
Numbers = config.Numbers;
|
||||
SpecialDecadeCases = config.SpecialDecadeCases;
|
||||
}
|
||||
|
||||
|
|
|
@ -399,8 +399,14 @@ LessThanRegex: !simpleRegex
|
|||
MoreThanRegex: !simpleRegex
|
||||
# TODO: modify below regex according to the counterpart in English
|
||||
def: ^\b$
|
||||
DurationUnitRegex: !simpleRegex
|
||||
def: (?<unit>ann[eé]es?|ans?|mois|semaines?|jours?|heures?|hrs?|h|minutes?|mins?|secondes?|secs?|journ[eé]e)\b
|
||||
DecadeRegex: !simpleRegex
|
||||
def: (?<decade>(?:dix|vingt|trente|quarante|cinquante|soixante-dix|soixante|quatre-vingt-dix|quatre-vingts|deux\s+mille))
|
||||
DecadeWithCenturyInnerRegex: !nestedRegex
|
||||
def: (((?<century>\d|1\d|2\d)?((?<decade>\d0)\b)|(?<decade>\d0)(?=s))|(({CenturyRegex}(\s+)(et\s+)?)?{DecadeRegex})|({CenturyRegex}(\s+)(et\s+)?(?<decade>dix|centaines)))
|
||||
references: [ CenturyRegex, DecadeRegex ]
|
||||
DurationUnitRegex: !nestedRegex
|
||||
def: (?<unit>\bann[eé]es?(?!\s+{DecadeWithCenturyInnerRegex})\b|ans?|mois|semaines?|jours?|heures?|hrs?|h|minutes?|mins?|secondes?|secs?|journ[eé]e)\b
|
||||
references: [ DecadeWithCenturyInnerRegex ]
|
||||
SuffixAndRegex: !simpleRegex
|
||||
def: (?<suffix>\s*(et)\s+(une?\s+)?(?<suffix_num>demi|quart))
|
||||
PeriodicRegex: !simpleRegex
|
||||
|
@ -592,12 +598,9 @@ TimeBeforeAfterRegex: !simpleRegex
|
|||
def: ^\b$
|
||||
DateNumberConnectorRegex: !simpleRegex
|
||||
def: ^\s*(?<connector>\s+[aà])\s*$
|
||||
DecadeRegex: !simpleRegex
|
||||
# TODO: modify below regex according to the counterpart in English
|
||||
def: ^\b$
|
||||
DecadeWithCenturyRegex: !simpleRegex
|
||||
# TODO: modify below regex according to the counterpart in English
|
||||
def: ^\b$
|
||||
DecadeWithCenturyRegex: !nestedRegex
|
||||
def: (les\s+)?(années)\s+{DecadeWithCenturyInnerRegex}
|
||||
references: [ DecadeWithCenturyInnerRegex ]
|
||||
RelativeDecadeRegex: !simpleRegex
|
||||
# TODO: modify below regex according to the counterpart in English
|
||||
def: ^\b$
|
||||
|
@ -1079,14 +1082,20 @@ NightRegex: !simpleRegex
|
|||
def: \b(minuit|nuit)\b
|
||||
WrittenDecades: !dictionary
|
||||
types: [ string, int ]
|
||||
# TODO: modify below dictionary according to the counterpart in English
|
||||
entries:
|
||||
'': 0
|
||||
'dix': 10
|
||||
'vingt': 20
|
||||
'trente': 30
|
||||
'quarante': 40
|
||||
'cinquante': 50
|
||||
'soixante': 60
|
||||
'soixante-dix': 70
|
||||
'quatre-vingt': 80
|
||||
'quatre-vingt-dix': 90
|
||||
SpecialDecadeCases: !dictionary
|
||||
types: [ string, int ]
|
||||
# TODO: modify below dictionary there're special cases for written decades
|
||||
entries:
|
||||
'': 0
|
||||
'deux mille': 2000
|
||||
DefaultLanguageFallback: DMY
|
||||
DurationDateRestrictions: []
|
||||
# Cases collected from mined data
|
||||
|
|
|
@ -4678,7 +4678,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années 1970",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 1970",
|
||||
|
@ -4690,7 +4690,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années 2000, il est né.",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 2000",
|
||||
|
@ -4726,7 +4726,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années 70",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 70",
|
||||
|
@ -4737,11 +4737,11 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"Input": "Dans les années 40",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"Input": "Dans les années 20",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 40",
|
||||
"Text": "les années 20",
|
||||
"Type": "daterange",
|
||||
"Start": 5,
|
||||
"Length": 13
|
||||
|
@ -4750,7 +4750,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années soixante-dix",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années soixante-dix",
|
||||
|
@ -4762,7 +4762,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années dix-neuf soixante-dix",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années dix-neuf soixante-dix",
|
||||
|
@ -4772,6 +4772,30 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Input": "Dans les années mille quatre cent vingt",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années mille quatre cent vingt",
|
||||
"Type": "daterange",
|
||||
"Start": 5,
|
||||
"Length": 34
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Input": "Dans les années deux mille",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années deux mille",
|
||||
"Type": "daterange",
|
||||
"Start": 5,
|
||||
"Length": 21
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Input": "Dans les deux mille dix",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
|
@ -4786,7 +4810,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années 2010",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 2010",
|
||||
|
@ -4810,7 +4834,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années 2000",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 2000",
|
||||
|
|
|
@ -4034,7 +4034,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années 2000, il est né.",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 2000",
|
||||
|
@ -4057,7 +4057,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années 1970's",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 1970",
|
||||
|
@ -4080,7 +4080,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années 70s",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 70",
|
||||
|
@ -4103,7 +4103,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années 70's",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 70",
|
||||
|
@ -4149,7 +4149,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années 40",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 40",
|
||||
|
@ -4172,7 +4172,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années soixante-dix",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années soixante-dix",
|
||||
|
@ -4193,9 +4193,78 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Input": "Dans les années dix-neuf soixante-dix",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années dix-neuf soixante-dix",
|
||||
"Type": "daterange",
|
||||
"Value": {
|
||||
"Timex": "(1970-01-01,1980-01-01,P10Y)",
|
||||
"FutureResolution": {
|
||||
"startDate": "1970-01-01",
|
||||
"endDate": "1980-01-01"
|
||||
},
|
||||
"PastResolution": {
|
||||
"startDate": "1970-01-01",
|
||||
"endDate": "1980-01-01"
|
||||
}
|
||||
},
|
||||
"Start": 5,
|
||||
"Length": 32
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Input": "Dans les années mille quatre cent vingt",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années mille quatre cent vingt",
|
||||
"Type": "daterange",
|
||||
"Value": {
|
||||
"Timex": "(1420-01-01,1430-01-01,P10Y)",
|
||||
"FutureResolution": {
|
||||
"startDate": "1420-01-01",
|
||||
"endDate": "1430-01-01"
|
||||
},
|
||||
"PastResolution": {
|
||||
"startDate": "1420-01-01",
|
||||
"endDate": "1430-01-01"
|
||||
}
|
||||
},
|
||||
"Start": 5,
|
||||
"Length": 34
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Input": "Dans les années deux mille",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années deux mille",
|
||||
"Type": "daterange",
|
||||
"Value": {
|
||||
"Timex": "(2000-01-01,2010-01-01,P10Y)",
|
||||
"FutureResolution": {
|
||||
"startDate": "2000-01-01",
|
||||
"endDate": "2010-01-01"
|
||||
},
|
||||
"PastResolution": {
|
||||
"startDate": "2000-01-01",
|
||||
"endDate": "2010-01-01"
|
||||
}
|
||||
},
|
||||
"Start": 5,
|
||||
"Length": 21
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Input": "Dans les années 1970",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 1970",
|
||||
|
@ -4241,7 +4310,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années 2010",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 2010",
|
||||
|
@ -4287,7 +4356,7 @@
|
|||
},
|
||||
{
|
||||
"Input": "Dans les années 2000",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 2000",
|
||||
|
|
|
@ -3756,6 +3756,60 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Input": "Qui était notre président dans les années 1990 ?",
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années 1990",
|
||||
"Start": 31,
|
||||
"End": 45,
|
||||
"TypeName": "datetimeV2.daterange",
|
||||
"Resolution": {
|
||||
"values": [
|
||||
{
|
||||
"timex": "(1990-01-01,2000-01-01,P10Y)",
|
||||
"type": "daterange",
|
||||
"start": "1990-01-01",
|
||||
"end": "2000-01-01"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Input": "Qui étaient les présidents des États-Unis dans les années quatre-vingt-dix ?",
|
||||
"Debug": true,
|
||||
"Context": {
|
||||
"ReferenceDateTime": "2018-05-29T00:00:00"
|
||||
},
|
||||
"NotSupported": "javascript, python, java",
|
||||
"Results": [
|
||||
{
|
||||
"Text": "les années quatre-vingt-dix",
|
||||
"Start": 47,
|
||||
"End": 73,
|
||||
"TypeName": "datetimeV2.daterange",
|
||||
"Resolution": {
|
||||
"values": [
|
||||
{
|
||||
"timex": "(XX90-01-01,XX00-01-01,P10Y)",
|
||||
"type": "daterange",
|
||||
"start": "1990-01-01",
|
||||
"end": "2000-01-01"
|
||||
},
|
||||
{
|
||||
"timex": "(XX90-01-01,XX00-01-01,P10Y)",
|
||||
"type": "daterange",
|
||||
"start": "2090-01-01",
|
||||
"end": "2100-01-01"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Input": "Je vais rentrer le 02/oct.",
|
||||
"NotSupported": "dotnet, javascript, python, java",
|
||||
|
|
Загрузка…
Ссылка в новой задаче