From 0e3bd3aee1ad96dc35edbe121fa8eb81547c84be Mon Sep 17 00:00:00 2001 From: Martin Battaglino Date: Thu, 15 Apr 2021 12:14:16 -0300 Subject: [PATCH] [SDK][Recognizers-Text] Update temporal folder with latest changes (#1164) * Update Datetime extractors * Update Datetime resources * Update Number resources * Update NumberWithUnit resources * Update NumberWithUnit extractors * Update NumberWithUnit parsers --- .../extractors/BaseDateExtractor.java | 19 ++ .../extractors/BaseDateTimeExtractor.java | 16 + .../extractors/BaseTimeExtractor.java | 16 + .../text/datetime/resources/BaseDateTime.java | 4 +- .../datetime/resources/EnglishDateTime.java | 91 +++--- .../datetime/resources/EnglishTimeZone.java | 2 +- .../datetime/resources/FrenchDateTime.java | 65 +++-- .../resources/PortugueseDateTime.java | 92 +++--- .../datetime/resources/SpanishDateTime.java | 70 +++-- .../text/number/resources/ChineseNumeric.java | 14 +- .../text/number/resources/EnglishNumeric.java | 2 +- .../text/number/resources/FrenchNumeric.java | 103 ++++++- ...nNumberWithUnitExtractorConfiguration.java | 2 +- ...rmanNumberWithUnitParserConfiguration.java | 3 +- .../resources/BaseCurrency.java | 4 + .../resources/ChineseNumericWithUnit.java | 2 + .../resources/EnglishNumericWithUnit.java | 70 +++-- .../resources/FrenchNumericWithUnit.java | 4 + .../resources/GermanNumericWithUnit.java | 5 + .../resources/PortugueseNumericWithUnit.java | 272 ++++++++++++++++- .../resources/SpanishNumericWithUnit.java | 273 +++++++++++++++++- 21 files changed, 959 insertions(+), 170 deletions(-) diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateExtractor.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateExtractor.java index 76b57dab..532d7947 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateExtractor.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateExtractor.java @@ -108,6 +108,25 @@ public class BaseDateExtractor extends AbstractYearExtractor implements IDateTim isValidMatch = startsWithBasicDate(subText); } } + + // Expressions with mixed separators are not considered valid dates e.g. "30/4.85" (unless one is a comma "30/4, 2016") + MatchGroup dayGroup = match.getGroup("day"); + MatchGroup monthGroup = match.getGroup("month"); + if (!StringUtility.isNullOrEmpty(dayGroup.value) && !StringUtility.isNullOrEmpty(monthGroup.value)) { + String noDateText = match.value.replace(yearGroup.value, "") + .replace(monthGroup.value, "").replace(dayGroup.value, ""); + String[] separators = {"/", "\\", "-", "."}; + int separatorCount = 0; + for (String separator : separators) { + if (noDateText.contains(separator)) { + separatorCount++; + } + if (separatorCount > 1) { + isValidMatch = false; + break; + } + } + } } return isValidMatch; diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateTimeExtractor.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateTimeExtractor.java index 8fbdd8b8..f54bbbcb 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateTimeExtractor.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateTimeExtractor.java @@ -128,11 +128,27 @@ public class BaseDateTimeExtractor implements IDateTimeExtractor { Match[] matches = RegExpUtility.getMatches(this.config.getSimpleTimeOfTodayAfterRegex(), input); for (Match match : matches) { + // @TODO Remove when lookbehinds are handled correctly + if (isDecimal(match, input)) { + continue; + } + ret.add(new Token(match.index, match.index + match.length)); } return ret; } + + // Check if the match is part of a decimal number (e.g. 123.24) + private boolean isDecimal(Match match, String text) { + boolean isDecimal = false; + if (match.index > 1 && (text.charAt(match.index - 1) == ',' || + text.charAt(match.index - 1) == '.') && Character.isDigit(text.charAt(match.index - 2)) && Character.isDigit(match.value.charAt(0))) { + isDecimal = true; + } + + return isDecimal; + } public List timeOfTodayBefore(String input, LocalDateTime reference) { List ret = new ArrayList<>(); diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseTimeExtractor.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseTimeExtractor.java index 3bdcbf87..75b9547a 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseTimeExtractor.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseTimeExtractor.java @@ -88,6 +88,11 @@ public class BaseTimeExtractor implements IDateTimeExtractor { Match[] matches = RegExpUtility.getMatches(regex, text); for (Match match : matches) { + + // @TODO Remove when lookbehinds are handled correctly + if (isDecimal(match, text)) { + continue; + } // @TODO Workaround to avoid incorrect partial-only matches. Remove after time regex reviews across languages. String lth = match.getGroup("lth").value; @@ -102,6 +107,17 @@ public class BaseTimeExtractor implements IDateTimeExtractor { return ret; } + + // Check if the match is part of a decimal number (e.g. 123.24) + private boolean isDecimal(Match match, String text) { + boolean isDecimal = false; + if (match.index > 1 && (text.charAt(match.index - 1) == ',' || + text.charAt(match.index - 1) == '.') && Character.isDigit(text.charAt(match.index - 2)) && Character.isDigit(match.value.charAt(0))) { + isDecimal = true; + } + + return isDecimal; + } private List atRegexMatch(String text) { List ret = new ArrayList<>(); diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/BaseDateTime.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/BaseDateTime.java index 3add7bfa..bd492560 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/BaseDateTime.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/BaseDateTime.java @@ -17,7 +17,7 @@ import com.google.common.collect.ImmutableMap; public class BaseDateTime { - public static final String HourRegex = "(?2[0-4]|[0-1]?\\d)(h)?"; + public static final String HourRegex = "(?2[0-4]|[0-1]?\\d)(h)?"; public static final String TwoDigitHourRegex = "(?[0-1]\\d|2[0-4])(h)?"; @@ -36,6 +36,8 @@ public class BaseDateTime { public static final String IllegalYearRegex = "([-])({FourDigitYearRegex})([-])" .replace("{FourDigitYearRegex}", FourDigitYearRegex); + public static final String CheckDecimalRegex = "(?![,.]\\d)"; + public static final String RangeConnectorSymbolRegex = "(--|-|—|——|~|–)"; public static final String BaseAmDescRegex = "(am\\b|a\\s*\\.\\s*m\\s*\\.|a[\\.]?\\s*m\\b)"; diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java index f91dc7b5..c86b81be 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java @@ -119,7 +119,7 @@ public class EnglishDateTime { public static final String OfPrepositionRegex = "(\\bof\\b)"; - public static final String TwoDigitYearRegex = "\\b(?([0-24-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b" + public static final String TwoDigitYearRegex = "\\b(?([0-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b" .replace("{AmDescRegex}", AmDescRegex) .replace("{PmDescRegex}", PmDescRegex); @@ -134,7 +134,10 @@ public class EnglishDateTime { public static final String RelativeMonthRegex = "(?((day\\s+)?of\\s+)?{RelativeRegex}\\s+month)\\b" .replace("{RelativeRegex}", RelativeRegex); - public static final String WrittenMonthRegex = "(((the\\s+)?month of\\s+)?(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sept?))"; + public static final String MonthRegex = "\\b(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sep)(?!\\p{L})"; + + public static final String WrittenMonthRegex = "(((the\\s+)?month of\\s+)?{MonthRegex})" + .replace("{MonthRegex}", MonthRegex); public static final String MonthSuffixRegex = "(?(?:(in|of|on)\\s+)?({RelativeMonthRegex}|{WrittenMonthRegex}))" .replace("{RelativeMonthRegex}", RelativeMonthRegex) @@ -186,13 +189,14 @@ public class EnglishDateTime { public static final String SpecialYearPrefixes = "(calendar|(?fiscal|school))"; - public static final String OneWordPeriodRegex = "\\b((((the\\s+)?month of\\s+)?({StrictRelativeRegex}\\s+)?(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sept?))|(month|year) to date|(?((un)?till?|to)\\s+date)|({RelativeRegex}\\s+)?(my\\s+)?((?working\\s+week|workweek)|week(end)?|month|(({SpecialYearPrefixes}\\s+)?year))(?!((\\s+of)?\\s+\\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\\s+to\\s+date))(\\s+{AfterNextSuffixRegex})?)\\b" + public static final String OneWordPeriodRegex = "\\b((((the\\s+)?month of\\s+)?({StrictRelativeRegex}\\s+)?{MonthRegex})|(month|year) to date|(?((un)?till?|to)\\s+date)|({RelativeRegex}\\s+)?(my\\s+)?((?working\\s+week|workweek)|week(end)?|month|(({SpecialYearPrefixes}\\s+)?year))(?!((\\s+of)?\\s+\\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\\s+to\\s+date))(\\s+{AfterNextSuffixRegex})?)\\b" .replace("{StrictRelativeRegex}", StrictRelativeRegex) .replace("{RelativeRegex}", RelativeRegex) .replace("{AfterNextSuffixRegex}", AfterNextSuffixRegex) .replace("{SpecialYearPrefixes}", SpecialYearPrefixes) .replace("{BaseDateTime.BaseAmDescRegex}", BaseDateTime.BaseAmDescRegex) - .replace("{BaseDateTime.BasePmDescRegex}", BaseDateTime.BasePmDescRegex); + .replace("{BaseDateTime.BasePmDescRegex}", BaseDateTime.BasePmDescRegex) + .replace("{MonthRegex}", MonthRegex); public static final String MonthNumWithYear = "\\b(({BaseDateTime.FourDigitYearRegex}(\\s*)[/\\-\\.](\\s*){MonthNumRegex})|({MonthNumRegex}(\\s*)[/\\-](\\s*){BaseDateTime.FourDigitYearRegex}))\\b" .replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex) @@ -274,8 +278,6 @@ public class EnglishDateTime { public static final String MonthOfRegex = "(month)(\\s*)(of)"; - public static final String MonthRegex = "(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sept?)"; - public static final String DateYearRegex = "(?{BaseDateTime.FourDigitYearRegex}|(?(?:3[0-1]|[1-2]\\d|0?[1-9])(?:th|nd|rd|st)?)[\\.]?(\\s+|\\s*[-,/]\\s*|\\s+of\\s+){MonthRegex}[\\.]?)\\b" + .replace("{DayPrefix}", DayPrefix) .replace("{DayRegex}", DayRegex) .replace("{MonthRegex}", MonthRegex) - .replace("{DateExtractorYearTermRegex}", DateExtractorYearTermRegex); + .replace("{DateExtractorYearTermRegex}", DateExtractorYearTermRegex) + .replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex); public static final String DateExtractor4 = "\\b{MonthNumRegex}\\s*[/\\\\\\-]\\s*{DayRegex}[\\.]?\\s*[/\\\\\\-]\\s*{DateYearRegex}" .replace("{MonthNumRegex}", MonthNumRegex) .replace("{DayRegex}", DayRegex) .replace("{DateYearRegex}", DateYearRegex); - public static final String DateExtractor5 = "\\b{DayRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DateYearRegex}(?!\\s*[/\\\\\\-\\.]\\s*\\d+)" + public static final String DateExtractor5 = "\\b({DayPrefix}(\\s*,)?\\s+)?{DayRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DateYearRegex}(?!\\s*[/\\\\\\-\\.]\\s*\\d+)" + .replace("{DayPrefix}", DayPrefix) .replace("{DayRegex}", DayRegex) .replace("{MonthNumRegex}", MonthNumRegex) .replace("{MonthRegex}", MonthRegex) .replace("{DateYearRegex}", DateYearRegex); - public static final String DateExtractor6 = "(?<={DatePreposition}\\s+)({StrictRelativeRegex}\\s+)?({WeekDayRegex}\\s+)?{MonthNumRegex}[\\-\\.]{DayRegex}(?![%])\\b" + public static final String DateExtractor6 = "(?<={DatePreposition}\\s+)({StrictRelativeRegex}\\s+)?({DayPrefix}\\s+)?{MonthNumRegex}[\\-\\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b" .replace("{MonthNumRegex}", MonthNumRegex) .replace("{DayRegex}", DayRegex) - .replace("{WeekDayRegex}", WeekDayRegex) + .replace("{DayPrefix}", DayPrefix) .replace("{DatePreposition}", DatePreposition) - .replace("{StrictRelativeRegex}", StrictRelativeRegex); + .replace("{StrictRelativeRegex}", StrictRelativeRegex) + .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex); - public static final String DateExtractor7L = "\\b({WeekDayRegex}\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\\b" + public static final String DateExtractor7L = "\\b({DayPrefix}(\\s*,)?\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\\b" .replace("{MonthNumRegex}", MonthNumRegex) .replace("{DayRegex}", DayRegex) - .replace("{WeekDayRegex}", WeekDayRegex) + .replace("{DayPrefix}", DayPrefix) .replace("{DateExtractorYearTermRegex}", DateExtractorYearTermRegex); - public static final String DateExtractor7S = "\\b({WeekDayRegex}\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}(?![%])\\b" + public static final String DateExtractor7S = "\\b({DayPrefix}(\\s*,)?\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b" .replace("{MonthNumRegex}", MonthNumRegex) .replace("{DayRegex}", DayRegex) - .replace("{WeekDayRegex}", WeekDayRegex); + .replace("{DayPrefix}", DayPrefix) + .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex); - public static final String DateExtractor8 = "(?<={DatePreposition}\\s+)({StrictRelativeRegex}\\s+)?({WeekDayRegex}\\s+)?{DayRegex}[\\\\\\-]{MonthNumRegex}(?![%])\\b" + public static final String DateExtractor8 = "(?<={DatePreposition}\\s+)({StrictRelativeRegex}\\s+)?({DayPrefix}\\s+)?{DayRegex}[\\\\\\-]{MonthNumRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b" .replace("{DayRegex}", DayRegex) .replace("{MonthNumRegex}", MonthNumRegex) - .replace("{WeekDayRegex}", WeekDayRegex) + .replace("{DayPrefix}", DayPrefix) .replace("{DatePreposition}", DatePreposition) - .replace("{StrictRelativeRegex}", StrictRelativeRegex); + .replace("{StrictRelativeRegex}", StrictRelativeRegex) + .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex); - public static final String DateExtractor9L = "\\b({WeekDayRegex}\\s+)?{DayRegex}\\s*/\\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\\b" + public static final String DateExtractor9L = "\\b({DayPrefix}(\\s*,)?\\s+)?{DayRegex}\\s*/\\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\\b" .replace("{DayRegex}", DayRegex) .replace("{MonthNumRegex}", MonthNumRegex) - .replace("{WeekDayRegex}", WeekDayRegex) + .replace("{DayPrefix}", DayPrefix) .replace("{DateExtractorYearTermRegex}", DateExtractorYearTermRegex); - public static final String DateExtractor9S = "\\b({WeekDayRegex}\\s+)?{DayRegex}\\s*/\\s*{MonthNumRegex}(?![%])\\b" + public static final String DateExtractor9S = "\\b({DayPrefix}(\\s*,)?\\s+)?{DayRegex}\\s*/\\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])\\b" .replace("{DayRegex}", DayRegex) .replace("{MonthNumRegex}", MonthNumRegex) - .replace("{WeekDayRegex}", WeekDayRegex); + .replace("{DayPrefix}", DayPrefix) + .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex); - public static final String DateExtractorA = "\\b({WeekDayRegex}\\s+)?{BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DayRegex}" + public static final String DateExtractorA = "\\b({DayPrefix}(\\s*,)?\\s+)?(({BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DayRegex})|({MonthRegex}\\s*[/\\\\\\-\\.]\\s*{BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*(the\\s+)?(?(?:3[0-1]|[1-2]\\d|0?[1-9])(?:th|nd|rd|st)?))|({DayRegex}\\s*[/\\\\\\-\\.]\\s*{BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*{MonthRegex}))" .replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex) .replace("{MonthNumRegex}", MonthNumRegex) .replace("{MonthRegex}", MonthRegex) .replace("{DayRegex}", DayRegex) - .replace("{WeekDayRegex}", WeekDayRegex); + .replace("{DayPrefix}", DayPrefix); public static final String OfMonth = "^\\s*(day\\s+)?of\\s*{MonthRegex}" .replace("{MonthRegex}", MonthRegex); @@ -417,11 +429,11 @@ public class EnglishDateTime { public static final String DeltaMinuteNumRegex = "(?ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|thirty|forty|fifty|one|two|three|five|eight)"; - public static final String PmRegex = "(?(((?:at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\\s+(the\\s+)?night))"; + public static final String PmRegex = "(?(((?:at|in|around|circa|on|for)\\s+(the\\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\\s+(the\\s+)?night))"; - public static final String PmRegexFull = "(?((?:at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|(mid)?night|lunchtime))"; + public static final String PmRegexFull = "(?((?:at|in|around|circa|on|for)\\s+(the\\s+)?)?(afternoon|evening|(mid)?night|lunchtime))"; - public static final String AmRegex = "(?((?:at|in|around|on|for)\\s+(the\\s+)?)?(morning))"; + public static final String AmRegex = "(?((?:at|in|around|circa|on|for)\\s+(the\\s+)?)?(morning))"; public static final String LunchRegex = "\\blunchtime\\b"; @@ -471,7 +483,7 @@ public class EnglishDateTime { .replace("{MidafternoonRegex}", MidafternoonRegex) .replace("{MiddayRegex}", MiddayRegex); - public static final String AtRegex = "\\b(?:(?:(?<=\\bat\\s+)(?:{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\\.\\d)(\\s*((?a)|(?p)))?|{MidTimeRegex}))|{MidTimeRegex})\\b" + public static final String AtRegex = "\\b(?:(?:(?<=\\b(at|(at)?\\s*around|circa)\\s+)(?:{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\\.\\d)(\\s*((?a)|(?p)))?|{MidTimeRegex}))|{MidTimeRegex})\\b" .replace("{WrittenTimeRegex}", WrittenTimeRegex) .replace("{HourNumRegex}", HourNumRegex) .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex) @@ -530,7 +542,7 @@ public class EnglishDateTime { .replace("{BasicTime}", BasicTime) .replace("{DescRegex}", DescRegex); - public static final String TimeRegex6 = "{BasicTime}(\\s*{DescRegex})?\\s+{TimeSuffix}\\b" + public static final String TimeRegex6 = "({BasicTime})(\\s*{DescRegex})?\\s+{TimeSuffix}\\b" .replace("{BasicTime}", BasicTime) .replace("{DescRegex}", DescRegex) .replace("{TimeSuffix}", TimeSuffix); @@ -609,7 +621,7 @@ public class EnglishDateTime { public static final String SuffixAfterRegex = "\\b(((at)\\s)?(or|and)\\s+(above|after|later|greater)(?!\\s+than))\\b"; - public static final String PrepositionRegex = "(?^(at|on|of)(\\s+the)?$)"; + public static final String PrepositionRegex = "(?^(,\\s*)?(at|on|of)(\\s+the)?$)"; public static final String LaterEarlyRegex = "((?early(\\s+|-))|(?late(r?\\s+|-)))"; @@ -652,7 +664,7 @@ public class EnglishDateTime { public static final String TimeOfTodayAfterRegex = "^\\s*(,\\s*)?(in\\s+)?{DateTimeSpecificTimeOfDayRegex}" .replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex); - public static final String TimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+(at|around|in|on))?\\s*$" + public static final String TimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+(at|around|circa|in|on))?\\s*$" .replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex); public static final String SimpleTimeOfTodayAfterRegex = "(?week|month|year|decade|weekend)\\b" .replace("{ReferencePrefixRegex}", ReferencePrefixRegex); - public static final String ConnectorRegex = "^(-|,|for|t|around|@)$"; + public static final String ConnectorRegex = "^(-|,|for|t|around|circa|@)$"; public static final String FromToRegex = "(\\b(from).+(to|and|or)\\b.+)"; @@ -830,7 +842,7 @@ public class EnglishDateTime { public static final String UnspecificDatePeriodRegex = "^(week|month|year)$"; - public static final String PrepositionSuffixRegex = "\\b(on|in|at|around|from|to)$"; + public static final String PrepositionSuffixRegex = "\\b(on|in|at|around|circa|from|to)$"; public static final String FlexibleDayRegex = "(?([A-Za-z]+\\s)?[A-Za-z\\d]+)"; @@ -901,7 +913,7 @@ public class EnglishDateTime { public static final String DateNumberConnectorRegex = "^\\s*(?\\s+at)\\s*$"; - public static final String DecadeRegex = "(?(?:nough|twen|thir|fou?r|fif|six|seven|eight|nine)ties|two\\s+thousands)"; + public static final String DecadeRegex = "(?(?:nough|twen|thir|fou?r|fif|six|seven|eigh|nine)ties|two\\s+thousands)"; public static final String DecadeWithCenturyRegex = "(the\\s+)?(((?\\d|1\\d|2\\d)?(')?(?\\d0)(')?(\\s)?s\\b)|(({CenturyRegex}(\\s+|-)(and\\s+)?)?{DecadeRegex})|({CenturyRegex}(\\s+|-)(and\\s+)?(?tens|hundreds)))" .replace("{CenturyRegex}", CenturyRegex) @@ -1386,6 +1398,7 @@ public class EnglishDateTime { .put("\\b(a|one) second\\b", "\\b(? MorningTermList = Arrays.asList("morning"); diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishTimeZone.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishTimeZone.java index 89b29c01..dd5e8a47 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishTimeZone.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishTimeZone.java @@ -88,7 +88,7 @@ public class EnglishTimeZone { .put("esat", -180) .put("est", -300) .put("estm", -300) - .put("et", -240) + .put("et", -300) .put("fjst", 780) .put("fjt", 720) .put("get", 240) diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java index 0ab5214d..0cee2693 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java @@ -27,9 +27,9 @@ public class FrenchDateTime { public static final String RangeConnectorRegex = "(?de la|au|[aà]|et(\\s*la)?|--|-|—|——)"; - public static final String RelativeRegex = "(?prochaine?|de|du|ce(tte)?|l[ae]|derni[eè]re|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))"; + public static final String RelativeRegex = "(?prochaine?|de|du|ce(tte)?|l[ae]|derni[eè]re|hier|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))"; - public static final String StrictRelativeRegex = "(?prochaine?|derni[eè]re|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))"; + public static final String StrictRelativeRegex = "(?prochaine?|derni[eè]re|hier|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))"; public static final String NextSuffixRegex = "(?prochaines?|prochain|suivante)\\b"; @@ -39,9 +39,9 @@ public class FrenchDateTime { public static final String RangePrefixRegex = "(du|depuis|des?|entre)"; - public static final String DayRegex = "(?01|02|03|04|05|06|07|08|09|10|11e?|12e?|13e?|14e?|15e?|16e?|17e?|18e?|19e?|1er|1|21e?|20e?|22e?|23e?|24e?|25e?|26e?|27e?|28e?|29e?|2e?|30e?|31e?|3e?|4e?|5e?|6e?|7e?|8e?|9e?)(?=\\b|t)"; + public static final String DayRegex = "(?(?:3[0-1]|[1-2]\\d|0?[1-9])(e(r)?)?)(?=\\b|t)"; - public static final String MonthNumRegex = "(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\\b"; + public static final String MonthNumRegex = "(?1[0-2]|(0)?[1-9])\\b"; public static final String SpecialDescRegex = "(p\\b)"; @@ -60,7 +60,7 @@ public class FrenchDateTime { .replace("{AmPmDescRegex}", AmPmDescRegex) .replace("{SpecialDescRegex}", SpecialDescRegex); - public static final String TwoDigitYearRegex = "\\b(?([0-24-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b" + public static final String TwoDigitYearRegex = "\\b(?([0-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b" .replace("{AmDescRegex}", AmDescRegex) .replace("{PmDescRegex}", PmDescRegex); @@ -212,10 +212,11 @@ public class FrenchDateTime { .replace("{YearRegex}", YearRegex) .replace("{TwoDigitYearRegex}", TwoDigitYearRegex); - public static final String DateExtractor1 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{MonthRegex}\\s*[/\\\\\\.\\-]?\\s*{DayRegex}\\b" + public static final String DateExtractor1 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{MonthRegex}\\s*[/\\\\\\.\\-]?\\s*{DayRegex}(\\s*[/\\\\\\.\\-]?\\s*{BaseDateTime.FourDigitYearRegex})?\\b" .replace("{WeekDayRegex}", WeekDayRegex) .replace("{MonthRegex}", MonthRegex) - .replace("{DayRegex}", DayRegex); + .replace("{DayRegex}", DayRegex) + .replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex); public static final String DateExtractor2 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}(\\s+|\\s*,\\s*|\\s+){MonthRegex}\\s*[\\.\\-]?\\s*{DateYearRegex}\\b" .replace("{WeekDayRegex}", WeekDayRegex) @@ -223,12 +224,13 @@ public class FrenchDateTime { .replace("{DayRegex}", DayRegex) .replace("{DateYearRegex}", DateYearRegex); - public static final String DateExtractor3 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})" + public static final String BasicTime = "(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(:|\\s*h\\s*){BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})" .replace("{WrittenTimeRegex}", WrittenTimeRegex) .replace("{HourNumRegex}", HourNumRegex) .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex) @@ -348,7 +356,7 @@ public class FrenchDateTime { public static final String RestrictedTimeUnitRegex = "(?huere|minute)\\b"; - public static final String ConnectNumRegex = "{BaseDateTime.HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)\\s*{DescRegex}" + public static final String ConnectNumRegex = "{BaseDateTime.HourRegex}(?[0-5][0-9])\\s*{DescRegex}" .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex) .replace("{DescRegex}", DescRegex); @@ -446,7 +454,7 @@ public class FrenchDateTime { public static final String TimeOfDayRegex = "\\b(?((((dans\\s+(l[ea])?\\s+)?((?d[eé]but(\\s+|-)|t[oô]t(\\s+|-)(l[ea]\\s*)?)|(?fin\\s*|fin de(\\s+(la)?)|tard\\s*))?(matin([ée]e)?|((d|l)?'?)apr[eè]s[-|\\s*]midi|nuit|soir([eé]e)?)))|(((\\s+(l[ea])?\\s+)?)jour(n[eé]e)?))s?)\\b"; - public static final String SpecificTimeOfDayRegex = "\\b(({RelativeRegex}\\s+{TimeOfDayRegex})|({TimeOfDayRegex}\\s*({NextSuffixRegex}))\\b|\\bsoir|\\bdu soir)s?\\b" + public static final String SpecificTimeOfDayRegex = "\\b(({RelativeRegex}\\s+{TimeOfDayRegex})|({TimeOfDayRegex}\\s*({NextSuffixRegex}))\\b|\\b(du )?soir)s?\\b" .replace("{TimeOfDayRegex}", TimeOfDayRegex) .replace("{RelativeRegex}", RelativeRegex) .replace("{NextSuffixRegex}", NextSuffixRegex); @@ -470,7 +478,7 @@ public class FrenchDateTime { public static final String TimeOfTodayAfterRegex = "^\\s*(,\\s*)?(en|dans|du\\s+)?{DateTimeSpecificTimeOfDayRegex}" .replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex); - public static final String TimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]|pour))?\\s*$" + public static final String TimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]|vers|pour))?\\s*$" .replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex); public static final String SimpleTimeOfTodayAfterRegex = "({HourNumRegex}|{BaseDateTime.HourRegex})\\s*(,\\s*)?(en|[àa]\\s+)?{DateTimeSpecificTimeOfDayRegex}" @@ -478,7 +486,7 @@ public class FrenchDateTime { .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex) .replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex); - public static final String SimpleTimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]|vers))?\\s*({HourNumRegex}|{BaseDateTime.HourRegex})" + public static final String SimpleTimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]|vers|pour))?\\s*({HourNumRegex}|{BaseDateTime.HourRegex})" .replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex) .replace("{HourNumRegex}", HourNumRegex) .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex); @@ -581,7 +589,7 @@ public class FrenchDateTime { public static final String SinceRegex = "\\b(depuis)\\b"; - public static final String AroundRegex = "^[.]"; + public static final String AroundRegex = "\\b(vers)\\b"; public static final String AgoPrefixRegex = "\\b(y a)\\b"; @@ -640,7 +648,7 @@ public class FrenchDateTime { public static final String RelativeDayRegex = "\\b(((la\\s+)?{RelativeRegex}\\s+journ[ée]e))\\b" .replace("{RelativeRegex}", RelativeRegex); - public static final String ConnectorRegex = "^(,|pour|t|vers)$"; + public static final String ConnectorRegex = "^(,|pour|t|vers|le)$"; public static final String ConnectorAndRegex = "\\b(et\\s*(le|las?)?)\\b.+"; @@ -1188,6 +1196,7 @@ public class FrenchDateTime { public static final ImmutableMap AmbiguityFiltersDict = ImmutableMap.builder() .put("^([eé]t[eé])$", "(? AmbiguityTimeFiltersDict = ImmutableMap.builder() diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java index 11119064..7f5fbdbf 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java @@ -28,9 +28,9 @@ public class PortugueseDateTime { public static final String RangeConnectorRegex = "(?(e\\s*(([àa]s?)|o)?)|{BaseDateTime.RangeConnectorSymbolRegex})" .replace("{BaseDateTime.RangeConnectorSymbolRegex}", BaseDateTime.RangeConnectorSymbolRegex); - public static final String DayRegex = "(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)(?=\\b|t)"; + public static final String DayRegex = "(?(?:3[0-1]|[1-2]\\d|0?[1-9]))(?=\\b|t)"; - public static final String MonthNumRegex = "(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\\b"; + public static final String MonthNumRegex = "(?1[0-2]|(0)?[1-9])\\b"; public static final String AmDescRegex = "({BaseDateTime.BaseAmDescRegex})" .replace("{BaseDateTime.BaseAmDescRegex}", BaseDateTime.BaseAmDescRegex); @@ -45,13 +45,15 @@ public class PortugueseDateTime { .replace("{AmDescRegex}", AmDescRegex) .replace("{PmDescRegex}", PmDescRegex); + public static final String OclockRegex = "(?em\\s+ponto)"; + public static final String OfPrepositionRegex = "(\\bd(o|a|e)s?\\b)"; public static final String AfterNextSuffixRegex = "\\b(que\\s+vem|passad[oa])\\b"; public static final String RangePrefixRegex = "((de(sde)?|das?|entre)\\s+(a(s)?\\s+)?)"; - public static final String TwoDigitYearRegex = "\\b(?([0-24-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b" + public static final String TwoDigitYearRegex = "\\b(?([0-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b" .replace("{AmDescRegex}", AmDescRegex) .replace("{PmDescRegex}", PmDescRegex); @@ -176,7 +178,9 @@ public class PortugueseDateTime { public static final String OnRegex = "(?<=\\b(em|no)\\s+)({DayRegex}s?)\\b" .replace("{DayRegex}", DayRegex); - public static final String RelaxedOnRegex = "(?<=\\b(em|n[oa]|d[oa])\\s+)(dia\\s+)?((?10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)s?)\\b"; + public static final String RelaxedOnRegex = "(?<=\\b(em|n[oa]|d[oa])\\s+)(dia\\s+)?({DayRegex}s?)\\b(?!\\s*[/\\\\\\-\\.,:\\s]\\s*(\\d|{MonthRegex}))" + .replace("{DayRegex}", DayRegex) + .replace("{MonthRegex}", MonthRegex); public static final String ThisRegex = "\\b(([nd]?es[st][ea]\\s*){WeekDayRegex})|({WeekDayRegex}\\s*([nd]?es[st]a\\s+semana))\\b" .replace("{WeekDayRegex}", WeekDayRegex); @@ -195,7 +199,12 @@ public class PortugueseDateTime { public static final String WeekDayAndDayOfMonthRegex = ".^"; - public static final String WeekDayAndDayRegex = ".^"; + public static final String WeekDayAndDayRegex = "\\b{WeekDayRegex}\\s+({DayRegex})(?!([-:/]|\\.\\d|(\\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\\b" + .replace("{WeekDayRegex}", WeekDayRegex) + .replace("{DayRegex}", DayRegex) + .replace("{AmDescRegex}", AmDescRegex) + .replace("{PmDescRegex}", PmDescRegex) + .replace("{OclockRegex}", OclockRegex); public static final String WeekDayOfMonthRegex = "(?(n?[ao]\\s+)?(?primeir[ao]|1[ao]|segund[ao]|2[ao]|terceir[ao]|3[ao]|[qc]uart[ao]|4[ao]|quint[ao]|5[ao]|[uú]ltim[ao])\\s+{WeekDayRegex}\\s+{MonthSuffixRegex})" .replace("{WeekDayRegex}", WeekDayRegex) @@ -225,18 +234,19 @@ public class PortugueseDateTime { .replace("{YearRegex}", YearRegex) .replace("{TwoDigitYearRegex}", TwoDigitYearRegex); - public static final String DateExtractor1 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}?((\\s*(de)|[/\\\\\\.\\-])\\s*)?{MonthRegex}\\b" + public static final String DateExtractor1 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}((\\s*(de)|[/\\\\\\.\\- ])\\s*)?{MonthRegex}\\b" .replace("{WeekDayRegex}", WeekDayRegex) .replace("{DayRegex}", DayRegex) .replace("{MonthRegex}", MonthRegex); - public static final String DateExtractor2 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}\\s*([\\.\\-]|de)?\\s*{MonthRegex}(\\s*(,|de)\\s*){DateYearRegex}\\b" + public static final String DateExtractor2 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?({DayRegex}\\s*([/\\.\\-]|de)?\\s*{MonthRegex}(\\s*([,./-]|de)\\s*){DateYearRegex}|{BaseDateTime.FourDigitYearRegex}\\s*[/\\.\\- ]\\s*{DayRegex}\\s*[/\\.\\- ]\\s*{MonthRegex})\\b" .replace("{MonthRegex}", MonthRegex) .replace("{DayRegex}", DayRegex) .replace("{DateYearRegex}", DateYearRegex) - .replace("{WeekDayRegex}", WeekDayRegex); + .replace("{WeekDayRegex}", WeekDayRegex) + .replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex); - public static final String DateExtractor3 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}(\\s+|\\s*,\\s*|\\s+de\\s+|\\s*-\\s*){MonthRegex}((\\s+|\\s*(,|de)\\s*){DateYearRegex})?\\b" + public static final String DateExtractor3 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{MonthRegex}(\\s*[/\\.\\- ]\\s*|\\s+de\\s+){DayRegex}((\\s*[/\\.\\- ]\\s*|\\s+de\\s+){DateYearRegex})?\\b" .replace("{DayRegex}", DayRegex) .replace("{MonthRegex}", MonthRegex) .replace("{WeekDayRegex}", WeekDayRegex) @@ -253,28 +263,34 @@ public class PortugueseDateTime { .replace("{DayRegex}", DayRegex) .replace("{DateYearRegex}", DateYearRegex); - public static final String DateExtractor6 = "(?<=\\b(em|no|o)\\s+){MonthNumRegex}[\\-\\.]{DayRegex}\\b" - .replace("{MonthNumRegex}", MonthNumRegex) - .replace("{DayRegex}", DayRegex); - - public static final String DateExtractor7 = "\\b{MonthNumRegex}\\s*/\\s*{DayRegex}((\\s+|\\s*(,|de)\\s*){DateYearRegex})?\\b" + public static final String DateExtractor6 = "(?<=\\b(em|no|o)\\s+){MonthNumRegex}[\\-\\.]{DayRegex}{BaseDateTime.CheckDecimalRegex}\\b" .replace("{MonthNumRegex}", MonthNumRegex) .replace("{DayRegex}", DayRegex) - .replace("{DateYearRegex}", DateYearRegex); + .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex); - public static final String DateExtractor8 = "(?<=\\b(em|no|o)\\s+){DayRegex}[\\\\\\-]{MonthNumRegex}\\b" + public static final String DateExtractor7 = "\\b{MonthNumRegex}\\s*/\\s*{DayRegex}((\\s+|\\s*(,|de)\\s*){DateYearRegex})?{BaseDateTime.CheckDecimalRegex}\\b" .replace("{MonthNumRegex}", MonthNumRegex) - .replace("{DayRegex}", DayRegex); + .replace("{DayRegex}", DayRegex) + .replace("{DateYearRegex}", DateYearRegex) + .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex); - public static final String DateExtractor9 = "\\b{DayRegex}\\s*/\\s*{MonthNumRegex}((\\s+|\\s*(,|de)\\s*){DateYearRegex})?\\b" + public static final String DateExtractor8 = "(?<=\\b(em|no|o)\\s+){DayRegex}[\\\\\\-]{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}\\b" + .replace("{MonthNumRegex}", MonthNumRegex) + .replace("{DayRegex}", DayRegex) + .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex); + + public static final String DateExtractor9 = "\\b{DayRegex}\\s*/\\s*{MonthNumRegex}((\\s+|\\s*(,|de)\\s*){DateYearRegex})?{BaseDateTime.CheckDecimalRegex}\\b" .replace("{DayRegex}", DayRegex) .replace("{MonthNumRegex}", MonthNumRegex) - .replace("{DateYearRegex}", DateYearRegex); + .replace("{DateYearRegex}", DateYearRegex) + .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex); - public static final String DateExtractor10 = "\\b{YearRegex}\\s*[/\\\\\\-\\.]\\s*{MonthNumRegex}\\s*[/\\\\\\-\\.]\\s*{DayRegex}(?!\\s*[/\\\\\\-\\.]\\s*\\d+)" + public static final String DateExtractor10 = "\\b({YearRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DayRegex}|{MonthRegex}\\s*[/\\\\\\-\\.]\\s*{BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*{DayRegex}|{DayRegex}\\s*[/\\\\\\-\\.]\\s*{BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*{MonthRegex})(?!\\s*[/\\\\\\-\\.:]\\s*\\d+)" .replace("{YearRegex}", YearRegex) .replace("{MonthNumRegex}", MonthNumRegex) - .replace("{DayRegex}", DayRegex); + .replace("{MonthRegex}", MonthRegex) + .replace("{DayRegex}", DayRegex) + .replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex); public static final String DateExtractor11 = "(?<=\\b(dia)\\s+){DayRegex}" .replace("{DayRegex}", DayRegex); @@ -285,8 +301,6 @@ public class PortugueseDateTime { public static final String DeltaMinuteNumRegex = "(?um|dois|tr[êe]s|[qc]uatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|catorze|quatorze|quinze|dez[ea]sseis|dez[ea]sete|dezoito|dez[ea]nove|vinte|trinta|[qc]uarenta|cin[qc]uenta)"; - public static final String OclockRegex = "(?em\\s+ponto)"; - public static final String PmRegex = "(?((pela|de|da|\\b[àa]\\b|na)\\s+(tarde|noite)))|((depois\\s+do|ap[óo]s\\s+o)\\s+(almo[çc]o|meio dia|meio-dia))"; public static final String AmRegex = "(?(pela|de|da|na)\\s+(manh[ãa]|madrugada))"; @@ -329,7 +343,7 @@ public class PortugueseDateTime { .replace("{WrittenTimeRegex}", WrittenTimeRegex) .replace("{OclockRegex}", OclockRegex); - public static final String ConnectNumRegex = "({BaseDateTime.HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)\\s*{DescRegex})" + public static final String ConnectNumRegex = "({BaseDateTime.HourRegex}(?[0-5][0-9])\\s*{DescRegex})" .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex) .replace("{DescRegex}", DescRegex); @@ -401,29 +415,29 @@ public class PortugueseDateTime { .replace("{BaseDateTime.MinuteRegex}", BaseDateTime.MinuteRegex) .replace("{DescRegex}", DescRegex); - public static final String PrepositionRegex = "(?([àa]s?|em|por|pelo|pela|no|na|de|d[oa]?)?$)"; + public static final String PrepositionRegex = "(?([àa]s?|em|por|pel[ao]|n[ao]|de|d[ao]?)?$)"; public static final String NowRegex = "\\b(?((logo|exatamente)\\s+)?agora(\\s+mesmo)?|neste\\s+momento|(assim\\s+que|t[ãa]o\\s+cedo\\s+quanto)\\s+(poss[ií]vel|possas?|possamos)|o\\s+mais\\s+(cedo|r[aá]pido)\\s+poss[íi]vel|recentemente|previamente)\\b"; - public static final String SuffixRegex = "^\\s*((e|a|em|por|pelo|pela|no|na|de)\\s+)?(manh[ãa]|madrugada|meio\\s*dia|tarde|noite)\\b"; + public static final String SuffixRegex = "^\\s*((e|a|em|por|pel[ao]|n[ao]|de)\\s+)?(manh[ãa]|madrugada|meio\\s*dia|tarde|noite)\\b"; public static final String TimeOfDayRegex = "\\b(?manh[ãa]|madrugada|tarde|noite|((depois\\s+do|ap[óo]s\\s+o)\\s+(almo[çc]o|meio dia|meio-dia)))\\b"; public static final String SpecificTimeOfDayRegex = "\\b(((((a)?\\s+|[nd]?es[st]a|seguinte|pr[oó]xim[oa]|[uú]ltim[oa])\\s+)?{TimeOfDayRegex}))\\b" .replace("{TimeOfDayRegex}", TimeOfDayRegex); - public static final String TimeOfTodayAfterRegex = "^\\s*(,\\s*)?([àa]|em|por|pelo|pela|de|no|na?\\s+)?{SpecificTimeOfDayRegex}" + public static final String TimeOfTodayAfterRegex = "^\\s*(,\\s*)?([àa]|em|por|pel[ao]|de|no|na?\\s+)?{SpecificTimeOfDayRegex}" .replace("{SpecificTimeOfDayRegex}", SpecificTimeOfDayRegex); - public static final String TimeOfTodayBeforeRegex = "({SpecificTimeOfDayRegex}(\\s*,)?(\\s+(a\\s+la(s)?|para))?\\s*)" + public static final String TimeOfTodayBeforeRegex = "({SpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]s|para))?\\s*)" .replace("{SpecificTimeOfDayRegex}", SpecificTimeOfDayRegex); - public static final String SimpleTimeOfTodayAfterRegex = "({HourNumRegex}|{BaseDateTime.HourRegex})\\s*(,\\s*)?((en|de(l)?)?\\s+)?{SpecificTimeOfDayRegex}" + public static final String SimpleTimeOfTodayAfterRegex = "({HourNumRegex}|{BaseDateTime.HourRegex})\\s*(,\\s*)?{SpecificTimeOfDayRegex}" .replace("{HourNumRegex}", HourNumRegex) .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex) .replace("{SpecificTimeOfDayRegex}", SpecificTimeOfDayRegex); - public static final String SimpleTimeOfTodayBeforeRegex = "({SpecificTimeOfDayRegex}(\\s*,)?(\\s+(a\\s+la|para))?\\s*({HourNumRegex}|{BaseDateTime.HourRegex}))" + public static final String SimpleTimeOfTodayBeforeRegex = "({SpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]s|((cerca|perto|ao\\s+redor|por\\s+volta)\\s+(de|das))))?\\s*({HourNumRegex}|{BaseDateTime.HourRegex}))" .replace("{SpecificTimeOfDayRegex}", SpecificTimeOfDayRegex) .replace("{HourNumRegex}", HourNumRegex) .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex); @@ -434,13 +448,13 @@ public class PortugueseDateTime { public static final String UnspecificEndOfRangeRegex = "^[.]"; - public static final String UnitRegex = "(?anos|ano|meses|m[êe]s|semanas|semana|dias|dia|horas|hora|h|hr|hrs|hs|minutos|minuto|mins|min|segundos|segundo|segs|seg)\\b"; + public static final String UnitRegex = "(?anos?|meses|m[êe]s|semanas?|dias?|horas?|hrs?|hs?|minutos?|mins?|segundos?|segs?)\\b"; - public static final String ConnectorRegex = "^(,|t|para [ao]|para as|pras|cerca de|cerca das|perto de|perto das|quase)$"; + public static final String ConnectorRegex = "^(,|t|para [ao]|para as|pras|(cerca|perto|ao\\s+redor|por\\s+volta)\\s+(de|das)|quase)$"; - public static final String TimeHourNumRegex = "(?vinte e um|vinte e dois|vinte e tr[êe]s|vinte e quatro|zero|um|uma|dois|duas|tr[êe]s|quatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|quatorze|catorze|quinze|dez[ea]sseis|dez[ea]ssete|dezoito|dez[ea]nove|vinte)"; + public static final String TimeHourNumRegex = "(?vinte( e (um|dois|tr[êe]s|quatro))?|zero|uma?|dois|duas|tr[êe]s|quatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|quatorze|catorze|quinze|dez([ea]sseis|[ea]ssete|oito|[ea]nove))"; - public static final String PureNumFromTo = "((desde|de|da|das)\\s+(a(s)?\\s+)?)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\\s*(?{DescRegex}))?\\s*{TillRegex}\\s*({BaseDateTime.HourRegex}|{TimeHourNumRegex})\\s*(?{PmRegex}|{AmRegex}|{DescRegex})?" + public static final String PureNumFromTo = "(((desde|de|da|das)\\s+(a(s)?\\s+)?)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\\s*(?{DescRegex}))?\\s*{TillRegex}(?{DescRegex}))?\\s*{TillRegex})\\s*({BaseDateTime.HourRegex}|{TimeHourNumRegex})\\s*(?{PmRegex}|{AmRegex}|{DescRegex})?" .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex) .replace("{TimeHourNumRegex}", TimeHourNumRegex) .replace("{DescRegex}", DescRegex) @@ -459,7 +473,7 @@ public class PortugueseDateTime { public static final String SpecificTimeBetweenAnd = "^[.]"; - public static final String TimeUnitRegex = "(?horas|hora|h|minutos|minuto|mins|min|segundos|segundo|secs|sec)\\b"; + public static final String TimeUnitRegex = "(?horas?|h|minutos?|mins?|segundos?|se[cg]s?)\\b"; public static final String TimeFollowedUnit = "^\\s*{TimeUnitRegex}" .replace("{TimeUnitRegex}", TimeUnitRegex); @@ -505,7 +519,7 @@ public class PortugueseDateTime { public static final String InexactNumberUnitRegex = "\\b(poucos|pouco|algum|alguns|v[áa]rios)\\s+{UnitRegex}" .replace("{UnitRegex}", UnitRegex); - public static final String HolidayRegex1 = "\\b(?sexta-feira santa|sexta-feira da paix[ãa]o|quarta-feira de cinzas|carnaval|dia (de|de los) presidentes?|ano novo chin[eê]s|ano novo|v[ée]spera de ano novo|natal|v[ée]spera de natal|dia de a[cç][ãa]o de gra[çc]as|a[cç][ãa]o de gra[çc]as|yuandan|halloween|dia das bruxas|p[áa]scoa)(\\s+(d[eo]?\\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|[nd]?es[st][ea]|[uú]ltim[oa]?|em))\\s+ano))?\\b" + public static final String HolidayRegex1 = "\\b(?sexta-feira santa|sexta-feira da paix[ãa]o|quarta-feira de cinzas|carnaval|dia dos? presidentes?|ano novo chin[eê]s|ano novo|v[ée]spera de ano novo|natal|v[ée]spera de natal|dia de a[cç][ãa]o de gra[çc]as|a[cç][ãa]o de gra[çc]as|yuandan|halloween|dia das bruxas|p[áa]scoa)(\\s+(d[eo]?\\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|[nd]?es[st][ea]|[uú]ltim[oa]?|em))\\s+ano))?\\b" .replace("{YearRegex}", YearRegex); public static final String HolidayRegex2 = "\\b(?(dia\\s+(d[eoa]s?\\s+)?)?(martin luther king|todos os santos|s[ãa]o (patr[íi]cio|francisco|jorge|jo[ãa]o)|independ[êe]ncia))(\\s+(d[eo]?\\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|[nd]?es[st][ea]|[uú]ltim[oa]?|em))\\s+ano))?\\b" @@ -520,7 +534,7 @@ public class PortugueseDateTime { public static final String SinceRegex = "(desde(\\s+(as?|o))?)"; - public static final String AroundRegex = "^[.]"; + public static final String AroundRegex = "(?:\\b(?:cerca|perto|ao\\s+redor|por\\s+volta)\\s*?\\b)(\\s+(de|das))?"; public static final String PeriodicRegex = "\\b(?di[áa]ri[ao]|diariamente|mensalmente|semanalmente|quinzenalmente|anualmente)\\b"; @@ -566,7 +580,7 @@ public class PortugueseDateTime { public static final String AgoRegex = "\\b(antes|atr[áa]s|no passado)\\b"; - public static final String LaterRegex = "\\b(depois d[eoa]s?|ap[óo]s (as)?|desde (as|o)|desde|no futuro|mais tarde)\\b"; + public static final String LaterRegex = "\\b(depois d[eoa]s?|ap[óo]s (as)?|desde( (as|o))?|no futuro|mais tarde)\\b"; public static final String Tomorrow = "amanh[ãa]"; @@ -932,7 +946,7 @@ public class PortugueseDateTime { public static final List DurationDateRestrictions = Arrays.asList(); public static final ImmutableMap AmbiguityFiltersDict = ImmutableMap.builder() - .put("null", "null") + .put("^(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)$", "([$%£&!?@#])(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)|(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)([$%£&@#])") .build(); public static final List EarlyMorningTermList = Arrays.asList("madrugada"); diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java index 019ed94c..d8330a66 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java @@ -36,7 +36,7 @@ public class SpanishDateTime { public static final String DayRegex = "\\b(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)(?:\\.[º°])?(?=\\b|t)"; - public static final String MonthNumRegex = "(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\\b"; + public static final String MonthNumRegex = "(?1[0-2]|(0)?[1-9])\\b"; public static final String OclockRegex = "(?en\\s+punto)"; @@ -68,7 +68,7 @@ public class SpanishDateTime { public static final String RangePrefixRegex = "((de(l|sde)?|entre)(\\s+la(s)?)?)"; - public static final String TwoDigitYearRegex = "\\b(?([0-24-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d))|\\.?[º°ª])\\b" + public static final String TwoDigitYearRegex = "\\b(?([0-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d))|\\.?[º°ª])\\b" .replace("{AmDescRegex}", AmDescRegex) .replace("{PmDescRegex}", PmDescRegex); @@ -308,18 +308,19 @@ public class SpanishDateTime { .replace("{DayRegex}", DayRegex) .replace("{MonthRegex}", MonthRegex); - public static final String DateExtractor2 = "\\b((el\\s+d[ií]a|{WeekDayRegex})(\\s+|\\s*,\\s*))?(?cero|una|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce)\\b"; @@ -399,6 +406,11 @@ public class SpanishDateTime { .replace("{PmRegex}", PmRegex) .replace("{OclockRegex}", OclockRegex); + public static final String GeneralDescRegex = "({DescRegex}|(?{AmRegex}|{PmRegex}))" + .replace("{DescRegex}", DescRegex) + .replace("{AmRegex}", AmRegex) + .replace("{PmRegex}", PmRegex); + public static final String BasicTime = "(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})" .replace("{WrittenTimeRegex}", WrittenTimeRegex) .replace("{HourNumRegex}", HourNumRegex) @@ -415,7 +427,7 @@ public class SpanishDateTime { .replace("{DescRegex}", DescRegex) .replace("{MidTimeRegex}", MidTimeRegex); - public static final String ConnectNumRegex = "({BaseDateTime.HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)\\s*{DescRegex})" + public static final String ConnectNumRegex = "({BaseDateTime.HourRegex}(?[0-5][0-9])\\s*{DescRegex})" .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex) .replace("{DescRegex}", DescRegex); @@ -444,8 +456,9 @@ public class SpanishDateTime { .replace("{TimeTokenPrefix}", TimeTokenPrefix) .replace("{TimeSuffix}", TimeSuffix); - public static final String TimeRegex4 = "\\b(({DescRegex}?)|({BasicTime}?)({DescRegex}?)){TimePrefix}(\\s*({HourNumRegex}|{BaseDateTime.HourRegex}))?(\\s+{TensTimeRegex}(\\s*(y\\s+)?{MinuteNumRegex})?)?(\\s*({OclockRegex}|{DescRegex})|\\b)" + public static final String TimeRegex4 = "\\b(({DescRegex}?)|({BasicTime}\\s*)?({GeneralDescRegex}?)){TimePrefix}(\\s*({HourNumRegex}|{BaseDateTime.HourRegex}))?(\\s+{TensTimeRegex}(\\s*(y\\s+)?{MinuteNumRegex})?)?(\\s*({OclockRegex}|{DescRegex})|\\b)" .replace("{DescRegex}", DescRegex) + .replace("{GeneralDescRegex}", GeneralDescRegex) .replace("{BasicTime}", BasicTime) .replace("{TimePrefix}", TimePrefix) .replace("{HourNumRegex}", HourNumRegex) @@ -500,7 +513,7 @@ public class SpanishDateTime { public static final String SuffixRegex = "^\\s*(((y|a|en|por)\\s+la|al)\\s+)?(mañana|madrugada|medio\\s*d[ií]a|(?(({LaterEarlyRegex}\\s+)((del?|en|por)(\\s+(el|los?|las?))?\\s+)?)?(mañana|madrugada|pasado\\s+(el\\s+)?medio\\s?d[ií]a|(?(({LaterEarlyRegex}\\s+)((del?|en|por)(\\s+(el|los?|las?))?\\s+)?)?(mañana|madrugada|pasado\\s+(el\\s+)?medio\\s?d[ií]a|(?veintiuno|veintidos|veintitres|veinticuatro|cero|uno|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|diecis([eé])is|diecisiete|dieciocho|diecinueve|veinte)"; + public static final String TimeHourNumRegex = "(?veint(i(uno|dos|tres|cuatro)|e)|cero|uno|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|dieci(s([eé])is|siete|ocho|nueve))"; public static final String PureNumFromTo = "((\\b(desde|de)\\s+(la(s)?\\s+)?)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})(?!\\s+al?\\b)(\\s*(?{DescRegex}))?|(\\b(desde|de)\\s+(la(s)?\\s+)?)({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\\s*(?{DescRegex}))?)\\s*{TillRegex}\\s*({BaseDateTime.HourRegex}|{TimeHourNumRegex})\\s*(?{PmRegex}|{AmRegex}|{DescRegex})?" .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex) @@ -1120,7 +1133,7 @@ public class SpanishDateTime { public static final String UnspecificDatePeriodRegex = "^[\\.]"; - public static final String PrepositionSuffixRegex = "\\b(en|el|la|cerca|desde|durante|hasta|hacia)$"; + public static final String PrepositionSuffixRegex = "\\b(en|el|la|cerca|alrededor|desde|durante|hasta|hacia)$"; public static final String RestOfDateTimeRegex = "\\bresto\\s+((del?)\\s+)?((la|el|est[ae])\\s+)?(?(día|jornada))(\\s+de\\s+hoy)?\\b"; @@ -1156,6 +1169,7 @@ public class SpanishDateTime { .put("^a[nñ]o$", "(? EarlyMorningTermList = Arrays.asList("madrugada"); diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/ChineseNumeric.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/ChineseNumeric.java index bf648924..187fef6f 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/ChineseNumeric.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/ChineseNumeric.java @@ -162,7 +162,7 @@ public class ChineseNumeric { public static final String DigitalNumberRegex = "((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))" .replace("{BaseNumbers.MultiplierLookupRegex}", BaseNumbers.MultiplierLookupRegex); - public static final String ZeroToNineFullHalfRegex = "[\\d1234567890]"; + public static final String ZeroToNineFullHalfRegex = "[\\d]"; public static final String DigitNumRegex = "{ZeroToNineFullHalfRegex}+" .replace("{ZeroToNineFullHalfRegex}", ZeroToNineFullHalfRegex); @@ -311,11 +311,11 @@ public class ChineseNumeric { .replace("{AllFloatRegex}", AllFloatRegex) .replace("{ZeroToNineIntegerRegex}", ZeroToNineIntegerRegex); - public static final String DoubleExponentialNotationRegex = "(?)"; + public static final String MoreRegex = "((大于|多于|高于|超过|大於|多於|高於|超過|超过)了?|过|>)"; public static final String LessRegex = "(小于|少于|低于|小於|少於|低於|不到|不足|<)"; @@ -447,7 +447,7 @@ public class ChineseNumeric { .replace("{LessRegex}", LessRegex) .replace("{SpeicalCharBeforeNumber}", SpeicalCharBeforeNumber); - public static final String MoreOrEqualSuffix = "(或|或者)\\s*(以上|之上|更[大多高])"; + public static final String MoreOrEqualSuffix = "(或|或者)\\s*(次?以上|之上|更[大多高])"; public static final String LessOrEqual = "(({LessRegex}\\s*(或|或者)?\\s*{EqualRegex})|(至多|最多){SpeicalCharBeforeNumber}?|不{MoreRegex}|≤)" .replace("{LessRegex}", LessRegex) @@ -463,7 +463,7 @@ public class ChineseNumeric { public static final String OneNumberRangeMoreRegex2 = "比\\s*(?((?!(([,,](?!\\d+))|。)).)+)\\s*更?[大多高]"; - public static final String OneNumberRangeMoreRegex3 = "(?((?!(([,,](?!\\d+))|。|[或者])).)+)\\s*(或|或者)?\\s*([多几余幾餘]|以上|之上|更[大多高])([万亿萬億]{0,2})"; + public static final String OneNumberRangeMoreRegex3 = "(?((?!(([,,](?!\\d+))|。|[或者])).)+)\\s*(或|或者)?\\s*([多几余幾餘]|次?以上|之上|更[大多高])([万亿萬億]{0,2})"; public static final String OneNumberRangeLessRegex1 = "({LessOrEqual}|{LessRegex})\\s*(?((?!([并且而並的同時时]|([,,](?!\\d+))|。)).)+)" .replace("{LessOrEqual}", LessOrEqual) diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java index 9f46b23b..ae82d423 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java @@ -198,7 +198,7 @@ public class EnglishNumeric { public static final String TillRegex = "((?)"; + public static final String MoreRegex = "(?:(bigger|greater|more|higher|larger)(\\s+than)?|above|over|beyond|exceed(ed|ing)?|surpass(ed|ing)?|(?)"; public static final String LessRegex = "(?:(less|lower|smaller|fewer)(\\s+than)?|below|under|(?|=)<)"; diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java index bbc5ae53..98441a99 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java @@ -191,12 +191,113 @@ public class FrenchNumeric { public static final String DoubleCaretExponentialNotationRegex = "(((?)"; + + public static final String LessRegex = "(?:(less|plus\\s+(bas|petit|jeune)|moins|inf[ée]rieure?s?)(\\s+([àa]|d[e'’]|que))?|((en )?dessous)\\s+de|under|(?|=)<)"; + + public static final String EqualRegex = "(([ée]ga(l(es)?|ux)|au\\s+nombre)(\\s+([àa]|d[e'’]))?|(?)=)"; + + public static final String MoreOrEqualPrefix = "((pas\\s+{LessRegex})|(au\\s+moins|[àa] partir d[e'’]))" + .replace("{LessRegex}", LessRegex); + + public static final String MoreOrEqual = "(?:({MoreRegex}\\s+(ou)?\\s+{EqualRegex})|({EqualRegex}\\s+(ou)?\\s+{MoreRegex})|{MoreOrEqualPrefix}(\\s+(ou)?\\s+{EqualRegex})?|({EqualRegex}\\s+(ou)?\\s+)?{MoreOrEqualPrefix}|>\\s*=|≥)" + .replace("{MoreRegex}", MoreRegex) + .replace("{EqualRegex}", EqualRegex) + .replace("{LessRegex}", LessRegex) + .replace("{MoreOrEqualPrefix}", MoreOrEqualPrefix); + + public static final String MoreOrEqualSuffix = "((et|ou)\\s+(((more|greater|higher|plus(\\s+grand)?|sup[ée]rieure?s?)((?!\\s+([àa]|que))|(\\s+([àa]|que)(?!((\\s+ou\\s+[èe]ga(l(es)?|ux)\\s+[àa])?\\s*\\d+)))))|((a plus|au-dessus)\\s+d[e'’](?!\\s+than))))"; + + public static final String LessOrEqualPrefix = "((pas\\s+{MoreRegex})|(au\\s+plus)|(jusqu'[àa]))" + .replace("{MoreRegex}", MoreRegex); + + public static final String LessOrEqual = "(({LessRegex}\\s+(ou)?\\s+{EqualRegex})|({EqualRegex}\\s+(ou)?\\s+{LessRegex})|{LessOrEqualPrefix}(\\s+(ou)?\\s+{EqualRegex})?|({EqualRegex}\\s+(ou)?\\s+)?{LessOrEqualPrefix}|<\\s*=|≤)" + .replace("{LessRegex}", LessRegex) + .replace("{EqualRegex}", EqualRegex) + .replace("{MoreRegex}", MoreRegex) + .replace("{LessOrEqualPrefix}", LessOrEqualPrefix); + + public static final String LessOrEqualSuffix = "((et|ou)\\s+(less|lower|plus petit|moins|inf[ée]rieure?s?)((?!\\s+([àa]|de|que))|(\\s+([àa]|d[e'’]|que)(?!(\\s*\\d+)))))"; + + public static final String NumberSplitMark = "(?![,.](?!\\d+))(?!\\s*\\b(et\\s+({LessRegex}|{MoreRegex})|mais|ou|to)\\b)" + .replace("{MoreRegex}", MoreRegex) + .replace("{LessRegex}", LessRegex); + + public static final String MoreRegexNoNumberSucceed = "((bigger|greater|more|plus(\\s+grand)?|sup[ée]rieure?s?)((?!\\s+([àa]|que))|\\s+(([àa]|que)(?!(\\s*\\d+))))|((au-dessus|a plus)\\s+d[e'’])(?!(\\s*\\d+)))"; + + public static final String LessRegexNoNumberSucceed = "((less|lower|plus petit|moins|inf[ée]rieure?s?)((?!\\s+([àa]|d[e'’]|que))|\\s+(([àa]|d[e'’]|que)(?!(\\s*\\d+))))|(((en )?dessous)\\s+d[e'’]|under)(?!(\\s*\\d+)))"; + + public static final String EqualRegexNoNumberSucceed = "([èe]ga(l(es)?|ux)((?!\\s+([àa]))|(\\s+([àa]|que)(?!(\\s*\\d+)))))"; + + public static final String OneNumberRangeMoreRegex1 = "({MoreOrEqual}|{MoreRegex})\\s*(l[ae]\\s+)?(?({NumberSplitMark}.)+)" + .replace("{MoreOrEqual}", MoreOrEqual) + .replace("{MoreRegex}", MoreRegex) + .replace("{NumberSplitMark}", NumberSplitMark); + + public static final String OneNumberRangeMoreRegex1LB = "(?({NumberSplitMark}.)+)\\s*{MoreOrEqualSuffix}" + .replace("{MoreOrEqualSuffix}", MoreOrEqualSuffix) + .replace("{NumberSplitMark}", NumberSplitMark); + + public static final String OneNumberRangeMoreSeparateRegex = "({EqualRegex}\\s+(?({NumberSplitMark}.)+)(\\s+ou\\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\\s+(?({NumberSplitMark}.)+)(\\s+ou\\s+){EqualRegexNoNumberSucceed})" + .replace("{EqualRegex}", EqualRegex) + .replace("{MoreRegex}", MoreRegex) + .replace("{EqualRegexNoNumberSucceed}", EqualRegexNoNumberSucceed) + .replace("{MoreRegexNoNumberSucceed}", MoreRegexNoNumberSucceed) + .replace("{NumberSplitMark}", NumberSplitMark); + + public static final String OneNumberRangeLessRegex1 = "({LessOrEqual}|{LessRegex})\\s*(l[ae]\\s+)?(?({NumberSplitMark}.)+)" + .replace("{LessOrEqual}", LessOrEqual) + .replace("{LessRegex}", LessRegex) + .replace("{NumberSplitMark}", NumberSplitMark); + + public static final String OneNumberRangeLessRegex1LB = "(?({NumberSplitMark}.)+)\\s*{LessOrEqualSuffix}" + .replace("{LessOrEqualSuffix}", LessOrEqualSuffix) + .replace("{NumberSplitMark}", NumberSplitMark); + + public static final String OneNumberRangeLessSeparateRegex = "({EqualRegex}\\s+(?({NumberSplitMark}.)+)(\\s+ou\\s+){LessRegexNoNumberSucceed})|({LessRegex}\\s+(?({NumberSplitMark}.)+)(\\s+ou\\s+){EqualRegexNoNumberSucceed})" + .replace("{EqualRegex}", EqualRegex) + .replace("{LessRegex}", LessRegex) + .replace("{EqualRegexNoNumberSucceed}", EqualRegexNoNumberSucceed) + .replace("{LessRegexNoNumberSucceed}", LessRegexNoNumberSucceed) + .replace("{NumberSplitMark}", NumberSplitMark); + + public static final String OneNumberRangeEqualRegex = "(?({NumberSplitMark}.)+)" + .replace("{EqualRegex}", EqualRegex) + .replace("{NumberSplitMark}", NumberSplitMark); + + public static final String TwoNumberRangeRegex1 = "entre\\s*(l[ae]\\s+)?(?({NumberSplitMark}.)+)\\s*et\\s*(l[ae]\\s+)?(?({NumberSplitMark}.)+)" + .replace("{NumberSplitMark}", NumberSplitMark); + + public static final String TwoNumberRangeRegex2 = "({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\\s*(et|mais|,)\\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})" + .replace("{OneNumberRangeMoreRegex1}", OneNumberRangeMoreRegex1) + .replace("{OneNumberRangeMoreRegex2}", OneNumberRangeMoreRegex2) + .replace("{OneNumberRangeLessRegex1}", OneNumberRangeLessRegex1) + .replace("{OneNumberRangeLessRegex2}", OneNumberRangeLessRegex2); + + public static final String TwoNumberRangeRegex3 = "({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\\s*(et|mais|,)\\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})" + .replace("{OneNumberRangeMoreRegex1}", OneNumberRangeMoreRegex1) + .replace("{OneNumberRangeMoreRegex2}", OneNumberRangeMoreRegex2) + .replace("{OneNumberRangeLessRegex1}", OneNumberRangeLessRegex1) + .replace("{OneNumberRangeLessRegex2}", OneNumberRangeLessRegex2); + + public static final String TwoNumberRangeRegex4 = "(de\\s+)?(?({NumberSplitMark}(?!\\bde\\b).)+)\\s*{TillRegex}\\s*(l[ae]\\s+)?(?({NumberSplitMark}.)+)" + .replace("{TillRegex}", TillRegex) + .replace("{NumberSplitMark}", NumberSplitMark); + public static final Character DecimalSeparatorChar = ','; public static final String FractionMarkerToken = "sur"; diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/extractors/GermanNumberWithUnitExtractorConfiguration.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/extractors/GermanNumberWithUnitExtractorConfiguration.java index d066d80b..401d9485 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/extractors/GermanNumberWithUnitExtractorConfiguration.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/extractors/GermanNumberWithUnitExtractorConfiguration.java @@ -46,7 +46,7 @@ public abstract class GermanNumberWithUnitExtractorConfiguration implements INum } public String getConnectorToken() { - return ""; + return GermanNumericWithUnit.ConnectorToken; } public Pattern getCompoundUnitConnectorRegex() { diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/parsers/GermanNumberWithUnitParserConfiguration.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/parsers/GermanNumberWithUnitParserConfiguration.java index 203f8019..f8bd445b 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/parsers/GermanNumberWithUnitParserConfiguration.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/parsers/GermanNumberWithUnitParserConfiguration.java @@ -8,6 +8,7 @@ import com.microsoft.recognizers.text.number.german.parsers.GermanNumberParserCo import com.microsoft.recognizers.text.number.parsers.AgnosticNumberParserFactory; import com.microsoft.recognizers.text.number.parsers.AgnosticNumberParserType; import com.microsoft.recognizers.text.numberwithunit.parsers.BaseNumberWithUnitParserConfiguration; +import com.microsoft.recognizers.text.numberwithunit.resources.GermanNumericWithUnit; public abstract class GermanNumberWithUnitParserConfiguration extends BaseNumberWithUnitParserConfiguration { @@ -26,7 +27,7 @@ public abstract class GermanNumberWithUnitParserConfiguration extends BaseNumber @Override public String getConnectorToken() { - return ""; + return GermanNumericWithUnit.ConnectorToken; } public GermanNumberWithUnitParserConfiguration(CultureInfo ci) { diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/BaseCurrency.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/BaseCurrency.java index 55a95d4a..799acd2c 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/BaseCurrency.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/BaseCurrency.java @@ -188,6 +188,7 @@ public class BaseCurrency { .put("VEF", "CENTIMO") .put("YER", "FILS") .put("ZMW", "NGWEE") + .put("_XBT", "MILLIBITCOIN|SATOSHI") .build(); public static final ImmutableMap CurrencyFractionalRatios = ImmutableMap.builder() @@ -267,6 +268,8 @@ public class BaseCurrency { .put("Kwartje", 4L) .put("Dubbeltje", 10L) .put("Stuiver", 20L) + .put("Millibitcoin", 1000L) + .put("Satoshi", 100000000L) .build(); public static final ImmutableMap NonStandardFractionalSubunits = ImmutableMap.builder() @@ -277,5 +280,6 @@ public class BaseCurrency { .put("YDD", 1000L) .put("TND", 1000L) .put("MRO", 5L) + .put("_XBT", 1000L) .build(); } diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/ChineseNumericWithUnit.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/ChineseNumericWithUnit.java index d4086e07..edd40e83 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/ChineseNumericWithUnit.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/ChineseNumericWithUnit.java @@ -245,6 +245,7 @@ public class ChineseNumericWithUnit { .put("Jiao", "毛钱|毛|角钱|角") .put("Finnish markka", "芬兰马克") .put("Penni", "盆尼") + .put("Bitcoin", "₿|btc|xbt|个比特币|比特币") .build(); public static final ImmutableMap CurrencyNameToIsoCodeMap = ImmutableMap.builder() @@ -536,6 +537,7 @@ public class ChineseNumericWithUnit { .put("Euro", "€") .put("Pound", "£") .put("Costa Rican colón", "₡") + .put("Bitcoin", "₿|btc|xbt") .build(); public static final List CurrencyAmbiguousValues = Arrays.asList("元", "仙", "分", "圆", "块", "毛", "盾", "箍", "蚊", "角"); diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/EnglishNumericWithUnit.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/EnglishNumericWithUnit.java index 3ce10a86..04ed6344 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/EnglishNumericWithUnit.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/EnglishNumericWithUnit.java @@ -47,6 +47,8 @@ public class EnglishNumericWithUnit { .put("Acre", "-acre|acre|acres") .build(); + public static final List AmbiguousAreaUnitList = Arrays.asList("n/a"); + public static final ImmutableMap CurrencySuffixList = ImmutableMap.builder() .put("Abkhazian apsar", "abkhazian apsar|apsars") .put("Afghan afghani", "afghan afghani|؋|afn|afghanis|afghani") @@ -296,6 +298,9 @@ public class EnglishNumericWithUnit { .put("Jiao", "jiao|mao") .put("Finnish markka", "suomen markka|finnish markka|finsk mark|fim|markkaa|markka") .put("Penni", "penniä|penni") + .put("Bitcoin", "bitcoin|bitcoins|btc|xbt|₿") + .put("Millibitcoin", "millibitcoin|millibitcoins|milibitcoin|milibitcoins") + .put("Satoshi", "satoshi|satoshis") .build(); public static final ImmutableMap CurrencyNameToIsoCodeMap = ImmutableMap.builder() @@ -484,6 +489,7 @@ public class EnglishNumericWithUnit { .put("Ascension pound", "_AP") .put("Alderney pound", "_ALP") .put("Abkhazian apsar", "_AA") + .put("Bitcoin", "_XBT") .build(); public static final ImmutableMap FractionalUnitNameToCodeMap = ImmutableMap.builder() @@ -560,6 +566,8 @@ public class EnglishNumericWithUnit { .put("Tiyin", "TIYIN") .put("Hào", "HAO") .put("Ngwee", "NGWEE") + .put("Millibitcoin", "MILLIBITCOIN") + .put("Satoshi", "SATOSHI") .build(); public static final String CompoundUnitConnectorRegex = "(?and)"; @@ -608,9 +616,10 @@ public class EnglishNumericWithUnit { .put("Pound", "£") .put("Costa Rican colón", "₡") .put("Turkish lira", "₺") + .put("Bitcoin", "₿|btc|xbt") .build(); - public static final List AmbiguousCurrencyUnitList = Arrays.asList("din.", "kiwi", "kina", "kobo", "lari", "lipa", "napa", "para", "sfr.", "taka", "tala", "toea", "vatu", "yuan", "all", "ang", "ban", "bob", "btn", "byr", "cad", "cop", "cup", "dop", "gip", "jod", "kgs", "lak", "lei", "mga", "mop", "nad", "omr", "pul", "sar", "sbd", "scr", "sdg", "sek", "sen", "sol", "sos", "std", "try", "yer", "yen", "db"); + public static final List AmbiguousCurrencyUnitList = Arrays.asList("din.", "kiwi", "kina", "kobo", "lari", "lipa", "napa", "para", "sfr.", "taka", "tala", "toea", "vatu", "yuan", "all", "ang", "ban", "bob", "btn", "byr", "cad", "cop", "cup", "dop", "gip", "jod", "kgs", "lak", "lei", "mga", "mop", "nad", "omr", "pul", "sar", "sbd", "scr", "sdg", "sek", "sen", "sol", "sos", "std", "try", "yer", "yen", "db", "satoshi", "satoshis"); public static final ImmutableMap InformationSuffixList = ImmutableMap.builder() .put("Bit", "-bit|bit|bits") @@ -627,7 +636,7 @@ public class EnglishNumericWithUnit { .put("Petabyte", "-petabyte|-petabytes|petabyte|pB|PB|petabytes|peta byte|peta bytes|pbyte") .build(); - public static final List AmbiguousDimensionUnitList = Arrays.asList("barrel", "barrels", "grain", "pound", "stone", "yards", "yard", "cord", "dram", "feet", "foot", "gill", "knot", "peck", "cup", "fps", "pts", "in", "dm", "\""); + public static final List AmbiguousDimensionUnitList = Arrays.asList("barrel", "barrels", "grain", "grains", "pound", "stone", "stones", "yards", "yard", "cord", "cords", "dram", "drachm", "drachma", "feet", "foot", "gill", "knot", "knots", "peck", "pecks", "cup", "cups", "fps", "pts", "in", "dm", "\"", "pinch", "pinches"); public static final String BuildPrefix = "(?<=(\\s|^))"; @@ -655,24 +664,29 @@ public class EnglishNumericWithUnit { public static final List AmbiguousLengthUnitList = Arrays.asList("m", "yard", "yards", "pm", "pt", "pts"); public static final ImmutableMap SpeedSuffixList = ImmutableMap.builder() - .put("Meter per second", "meters / second|m/s|meters per second|metres per second|meter per second|metre per second") - .put("Kilometer per hour", "km/h|kilometres per hour|kilometers per hour|kilometer per hour|kilometre per hour") + .put("Meter per second", "meter/second|meters/second|meters / second|m/s|meters per second|metres per second|meter per second|metre per second") + .put("Kilometer per hour", "km/h|kilometres per hour|kilometers per hour|kilometer per hour|kilometre per hour|kph|kmph|km/hr") .put("Kilometer per minute", "km/min|kilometers per minute|kilometres per minute|kilometer per minute|kilometre per minute") .put("Kilometer per second", "km/s|kilometers per second|kilometres per second|kilometer per second|kilometre per second") - .put("Mile per hour", "mph|mile per hour|miles per hour|mi/h|mile / hour|miles / hour|miles an hour") - .put("Knot", "kt|knot|kn") - .put("Foot per second", "ft/s|foot/s|foot per second|feet per second|fps") - .put("Foot per minute", "ft/min|foot/min|foot per minute|feet per minute") + .put("Mile per hour", "mph|mile per hour|miles per hour|mi/h|mile / hour|miles / hour|miles an hour|mi/hr") + .put("Knot", "kt|knot|knots|kn") + .put("Foot per second", "ft/s|foot/s|feet/s|foot per second|feet per second|fps") + .put("Foot per minute", "ft/min|foot/min|feet/min|foot per minute|feet per minute") .put("Yard per minute", "yards per minute|yard per minute|yards / minute|yards/min|yard/min") .put("Yard per second", "yards per second|yard per second|yards / second|yards/s|yard/s") + .put("Meter per millisecond", "meter/millisecond|meters/millisecond|meter / millisecond|meters / millisecond|meter per millisecond|meters per millisecond|m/ms") + .put("Centimeter per millisecond", "centimeter/millisecond|centimeters/millisecond|centimeter / millisecond|centimeters / millisecond|centimeter per millisecond|centimeters per millisecond|cm/ms") + .put("Kilometer per millisecond", "kilometer/millisecond|kilometers/millisecond|kilometer / millisecond|kilometers / millisecond|kilometer per millisecond|kilometers per millisecond|km/ms") .build(); + public static final List AmbiguousSpeedUnitList = Arrays.asList("knot", "knots", "fps"); + public static final ImmutableMap TemperatureSuffixList = ImmutableMap.builder() - .put("F", "degrees fahrenheit|degree fahrenheit|deg fahrenheit|degs fahrenheit|fahrenheit|°f|degrees farenheit|degree farenheit|deg farenheit|degs farenheit|degrees f|degree f|deg f|degs f|farenheit|f") + .put("F", "degrees fahrenheit|degree fahrenheit|deg fahrenheit|degs fahrenheit|fahrenheit|°f|° f|degrees farenheit|degree farenheit|deg farenheit|degs farenheit|degrees f|degree f|deg f|degs f|farenheit|f") .put("K", "k|K|kelvin") .put("R", "rankine|°r") .put("D", "delisle|°de") - .put("C", "degrees celsius|degree celsius|deg celsius|degs celsius|celsius|degrees celcius|degree celcius|celcius|deg celcius|degs celcius|degrees centigrade|degree centigrade|centigrade|degrees centigrate|degree centigrate|degs centigrate|deg centigrate|centigrate|degrees c|degree c|deg c|degs c|°c|c") + .put("C", "degrees celsius|degree celsius|deg celsius|degs celsius|celsius|degrees celcius|degree celcius|celcius|deg celcius|degs celcius|degrees centigrade|degree centigrade|centigrade|degrees centigrate|degree centigrate|degs centigrate|deg centigrate|centigrate|degrees c|degree c|deg c|degs c|°c|° c|c") .put("Degree", "degree|degrees|deg.|deg|°") .build(); @@ -687,33 +701,49 @@ public class EnglishNumericWithUnit { .put("Liter", "l|litre|liter|liters|litres") .put("Deciliter", "dl|deciliter|decilitre|deciliters|decilitres") .put("Centiliter", "cl|centiliter|centilitre|centiliters|centilitres") - .put("Milliliter", "ml|mls|millilitre|milliliter|millilitres|milliliters") + .put("Milliliter", "ml|mls|millilitre|milliliter|millilitres|milliliters|cc") .put("Cubic yard", "cubic yard|cubic yards") .put("Cubic inch", "cubic inch|cubic inches") .put("Cubic foot", "cubic foot|cubic feet") .put("Cubic mile", "cubic mile|cubic miles") .put("Fluid ounce", "fl oz|fluid ounce|fluid ounces") - .put("Teaspoon", "teaspoon|teaspoons") - .put("Tablespoon", "tablespoon|tablespoons") - .put("Pint", "pint|pints") - .put("Volume unit", "fluid dram|gill|quart|minim|cord|peck|bushel|hogshead|barrels|barrel|bbl") + .put("Teaspoon", "teaspoon|teaspoons|teaspoonful|teaspoonfuls|tsp|tsp.|tspn|tspn.|tea spoon|tea spoons|t.|ts.") + .put("Tablespoon", "tablespoon|tablespoons|tablespoonful|tablespoonfuls|tbl|tbl.|tbs|tbs.|tbsp|tbsp.|table spoon|table spoons|T.|Tb.|tbls.|tbls") + .put("Pint", "pint|pints|fl pt| fluid pint") + .put("Quart", "quart|quarts|fl qt") + .put("Cup", "cup|cups") + .put("Gill", "gill|gills") + .put("Pinch", "pinch|pinches") + .put("Fluid Dram", "fluid dram|fluid drachm|fluid drachma|fluidram|fluidrams") + .put("Barrel", "barrel|bbl|barrels") + .put("Minim", "minim") + .put("Cord", "cord|cords") + .put("Peck", "peck|pecks") + .put("Bushel", "bushel") + .put("Hogshead", "hogshead") .build(); - public static final List AmbiguousVolumeUnitList = Arrays.asList("l", "ounce", "oz", "cup", "peck", "cord", "gill"); + public static final List AmbiguousVolumeUnitList = Arrays.asList("l", "ounce", "oz", "cup", "cups", "peck", "pecks", "cord", "cords", "gill", "gills", "barrel", "barrels", "tbl", "quart", "quarts", "pinch", "t.", "T.", "Tb.", "ts."); public static final ImmutableMap WeightSuffixList = ImmutableMap.builder() .put("Kilogram", "kg|kilogram|kilograms|kilo|kilos") - .put("Gram", "g|gram|grams") + .put("Gram", "g|gram|grams|gm") .put("Milligram", "mg|milligram|milligrams") - .put("Gallon", "-gallon|gallons|gallon") + .put("Gallon", "-gallon|gallons|gallon|gal") .put("Metric ton", "metric tons|metric ton") .put("Ton", "-ton|ton|tons|tonne|tonnes") .put("Pound", "pound|pounds|lb|lbs") .put("Ounce", "-ounce|ounce|oz|ounces") - .put("Weight unit", "pennyweight|grain|british long ton|us short hundredweight|stone|dram") + .put("Grain", "grain|grains|gr") + .put("Pennyweight", "pennyweight") + .put("Long ton (British)", "british long ton|long ton (british)") + .put("Short ton (US)", "us short ton|short ton (us)") + .put("Short hundredweight (US)", "us short hundredweight|short hundredweight (us)") + .put("Stone", "stone") + .put("Dram", "dram|drachm|drachma|roman drachma|greek drachma") .build(); - public static final List AmbiguousWeightUnitList = Arrays.asList("g", "oz", "stone", "dram", "lbs"); + public static final List AmbiguousWeightUnitList = Arrays.asList("g", "oz", "stone", "dram", "lbs", "gal", "grain", "grains"); public static final ImmutableMap AmbiguityFiltersDict = ImmutableMap.builder() .put("\\bm\\b", "((('|’)\\s*m)|(m\\s*('|’)))") diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/FrenchNumericWithUnit.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/FrenchNumericWithUnit.java index c7ae6bda..d2abdf03 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/FrenchNumericWithUnit.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/FrenchNumericWithUnit.java @@ -267,6 +267,9 @@ public class FrenchNumericWithUnit { .put("Fen", "fen") .put("Jiao", "jiao") .put("Mark Finlandais", "marks finlandais|mark finlandais|fim|mark") + .put("Bitcoin", "bitcoin|bitcoins|btc|xbt|₿") + .put("Millibitcoin", "millibitcoin|millibitcoins|milibitcoin|milibitcoins") + .put("Satoshi", "satoshi|satoshis") .build(); public static final String CompoundUnitConnectorRegex = "(?[^.])"; @@ -300,6 +303,7 @@ public class FrenchNumericWithUnit { .put("Yen Japonais", "¥|jpy") .put("Euro", "€|eur") .put("Livre", "£") + .put("Bitcoin", "₿|btc|xbt") .build(); public static final List AmbiguousCurrencyUnitList = Arrays.asList("din.", "kina", "lari", "taka", "tala", "vatu", "yuan", "bob", "btn", "cop", "cup", "dop", "gip", "jod", "kgs", "lak", "mga", "mop", "nad", "omr", "sar", "sbd", "scr", "sdg", "sek", "sos", "std", "try", "yer"); diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/GermanNumericWithUnit.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/GermanNumericWithUnit.java index 5d4717a9..a772e88d 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/GermanNumericWithUnit.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/GermanNumericWithUnit.java @@ -292,6 +292,8 @@ public class GermanNumericWithUnit { .put("Jiao", "jiao") .put("Finnish markka", "suomen markka|finnish markka|finsk mark|fim|markkaa|markka|finnische mark|finnischen mark") .put("Penni", "penniä|penni") + .put("Bitcoin", "bitcoin|bitcoins|btc|xbt|₿") + .put("Millibitcoin", "millibitcoin|millibitcoins|milibitcoin|milibitcoins") .build(); public static final String CompoundUnitConnectorRegex = "(?[^.])"; @@ -337,6 +339,7 @@ public class GermanNumericWithUnit { .put("Pound", "£") .put("Costa Rican colón", "₡") .put("Turkish lira", "₺") + .put("Bitcoin", "₿|btc|xbt") .build(); public static final List AmbiguousCurrencyUnitList = Arrays.asList("din.", "kiwi", "kina", "kobo", "lari", "lipa", "napa", "para", "sfr.", "taka", "tala", "toea", "vatu", "yuan", "ang", "ban", "bob", "btn", "byr", "cad", "cop", "cup", "dop", "gip", "jod", "kgs", "lak", "lei", "mga", "mop", "nad", "omr", "pul", "sar", "sbd", "scr", "sdg", "sek", "sen", "sol", "sos", "std", "try", "yer", "yen"); @@ -362,6 +365,8 @@ public class GermanNumericWithUnit { public static final String BuildSuffix = "(?=(\\s|\\W|$))"; + public static final String ConnectorToken = "-"; + public static final ImmutableMap LengthSuffixList = ImmutableMap.builder() .put("Kilometer", "km|kilometer|kilometern") .put("Hectometer", "hm|hektometer|hektometern") diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/PortugueseNumericWithUnit.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/PortugueseNumericWithUnit.java index 5e186321..a2965662 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/PortugueseNumericWithUnit.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/PortugueseNumericWithUnit.java @@ -349,9 +349,278 @@ public class PortugueseNumericWithUnit { .put("Dinar iugoslavo", "dinar iugoslavo|dinares iugoslavos|dinar jugoslavo|dinares jugoslavos|yud") .put("Kwacha zambiano", "kwacha zambiano|kwacha zambianos|kwachas zambianos|zmw") .put("Ngwee zambiano", "ngwee zambiano|ngwee zambianos|ngwees zambianos") + .put("Bitcoin", "bitcoin|bitcoins|btc|xbt|₿") + .put("Millibitcoin", "millibitcoin|millibitcoins|milibitcoin|milibitcoins") + .put("Satoshi", "satoshi|satoshis") .build(); - public static final String CompoundUnitConnectorRegex = "(?[^.])"; + public static final ImmutableMap CurrencyNameToIsoCodeMap = ImmutableMap.builder() + .put("Afegani afegão", "AFN") + .put("Euro", "EUR") + .put("Lek albanês", "ALL") + .put("Kwanza angolano", "AOA") + .put("Dram armênio", "AMD") + .put("Florim arubano", "AWG") + .put("Taka bengali", "BDT") + .put("Ngultrum butanês", "BTN") + .put("Boliviano", "BOB") + .put("Marco da Bósnia e Herzegovina", "BAM") + .put("Pula", "BWP") + .put("Real brasileiro", "BRL") + .put("Lev búlgaro", "BGN") + .put("Riel cambojano", "KHR") + .put("Escudo cabo-verdiano", "CVE") + .put("Colón costarriquenho", "CRC") + .put("Kuna croata", "HRK") + .put("Coroa checa", "CZK") + .put("Nakfa", "ERN") + .put("Birr etíope", "ETB") + .put("Dalasi gambiano", "GMD") + .put("Lari georgiano", "GEL") + .put("Cedi", "GHS") + .put("Quetzal guatemalteco", "GTQ") + .put("Gurde haitiano", "HTG") + .put("Lempira hondurenha", "HNL") + .put("Florim húngaro", "HUF") + .put("Rial iraniano", "IRR") + .put("Rial iemenita", "YER") + .put("Novo shekel israelense", "ILS") + .put("Yen", "JPY") + .put("Tengue cazaque", "KZT") + .put("Xelim queniano", "KES") + .put("Won norte-coreano", "KPW") + .put("Won sul-coreano", "KRW") + .put("Som quirguiz", "KGS") + .put("Quipe laosiano", "LAK") + .put("Loti do Lesoto", "LSL") + .put("Rand sul-africano", "ZAR") + .put("Pataca macaense", "MOP") + .put("Dinar macedônio", "MKD") + .put("Ariary malgaxe", "MGA") + .put("Kwacha do Malawi", "MWK") + .put("Ringuite malaio", "MYR") + .put("Uguia", "MRO") + .put("Tugrik mongol", "MNT") + .put("Metical moçambicao", "MZN") + .put("Quiate mianmarense", "MMK") + .put("Córdova nicaraguense", "NIO") + .put("Naira", "NGN") + .put("Lira turca", "TRY") + .put("Rial omanense", "OMR") + .put("Balboa panamenho", "PAB") + .put("Kina", "PGK") + .put("Guarani", "PYG") + .put("Novo Sol", "PEN") + .put("Złoty", "PLN") + .put("Rial catariano", "QAR") + .put("Rial saudita", "SAR") + .put("Tala samoano", "WST") + .put("São Tomé and Príncipe dobra", "STN") + .put("Leone serra-leonino", "SLL") + .put("Lilangeni", "SZL") + .put("Somoni tajique", "TJS") + .put("Baht tailandês", "THB") + .put("Grívnia", "UAH") + .put("Vatu", "VUV") + .put("Bolívar forte venezuelano", "VEF") + .put("Kwacha zambiano", "ZMW") + .put("Dirame marroquino", "MAD") + .put("Dirame dos Emirados Árabes Unidos", "AED") + .put("Manat azeri", "AZN") + .put("Manat turcomeno", "TMT") + .put("Xelim somali", "SOS") + .put("Xelim tanzaniano", "TZS") + .put("Xelim ugandês", "UGX") + .put("Leu romeno", "RON") + .put("Leu moldávio", "MDL") + .put("Rupia nepalesa", "NPR") + .put("Rupia paquistanesa", "PKR") + .put("Rupia indiana", "INR") + .put("Rupia seichelense", "SCR") + .put("Rupia maurícia", "MUR") + .put("Rupia maldiva", "MVR") + .put("Rupia do Sri Lanka", "LKR") + .put("Rupia indonésia", "IDR") + .put("Coroa dinamarquesa", "DKK") + .put("Coroa norueguesa", "NOK") + .put("Coroa islandesa", "ISK") + .put("Coroa sueca", "SEK") + .put("Franco CFA da África Ocidental", "XOF") + .put("Franco CFA da África Central", "XAF") + .put("Franco comorense", "KMF") + .put("Franco congolês", "CDF") + .put("Burundian franc", "BIF") + .put("Franco djibutiano", "DJF") + .put("Franco CFP", "XPF") + .put("Franco da Guiné", "GNF") + .put("Franco suíço", "CHF") + .put("Franco ruandês", "RWF") + .put("Rublo russo", "RUB") + .put("Transnistrian ruble", "PRB") + .put("New Belarusian ruble", "BYN") + .put("Dinar argelino", "DZD") + .put("Dinar bareinita", "BHD") + .put("Dinar iraquiano", "IQD") + .put("Dinar jordaniano", "JOD") + .put("Dinar kuwaitiano", "KWD") + .put("Dinar libio", "LYD") + .put("Dinar sérvio", "RSD") + .put("Dinar tunisiano", "TND") + .put("Peso argentino", "ARS") + .put("Chilean peso", "CLP") + .put("Peso colombiano", "COP") + .put("Peso cubano convertível", "CUC") + .put("Peso cubano", "CUP") + .put("Peso dominicano", "DOP") + .put("Peso mexicano", "MXN") + .put("Peso uruguaio", "UYU") + .put("Libra esterlina", "GBP") + .put("Libra de Santa Helena", "SHP") + .put("Libra egípcia", "EGP") + .put("Libra das Malvinas", "FKP") + .put("Libra de Gibraltar", "GIP") + .put("Libra manesa", "IMP") + .put("Libra de Jersey", "JEP") + .put("Libra libanesa", "LBP") + .put("Libra sul-sudanesa", "SSP") + .put("Libra sudanesa", "SDG") + .put("Libra síria", "SYP") + .put("Dólar estadunidense", "USD") + .put("Dólar australiano", "AUD") + .put("Dólar bahamense", "BSD") + .put("Dólar de Barbados", "BBD") + .put("Dólar de Belize", "BZD") + .put("Dólar bermudense", "BMD") + .put("Dólar de Brunei", "BND") + .put("Singapore dólar", "SGD") + .put("Dólar canadense", "CAD") + .put("Dólar das Ilhas Cayman", "KYD") + .put("Dólar neozelandês", "NZD") + .put("Dólar fijiano", "FJD") + .put("Dólar guianense", "GYD") + .put("Dólar de Hong Kong", "HKD") + .put("Dólar jamaicano", "JMD") + .put("Dólar liberiano", "LRD") + .put("Dólar namibiano", "NAD") + .put("Dólar das Ilhas Salomão", "SBD") + .put("Dólar surinamês", "SRD") + .put("Novo dólar taiwanês", "TWD") + .put("Dólar de Trinidade e Tobago", "TTD") + .put("Tuvaluan dólar", "TVD") + .put("Yuan chinês", "CNY") + .put("Rial", "__RI") + .put("Xelim", "__S") + .put("Som", "__SO") + .put("Dirame", "__DR") + .put("Dinar", "_DN") + .put("Dólar", "__D") + .put("Manat", "__MA") + .put("Rupia", "__R") + .put("Coroa", "__K") + .put("Krona", "__K") + .put("Franco", "__F") + .put("Marco", "__M") + .put("Rublo", "__RB") + .put("Peso", "__PE") + .put("Libra", "__P") + .put("Tristan da Cunha libra", "_TP") + .put("South Georgia and the South Sandwich Islands libra", "_SP") + .put("Somaliland xelim", "_SS") + .put("Pitcairn Islands dólar", "_PND") + .put("Palauan dólar", "_PD") + .put("Niue dólar", "_NID") + .put("Nauruan dólar", "_ND") + .put("Micronesian dólar", "_MD") + .put("Kiribati dólar", "_KID") + .put("Guernsey libra", "_GGP") + .put("Faroese króna", "_FOK") + .put("Cook Islands dólar", "_CKD") + .put("British Virgin Islands dólar", "_BD") + .put("Ascension libra", "_AP") + .put("Alderney libra", "_ALP") + .put("Abkhazian apsar", "_AA") + .put("Bitcoin", "_XBT") + .build(); + + public static final ImmutableMap FractionalUnitNameToCodeMap = ImmutableMap.builder() + .put("Jiao", "JIAO") + .put("Kopek", "KOPEK") + .put("Pul", "PUL") + .put("Cent", "CENT") + .put("Qindarke", "QINDARKE") + .put("Peni", "PENNY") + .put("Santeem", "SANTEEM") + .put("Cêntimo", "CENTIMO") + .put("Centavo", "CENT") + .put("Luma", "LUMA") + .put("Qəpik", "QƏPIK") + .put("Fils", "FILS") + .put("Poisha", "POISHA") + .put("Kapyeyka", "KAPYEYKA") + .put("Centime", "CENTIME") + .put("Chetrum", "CHETRUM") + .put("Paisa", "PAISA") + .put("Fening", "FENING") + .put("Thebe", "THEBE") + .put("Sen", "SEN") + .put("Stotinka", "STOTINKA") + .put("Fen", "FEN") + .put("Céntimo", "CENT") + .put("Lipa", "LIPA") + .put("Haléř", "HALER") + .put("Øre", "ØRE") + .put("Piastre", "PIASTRE") + .put("Santim", "SANTIM") + .put("Oyra", "OYRA") + .put("Butut", "BUTUT") + .put("Tetri", "TETRI") + .put("Pesewa", "PESEWA") + .put("Fillér", "FILLER") + .put("Eyrir", "EYRIR") + .put("Dinar", "DINAR") + .put("Agora", "AGORA") + .put("Tïın", "TIIN") + .put("Chon", "CHON") + .put("Jeon", "JEON") + .put("Tyiyn", "TYIYN") + .put("Att", "ATT") + .put("Sente", "SENTE") + .put("Dirham", "DIRHAM") + .put("Rappen", "RAPPEN") + .put("Avo", "AVO") + .put("Deni", "DENI") + .put("Iraimbilanja", "IRAIMBILANJA") + .put("Tambala", "TAMBALA") + .put("Laari", "LAARI") + .put("Khoums", "KHOUMS") + .put("Ban", "BAN") + .put("Möngö", "MONGO") + .put("Pya", "PYA") + .put("Kobo", "KOBO") + .put("Kuruş", "KURUS") + .put("Baisa", "BAISA") + .put("Centésimo", "CENTESIMO") + .put("Toea", "TOEA") + .put("Sentimo", "SENTIMO") + .put("Grosz", "GROSZ") + .put("Sene", "SENE") + .put("Halala", "HALALA") + .put("Para", "PARA") + .put("Öre", "ORE") + .put("Diram", "DIRAM") + .put("Satang", "SATANG") + .put("Seniti", "SENITI") + .put("Millime", "MILLIME") + .put("Tennesi", "TENNESI") + .put("Kopiyka", "KOPIYKA") + .put("Tiyin", "TIYIN") + .put("Hào", "HAO") + .put("Ngwee", "NGWEE") + .put("Millibitcoin", "MILLIBITCOIN") + .put("Satoshi", "SATOSHI") + .build(); + + public static final String CompoundUnitConnectorRegex = "(?e)"; public static final ImmutableMap CurrencyPrefixList = ImmutableMap.builder() .put("Dólar", "$") @@ -385,6 +654,7 @@ public class PortugueseNumericWithUnit { .put("Libra", "£") .put("Colón costarriquenho", "₡") .put("Lira turca", "₺") + .put("Bitcoin", "₿|btc|xbt") .build(); public static final List AmbiguousCurrencyUnitList = Arrays.asList("le", "agora"); diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/SpanishNumericWithUnit.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/SpanishNumericWithUnit.java index 198cdea8..24aae9db 100644 --- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/SpanishNumericWithUnit.java +++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/SpanishNumericWithUnit.java @@ -45,7 +45,7 @@ public class SpanishNumericWithUnit { public static final List AreaAmbiguousValues = Arrays.asList("área", "áreas"); public static final ImmutableMap CurrencySuffixList = ImmutableMap.builder() - .put("Dólar", "dólar|dólares") + .put("Dólar", "dólar|dólares|dolar|dolares") .put("Peso", "peso|pesos") .put("Rublo", "rublo|rublos") .put("Libra", "libra|libras") @@ -353,9 +353,277 @@ public class SpanishNumericWithUnit { .put("Dinar yugoslavo", "dinar yugoslavo|dinares yugoslavos|yud") .put("Kwacha zambiano", "kwacha zambiano|kwacha zambianos|zmw") .put("Ngwee zambiano", "ngwee zambiano|ngwee zambianos") + .put("Bitcoin", "bitcoin|bitcoins|btc|xbt|₿") + .put("Millibitcoin", "millibitcoin|millibitcoins|milibitcoin|milibitcoins") + .put("Satoshi", "satoshi|satoshis") .build(); - public static final String CompoundUnitConnectorRegex = "(?[^.])"; + public static final ImmutableMap CurrencyNameToIsoCodeMap = ImmutableMap.builder() + .put("Afgani afgano", "AFN") + .put("Euro", "EUR") + .put("Lek albanés", "ALL") + .put("Kwanza angoleño", "AOA") + .put("Dram armenio", "AMD") + .put("Florín arubeño", "AWG") + .put("Taka bangladeshí", "BDT") + .put("Ngultrum butanés", "BTN") + .put("Boliviano", "BOB") + .put("Marco bosnioherzegovino", "BAM") + .put("Pula", "BWP") + .put("Real brasileño", "BRL") + .put("Lev búlgaro", "BGN") + .put("Riel camboyano", "KHR") + .put("Escudo caboverdiano", "CVE") + .put("Colón costarricense", "CRC") + .put("Kuna croata", "HRK") + .put("Corona checa", "CZK") + .put("Nakfa", "ERN") + .put("Birr etíope", "ETB") + .put("Dalasi", "GMD") + .put("Lari georgiano", "GEL") + .put("Cedi", "GHS") + .put("Quetzal guatemalteco", "GTQ") + .put("Gourde haitiano", "HTG") + .put("Lempira hondureño", "HNL") + .put("Forinto húngaro", "HUF") + .put("Rial iraní", "IRR") + .put("Rial yemení", "YER") + .put("Nuevo shéquel", "ILS") + .put("Yen", "JPY") + .put("Tenge kazajo", "KZT") + .put("Chelín keniano", "KES") + .put("Won norcoreano", "KPW") + .put("Won surcoreano", "KRW") + .put("Som kirguís", "KGS") + .put("Kip laosiano", "LAK") + .put("Loti", "LSL") + .put("Rand sudafricano", "ZAR") + .put("Pataca macaense", "MOP") + .put("Denar macedonio", "MKD") + .put("Ariary malgache", "MGA") + .put("Kwacha malauí", "MWK") + .put("Ringgit malayo", "MYR") + .put("Uguiya", "MRO") + .put("Tugrik mongol", "MNT") + .put("Metical mozambiqueño", "MZN") + .put("Kyat birmano", "MMK") + .put("Córdoba nicaragüense", "NIO") + .put("Naira", "NGN") + .put("Lira turca", "TRY") + .put("Rial omaní", "OMR") + .put("Balboa panameño", "PAB") + .put("Kina", "PGK") + .put("Guaraní", "PYG") + .put("Sol", "PEN") + .put("Złoty", "PLN") + .put("Riyal qatarí", "QAR") + .put("Riyal saudí", "SAR") + .put("Tala", "WST") + .put("São Tomé and Príncipe dobra", "STN") + .put("Leone", "SLL") + .put("Lilangeni", "SZL") + .put("Somoni tayiko", "TJS") + .put("Baht tailandés", "THB") + .put("Grivna", "UAH") + .put("Vatu", "VUV") + .put("Bolívar fuerte", "VEF") + .put("Kwacha zambiano", "ZMW") + .put("Dirham marroquí", "MAD") + .put("Dirham de los Emiratos Árabes Unidos", "AED") + .put("Manat azerí", "AZN") + .put("Manat turkmeno", "TMT") + .put("Chelín somalí", "SOS") + .put("Chelín tanzano", "TZS") + .put("Chelín ugandés", "UGX") + .put("Leu rumano", "RON") + .put("Leu moldavo", "MDL") + .put("Rupia nepalí", "NPR") + .put("Rupia pakistaní", "PKR") + .put("Rupia india", "INR") + .put("Rupia de Seychelles", "SCR") + .put("Rupia de Mauricio", "MUR") + .put("Rupia de Maldivas", "MVR") + .put("Rupia de Sri Lanka", "LKR") + .put("Rupia indonesia", "IDR") + .put("Corona danesa", "DKK") + .put("Corona noruega", "NOK") + .put("Corona islandesa", "ISK") + .put("Corona sueca", "SEK") + .put("Franco CFA de África Occidental", "XOF") + .put("Franco CFA de África Central", "XAF") + .put("Franco comorano", "KMF") + .put("Franco congoleño", "CDF") + .put("Burundian franc", "BIF") + .put("Franco yibutiano", "DJF") + .put("Franco CFP", "XPF") + .put("Franco guineano", "GNF") + .put("Franco suizo", "CHF") + .put("Franco ruandés", "RWF") + .put("Rublo ruso", "RUB") + .put("Transnistrian ruble", "PRB") + .put("New Belarusian ruble", "BYN") + .put("Dinar argelino", "DZD") + .put("Dinar bahreiní", "BHD") + .put("Dinar iraquí", "IQD") + .put("Dinar jordano", "JOD") + .put("Kuwaiti dinar", "KWD") + .put("Dinar libio", "LYD") + .put("Dinar serbio", "RSD") + .put("Dinar tunecino", "TND") + .put("Peso argentino", "ARS") + .put("Chilean peso", "CLP") + .put("Peso colombiano", "COP") + .put("Peso cubano convertible", "CUC") + .put("Peso cubano", "CUP") + .put("Peso dominicano", "DOP") + .put("Peso mexicano", "MXN") + .put("Peso uruguayo", "UYU") + .put("Libra esterlina", "GBP") + .put("Libra de Santa Helena", "SHP") + .put("Libra egipcia", "EGP") + .put("Libra malvinense", "FKP") + .put("Libra gibraltareña", "GIP") + .put("Libra manesa", "IMP") + .put("Libra de Jersey", "JEP") + .put("Libra libanesa", "LBP") + .put("Libra sursudanesa", "SSP") + .put("Libra sudanesa", "SDG") + .put("Libra siria", "SYP") + .put("Dólar estadounidense", "USD") + .put("Dólar australiano", "AUD") + .put("Dólar bahameño", "BSD") + .put("Dólar de Barbados", "BBD") + .put("Dólar beliceño", "BZD") + .put("Dólar bermudeño", "BMD") + .put("Dólar de Brunéi", "BND") + .put("Singapore dollar", "SGD") + .put("Dólar canadiense", "CAD") + .put("Dólar de las Islas Caimán", "KYD") + .put("Dólar neozelandés", "NZD") + .put("Dólar fiyiano", "FJD") + .put("Dólar guyanés", "GYD") + .put("Dólar de Hong Kong", "HKD") + .put("Dólar jamaiquino", "JMD") + .put("Dólar liberiano", "LRD") + .put("Dólar namibio", "NAD") + .put("Dólar de las Islas Salomón", "SBD") + .put("Dólar surinamés", "SRD") + .put("Nuevo dólar taiwanés", "TWD") + .put("Dólar trinitense", "TTD") + .put("Tuvaluan dollar", "TVD") + .put("Yuan chino", "CNY") + .put("Rial", "__RI") + .put("Chelín", "__S") + .put("Som", "__SO") + .put("Dirham", "__DR") + .put("Dinar", "_DN") + .put("Dólar", "__D") + .put("Manat", "__MA") + .put("Rupia", "__R") + .put("Corona", "__K") + .put("Franco", "__F") + .put("Marco", "__M") + .put("Rublo", "__RB") + .put("Peso", "__PE") + .put("Libra", "__P") + .put("Tristan da Cunha libra", "_TP") + .put("South Georgia and the South Sandwich Islands libra", "_SP") + .put("Somaliland chelín", "_SS") + .put("Pitcairn Islands dólar", "_PND") + .put("Palauan dólar", "_PD") + .put("Niue dólar", "_NID") + .put("Nauruan dólar", "_ND") + .put("Micronesian dólar", "_MD") + .put("Kiribati dólar", "_KID") + .put("Guernsey libra", "_GGP") + .put("Faroese corona", "_FOK") + .put("Cook Islands dólar", "_CKD") + .put("British Virgin Islands dólar", "_BD") + .put("Ascension libra", "_AP") + .put("Alderney libra", "_ALP") + .put("Abkhazian apsar", "_AA") + .put("Bitcoin", "_XBT") + .build(); + + public static final ImmutableMap FractionalUnitNameToCodeMap = ImmutableMap.builder() + .put("Jiao", "JIAO") + .put("Kópek", "KOPEK") + .put("Kopek", "KOPEK") + .put("Pul", "PUL") + .put("Cent", "CENT") + .put("Qindarka", "QINDARKE") + .put("Penique", "PENNY") + .put("Santeem", "SANTEEM") + .put("Cêntimo", "CENT") + .put("Centavo", "CENT") + .put("Luma", "LUMA") + .put("Qəpik", "QƏPIK") + .put("Fils", "FILS") + .put("Poisha", "POISHA") + .put("Centime", "CENTIME") + .put("Chetrum", "CHETRUM") + .put("Paisa", "PAISA") + .put("Feningas", "FENING") + .put("Thebe", "THEBE") + .put("Sen", "SEN") + .put("Stotinka", "STOTINKA") + .put("Fen", "FEN") + .put("Céntimo", "CENT") + .put("Lipa", "LIPA") + .put("Haléř", "HALER") + .put("Øre", "ØRE") + .put("Piastre", "PIASTRE") + .put("Santim", "SANTIM") + .put("Oyra", "OYRA") + .put("Butut", "BUTUT") + .put("Tetri", "TETRI") + .put("Pesewa", "PESEWA") + .put("Fillér", "FILLER") + .put("Eyrir", "EYRIR") + .put("Dinar", "DINAR") + .put("Agora", "AGORA") + .put("Tïın", "TIIN") + .put("Chon", "CHON") + .put("Jeon", "JEON") + .put("Tyiyn", "TYIYN") + .put("Att", "ATT") + .put("Sente", "SENTE") + .put("Dirham", "DIRHAM") + .put("Rappen", "RAPPEN") + .put("Avo", "AVO") + .put("Deni", "DENI") + .put("Iraimbilanja", "IRAIMBILANJA") + .put("Tambala", "TAMBALA") + .put("Laari", "LAARI") + .put("Khoums", "KHOUMS") + .put("Ban", "BAN") + .put("Möngö", "MONGO") + .put("Pya", "PYA") + .put("Kobo", "KOBO") + .put("Kuruş", "KURUS") + .put("Baisa", "BAISA") + .put("Centésimo", "CENTESIMO") + .put("Toea", "TOEA") + .put("Sentimo", "SENTIMO") + .put("Grosz", "GROSZ") + .put("Sene", "SENE") + .put("Halala", "HALALA") + .put("Para", "PARA") + .put("Öre", "ORE") + .put("Diram", "DIRAM") + .put("Satang", "SATANG") + .put("Seniti", "SENITI") + .put("Millime", "MILLIME") + .put("Tennesi", "TENNESI") + .put("Kopiyka", "KOPIYKA") + .put("Tiyin", "TIYIN") + .put("Hào", "HAO") + .put("Ngwee", "NGWEE") + .put("Millibitcoin", "MILLIBITCOIN") + .put("Satoshi", "SATOSHI") + .build(); + + public static final String CompoundUnitConnectorRegex = "(?y)"; public static final ImmutableMap CurrencyPrefixList = ImmutableMap.builder() .put("Dobra", "db|std") @@ -390,6 +658,7 @@ public class SpanishNumericWithUnit { .put("Libra", "£") .put("Colón costarricense", "₡") .put("Lira turca", "₺") + .put("Bitcoin", "₿|btc|xbt") .build(); public static final List AmbiguousCurrencyUnitList = Arrays.asList("le", "db", "std");