[SDK][Bot-Dialogs] Update Recognizers-Text internal library (#1021)
* Update Recognizers-Text Choice library * Update Recognizers-Text Datetime library * Update Recognizers-Text Number library * Update Recognizers-Text NumberWithUnit library * Update Recognizers-Text Main library * Add Recognizers-Text Expression library * Add Recognizers-Text Sequence library * Remove deprecated workaround as it's fixed in RT
This commit is contained in:
Родитель
0a7c6c1f90
Коммит
349ab351af
|
@ -131,7 +131,7 @@ public final class ChoiceRecognizers {
|
|||
return result.stream().map(r ->
|
||||
new ModelResult<FoundChoice>() {{
|
||||
setStart(r.start);
|
||||
setEnd(r.end - 1); // bug in 1.0-SNAPSHOT, should not have to decrement
|
||||
setEnd(r.end);
|
||||
setText(r.text);
|
||||
setResolution(new FoundChoice() {{
|
||||
setValue(r.resolution.get("value").toString());
|
||||
|
|
|
@ -17,7 +17,11 @@ public class ChineseChoice {
|
|||
|
||||
public static final String TokenizerRegex = "[^\\u3040-\\u30ff\\u3400-\\u4dbf\\u4e00-\\u9fff\\uf900-\\ufaff\\uff66-\\uff9f]";
|
||||
|
||||
public static final String TrueRegex = "(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
|
||||
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
|
||||
|
||||
public static final String FalseRegex = "(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
|
||||
public static final String TrueRegex = "(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
|
||||
.replace("{SkinToneRegex}", SkinToneRegex);
|
||||
|
||||
public static final String FalseRegex = "(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
|
||||
.replace("{SkinToneRegex}", SkinToneRegex);
|
||||
}
|
||||
|
|
|
@ -17,7 +17,11 @@ public class EnglishChoice {
|
|||
|
||||
public static final String TokenizerRegex = "[^\\w\\d]";
|
||||
|
||||
public static final String TrueRegex = "\\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C|\\u0001f44c)";
|
||||
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
|
||||
|
||||
public static final String FalseRegex = "\\b(false|nope|nop|no|not\\s+ok|disagree)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90|\\u0001F44E|\\u0001F590)";
|
||||
public static final String TrueRegex = "\\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C|\\u0001f44c){SkinToneRegex}?"
|
||||
.replace("{SkinToneRegex}", SkinToneRegex);
|
||||
|
||||
public static final String FalseRegex = "\\b(false|nope|nop|no|not\\s+ok|disagree)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90|\\u0001F44E|\\u0001F590){SkinToneRegex}?"
|
||||
.replace("{SkinToneRegex}", SkinToneRegex);
|
||||
}
|
||||
|
|
|
@ -17,7 +17,11 @@ public class FrenchChoice {
|
|||
|
||||
public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]";
|
||||
|
||||
public static final String TrueRegex = "\\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
|
||||
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
|
||||
|
||||
public static final String FalseRegex = "\\b(faux|nan|non|pas\\s+d'accord|pas\\s+concorder|n'est\\s+pas\\s+(correct|ok)|pas)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
|
||||
public static final String TrueRegex = "\\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
|
||||
.replace("{SkinToneRegex}", SkinToneRegex);
|
||||
|
||||
public static final String FalseRegex = "\\b(faux|nan|non|pas\\s+d'accord|pas\\s+concorder|n'est\\s+pas\\s+(correct|ok)|pas)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
|
||||
.replace("{SkinToneRegex}", SkinToneRegex);
|
||||
}
|
||||
|
|
|
@ -17,7 +17,11 @@ public class PortugueseChoice {
|
|||
|
||||
public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]";
|
||||
|
||||
public static final String TrueRegex = "\\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
|
||||
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
|
||||
|
||||
public static final String FalseRegex = "\\b(falso|n[aã]o|incorreto|nada disso)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
|
||||
public static final String TrueRegex = "\\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
|
||||
.replace("{SkinToneRegex}", SkinToneRegex);
|
||||
|
||||
public static final String FalseRegex = "\\b(falso|n[aã]o|incorreto|nada disso)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
|
||||
.replace("{SkinToneRegex}", SkinToneRegex);
|
||||
}
|
||||
|
|
|
@ -17,7 +17,11 @@ public class SpanishChoice {
|
|||
|
||||
public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]";
|
||||
|
||||
public static final String TrueRegex = "\\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
|
||||
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
|
||||
|
||||
public static final String FalseRegex = "\\b(falso|no|nop|n|no)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
|
||||
public static final String TrueRegex = "\\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
|
||||
.replace("{SkinToneRegex}", SkinToneRegex);
|
||||
|
||||
public static final String FalseRegex = "\\b(falso|no|nop|n|no)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
|
||||
.replace("{SkinToneRegex}", SkinToneRegex);
|
||||
}
|
||||
|
|
|
@ -1231,8 +1231,11 @@ public class EnglishDateTime {
|
|||
|
||||
public static final ImmutableMap<String, Integer> DayOfMonth = ImmutableMap.<String, Integer>builder()
|
||||
.put("1st", 1)
|
||||
.put("1th", 1)
|
||||
.put("2nd", 2)
|
||||
.put("2th", 2)
|
||||
.put("3rd", 3)
|
||||
.put("3th", 3)
|
||||
.put("4th", 4)
|
||||
.put("5th", 5)
|
||||
.put("6th", 6)
|
||||
|
@ -1268,8 +1271,11 @@ public class EnglishDateTime {
|
|||
.put("30th", 30)
|
||||
.put("31st", 31)
|
||||
.put("01st", 1)
|
||||
.put("01th", 1)
|
||||
.put("02nd", 2)
|
||||
.put("02th", 2)
|
||||
.put("03rd", 3)
|
||||
.put("03th", 3)
|
||||
.put("04th", 4)
|
||||
.put("05th", 5)
|
||||
.put("06th", 6)
|
||||
|
|
|
@ -369,6 +369,16 @@ public class SpanishDateTime {
|
|||
|
||||
public static final String PmTimeRegex = "(?<pm>(esta|(por|de|a|en)\\s+la)\\s+(tarde|noche))";
|
||||
|
||||
public static final String NightTimeRegex = "(noche)";
|
||||
|
||||
public static final String LastNightTimeRegex = "(anoche)";
|
||||
|
||||
public static final String NowTimeRegex = "(ahora|mismo|momento)";
|
||||
|
||||
public static final String RecentlyTimeRegex = "(mente)";
|
||||
|
||||
public static final String AsapTimeRegex = "(posible|pueda[ns]?|podamos)";
|
||||
|
||||
public static final String LessThanOneHour = "(?<lth>((\\s+y\\s+)?cuarto|(\\s*)menos cuarto|(\\s+y\\s+)media|{BaseDateTime.DeltaMinuteRegex}(\\s+(minutos?|mins?))|{DeltaMinuteNumRegex}(\\s+(minutos?|mins?))))"
|
||||
.replace("{BaseDateTime.DeltaMinuteRegex}", BaseDateTime.DeltaMinuteRegex)
|
||||
.replace("{DeltaMinuteNumRegex}", DeltaMinuteNumRegex);
|
||||
|
|
|
@ -40,6 +40,7 @@ public class SpanishDateTimeParserConfiguration extends BaseOptionsConfiguration
|
|||
public final Pattern nowRegex;
|
||||
public final Pattern amTimeRegex;
|
||||
public final Pattern pmTimeRegex;
|
||||
public final Pattern lastNightTimeRegex;
|
||||
public final Pattern simpleTimeOfTodayAfterRegex;
|
||||
public final Pattern simpleTimeOfTodayBeforeRegex;
|
||||
public final Pattern specificTimeOfDayRegex;
|
||||
|
@ -80,6 +81,7 @@ public class SpanishDateTimeParserConfiguration extends BaseOptionsConfiguration
|
|||
|
||||
pmTimeRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.PmRegex);
|
||||
amTimeRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.AmTimeRegex);
|
||||
lastNightTimeRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.LastNightTimeRegex);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -122,12 +124,18 @@ public class SpanishDateTimeParserConfiguration extends BaseOptionsConfiguration
|
|||
Matcher regexMatcher = SpanishDatePeriodParserConfiguration.previousPrefixRegex.matcher(trimmedText);
|
||||
|
||||
int swift = 0;
|
||||
|
||||
if (regexMatcher.find()) {
|
||||
swift = 1;
|
||||
swift = -1;
|
||||
} else {
|
||||
regexMatcher = SpanishDatePeriodParserConfiguration.nextPrefixRegex.matcher(trimmedText);
|
||||
regexMatcher = this.lastNightTimeRegex.matcher(trimmedText);
|
||||
if (regexMatcher.find()) {
|
||||
swift = -1;
|
||||
} else {
|
||||
regexMatcher = SpanishDatePeriodParserConfiguration.nextPrefixRegex.matcher(trimmedText);
|
||||
if (regexMatcher.find()) {
|
||||
swift = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -332,7 +332,7 @@ public class SpanishDateTimePeriodParserConfiguration extends BaseOptionsConfigu
|
|||
Matcher regexMatcher = regex.matcher(trimmedText);
|
||||
|
||||
int swift = 0;
|
||||
if (regexMatcher.find() || trimmedText.equals("anoche")) {
|
||||
if (regexMatcher.find() || trimmedText.startsWith("anoche")) {
|
||||
swift = -1;
|
||||
} else {
|
||||
regex = Pattern.compile(SpanishDateTime.NextPrefixRegex);
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package com.microsoft.recognizers.text.datetime.utilities;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.microsoft.recognizers.datatypes.timex.expression.TimexHelpers;
|
||||
import com.microsoft.recognizers.text.datetime.Constants;
|
||||
import com.microsoft.recognizers.text.datetime.DatePeriodTimexType;
|
||||
import com.microsoft.recognizers.text.datetime.DateTimeResolutionKey;
|
||||
|
@ -14,6 +15,7 @@ import java.util.ArrayList;
|
|||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class TimexUtility {
|
||||
private static final HashMap<DatePeriodTimexType, String> DatePeriodTimexTypeToTimexSuffix = new HashMap<DatePeriodTimexType, String>() {
|
||||
|
@ -28,33 +30,14 @@ public class TimexUtility {
|
|||
public static String generateCompoundDurationTimex(Map<String, String> unitToTimexComponents, ImmutableMap<String, Long> unitValueMap) {
|
||||
List<String> unitList = new ArrayList<>(unitToTimexComponents.keySet());
|
||||
unitList.sort((x, y) -> unitValueMap.get(x) < unitValueMap.get(y) ? 1 : -1);
|
||||
boolean isTimeDurationAlreadyExist = false;
|
||||
StringBuilder timexBuilder = new StringBuilder(Constants.GeneralPeriodPrefix);
|
||||
|
||||
for (String unitKey : unitList) {
|
||||
String timexComponent = unitToTimexComponents.get(unitKey);
|
||||
|
||||
// The Time Duration component occurs first time
|
||||
if (!isTimeDurationAlreadyExist && isTimeDurationTimex(timexComponent)) {
|
||||
timexBuilder.append(Constants.TimeTimexPrefix);
|
||||
timexBuilder.append(getDurationTimexWithoutPrefix(timexComponent));
|
||||
isTimeDurationAlreadyExist = true;
|
||||
} else {
|
||||
timexBuilder.append(getDurationTimexWithoutPrefix(timexComponent));
|
||||
}
|
||||
}
|
||||
return timexBuilder.toString();
|
||||
unitList = unitList.stream().map(t -> unitToTimexComponents.get(t)).collect(Collectors.toList());
|
||||
return TimexHelpers.generateCompoundDurationTimex(unitList);
|
||||
}
|
||||
|
||||
private static boolean isTimeDurationTimex(String timex) {
|
||||
private static Boolean isTimeDurationTimex(String timex) {
|
||||
return timex.startsWith(Constants.GeneralPeriodPrefix + Constants.TimeTimexPrefix);
|
||||
}
|
||||
|
||||
private static String getDurationTimexWithoutPrefix(String timex) {
|
||||
// Remove "PT" prefix for TimeDuration, Remove "P" prefix for DateDuration
|
||||
return timex.substring(isTimeDurationTimex(timex) ? 2 : 1);
|
||||
}
|
||||
|
||||
public static String getDatePeriodTimexUnitCount(LocalDateTime begin, LocalDateTime end,
|
||||
DatePeriodTimexType timexType, Boolean equalDurationLength) {
|
||||
String unitCount = "XX";
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
public class Constants {
|
||||
|
||||
// Timex
|
||||
public static final String TIMEX_YEAR = "Y";
|
||||
public static final String TIMEX_MONTH = "M";
|
||||
public static final String TIMEX_MONTH_FULL = "MON";
|
||||
public static final String TIMEX_WEEK = "W";
|
||||
public static final String TIMEX_DAY = "D";
|
||||
public static final String TIMEX_BUSINESS_DAY = "BD";
|
||||
public static final String TIMEX_WEEKEND = "WE";
|
||||
public static final String TIMEX_HOUR = "H";
|
||||
public static final String TIMEX_MINUTE = "M";
|
||||
public static final String TIMEX_SECOND = "S";
|
||||
public static final String TIMEX_NIGHT = "NI";
|
||||
public static final Character TIMEX_FUZZY = 'X';
|
||||
public static final String TIMEX_FUZZY_YEAR = "XXXX";
|
||||
public static final String TIMEX_FUZZY_MONTH = "XX";
|
||||
public static final String TIMEX_FUZZY_WEEK = "WXX";
|
||||
public static final String TIMEX_FUZZY_DAY = "XX";
|
||||
public static final String DATE_TIMEX_CONNECTOR = "-";
|
||||
public static final String TIME_TIMEX_CONNECTOR = ":";
|
||||
public static final String GENERAL_PERIOD_PREFIX = "P";
|
||||
public static final String TIME_TIMEX_PREFIX = "T";
|
||||
|
||||
public static final String YEAR_UNIT = "year";
|
||||
public static final String MONTH_UNIT = "month";
|
||||
public static final String WEEK_UNIT = "week";
|
||||
public static final String DAY_UNIT = "day";
|
||||
public static final String HOUR_UNIT = "hour";
|
||||
public static final String MINUTE_UNIT = "minute";
|
||||
public static final String SECOND_UNIT = "second";
|
||||
public static final String TIME_DURATION_UNIT = "s";
|
||||
|
||||
public static final String AM = "AM";
|
||||
public static final String PM = "PM";
|
||||
|
||||
public static final int INVALID_VALUE = -1;
|
||||
|
||||
public static class TimexTypes {
|
||||
public static final String PRESENT = "present";
|
||||
public static final String DEFINITE = "definite";
|
||||
public static final String DATE = "date";
|
||||
public static final String DATE_TIME = "datetime";
|
||||
public static final String DATE_RANGE = "daterange";
|
||||
public static final String DURATION = "duration";
|
||||
public static final String TIME = "time";
|
||||
public static final String TIME_RANGE = "timerange";
|
||||
public static final String DATE_TIME_RANGE = "datetimerange";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
public class DateRange {
|
||||
private LocalDateTime start;
|
||||
private LocalDateTime end;
|
||||
|
||||
public LocalDateTime getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public void setStart(LocalDateTime withStart) {
|
||||
this.start = withStart;
|
||||
}
|
||||
|
||||
public LocalDateTime getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
public void setEnd(LocalDateTime withEnd) {
|
||||
this.end = withEnd;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class Resolution {
|
||||
private List<Entry> values;
|
||||
|
||||
public List<Entry> getValues() {
|
||||
return this.values;
|
||||
}
|
||||
|
||||
public Resolution() {
|
||||
this.values = new ArrayList<Entry>();
|
||||
}
|
||||
|
||||
public static class Entry {
|
||||
private String timex;
|
||||
|
||||
private String type;
|
||||
|
||||
private String value;
|
||||
|
||||
private String start;
|
||||
|
||||
private String end;
|
||||
|
||||
public String getTimex() {
|
||||
return timex;
|
||||
}
|
||||
|
||||
public void setTimex(String withTimex) {
|
||||
this.timex = withTimex;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String withType) {
|
||||
this.type = withType;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(String withValue) {
|
||||
this.value = withValue;
|
||||
}
|
||||
|
||||
public String getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public void setStart(String withStart) {
|
||||
this.start = withStart;
|
||||
}
|
||||
|
||||
public String getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
public void setEnd(String withEnd) {
|
||||
this.end = withEnd;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
public class Time {
|
||||
private Integer hour;
|
||||
|
||||
private Integer minute;
|
||||
|
||||
private Integer second;
|
||||
|
||||
public Time(Integer withSeconds) {
|
||||
this.hour = (int)Math.floor(withSeconds / 3600000d);
|
||||
this.minute = (int)Math.floor((withSeconds - (this.hour * 3600000)) / 60000d);
|
||||
this.second = (withSeconds - (this.hour * 3600000) - (this.minute * 60000)) / 1000;
|
||||
}
|
||||
|
||||
public Time(Integer withHour, Integer withMinute, Integer withSecond) {
|
||||
this.hour = withHour;
|
||||
this.minute = withMinute;
|
||||
this.second = withSecond;
|
||||
}
|
||||
|
||||
public Integer getTime() {
|
||||
return (this.second * 1000) + (this.minute * 60000) + (this.hour * 3600000);
|
||||
}
|
||||
|
||||
public Integer getHour() {
|
||||
return hour;
|
||||
}
|
||||
|
||||
public void setHour(Integer withHour) {
|
||||
this.hour = withHour;
|
||||
}
|
||||
|
||||
public Integer getMinute() {
|
||||
return minute;
|
||||
}
|
||||
|
||||
public void setMinute(Integer withMinute) {
|
||||
this.minute = withMinute;
|
||||
}
|
||||
|
||||
public Integer getSecond() {
|
||||
return second;
|
||||
}
|
||||
|
||||
public void setSecond(Integer withSecond) {
|
||||
this.second = withSecond;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
public class TimeRange {
|
||||
private Time start;
|
||||
|
||||
private Time end;
|
||||
|
||||
public Time getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public void setStart(Time withStart) {
|
||||
this.start = withStart;
|
||||
}
|
||||
|
||||
public Time getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
public void setEnd(Time withEnd) {
|
||||
this.end = withEnd;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class TimexConstraintsHelper {
|
||||
public static List<TimeRange> collapseTimeRanges(List<TimeRange> ranges) {
|
||||
List<TimeRange> r = ranges;
|
||||
|
||||
while (TimexConstraintsHelper.innerCollapseTimeRanges(r)) {
|
||||
|
||||
}
|
||||
|
||||
r.sort((a, b) -> a.getStart().getTime() - b.getStart().getTime());
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
public static List<DateRange> collapseDateRanges(List<DateRange> ranges) {
|
||||
List<DateRange> r = ranges;
|
||||
|
||||
while (TimexConstraintsHelper.innerCollapseDateRanges(r)) {
|
||||
|
||||
}
|
||||
|
||||
r.sort((a, b) -> a.getStart().compareTo(b.getStart()));
|
||||
return r;
|
||||
}
|
||||
|
||||
public static Boolean isOverlapping(TimeRange r1, TimeRange r2) {
|
||||
return (r1.getEnd().getTime() > r2.getStart().getTime() && r1.getStart().getTime() <= r2.getStart().getTime()) ||
|
||||
(r1.getStart().getTime() < r2.getEnd().getTime() &&
|
||||
r1.getStart().getTime() >= r2.getStart().getTime());
|
||||
}
|
||||
|
||||
private static Boolean isOverlapping(DateRange r1, DateRange r2) {
|
||||
return (r1.getEnd().isAfter(r2.getStart()) && (r1.getStart().isBefore(r2.getStart()) || r1.getStart().isEqual(r2.getStart()))) ||
|
||||
(r1.getStart().isBefore(r2.getEnd()) && (r1.getStart().isAfter(r2.getStart()) || r1.getStart().isEqual(r2.getStart())));
|
||||
}
|
||||
|
||||
private static TimeRange collapseOverlapping(TimeRange r1, TimeRange r2) {
|
||||
return new TimeRange() {
|
||||
{
|
||||
setStart(new Time(Math.max(r1.getStart().getTime(), r2.getStart().getTime())));
|
||||
setEnd(new Time(Math.min(r1.getEnd().getTime(), r2.getEnd().getTime())));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static DateRange collapseOverlapping(DateRange r1, DateRange r2) {
|
||||
return new DateRange() {
|
||||
{
|
||||
setStart(r1.getStart().compareTo(r2.getStart()) > 0 ? r1.getStart() : r2.getStart());
|
||||
setEnd(r1.getEnd().compareTo(r2.getEnd()) < 0 ? r1.getEnd() : r2.getEnd());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static Boolean innerCollapseTimeRanges(List<TimeRange> ranges) {
|
||||
if (ranges.size() == 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < ranges.size(); i++) {
|
||||
TimeRange r1 = ranges.get(i);
|
||||
for (int j = i + 1; j < ranges.size(); j++) {
|
||||
TimeRange r2 = ranges.get(j);
|
||||
if (TimexConstraintsHelper.isOverlapping(r1, r2)) {
|
||||
ranges.subList(i, 1).clear();
|
||||
ranges.subList(j - 1, 1).clear();
|
||||
ranges.add(TimexConstraintsHelper.collapseOverlapping(r1, r2));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static Boolean innerCollapseDateRanges(List<DateRange> ranges) {
|
||||
if (ranges.size() == 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < ranges.size(); i++) {
|
||||
DateRange r1 = ranges.get(i);
|
||||
for (int j = i + 1; j < ranges.size(); j++) {
|
||||
DateRange r2 = ranges.get(j);
|
||||
if (TimexConstraintsHelper.isOverlapping(r1, r2)) {
|
||||
ranges.subList(i, 1).clear();
|
||||
ranges.subList(j - 1, 1).clear();
|
||||
ranges.add(TimexConstraintsHelper.collapseOverlapping(r1, r2));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import com.microsoft.recognizers.datatypes.timex.expression.english.TimexConvertEnglish;
|
||||
|
||||
public class TimexConvert {
|
||||
public static String convertTimexToString(TimexProperty timex) {
|
||||
return TimexConvertEnglish.convertTimexToString(timex);
|
||||
}
|
||||
|
||||
public static String convertTimexSetToString(TimexSet timexSet) {
|
||||
return TimexConvertEnglish.convertTimexSetToString(timexSet);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.DayOfWeek;
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
public class TimexCreator {
|
||||
// The following constants are consistent with the Recognizer results
|
||||
public static final String MONDAY = "XXXX-WXX-1";
|
||||
public static final String TUESDAY = "XXXX-WXX-2";
|
||||
public static final String WEDNESDAY = "XXXX-WXX-3";
|
||||
public static final String THURSDAY = "XXXX-WXX-4";
|
||||
public static final String FRIDAY = "XXXX-WXX-5";
|
||||
public static final String SATURDAY = "XXXX-WXX-6";
|
||||
public static final String SUNDAY = "XXXX-WXX-7";
|
||||
public static final String MORNING = "(T08,T12,PT4H)";
|
||||
public static final String AFTERNOON = "(T12,T16,PT4H)";
|
||||
public static final String EVENING = "(T16,T20,PT4H)";
|
||||
public static final String DAYTIME = "(T08,T18,PT10H)";
|
||||
public static final String NIGHT = "(T20,T24,PT10H)";
|
||||
|
||||
public static String today(LocalDateTime date) {
|
||||
return TimexProperty.fromDate(date == null ? LocalDateTime.now() : date).getTimexValue();
|
||||
}
|
||||
|
||||
public static String tomorrow(LocalDateTime date) {
|
||||
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
|
||||
d = d.plusDays(1);
|
||||
return TimexProperty.fromDate(d).getTimexValue();
|
||||
}
|
||||
|
||||
public static String yesterday(LocalDateTime date) {
|
||||
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
|
||||
d = d.plusDays(-1);
|
||||
return TimexProperty.fromDate(d).getTimexValue();
|
||||
}
|
||||
|
||||
public static String weekFromToday(LocalDateTime date) {
|
||||
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
|
||||
TimexProperty t = TimexProperty.fromDate(d);
|
||||
t.setDays(new BigDecimal(7));
|
||||
return t.getTimexValue();
|
||||
}
|
||||
|
||||
public static String weekBackFromToday(LocalDateTime date) {
|
||||
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
|
||||
d = d.plusDays(-7);
|
||||
TimexProperty t = TimexProperty.fromDate(d);
|
||||
t.setDays(new BigDecimal(7));
|
||||
return t.getTimexValue();
|
||||
}
|
||||
|
||||
public static String thisWeek(LocalDateTime date) {
|
||||
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
|
||||
d = d.plusDays(-7);
|
||||
LocalDateTime start = TimexDateHelpers.dateOfNextDay(DayOfWeek.MONDAY, d);
|
||||
TimexProperty t = TimexProperty.fromDate(start);
|
||||
t.setDays(new BigDecimal(7));
|
||||
return t.getTimexValue();
|
||||
}
|
||||
|
||||
public static String nextWeek(LocalDateTime date) {
|
||||
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
|
||||
LocalDateTime start = TimexDateHelpers.dateOfNextDay(DayOfWeek.MONDAY, d);
|
||||
TimexProperty t = TimexProperty.fromDate(start);
|
||||
t.setDays(new BigDecimal(7));
|
||||
return t.getTimexValue();
|
||||
}
|
||||
|
||||
public static String lastWeek(LocalDateTime date) {
|
||||
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
|
||||
LocalDateTime start = TimexDateHelpers.dateOfLastDay(DayOfWeek.MONDAY, d);
|
||||
start = start.plusDays(-7);
|
||||
TimexProperty t = TimexProperty.fromDate(start);
|
||||
t.setDays(new BigDecimal(7));
|
||||
return t.getTimexValue();
|
||||
}
|
||||
|
||||
public static String nextWeeksFromToday(Integer n, LocalDateTime date) {
|
||||
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
|
||||
TimexProperty t = TimexProperty.fromDate(d);
|
||||
t.setDays(new BigDecimal(n * 7));
|
||||
return t.getTimexValue();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,126 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import java.time.DayOfWeek;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class TimexDateHelpers {
|
||||
public static LocalDateTime tomorrow(LocalDateTime date) {
|
||||
date = date.plusDays(1);
|
||||
return date;
|
||||
}
|
||||
|
||||
public static LocalDateTime yesterday(LocalDateTime date) {
|
||||
date = date.plusDays(-1);
|
||||
return date;
|
||||
}
|
||||
|
||||
public static Boolean datePartEquals(LocalDateTime dateX, LocalDateTime dateY) {
|
||||
return (dateX.getYear() == dateY.getYear()) &&
|
||||
(dateX.getMonthValue() == dateY.getMonthValue()) &&
|
||||
(dateX.getDayOfMonth() == dateY.getDayOfMonth());
|
||||
}
|
||||
|
||||
public static boolean isDateInWeek(LocalDateTime date, LocalDateTime startOfWeek) {
|
||||
LocalDateTime d = startOfWeek;
|
||||
for (int i = 0; i < 7; i++) {
|
||||
if (TimexDateHelpers.datePartEquals(date, d)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
d = d.plusDays(1);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public static Boolean isThisWeek(LocalDateTime date, LocalDateTime referenceDate) {
|
||||
// Note ISO 8601 week starts on a Monday
|
||||
LocalDateTime startOfWeek = referenceDate;
|
||||
while (TimexDateHelpers.getUSDayOfWeek(startOfWeek.getDayOfWeek()) > TimexDateHelpers.getUSDayOfWeek(DayOfWeek.MONDAY)) {
|
||||
startOfWeek = startOfWeek.plusDays(-1);
|
||||
}
|
||||
|
||||
return TimexDateHelpers.isDateInWeek(date, startOfWeek);
|
||||
}
|
||||
|
||||
public static Boolean isNextWeek(LocalDateTime date, LocalDateTime referenceDate) {
|
||||
LocalDateTime nextWeekDate = referenceDate;
|
||||
nextWeekDate = nextWeekDate.plusDays(7);
|
||||
return TimexDateHelpers.isThisWeek(date, nextWeekDate);
|
||||
}
|
||||
|
||||
public static Boolean isLastWeek(LocalDateTime date, LocalDateTime referenceDate) {
|
||||
LocalDateTime nextWeekDate = referenceDate;
|
||||
nextWeekDate = nextWeekDate.plusDays(-7);
|
||||
return TimexDateHelpers.isThisWeek(date, nextWeekDate);
|
||||
}
|
||||
|
||||
public static Integer weekOfYear(LocalDateTime date) {
|
||||
LocalDateTime ds = LocalDateTime.of(date.getYear(), 1, 1, 0, 0);
|
||||
LocalDateTime de = LocalDateTime.of(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), 0, 0);
|
||||
Integer weeks = 1;
|
||||
|
||||
while (ds.compareTo(de) < 0) {
|
||||
Integer dayOfWeek = TimexDateHelpers.getUSDayOfWeek(ds.getDayOfWeek());
|
||||
|
||||
Integer isoDayOfWeek = (dayOfWeek == 0) ? 7 : dayOfWeek;
|
||||
if (isoDayOfWeek == 7) {
|
||||
weeks++;
|
||||
}
|
||||
|
||||
ds = ds.plusDays(1);
|
||||
}
|
||||
|
||||
return weeks;
|
||||
}
|
||||
|
||||
public static String fixedFormatNumber(Integer n, Integer size) {
|
||||
return String.format("%1$" + size + "s", n.toString()).replace(' ', '0');
|
||||
}
|
||||
|
||||
public static LocalDateTime dateOfLastDay(DayOfWeek day, LocalDateTime referenceDate) {
|
||||
LocalDateTime result = referenceDate;
|
||||
result = result.plusDays(-1);
|
||||
|
||||
while (result.getDayOfWeek() != day) {
|
||||
result = result.plusDays(-1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static LocalDateTime dateOfNextDay(DayOfWeek day, LocalDateTime referenceDate) {
|
||||
LocalDateTime result = referenceDate;
|
||||
result = result.plusDays(1);
|
||||
|
||||
while (result.getDayOfWeek() != day) {
|
||||
result = result.plusDays(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static List<LocalDateTime> datesMatchingDay(DayOfWeek day, LocalDateTime start, LocalDateTime end) {
|
||||
List<LocalDateTime> result = new ArrayList<LocalDateTime>();
|
||||
LocalDateTime d = start;
|
||||
|
||||
while (!TimexDateHelpers.datePartEquals(d, end)) {
|
||||
if (d.getDayOfWeek() == day) {
|
||||
result.add(d);
|
||||
}
|
||||
|
||||
d = d.plusDays(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static Integer getUSDayOfWeek(DayOfWeek dayOfWeek) {
|
||||
return dayOfWeek.getValue() % 7;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,195 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.text.NumberFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
public class TimexFormat {
|
||||
public static String format(TimexProperty timex) {
|
||||
HashSet<String> types = timex.getTypes().size() != 0 ? timex.getTypes() : TimexInference.infer(timex);
|
||||
|
||||
if (types.contains(Constants.TimexTypes.PRESENT)) {
|
||||
return "PRESENT_REF";
|
||||
}
|
||||
|
||||
if ((types.contains(Constants.TimexTypes.DATE_TIME_RANGE) || types.contains(Constants.TimexTypes.DATE_RANGE) ||
|
||||
types.contains(Constants.TimexTypes.TIME_RANGE)) && types.contains(Constants.TimexTypes.DURATION)) {
|
||||
TimexRange range = TimexHelpers.expandDateTimeRange(timex);
|
||||
return String.format("(%1$s,%2$s,%3$s)", TimexFormat.format(range.getStart()),
|
||||
TimexFormat.format(range.getEnd()), TimexFormat.format(range.getDuration()));
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE_TIME_RANGE)) {
|
||||
return String.format("%1$s%2$s", TimexFormat.formatDate(timex), TimexFormat.formatTimeRange(timex));
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE_RANGE)) {
|
||||
return TimexFormat.formatDateRange(timex);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.TIME_RANGE)) {
|
||||
return TimexFormat.formatTimeRange(timex);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE_TIME)) {
|
||||
return String.format("%1$s%2$s", TimexFormat.formatDate(timex), TimexFormat.formatTime(timex));
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DURATION)) {
|
||||
return TimexFormat.formatDuration(timex);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE)) {
|
||||
return TimexFormat.formatDate(timex);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.TIME)) {
|
||||
return TimexFormat.formatTime(timex);
|
||||
}
|
||||
|
||||
return new String();
|
||||
}
|
||||
|
||||
private static String formatDuration(TimexProperty timex) {
|
||||
List<String> timexList = new ArrayList<String>();
|
||||
NumberFormat nf = NumberFormat.getInstance(Locale.getDefault());
|
||||
|
||||
if (timex.getYears() != null) {
|
||||
nf.setMaximumFractionDigits(timex.getYears().scale());
|
||||
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Year,
|
||||
timex.getYears() != null ? timex.getYears() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
|
||||
}
|
||||
|
||||
if (timex.getMonths() != null) {
|
||||
nf.setMaximumFractionDigits(timex.getMonths().scale());
|
||||
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Month,
|
||||
timex.getMonths() != null ? timex.getMonths() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
|
||||
}
|
||||
|
||||
if (timex.getWeeks() != null) {
|
||||
nf.setMaximumFractionDigits(timex.getWeeks().scale());
|
||||
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Week,
|
||||
timex.getWeeks() != null ? timex.getWeeks() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
|
||||
}
|
||||
|
||||
if (timex.getDays() != null) {
|
||||
nf.setMaximumFractionDigits(timex.getDays().scale());
|
||||
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Day,
|
||||
timex.getDays() != null ? timex.getDays() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
|
||||
}
|
||||
|
||||
if (timex.getHours() != null) {
|
||||
nf.setMaximumFractionDigits(timex.getHours().scale());
|
||||
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Hour,
|
||||
timex.getHours() != null ? timex.getHours() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
|
||||
}
|
||||
|
||||
if (timex.getMinutes() != null) {
|
||||
nf.setMaximumFractionDigits(timex.getMinutes().scale());
|
||||
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Minute,
|
||||
timex.getMinutes() != null ? timex.getMinutes() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
|
||||
}
|
||||
|
||||
if (timex.getSeconds() != null) {
|
||||
nf.setMaximumFractionDigits(timex.getSeconds().scale());
|
||||
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Second,
|
||||
timex.getSeconds() != null ? timex.getSeconds() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
|
||||
}
|
||||
|
||||
return TimexHelpers.generateCompoundDurationTimex(timexList);
|
||||
}
|
||||
|
||||
private static String formatTime(TimexProperty timex) {
|
||||
if (timex.getMinute() == 0 && timex.getSecond() == 0) {
|
||||
return String.format("T%s", TimexDateHelpers.fixedFormatNumber(timex.getHour(), 2));
|
||||
}
|
||||
|
||||
if (timex.getSecond() == 0) {
|
||||
return String.format("T%1$s:%2$s", TimexDateHelpers.fixedFormatNumber(timex.getHour(), 2),
|
||||
TimexDateHelpers.fixedFormatNumber(timex.getMinute(), 2));
|
||||
}
|
||||
|
||||
return String.format("T%1$s:%2$s:%3$s", TimexDateHelpers.fixedFormatNumber(timex.getHour(), 2),
|
||||
TimexDateHelpers.fixedFormatNumber(timex.getMinute(), 2),
|
||||
TimexDateHelpers.fixedFormatNumber(timex.getSecond(), 2));
|
||||
}
|
||||
|
||||
private static String formatDate(TimexProperty timex) {
|
||||
Integer year = timex.getYear() != null ? timex.getYear() : Constants.INVALID_VALUE;
|
||||
Integer month = timex.getWeekOfYear() != null ? timex.getWeekOfYear()
|
||||
: (timex.getMonth() != null ? timex.getMonth() : Constants.INVALID_VALUE);
|
||||
Integer day = timex.getDayOfWeek() != null ? timex.getDayOfWeek()
|
||||
: timex.getDayOfMonth() != null ? timex.getDayOfMonth() : Constants.INVALID_VALUE;
|
||||
Integer weekOfMonth = timex.getWeekOfMonth() != null ? timex.getWeekOfMonth() : Constants.INVALID_VALUE;
|
||||
|
||||
return TimexHelpers.generateDateTimex(year, month, day, weekOfMonth, timex.getDayOfWeek() != null);
|
||||
}
|
||||
|
||||
private static String formatDateRange(TimexProperty timex) {
|
||||
if (timex.getYear() != null && timex.getWeekOfYear() != null && timex.getWeekend() != null) {
|
||||
return String.format("%1$s-W%2$s-WE", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4),
|
||||
TimexDateHelpers.fixedFormatNumber(timex.getWeekOfYear(), 2));
|
||||
}
|
||||
|
||||
if (timex.getYear() != null && timex.getWeekOfYear() != null) {
|
||||
return String.format("%1$s-W%2$s", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4),
|
||||
TimexDateHelpers.fixedFormatNumber(timex.getWeekOfYear(), 2));
|
||||
}
|
||||
|
||||
if (timex.getYear() != null && timex.getMonth() != null && timex.getWeekOfMonth() != null) {
|
||||
return String.format("%1$s-%2$s-W%3$s", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4),
|
||||
TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2),
|
||||
TimexDateHelpers.fixedFormatNumber(timex.getWeekOfMonth(), 2));
|
||||
}
|
||||
|
||||
if (timex.getYear() != null && timex.getSeason() != null) {
|
||||
return String.format("%1$s-%2$s", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4),
|
||||
timex.getSeason());
|
||||
}
|
||||
|
||||
if (timex.getSeason() != null) {
|
||||
return timex.getSeason();
|
||||
}
|
||||
|
||||
if (timex.getYear() != null && timex.getMonth() != null) {
|
||||
return String.format("%1$s-%2$s", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4),
|
||||
TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2));
|
||||
}
|
||||
|
||||
if (timex.getYear() != null) {
|
||||
return TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4);
|
||||
}
|
||||
|
||||
if (timex.getMonth() != null && timex.getWeekOfMonth() != null && timex.getDayOfWeek() != null) {
|
||||
return String.format("%1$s-%2$s-%3$s-%4$s-%5$s", Constants.TIMEX_FUZZY_YEAR,
|
||||
TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2), Constants.TIMEX_FUZZY_WEEK,
|
||||
timex.getWeekOfMonth(), timex.getDayOfWeek());
|
||||
}
|
||||
|
||||
if (timex.getMonth() != null && timex.getWeekOfMonth() != null) {
|
||||
return String.format("%1$s-%2$s-W%3$02d", Constants.TIMEX_FUZZY_YEAR,
|
||||
TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2), timex.getWeekOfMonth());
|
||||
}
|
||||
|
||||
if (timex.getMonth() != null) {
|
||||
return String.format("%1$s-%2$s", Constants.TIMEX_FUZZY_YEAR,
|
||||
TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2));
|
||||
}
|
||||
|
||||
return new String();
|
||||
}
|
||||
|
||||
private static String formatTimeRange(TimexProperty timex) {
|
||||
if (timex.getPartOfDay() != null) {
|
||||
return String.format("T%s", timex.getPartOfDay());
|
||||
}
|
||||
|
||||
return new String();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,515 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.DayOfWeek;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.temporal.TemporalField;
|
||||
import java.time.temporal.WeekFields;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
public class TimexHelpers {
|
||||
public static final HashMap<TimexUnit, String> TIMEX_UNIT_TO_STRING_MAP = new HashMap<TimexUnit, String>() {
|
||||
{
|
||||
put(TimexUnit.Year, Constants.TIMEX_YEAR);
|
||||
put(TimexUnit.Month, Constants.TIMEX_MONTH);
|
||||
put(TimexUnit.Week, Constants.TIMEX_WEEK);
|
||||
put(TimexUnit.Day, Constants.TIMEX_DAY);
|
||||
put(TimexUnit.Hour, Constants.TIMEX_HOUR);
|
||||
put(TimexUnit.Minute, Constants.TIMEX_MINUTE);
|
||||
put(TimexUnit.Second, Constants.TIMEX_SECOND);
|
||||
}
|
||||
};
|
||||
|
||||
public static final List<TimexUnit> TimeTimexUnitList = Arrays.asList(TimexUnit.Hour, TimexUnit.Minute,
|
||||
TimexUnit.Second);
|
||||
|
||||
public static TimexRange expandDateTimeRange(TimexProperty timex) {
|
||||
HashSet<String> types = timex.getTypes().size() != 0 ? timex.getTypes() : TimexInference.infer(timex);
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DURATION)) {
|
||||
TimexProperty start = TimexHelpers.cloneDateTime(timex);
|
||||
TimexProperty duration = TimexHelpers.cloneDuration(timex);
|
||||
return new TimexRange() {
|
||||
{
|
||||
setStart(start);
|
||||
setEnd(TimexHelpers.timexDateTimeAdd(start, duration));
|
||||
setDuration(duration);
|
||||
}
|
||||
};
|
||||
} else {
|
||||
if (timex.getYear() != null) {
|
||||
Pair<TimexProperty, TimexProperty> dateRange;
|
||||
if (timex.getMonth() != null && timex.getWeekOfMonth() != null) {
|
||||
dateRange = TimexHelpers.monthWeekDateRange(timex.getYear(), timex.getMonth(),
|
||||
timex.getWeekOfMonth());
|
||||
} else if (timex.getMonth() != null) {
|
||||
dateRange = TimexHelpers.monthDateRange(timex.getYear(), timex.getMonth());
|
||||
} else if (timex.getWeekOfYear() != null) {
|
||||
dateRange = TimexHelpers.yearWeekDateRange(timex.getYear(), timex.getWeekOfYear(),
|
||||
timex.getWeekend());
|
||||
} else {
|
||||
dateRange = TimexHelpers.yearDateRange(timex.getYear());
|
||||
}
|
||||
return new TimexRange() {
|
||||
{
|
||||
setStart(dateRange.getLeft());
|
||||
setEnd(dateRange.getRight());
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return new TimexRange() {
|
||||
{
|
||||
setStart(new TimexProperty());
|
||||
setEnd(new TimexProperty());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static TimexRange expandTimeRange(TimexProperty timex) {
|
||||
if (!timex.getTypes().contains(Constants.TimexTypes.TIME_RANGE)) {
|
||||
throw new IllegalArgumentException("argument must be a timerange: timex");
|
||||
}
|
||||
|
||||
if (timex.getPartOfDay() != null) {
|
||||
switch (timex.getPartOfDay()) {
|
||||
case "DT":
|
||||
timex = new TimexProperty(TimexCreator.DAYTIME);
|
||||
break;
|
||||
case "MO":
|
||||
timex = new TimexProperty(TimexCreator.MORNING);
|
||||
break;
|
||||
case "AF":
|
||||
timex = new TimexProperty(TimexCreator.AFTERNOON);
|
||||
break;
|
||||
case "EV":
|
||||
timex = new TimexProperty(TimexCreator.EVENING);
|
||||
break;
|
||||
case "NI":
|
||||
timex = new TimexProperty(TimexCreator.NIGHT);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("unrecognized part of day timerange: timex");
|
||||
}
|
||||
}
|
||||
|
||||
Integer hour = timex.getHour();
|
||||
Integer minute = timex.getMinute();
|
||||
Integer second = timex.getSecond();
|
||||
TimexProperty start = new TimexProperty() {
|
||||
{
|
||||
setHour(hour);
|
||||
setMinute(minute);
|
||||
setSecond(second);
|
||||
}
|
||||
};
|
||||
TimexProperty duration = TimexHelpers.cloneDuration(timex);
|
||||
|
||||
return new TimexRange() {
|
||||
{
|
||||
setStart(start);
|
||||
setEnd(TimexHelpers.timeAdd(start, duration));
|
||||
setDuration(duration);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static TimexProperty timexDateAdd(TimexProperty start, TimexProperty duration) {
|
||||
if (start.getDayOfWeek() != null) {
|
||||
TimexProperty end = start.clone();
|
||||
if (duration.getDays() != null) {
|
||||
Integer newDayOfWeek = end.getDayOfWeek() + (int)Math.round(duration.getDays().doubleValue());
|
||||
end.setDayOfWeek(newDayOfWeek);
|
||||
}
|
||||
|
||||
return end;
|
||||
}
|
||||
|
||||
if (start.getMonth() != null && start.getDayOfMonth() != null) {
|
||||
Double durationDays = null;
|
||||
if (duration.getDays() != null) {
|
||||
durationDays = duration.getDays().doubleValue();
|
||||
}
|
||||
|
||||
if (durationDays == null && duration.getWeeks() != null) {
|
||||
durationDays = 7 * duration.getWeeks().doubleValue();
|
||||
}
|
||||
|
||||
if (durationDays != null) {
|
||||
if (start.getYear() != null) {
|
||||
LocalDateTime d = LocalDateTime.of(start.getYear(), start.getMonth(), start.getDayOfMonth(), 0, 0,
|
||||
0);
|
||||
LocalDateTime d2 = d.plusDays(durationDays.longValue());
|
||||
return new TimexProperty() {
|
||||
{
|
||||
setYear(d2.getYear());
|
||||
setMonth(d2.getMonthValue());
|
||||
setDayOfMonth(d2.getDayOfMonth());
|
||||
}
|
||||
};
|
||||
} else {
|
||||
LocalDateTime d = LocalDateTime.of(2001, start.getMonth(), start.getDayOfMonth(), 0, 0, 0);
|
||||
LocalDateTime d2 = d.plusDays(durationDays.longValue());
|
||||
return new TimexProperty() {
|
||||
{
|
||||
setMonth(d2.getMonthValue());
|
||||
setDayOfMonth(d2.getDayOfMonth());
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (duration.getYears() != null) {
|
||||
if (start.getYear() != null) {
|
||||
return new TimexProperty() {
|
||||
{
|
||||
setYear(start.getYear() + (int)Math.round(duration.getYears().doubleValue()));
|
||||
setMonth(start.getMonth());
|
||||
setDayOfMonth(start.getDayOfMonth());
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (duration.getMonths() != null) {
|
||||
if (start.getMonth() != null) {
|
||||
return new TimexProperty() {
|
||||
{
|
||||
setYear(start.getYear());
|
||||
setMonth(start.getMonth() + (int)Math.round(duration.getMonths().doubleValue()));
|
||||
setDayOfMonth(start.getDayOfMonth());
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
public static String generateCompoundDurationTimex(List<String> timexList) {
|
||||
Boolean isTimeDurationAlreadyExist = false;
|
||||
StringBuilder timexBuilder = new StringBuilder(Constants.GENERAL_PERIOD_PREFIX);
|
||||
|
||||
for (String timexComponent : timexList) {
|
||||
// The Time Duration component occurs first time
|
||||
if (!isTimeDurationAlreadyExist && isTimeDurationTimex(timexComponent)) {
|
||||
timexBuilder.append(Constants.TIME_TIMEX_PREFIX.concat(getDurationTimexWithoutPrefix(timexComponent)));
|
||||
isTimeDurationAlreadyExist = true;
|
||||
} else {
|
||||
timexBuilder.append(getDurationTimexWithoutPrefix(timexComponent));
|
||||
}
|
||||
}
|
||||
|
||||
return timexBuilder.toString();
|
||||
}
|
||||
|
||||
public static String generateDateTimex(Integer year, Integer monthOrWeekOfYear, Integer day, Integer weekOfMonth,
|
||||
boolean byWeek) {
|
||||
String yearString = year == Constants.INVALID_VALUE ? Constants.TIMEX_FUZZY_YEAR
|
||||
: TimexDateHelpers.fixedFormatNumber(year, 4);
|
||||
String monthWeekString = monthOrWeekOfYear == Constants.INVALID_VALUE ? Constants.TIMEX_FUZZY_MONTH
|
||||
: TimexDateHelpers.fixedFormatNumber(monthOrWeekOfYear, 2);
|
||||
String dayString;
|
||||
if (byWeek) {
|
||||
dayString = day.toString();
|
||||
if (weekOfMonth != Constants.INVALID_VALUE) {
|
||||
monthWeekString = monthWeekString + String.format("-%s-", Constants.TIMEX_FUZZY_WEEK)
|
||||
+ weekOfMonth.toString();
|
||||
} else {
|
||||
monthWeekString = Constants.TIMEX_WEEK + monthWeekString;
|
||||
}
|
||||
} else {
|
||||
dayString = day == Constants.INVALID_VALUE ? Constants.TIMEX_FUZZY_DAY
|
||||
: TimexDateHelpers.fixedFormatNumber(day, 2);
|
||||
}
|
||||
|
||||
return String.join("-", yearString, monthWeekString, dayString);
|
||||
}
|
||||
|
||||
public static String generateDurationTimex(TimexUnit unit, BigDecimal value) {
|
||||
if (value.intValue() == Constants.INVALID_VALUE) {
|
||||
return new String();
|
||||
}
|
||||
|
||||
StringBuilder timexBuilder = new StringBuilder(Constants.GENERAL_PERIOD_PREFIX);
|
||||
if (TimeTimexUnitList.contains(unit)) {
|
||||
timexBuilder.append(Constants.TIME_TIMEX_PREFIX);
|
||||
}
|
||||
|
||||
timexBuilder.append(value.toString());
|
||||
timexBuilder.append(TIMEX_UNIT_TO_STRING_MAP.get(unit));
|
||||
return timexBuilder.toString();
|
||||
}
|
||||
|
||||
public static TimexProperty timexTimeAdd(TimexProperty start, TimexProperty duration) {
|
||||
|
||||
TimexProperty result = start.clone();
|
||||
if (duration.getMinutes() != null) {
|
||||
result.setMinute(result.getMinute() + (int)Math.round(duration.getMinutes().doubleValue()));
|
||||
|
||||
if (result.getMinute() > 59) {
|
||||
result.setHour(((result.getHour() != null) ? result.getHour() : 0) + 1);
|
||||
result.setMinute(result.getMinute() % 60);
|
||||
}
|
||||
}
|
||||
|
||||
if (duration.getHours() != null) {
|
||||
result.setHour(result.getHour() + (int)Math.round(duration.getHours().doubleValue()));
|
||||
}
|
||||
|
||||
if (result.getHour() != null && result.getHour() > 23) {
|
||||
Double days = Math.floor(result.getHour() / 24d);
|
||||
Integer hour = result.getHour() % 24;
|
||||
result.setHour(hour);
|
||||
|
||||
if (result.getYear() != null && result.getMonth() != null && result.getDayOfMonth() != null) {
|
||||
LocalDateTime d = LocalDateTime.of(result.getYear(), result.getMonth(), result.getDayOfMonth(), 0, 0,
|
||||
0);
|
||||
d = d.plusDays(days.longValue());
|
||||
|
||||
result.setYear(d.getYear());
|
||||
result.setMonth(d.getMonthValue());
|
||||
result.setDayOfMonth(d.getDayOfMonth());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (result.getDayOfWeek() != null) {
|
||||
result.setDayOfWeek(result.getDayOfWeek() + (int)Math.round(days));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static TimexProperty timexDateTimeAdd(TimexProperty start, TimexProperty duration) {
|
||||
return TimexHelpers.timexTimeAdd(TimexHelpers.timexDateAdd(start, duration), duration);
|
||||
}
|
||||
|
||||
public static LocalDateTime dateFromTimex(TimexProperty timex) {
|
||||
Integer year = timex.getYear() != null ? timex.getYear() : 2001;
|
||||
Integer month = timex.getMonth() != null ? timex.getMonth() : 1;
|
||||
Integer day = timex.getDayOfMonth() != null ? timex.getDayOfMonth() : 1;
|
||||
Integer hour = timex.getHour() != null ? timex.getHour() : 0;
|
||||
Integer minute = timex.getMinute() != null ? timex.getMinute() : 0;
|
||||
Integer second = timex.getSecond() != null ? timex.getSecond() : 0;
|
||||
LocalDateTime date = LocalDateTime.of(year, month, day, hour, minute, second);
|
||||
|
||||
return date;
|
||||
}
|
||||
|
||||
public static Time timeFromTimex(TimexProperty timex) {
|
||||
Integer hour = timex.getHour() != null ? timex.getHour() : 0;
|
||||
Integer minute = timex.getMinute() != null ? timex.getMinute() : 0;
|
||||
Integer second = timex.getSecond() != null ? timex.getSecond() : 0;
|
||||
return new Time(hour, minute, second);
|
||||
}
|
||||
|
||||
public static DateRange dateRangeFromTimex(TimexProperty timex) {
|
||||
TimexRange expanded = TimexHelpers.expandDateTimeRange(timex);
|
||||
return new DateRange() {
|
||||
{
|
||||
setStart(TimexHelpers.dateFromTimex(expanded.getStart()));
|
||||
setEnd(TimexHelpers.dateFromTimex(expanded.getEnd()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static TimeRange timeRangeFromTimex(TimexProperty timex) {
|
||||
TimexRange expanded = TimexHelpers.expandTimeRange(timex);
|
||||
return new TimeRange() {
|
||||
{
|
||||
setStart(TimexHelpers.timeFromTimex(expanded.getStart()));
|
||||
setEnd(TimexHelpers.timeFromTimex(expanded.getEnd()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static String formatResolvedDateValue(String dateValue, String timeValue) {
|
||||
return String.format("%1$s %2$s", dateValue, timeValue);
|
||||
}
|
||||
|
||||
public static Pair<TimexProperty, TimexProperty> monthWeekDateRange(Integer year, Integer month,
|
||||
Integer weekOfMonth) {
|
||||
LocalDateTime start = TimexHelpers.generateMonthWeekDateStart(year, month, weekOfMonth);
|
||||
LocalDateTime end = start.plusDays(7);
|
||||
TimexProperty value1 = new TimexProperty() {
|
||||
{
|
||||
setYear(start.getYear());
|
||||
setMonth(start.getMonth().getValue());
|
||||
setDayOfMonth(start.getDayOfMonth());
|
||||
}
|
||||
};
|
||||
TimexProperty value2 = new TimexProperty() {
|
||||
{
|
||||
setYear(end.getYear());
|
||||
setMonth(end.getMonth().getValue());
|
||||
setDayOfMonth(end.getDayOfMonth());
|
||||
}
|
||||
};
|
||||
return Pair.of(value1, value2);
|
||||
}
|
||||
|
||||
public static Pair<TimexProperty, TimexProperty> monthDateRange(Integer year, Integer month) {
|
||||
TimexProperty value1 = new TimexProperty() {
|
||||
{
|
||||
setYear(year);
|
||||
setMonth(month);
|
||||
setDayOfMonth(1);
|
||||
}
|
||||
};
|
||||
TimexProperty value2 = new TimexProperty() {
|
||||
{
|
||||
setYear(month == 12 ? year + 1 : year);
|
||||
setMonth(month == 12 ? 1 : month + 1);
|
||||
setDayOfMonth(1);
|
||||
}
|
||||
};
|
||||
return Pair.of(value1, value2);
|
||||
}
|
||||
|
||||
public static Pair<TimexProperty, TimexProperty> yearDateRange(Integer year) {
|
||||
TimexProperty value1 = new TimexProperty() {
|
||||
{
|
||||
setYear(year);
|
||||
setMonth(1);
|
||||
setDayOfMonth(1);
|
||||
}
|
||||
};
|
||||
TimexProperty value2 = new TimexProperty() {
|
||||
{
|
||||
setYear(year + 1);
|
||||
setMonth(1);
|
||||
setDayOfMonth(1);
|
||||
}
|
||||
};
|
||||
return Pair.of(value1, value2);
|
||||
}
|
||||
|
||||
public static Pair<TimexProperty, TimexProperty> yearWeekDateRange(Integer year, Integer weekOfYear,
|
||||
Boolean isWeekend) {
|
||||
LocalDateTime firstMondayInWeek = TimexHelpers.firstDateOfWeek(year, weekOfYear, null);
|
||||
|
||||
LocalDateTime start = (isWeekend == null || !isWeekend) ? firstMondayInWeek
|
||||
: TimexDateHelpers.dateOfNextDay(DayOfWeek.SATURDAY, firstMondayInWeek);
|
||||
LocalDateTime end = firstMondayInWeek.plusDays(7);
|
||||
TimexProperty value1 = new TimexProperty() {
|
||||
{
|
||||
setYear(start.getYear());
|
||||
setMonth(start.getMonth().getValue());
|
||||
setDayOfMonth(start.getDayOfMonth());
|
||||
}
|
||||
};
|
||||
TimexProperty value2 = new TimexProperty() {
|
||||
{
|
||||
setYear(end.getYear());
|
||||
setMonth(end.getMonth().getValue());
|
||||
setDayOfMonth(end.getDayOfMonth());
|
||||
}
|
||||
};
|
||||
return Pair.of(value1, value2);
|
||||
}
|
||||
|
||||
// this is based on
|
||||
// https://stackoverflow.com/questions/19901666/get-date-of-first-and-last-day-of-week-knowing-week-number/34727270
|
||||
public static LocalDateTime firstDateOfWeek(Integer year, Integer weekOfYear, Locale cultureInfo) {
|
||||
// ISO uses FirstFourDayWeek, and Monday as first day of week, according to
|
||||
// https://en.wikipedia.org/wiki/ISO_8601
|
||||
LocalDateTime jan1 = LocalDateTime.of(year, 1, 1, 0, 0);
|
||||
Integer daysOffset = DayOfWeek.MONDAY.getValue() - TimexDateHelpers.getUSDayOfWeek(jan1.getDayOfWeek());
|
||||
LocalDateTime firstWeekDay = jan1;
|
||||
firstWeekDay = firstWeekDay.plusDays(daysOffset);
|
||||
|
||||
TemporalField woy = WeekFields.ISO.weekOfYear();
|
||||
Integer firstWeek = jan1.get(woy);
|
||||
|
||||
if ((firstWeek <= 1 || firstWeek >= 52) && daysOffset >= -3) {
|
||||
weekOfYear -= 1;
|
||||
}
|
||||
|
||||
firstWeekDay = firstWeekDay.plusDays(weekOfYear * 7);
|
||||
|
||||
return firstWeekDay;
|
||||
}
|
||||
|
||||
public static LocalDateTime generateMonthWeekDateStart(Integer year, Integer month, Integer weekOfMonth) {
|
||||
LocalDateTime dateInWeek = LocalDateTime.of(year, month, 1 + ((weekOfMonth - 1) * 7), 0, 0);
|
||||
|
||||
// Align the date of the week according to Thursday, base on ISO 8601,
|
||||
// https://en.wikipedia.org/wiki/ISO_8601
|
||||
if (dateInWeek.getDayOfWeek().getValue() > DayOfWeek.THURSDAY.getValue()) {
|
||||
dateInWeek = dateInWeek.plusDays(7 - dateInWeek.getDayOfWeek().getValue() + 1);
|
||||
} else {
|
||||
dateInWeek = dateInWeek.plusDays(1 - dateInWeek.getDayOfWeek().getValue());
|
||||
}
|
||||
|
||||
return dateInWeek;
|
||||
}
|
||||
|
||||
private static TimexProperty timeAdd(TimexProperty start, TimexProperty duration) {
|
||||
Integer second = start.getSecond()
|
||||
+ (int)(duration.getSeconds() != null ? duration.getSeconds().intValue() : 0);
|
||||
Integer minute = start.getMinute() + second / 60
|
||||
+ (duration.getMinutes() != null ? duration.getMinutes().intValue() : 0);
|
||||
Integer hour = start.getHour() + (minute / 60)
|
||||
+ (duration.getHours() != null ? duration.getHours().intValue() : 0);
|
||||
|
||||
return new TimexProperty() {
|
||||
{
|
||||
setHour((hour == 24 && minute % 60 == 0 && second % 60 == 0) ? hour : hour % 24);
|
||||
setMinute(minute % 60);
|
||||
setSecond(second % 60);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static TimexProperty cloneDateTime(TimexProperty timex) {
|
||||
TimexProperty result = timex.clone();
|
||||
result.setYears(null);
|
||||
result.setMonths(null);
|
||||
result.setWeeks(null);
|
||||
result.setDays(null);
|
||||
result.setHours(null);
|
||||
result.setMinutes(null);
|
||||
result.setSeconds(null);
|
||||
return result;
|
||||
}
|
||||
|
||||
private static TimexProperty cloneDuration(TimexProperty timex) {
|
||||
TimexProperty result = timex.clone();
|
||||
result.setYear(null);
|
||||
result.setMonth(null);
|
||||
result.setDayOfMonth(null);
|
||||
result.setDayOfWeek(null);
|
||||
result.setWeekOfYear(null);
|
||||
result.setWeekOfMonth(null);
|
||||
result.setSeason(null);
|
||||
result.setHour(null);
|
||||
result.setMinute(null);
|
||||
result.setSecond(null);
|
||||
result.setWeekend(null);
|
||||
result.setPartOfDay(null);
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Boolean isTimeDurationTimex(String timex) {
|
||||
return timex.startsWith(Constants.GENERAL_PERIOD_PREFIX.concat(Constants.TIME_TIMEX_PREFIX));
|
||||
}
|
||||
|
||||
private static String getDurationTimexWithoutPrefix(String timex) {
|
||||
// Remove "PT" prefix for TimeDuration, Remove "P" prefix for DateDuration
|
||||
return timex.substring(isTimeDurationTimex(timex) ? 2 : 1);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import java.util.HashSet;
|
||||
|
||||
public class TimexInference {
|
||||
public static HashSet<String> infer(TimexProperty timexProperty) {
|
||||
HashSet<String> types = new HashSet<String>();
|
||||
|
||||
if (TimexInference.isPresent(timexProperty)) {
|
||||
types.add(Constants.TimexTypes.PRESENT);
|
||||
}
|
||||
|
||||
if (TimexInference.isDefinite(timexProperty)) {
|
||||
types.add(Constants.TimexTypes.DEFINITE);
|
||||
}
|
||||
|
||||
if (TimexInference.isDate(timexProperty)) {
|
||||
types.add(Constants.TimexTypes.DATE);
|
||||
}
|
||||
|
||||
if (TimexInference.isDateRange(timexProperty)) {
|
||||
types.add(Constants.TimexTypes.DATE_RANGE);
|
||||
}
|
||||
|
||||
if (TimexInference.isDuration(timexProperty)) {
|
||||
types.add(Constants.TimexTypes.DURATION);
|
||||
}
|
||||
|
||||
if (TimexInference.isTime(timexProperty)) {
|
||||
types.add(Constants.TimexTypes.TIME);
|
||||
}
|
||||
|
||||
if (TimexInference.isTimeRange(timexProperty)) {
|
||||
types.add(Constants.TimexTypes.TIME_RANGE);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.PRESENT)) {
|
||||
types.add(Constants.TimexTypes.DATE);
|
||||
types.add(Constants.TimexTypes.TIME);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.TIME) && types.contains(Constants.TimexTypes.DURATION)) {
|
||||
types.add(Constants.TimexTypes.TIME_RANGE);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE) && types.contains(Constants.TimexTypes.TIME)) {
|
||||
types.add(Constants.TimexTypes.DATE_TIME);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE) && types.contains(Constants.TimexTypes.DURATION)) {
|
||||
types.add(Constants.TimexTypes.DATE_RANGE);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE_TIME) && types.contains(Constants.TimexTypes.DURATION)) {
|
||||
types.add((Constants.TimexTypes.DATE_TIME_RANGE));
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE) && types.contains(Constants.TimexTypes.TIME_RANGE)) {
|
||||
types.add(Constants.TimexTypes.DATE_TIME_RANGE);
|
||||
}
|
||||
|
||||
return types;
|
||||
}
|
||||
|
||||
private static Boolean isPresent(TimexProperty timexProperty) {
|
||||
return timexProperty.getNow() != null && timexProperty.getNow() == true;
|
||||
}
|
||||
|
||||
private static Boolean isDuration(TimexProperty timexProperty) {
|
||||
return timexProperty.getYears() != null || timexProperty.getMonths() != null || timexProperty.getWeeks() != null ||
|
||||
timexProperty.getDays() != null | timexProperty.getHours() != null ||
|
||||
timexProperty.getMinutes() != null || timexProperty.getSeconds() != null;
|
||||
}
|
||||
|
||||
private static Boolean isTime(TimexProperty timexProperty) {
|
||||
return timexProperty.getHour() != null && timexProperty.getMinute() != null && timexProperty.getSecond() != null;
|
||||
}
|
||||
|
||||
private static Boolean isDate(TimexProperty timexProperty) {
|
||||
return timexProperty.getDayOfMonth() != null || timexProperty.getDayOfWeek() != null;
|
||||
}
|
||||
|
||||
private static Boolean isTimeRange(TimexProperty timexProperty) {
|
||||
return timexProperty.getPartOfDay() != null;
|
||||
}
|
||||
|
||||
private static Boolean isDateRange(TimexProperty timexProperty) {
|
||||
return (timexProperty.getDayOfMonth() == null && timexProperty.getDayOfWeek() == null) &&
|
||||
(timexProperty.getYear() != null || timexProperty.getMonth() != null ||
|
||||
timexProperty.getSeason() != null || timexProperty.getWeekOfYear() != null ||
|
||||
timexProperty.getWeekOfMonth() != null);
|
||||
}
|
||||
|
||||
private static Boolean isDefinite(TimexProperty timexProperty) {
|
||||
return timexProperty.getYear() != null & timexProperty.getMonth() != null && timexProperty.getDayOfMonth() != null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class TimexParsing {
|
||||
public static void parseString(String timex, TimexProperty timexProperty) {
|
||||
// a reference to the present
|
||||
if (timex == "PRESENT_REF") {
|
||||
timexProperty.setNow(true);
|
||||
} else if (timex.startsWith("P")) {
|
||||
// duration
|
||||
TimexParsing.extractDuration(timex, timexProperty);
|
||||
} else if (timex.startsWith("(") && timex.endsWith(")")) {
|
||||
// range indicated with start and end dates and a duration
|
||||
TimexParsing.extractStartEndRange(timex, timexProperty);
|
||||
} else {
|
||||
// date andt ime and their respective ranges
|
||||
TimexParsing.extractDateTime(timex, timexProperty);
|
||||
}
|
||||
}
|
||||
|
||||
private static void extractDuration(String s, TimexProperty timexProperty) {
|
||||
Map<String, String> extracted = new HashMap<String, String>();
|
||||
TimexRegex.extract("period", s, extracted);
|
||||
timexProperty.assignProperties(extracted);
|
||||
}
|
||||
|
||||
private static void extractStartEndRange(String s, TimexProperty timexProperty) {
|
||||
String[] parts = s.substring(1, s.length() - 1).split(",");
|
||||
|
||||
if (parts.length == 3) {
|
||||
TimexParsing.extractDateTime(parts[0], timexProperty);
|
||||
TimexParsing.extractDuration(parts[2], timexProperty);
|
||||
}
|
||||
}
|
||||
|
||||
private static void extractDateTime(String s, TimexProperty timexProperty) {
|
||||
Integer indexOfT = s.indexOf("T");
|
||||
|
||||
if (indexOfT == -1) {
|
||||
Map<String, String> extracted = new HashMap<String, String>();
|
||||
TimexRegex.extract("date", s, extracted);
|
||||
timexProperty.assignProperties(extracted);
|
||||
|
||||
} else {
|
||||
Map<String, String> extracted = new HashMap<String, String>();
|
||||
TimexRegex.extract("date", s.substring(0, indexOfT), extracted);
|
||||
TimexRegex.extract("time", s.substring(indexOfT), extracted);
|
||||
timexProperty.assignProperties(extracted);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,445 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
public class TimexProperty {
|
||||
private Time time;
|
||||
|
||||
private String timexValue;
|
||||
|
||||
private HashSet<String> types;
|
||||
|
||||
private Boolean now;
|
||||
|
||||
private BigDecimal years;
|
||||
|
||||
private BigDecimal months;
|
||||
|
||||
private BigDecimal weeks;
|
||||
|
||||
private BigDecimal days;
|
||||
|
||||
private BigDecimal hours;
|
||||
|
||||
private BigDecimal minutes;
|
||||
|
||||
private BigDecimal seconds;
|
||||
|
||||
private Integer year;
|
||||
|
||||
private Integer month;
|
||||
|
||||
private Integer dayOfMonth;
|
||||
|
||||
private Integer dayOfWeek;
|
||||
|
||||
private String season;
|
||||
|
||||
private Integer weekOfYear;
|
||||
|
||||
private Boolean weekend;
|
||||
|
||||
public Integer weekOfMonth;
|
||||
|
||||
private Integer hour;
|
||||
|
||||
private Integer minute;
|
||||
|
||||
private Integer second;
|
||||
|
||||
private String partOfDay;
|
||||
|
||||
public TimexProperty() {
|
||||
|
||||
}
|
||||
|
||||
public TimexProperty(String timex) {
|
||||
TimexParsing.parseString(timex, this);
|
||||
}
|
||||
|
||||
public String getTimexValue() {
|
||||
return TimexFormat.format(this);
|
||||
}
|
||||
|
||||
public void setTimexValue(String withTimexValue) {
|
||||
this.timexValue = withTimexValue;
|
||||
}
|
||||
|
||||
public HashSet<String> getTypes() {
|
||||
return TimexInference.infer(this);
|
||||
}
|
||||
|
||||
public void setTypes(HashSet<String> withTypes) {
|
||||
this.types = withTypes;
|
||||
}
|
||||
|
||||
public Boolean getNow() {
|
||||
return now;
|
||||
}
|
||||
|
||||
public void setNow(Boolean withNow) {
|
||||
this.now = withNow;
|
||||
}
|
||||
|
||||
public BigDecimal getYears() {
|
||||
return years;
|
||||
}
|
||||
|
||||
public void setYears(BigDecimal withYears) {
|
||||
this.years = withYears;
|
||||
}
|
||||
|
||||
public BigDecimal getMonths() {
|
||||
return months;
|
||||
}
|
||||
|
||||
public void setMonths(BigDecimal withMonths) {
|
||||
this.months = withMonths;
|
||||
}
|
||||
|
||||
public BigDecimal getWeeks() {
|
||||
return weeks;
|
||||
}
|
||||
|
||||
public void setWeeks(BigDecimal withWeeks) {
|
||||
this.weeks = withWeeks;
|
||||
}
|
||||
|
||||
public BigDecimal getDays() {
|
||||
return days;
|
||||
}
|
||||
|
||||
public void setDays(BigDecimal withDays) {
|
||||
this.days = withDays;
|
||||
}
|
||||
|
||||
public BigDecimal getHours() {
|
||||
return hours;
|
||||
}
|
||||
|
||||
public void setHours(BigDecimal withHours) {
|
||||
this.hours = withHours;
|
||||
}
|
||||
|
||||
public BigDecimal getMinutes() {
|
||||
return minutes;
|
||||
}
|
||||
|
||||
public void setMinutes(BigDecimal withMinutes) {
|
||||
this.minutes = withMinutes;
|
||||
}
|
||||
|
||||
public BigDecimal getSeconds() {
|
||||
return seconds;
|
||||
}
|
||||
|
||||
public void setSeconds(BigDecimal withSeconds) {
|
||||
this.seconds = withSeconds;
|
||||
}
|
||||
|
||||
public Integer getYear() {
|
||||
return year;
|
||||
}
|
||||
|
||||
public void setYear(Integer withYear) {
|
||||
this.year = withYear;
|
||||
}
|
||||
|
||||
public Integer getMonth() {
|
||||
return month;
|
||||
}
|
||||
|
||||
public void setMonth(Integer withMonth) {
|
||||
this.month = withMonth;
|
||||
}
|
||||
|
||||
public Integer getDayOfMonth() {
|
||||
return dayOfMonth;
|
||||
}
|
||||
|
||||
public void setDayOfMonth(Integer withDayOfMonth) {
|
||||
this.dayOfMonth = withDayOfMonth;
|
||||
}
|
||||
|
||||
public Integer getDayOfWeek() {
|
||||
return dayOfWeek;
|
||||
}
|
||||
|
||||
public void setDayOfWeek(Integer withDayOfWeek) {
|
||||
this.dayOfWeek = withDayOfWeek;
|
||||
}
|
||||
|
||||
public String getSeason() {
|
||||
return season;
|
||||
}
|
||||
|
||||
public void setSeason(String withSeason) {
|
||||
this.season = withSeason;
|
||||
}
|
||||
|
||||
public Integer getWeekOfYear() {
|
||||
return weekOfYear;
|
||||
}
|
||||
|
||||
public void setWeekOfYear(Integer withWeekOfYear) {
|
||||
this.weekOfYear = withWeekOfYear;
|
||||
}
|
||||
|
||||
public Boolean getWeekend() {
|
||||
return weekend;
|
||||
}
|
||||
|
||||
public void setWeekend(Boolean withWeekend) {
|
||||
this.weekend = withWeekend;
|
||||
}
|
||||
|
||||
public Integer getWeekOfMonth() {
|
||||
return weekOfMonth;
|
||||
}
|
||||
|
||||
public void setWeekOfMonth(Integer withWeekOfMonth) {
|
||||
this.weekOfMonth = withWeekOfMonth;
|
||||
}
|
||||
|
||||
public Integer getHour() {
|
||||
if (this.time != null) {
|
||||
return this.time.getHour();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public void setHour(Integer withHour) {
|
||||
if (withHour != null) {
|
||||
if (this.time == null) {
|
||||
this.time = new Time(withHour, 0, 0);
|
||||
} else {
|
||||
this.time.setHour(withHour);
|
||||
}
|
||||
} else {
|
||||
this.time = null;
|
||||
}
|
||||
}
|
||||
|
||||
public Integer getMinute() {
|
||||
if (this.time != null) {
|
||||
return this.time.getMinute();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public void setMinute(Integer withMinute) {
|
||||
if (withMinute != null) {
|
||||
if (this.time == null) {
|
||||
time = new Time(0, withMinute, 0);
|
||||
} else {
|
||||
time.setMinute(withMinute);
|
||||
}
|
||||
} else {
|
||||
this.time = null;
|
||||
}
|
||||
}
|
||||
|
||||
public Integer getSecond() {
|
||||
if (this.time != null) {
|
||||
return this.time.getSecond();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public void setSecond(Integer withSecond) {
|
||||
if (withSecond != null) {
|
||||
if (this.time == null) {
|
||||
this.time = new Time(0, 0, withSecond);
|
||||
} else {
|
||||
this.time.setSecond(withSecond);
|
||||
}
|
||||
} else {
|
||||
this.time = null;
|
||||
}
|
||||
}
|
||||
|
||||
public String getPartOfDay() {
|
||||
return partOfDay;
|
||||
}
|
||||
|
||||
public void setPartOfDay(String wthPartOfDay) {
|
||||
this.partOfDay = wthPartOfDay;
|
||||
}
|
||||
|
||||
public static TimexProperty fromDate(LocalDateTime date) {
|
||||
TimexProperty timex = new TimexProperty() {
|
||||
{
|
||||
setYear(date.getYear());
|
||||
setMonth(date.getMonthValue());
|
||||
setDayOfMonth(date.getDayOfMonth());
|
||||
}
|
||||
};
|
||||
return timex;
|
||||
}
|
||||
|
||||
public static TimexProperty fromDateTime(LocalDateTime datetime) {
|
||||
TimexProperty timex = TimexProperty.fromDate(datetime);
|
||||
timex.setHour(datetime.getHour());
|
||||
timex.setMinute(datetime.getMinute());
|
||||
timex.setSecond(datetime.getSecond());
|
||||
return timex;
|
||||
}
|
||||
|
||||
public static TimexProperty fromTime(Time time) {
|
||||
return new TimexProperty() {
|
||||
{
|
||||
setHour(time.getHour());
|
||||
setMinute(time.getMinute());
|
||||
setSecond(time.getSecond());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return TimexConvert.convertTimexToString(this);
|
||||
}
|
||||
|
||||
public String toNaturalLanguage(LocalDateTime referenceDate) {
|
||||
return TimexRelativeConvert.convertTimexToStringRelative(this, referenceDate);
|
||||
}
|
||||
|
||||
public TimexProperty clone() {
|
||||
Boolean now = this.getNow();
|
||||
BigDecimal years = this.getYears();
|
||||
BigDecimal months = this.getMonths();
|
||||
BigDecimal weeks = this.getWeeks();
|
||||
BigDecimal days = this.getDays();
|
||||
BigDecimal hours = this.getHours();
|
||||
BigDecimal minutes = this.getMinutes();
|
||||
BigDecimal seconds = this.getSeconds();
|
||||
Integer year = this.getYear();
|
||||
Integer month = this.getMonth();
|
||||
Integer dayOfMonth = this.getDayOfMonth();
|
||||
Integer dayOfWeek = this.getDayOfWeek();
|
||||
String season = this.getSeason();
|
||||
Integer weekOfYear = this.getWeekOfYear();
|
||||
Boolean weekend = this.getWeekend();
|
||||
Integer innerWeekOfMonth = this.getWeekOfMonth();
|
||||
Integer hour = this.getHour();
|
||||
Integer minute = this.getMinute();
|
||||
Integer second = this.getSecond();
|
||||
String partOfDay = this.getPartOfDay();
|
||||
|
||||
return new TimexProperty() {
|
||||
{
|
||||
setNow(now);
|
||||
setYears(years);
|
||||
setMonths(months);
|
||||
setWeeks(weeks);
|
||||
setDays(days);
|
||||
setHours(hours);
|
||||
setMinutes(minutes);
|
||||
setSeconds(seconds);
|
||||
setYear(year);
|
||||
setMonth(month);
|
||||
setDayOfMonth(dayOfMonth);
|
||||
setDayOfWeek(dayOfWeek);
|
||||
setSeason(season);
|
||||
setWeekOfYear(weekOfYear);
|
||||
setWeekend(weekend);
|
||||
setWeekOfMonth(innerWeekOfMonth);
|
||||
setHour(hour);
|
||||
setMinute(minute);
|
||||
setSecond(second);
|
||||
setPartOfDay(partOfDay);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public void assignProperties(Map<String, String> source) {
|
||||
for (Entry<String, String> item : source.entrySet()) {
|
||||
|
||||
if (StringUtils.isBlank(item.getValue())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (item.getKey()) {
|
||||
case "year":
|
||||
setYear(Integer.parseInt(item.getValue()));
|
||||
break;
|
||||
case "month":
|
||||
setMonth(Integer.parseInt(item.getValue()));
|
||||
break;
|
||||
case "dayOfMonth":
|
||||
setDayOfMonth(Integer.parseInt(item.getValue()));
|
||||
break;
|
||||
case "dayOfWeek":
|
||||
setDayOfWeek(Integer.parseInt(item.getValue()));
|
||||
break;
|
||||
case "season":
|
||||
setSeason(item.getValue());
|
||||
break;
|
||||
case "weekOfYear":
|
||||
setWeekOfYear(Integer.parseInt(item.getValue()));
|
||||
break;
|
||||
case "weekend":
|
||||
setWeekend(true);
|
||||
break;
|
||||
case "weekOfMonth":
|
||||
setWeekOfMonth(Integer.parseInt(item.getValue()));
|
||||
break;
|
||||
case "hour":
|
||||
setHour(Integer.parseInt(item.getValue()));
|
||||
break;
|
||||
case "minute":
|
||||
setMinute(Integer.parseInt(item.getValue()));
|
||||
break;
|
||||
case "second":
|
||||
setSecond(Integer.parseInt(item.getValue()));
|
||||
break;
|
||||
case "partOfDay":
|
||||
setPartOfDay(item.getValue());
|
||||
break;
|
||||
case "dateUnit":
|
||||
this.assignDateDuration(source);
|
||||
break;
|
||||
case "hourAmount":
|
||||
setHours(new BigDecimal(item.getValue()));
|
||||
break;
|
||||
case "minuteAmount":
|
||||
setMinutes(new BigDecimal(item.getValue()));
|
||||
break;
|
||||
case "secondAmount":
|
||||
setSeconds(new BigDecimal(item.getValue()));
|
||||
break;
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void assignDateDuration(Map<String, String> source) {
|
||||
switch (source.get("dateUnit")) {
|
||||
case "Y":
|
||||
this.years = new BigDecimal(source.get("amount"));
|
||||
break;
|
||||
case "M":
|
||||
this.months = new BigDecimal(source.get("amount"));
|
||||
break;
|
||||
case "W":
|
||||
this.weeks = new BigDecimal(source.get("amount"));
|
||||
break;
|
||||
case "D":
|
||||
this.days = new BigDecimal(source.get("amount"));
|
||||
break;
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
public class TimexRange {
|
||||
private TimexProperty start;
|
||||
|
||||
private TimexProperty end;
|
||||
|
||||
private TimexProperty duration;
|
||||
|
||||
public TimexProperty getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public void setStart(TimexProperty withStart) {
|
||||
this.start = withStart;
|
||||
}
|
||||
|
||||
public TimexProperty getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
public void setEnd(TimexProperty withEnd) {
|
||||
this.end = withEnd;
|
||||
}
|
||||
|
||||
public TimexProperty getDuration() {
|
||||
return duration;
|
||||
}
|
||||
|
||||
public void setDuration(TimexProperty withDuration) {
|
||||
this.duration = withDuration;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,266 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import java.time.DayOfWeek;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class TimexRangeResolver {
|
||||
public static List<TimexProperty> evaluate(Set<String> candidates, List<String> constraints) {
|
||||
List<TimexProperty> timexConstraints = constraints.stream().map(x -> {
|
||||
return new TimexProperty(x);
|
||||
}).collect(Collectors.toList());
|
||||
Set<String> candidatesWithDurationsResolved = TimexRangeResolver.resolveDurations(candidates, timexConstraints);
|
||||
Set<String> candidatesAccordingToDate = TimexRangeResolver
|
||||
.resolveByDateRangeConstraints(candidatesWithDurationsResolved, timexConstraints);
|
||||
Set<String> candidatesWithAddedTime = TimexRangeResolver.resolveByTimeConstraints(candidatesAccordingToDate,
|
||||
timexConstraints);
|
||||
Set<String> candidatesFilteredByTime = TimexRangeResolver.resolveByTimeRangeConstraints(candidatesWithAddedTime,
|
||||
timexConstraints);
|
||||
|
||||
List<TimexProperty> timexResults = candidatesFilteredByTime.stream().map(x -> {
|
||||
return new TimexProperty(x);
|
||||
}).collect(Collectors.toList());
|
||||
|
||||
return timexResults;
|
||||
}
|
||||
|
||||
public static Set<String> resolveDurations(Set<String> candidates, List<TimexProperty> constraints) {
|
||||
Set<String> results = new HashSet<String>();
|
||||
for (String candidate : candidates) {
|
||||
TimexProperty timex = new TimexProperty(candidate);
|
||||
if (timex.getTypes().contains(Constants.TimexTypes.DURATION)) {
|
||||
List<TimexProperty> r = TimexRangeResolver.resolveDuration(timex, constraints);
|
||||
for (TimexProperty resolved : r) {
|
||||
results.add(resolved.getTimexValue());
|
||||
}
|
||||
} else {
|
||||
results.add(candidate);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private static List<TimexProperty> resolveDuration(TimexProperty candidate, List<TimexProperty> constraints) {
|
||||
List<TimexProperty> results = new ArrayList<TimexProperty>();
|
||||
for (TimexProperty constraint : constraints) {
|
||||
if (constraint.getTypes().contains(Constants.TimexTypes.DATE_TIME)) {
|
||||
results.add(TimexHelpers.timexDateTimeAdd(constraint, candidate));
|
||||
} else if (constraint.getTypes().contains(Constants.TimexTypes.TIME)) {
|
||||
results.add(TimexHelpers.timexTimeAdd(constraint, candidate));
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private static Set<String> resolveByDateRangeConstraints(Set<String> candidates,
|
||||
List<TimexProperty> timexConstraints) {
|
||||
List<DateRange> dateRangeconstraints = timexConstraints.stream().filter(timex -> {
|
||||
return timex.getTypes().contains(Constants.TimexTypes.DATE_RANGE);
|
||||
}).map(timex -> {
|
||||
return TimexHelpers.dateRangeFromTimex(timex);
|
||||
}).collect(Collectors.toList());
|
||||
|
||||
List<DateRange> collapseDateRanges = TimexConstraintsHelper.collapseDateRanges(dateRangeconstraints);
|
||||
|
||||
if (collapseDateRanges.isEmpty()) {
|
||||
return candidates;
|
||||
}
|
||||
|
||||
List<String> resolution = new ArrayList<String>();
|
||||
for (String timex : candidates) {
|
||||
List<String> r = TimexRangeResolver.resolveDate(new TimexProperty(timex), collapseDateRanges);
|
||||
resolution.addAll(r);
|
||||
}
|
||||
|
||||
return TimexRangeResolver.removeDuplicates(resolution);
|
||||
}
|
||||
|
||||
private static List<String> resolveDate(TimexProperty timex, List<DateRange> constraints) {
|
||||
List<String> result = new ArrayList<String>();
|
||||
for (DateRange constraint : constraints) {
|
||||
result.addAll(TimexRangeResolver.resolveDateAgainstConstraint(timex, constraint));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Set<String> resolveByTimeRangeConstraints(Set<String> candidates,
|
||||
List<TimexProperty> timexConstrainst) {
|
||||
List<TimeRange> timeRangeConstraints = timexConstrainst.stream().filter(timex -> {
|
||||
return timex.getTypes().contains(Constants.TimexTypes.TIME_RANGE);
|
||||
}).map(timex -> {
|
||||
return TimexHelpers.timeRangeFromTimex(timex);
|
||||
}).collect(Collectors.toList());
|
||||
|
||||
List<TimeRange> collapsedTimeRanges = TimexConstraintsHelper.collapseTimeRanges(timeRangeConstraints);
|
||||
|
||||
if (collapsedTimeRanges.isEmpty()) {
|
||||
return candidates;
|
||||
}
|
||||
|
||||
List<String> resolution = new ArrayList<String>();
|
||||
for (String timex : candidates) {
|
||||
TimexProperty t = new TimexProperty(timex);
|
||||
if (t.getTypes().contains(Constants.TimexTypes.TIME_RANGE)) {
|
||||
List<String> r = TimexRangeResolver.resolveTimeRange(t, collapsedTimeRanges);
|
||||
resolution.addAll(r);
|
||||
} else if (t.getTypes().contains(Constants.TimexTypes.TIME)) {
|
||||
List<String> r = TimexRangeResolver.resolveTime(t, collapsedTimeRanges);
|
||||
resolution.addAll(r);
|
||||
}
|
||||
}
|
||||
|
||||
return TimexRangeResolver.removeDuplicates(resolution);
|
||||
}
|
||||
|
||||
private static List<String> resolveTimeRange(TimexProperty timex, List<TimeRange> constraints) {
|
||||
TimeRange candidate = TimexHelpers.timeRangeFromTimex(timex);
|
||||
|
||||
List<String> result = new ArrayList<String>();
|
||||
for (TimeRange constraint : constraints) {
|
||||
if (TimexConstraintsHelper.isOverlapping(candidate, constraint)) {
|
||||
Integer start = Math.max(candidate.getStart().getTime(), constraint.getStart().getTime());
|
||||
Time time = new Time(start);
|
||||
|
||||
// TODO: consider a method on TimexProperty to do this clone/overwrite pattern
|
||||
TimexProperty resolved = timex.clone();
|
||||
resolved.setPartOfDay(null);
|
||||
resolved.setSeconds(null);
|
||||
resolved.setMinutes(null);
|
||||
resolved.setHours(null);
|
||||
resolved.setSecond(time.getSecond());
|
||||
resolved.setMinute(time.getMinute());
|
||||
resolved.setHour(time.getHour());
|
||||
|
||||
result.add(resolved.getTimexValue());
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static List<String> resolveTime(TimexProperty timex, List<TimeRange> constraints) {
|
||||
List<String> result = new ArrayList<String>();
|
||||
for (TimeRange constraint : constraints) {
|
||||
result.addAll(TimexRangeResolver.resolveTimeAgainstConstraint(timex, constraint));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static List<String> resolveTimeAgainstConstraint(TimexProperty timex, TimeRange constraint) {
|
||||
Time t = new Time(timex.getHour(), timex.getMinute(), timex.getSecond());
|
||||
if (t.getTime() >= constraint.getStart().getTime() && t.getTime() < constraint.getEnd().getTime()) {
|
||||
return new ArrayList<String>() {
|
||||
{
|
||||
add(timex.getTimexValue());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return new ArrayList<String>();
|
||||
}
|
||||
|
||||
private static Set<String> removeDuplicates(List<String> original) {
|
||||
return new HashSet<String>(original);
|
||||
}
|
||||
|
||||
private static List<String> resolveDefiniteAgainstConstraint(TimexProperty timex, DateRange constraint) {
|
||||
LocalDateTime timexDate = TimexHelpers.dateFromTimex(timex);
|
||||
if (timexDate.compareTo(constraint.getStart()) >= 0 && timexDate.compareTo(constraint.getEnd()) < 0) {
|
||||
return new ArrayList<String>() {
|
||||
{
|
||||
add(timex.getTimexValue());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return new ArrayList<String>();
|
||||
}
|
||||
|
||||
private static List<String> resolveDateAgainstConstraint(TimexProperty timex, DateRange constraint) {
|
||||
if (timex.getMonth() != null && timex.getDayOfMonth() != null) {
|
||||
List<String> result = new ArrayList<String>();
|
||||
for (int year = constraint.getStart().getYear(); year <= constraint.getEnd()
|
||||
.getYear(); year++) {
|
||||
TimexProperty t = timex.clone();
|
||||
t.setYear(year);
|
||||
result.addAll(TimexRangeResolver.resolveDefiniteAgainstConstraint(t, constraint));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (timex.getDayOfWeek() != null) {
|
||||
// convert between ISO day of week and .NET day of week
|
||||
DayOfWeek day = timex.getDayOfWeek() == 7 ? DayOfWeek.SUNDAY : DayOfWeek.of(timex.getDayOfWeek());
|
||||
List<LocalDateTime> dates = TimexDateHelpers.datesMatchingDay(day, constraint.getStart(), constraint.getEnd());
|
||||
List<String> result = new ArrayList<String>();
|
||||
|
||||
for (LocalDateTime d : dates) {
|
||||
TimexProperty t = timex.clone();
|
||||
t.setDayOfWeek(null);
|
||||
t.setYear(d.getYear());
|
||||
t.setMonth(d.getMonthValue());
|
||||
t.setDayOfMonth(d.getDayOfMonth());
|
||||
result.add(t.getTimexValue());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (timex.getHour() != null) {
|
||||
List<String> result = new ArrayList<String>();
|
||||
LocalDateTime day = constraint.getStart();
|
||||
while (day.compareTo(constraint.getEnd()) <= 0) {
|
||||
TimexProperty t = timex.clone();
|
||||
t.setYear(day.getYear());
|
||||
t.setMonth(day.getMonthValue());
|
||||
t.setDayOfMonth(day.getDayOfMonth());
|
||||
result.addAll(TimexRangeResolver.resolveDefiniteAgainstConstraint(t, constraint));
|
||||
day = day.plusDays(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
return new ArrayList<String>();
|
||||
}
|
||||
|
||||
private static Set<String> resolveByTimeConstraints(Set<String> candidates, List<TimexProperty> timexConstrainst) {
|
||||
List<Time> times = timexConstrainst.stream().filter(timex -> {
|
||||
return timex.getTypes().contains(Constants.TimexTypes.TIME);
|
||||
}).map(timex -> {
|
||||
return TimexHelpers.timeFromTimex(timex);
|
||||
}).collect(Collectors.toList());
|
||||
|
||||
if (times.isEmpty()) {
|
||||
return candidates;
|
||||
}
|
||||
|
||||
List<String> resolution = new ArrayList<String>();
|
||||
for (TimexProperty timex : candidates.stream().map(t -> new TimexProperty(t)).collect(Collectors.toList())) {
|
||||
if (timex.getTypes().contains(Constants.TimexTypes.DATE) && !timex.getTypes().contains(Constants.TimexTypes.TIME)) {
|
||||
for (Time time : times) {
|
||||
timex.setHour(time.getHour());
|
||||
timex.setMinute(time.getMinute());
|
||||
timex.setSecond(time.getSecond());
|
||||
resolution.add(timex.getTimexValue());
|
||||
}
|
||||
} else {
|
||||
resolution.add(timex.getTimexValue());
|
||||
}
|
||||
}
|
||||
|
||||
return TimexRangeResolver.removeDuplicates(resolution);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import com.microsoft.recognizers.text.utilities.RegExpUtility;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class TimexRegex {
|
||||
private static final String DATE_TIME_COLLECTION_NAME = "datetime";
|
||||
private static final String DATE_COLLECTION_NAME = "date";
|
||||
private static final String TIME_COLLECTION_NAME = "time";
|
||||
private static final String PERIOD_COLLECTION_NAME = "period";
|
||||
|
||||
private static Pattern[] DATE_COLLECTION_NAME_PATTERNS = {
|
||||
// date
|
||||
Pattern.compile("^(XXXX|(?<year>\\d\\d\\d\\d))-(?<month>\\d\\d)(-(?<dayOfMonth>\\d\\d))?"),
|
||||
Pattern.compile("^XXXX-WXX-(?<dayOfWeek>\\d)"),
|
||||
Pattern.compile("^XXXX-XX-(?<dayOfMonth>\\d\\d)"),
|
||||
|
||||
// daterange
|
||||
Pattern.compile("^(?<year>\\d\\d\\d\\d)"),
|
||||
Pattern.compile("^(XXXX|(?<year>\\d\\d\\d\\d))-(?<month>\\d\\d)-W(?<weekOfMonth>\\d\\d)"),
|
||||
Pattern.compile("^(XXXX|(?<year>\\d\\d\\d\\d))-(?<month>\\d\\d)-WXX-(?<weekOfMonth>\\d{1,2})(-(?<dayOfWeek>\\d))?"),
|
||||
Pattern.compile("^(?<season>SP|SU|FA|WI)"),
|
||||
Pattern.compile("^(XXXX|(?<year>\\d\\d\\d\\d))-(?<season>SP|SU|FA|WI)"),
|
||||
Pattern.compile("^(XXXX|(?<year>\\d\\d\\d\\d))-W(?<weekOfYear>\\d\\d)(-(?<dayOfWeek>\\d)|-(?<weekend>WE))?"), };
|
||||
|
||||
private static Pattern[] TIME_COLLECTION_NAME_PATTERNS = {
|
||||
// time
|
||||
Pattern.compile("T(?<hour>\\d\\d)Z?$"), Pattern.compile("T(?<hour>\\d\\d):(?<minute>\\d\\d)Z?$"),
|
||||
Pattern.compile("T(?<hour>\\d\\d):(?<minute>\\d\\d):(?<second>\\d\\d)Z?$"),
|
||||
|
||||
// timerange
|
||||
Pattern.compile("^T(?<partOfDay>DT|NI|MO|AF|EV)$") };
|
||||
|
||||
private static Pattern[] PERIOD_COLLECTION_NAME_PATTERNS = {
|
||||
Pattern.compile("^P(?<amount>\\d*\\.?\\d+)(?<dateUnit>Y|M|W|D)$"),
|
||||
Pattern.compile("^PT(?<hourAmount>\\d*\\.?\\d+)H(\\d*\\.?\\d+(M|S)){0,2}$"),
|
||||
Pattern.compile("^PT(\\d*\\.?\\d+H)?(?<minuteAmount>\\d*\\.?\\d+)M(\\d*\\.?\\d+S)?$"),
|
||||
Pattern.compile("^PT(\\d*\\.?\\d+(H|M)){0,2}(?<secondAmount>\\d*\\.?\\d+)S$"), };
|
||||
|
||||
private static Map<String, Pattern[]> TIMEX_REGEX = new HashMap<String, Pattern[]>() {
|
||||
{
|
||||
put(DATE_COLLECTION_NAME, DATE_COLLECTION_NAME_PATTERNS);
|
||||
put(TIME_COLLECTION_NAME, TIME_COLLECTION_NAME_PATTERNS);
|
||||
put(PERIOD_COLLECTION_NAME, PERIOD_COLLECTION_NAME_PATTERNS);
|
||||
}
|
||||
};
|
||||
|
||||
public static Boolean extract(String name, String timex, Map<String, String> result) {
|
||||
String lowerName = name.toLowerCase();
|
||||
String[] nameGroup = new String[lowerName == DATE_TIME_COLLECTION_NAME ? 2 : 1];
|
||||
|
||||
if (lowerName == DATE_TIME_COLLECTION_NAME) {
|
||||
nameGroup[0] = DATE_COLLECTION_NAME;
|
||||
nameGroup[1] = TIME_COLLECTION_NAME;
|
||||
} else {
|
||||
nameGroup[0] = lowerName;
|
||||
}
|
||||
|
||||
Boolean anyTrue = false;
|
||||
for (String nameItem : nameGroup) {
|
||||
for (Pattern entry : TIMEX_REGEX.get(nameItem)) {
|
||||
if (TimexRegex.tryExtract(entry, timex, result)) {
|
||||
anyTrue = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return anyTrue;
|
||||
}
|
||||
|
||||
private static Boolean tryExtract(Pattern regex, String timex, Map<String, String> result) {
|
||||
Matcher regexResult = regex.matcher(timex);
|
||||
if (!regexResult.find()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Map<String, String> regexGroupNames = RegExpUtility.getNamedGroups(regexResult, true);
|
||||
|
||||
for (Entry<String, String> entry : regexGroupNames.entrySet()) {
|
||||
result.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import com.microsoft.recognizers.datatypes.timex.expression.english.TimexRelativeConvertEnglish;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
public class TimexRelativeConvert {
|
||||
public static String convertTimexToStringRelative(TimexProperty timex, LocalDateTime referenceDate) {
|
||||
return TimexRelativeConvertEnglish.convertTimexToStringRelative(timex, referenceDate);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,572 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import com.google.common.collect.Streams;
|
||||
|
||||
import java.time.DayOfWeek;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.LocalTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
public class TimexResolver {
|
||||
public static Resolution resolve(String[] timexArray, LocalDateTime date) {
|
||||
date = date != null ? date : LocalDateTime.now();
|
||||
Resolution resolution = new Resolution();
|
||||
for (String timex : timexArray) {
|
||||
TimexProperty t = new TimexProperty(timex);
|
||||
List<Resolution.Entry> r = TimexResolver.resolveTimex(t, date);
|
||||
resolution.getValues().addAll(r);
|
||||
}
|
||||
|
||||
return resolution;
|
||||
}
|
||||
|
||||
private static List<Resolution.Entry> resolveTimex(TimexProperty timex, LocalDateTime date) {
|
||||
HashSet<String> types = timex.getTypes().size() != 0 ? timex.getTypes() : TimexInference.infer(timex);
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE_TIME_RANGE)) {
|
||||
return TimexResolver.resolveDateTimeRange(timex, date);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DEFINITE) && types.contains(Constants.TimexTypes.TIME)) {
|
||||
return TimexResolver.resolveDefiniteTime(timex, date);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DEFINITE) && types.contains(Constants.TimexTypes.DATE_RANGE)) {
|
||||
return TimexResolver.resolveDefiniteDateRange(timex, date);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE_RANGE)) {
|
||||
return TimexResolver.resolveDateRange(timex, date);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DEFINITE)) {
|
||||
return TimexResolver.resolveDefinite(timex);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.TIME_RANGE)) {
|
||||
return TimexResolver.resolveTimeRange(timex, date);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE_TIME)) {
|
||||
return TimexResolver.resolveDateTime(timex, date);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DURATION)) {
|
||||
return TimexResolver.resolveDuration(timex);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE)) {
|
||||
return TimexResolver.resolveDate(timex, date);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.TIME)) {
|
||||
return TimexResolver.resolveTime(timex, date);
|
||||
}
|
||||
|
||||
return new ArrayList<Resolution.Entry>();
|
||||
}
|
||||
|
||||
private static List<Resolution.Entry> resolveDefiniteTime(TimexProperty timex, LocalDateTime date) {
|
||||
return new ArrayList<Resolution.Entry>() {
|
||||
{
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("datetime");
|
||||
setValue(String.format("%1$s %2$s", TimexValue.dateValue(timex),
|
||||
TimexValue.timeValue(timex, date)));
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static List<Resolution.Entry> resolveDefinite(TimexProperty timex) {
|
||||
return new ArrayList<Resolution.Entry>() {
|
||||
{
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("date");
|
||||
setValue(TimexValue.dateValue(timex));
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static List<Resolution.Entry> resolveDefiniteDateRange(TimexProperty timex, LocalDateTime date) {
|
||||
TimexRange range = TimexHelpers.expandDateTimeRange(timex);
|
||||
return new ArrayList<Resolution.Entry>() {
|
||||
{
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("daterange");
|
||||
setStart(TimexValue.dateValue(range.getStart()));
|
||||
setEnd(TimexValue.dateValue(range.getEnd()));
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static List<Resolution.Entry> resolveDate(TimexProperty timex, LocalDateTime date) {
|
||||
List<String> dateValueList = TimexResolver.getDateValues(timex, date);
|
||||
List<Resolution.Entry> result = new ArrayList<Resolution.Entry>();
|
||||
for (String dateValue : dateValueList) {
|
||||
result.add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("date");
|
||||
setValue(dateValue);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static String lastDateValue(TimexProperty timex, LocalDateTime date) {
|
||||
if (timex.getDayOfMonth() != null) {
|
||||
Integer year = date.getYear();
|
||||
Integer month = date.getMonth().getValue();
|
||||
if (timex.getMonth() != null) {
|
||||
month = timex.getMonth();
|
||||
if (date.getMonthValue() <= month || (date.getMonth().getValue() == month && TimexDateHelpers.getUSDayOfWeek(date.getDayOfWeek()) <= timex.getDayOfMonth())) {
|
||||
year--;
|
||||
}
|
||||
} else {
|
||||
if (date.getDayOfMonth() <= timex.getDayOfMonth()) {
|
||||
month--;
|
||||
if (month < 1) {
|
||||
month = (month + 12) % 12;
|
||||
year--;
|
||||
}
|
||||
}
|
||||
}
|
||||
Integer finalYear = year;
|
||||
Integer finalMonth = month;
|
||||
return TimexValue.dateValue(new TimexProperty() {
|
||||
{
|
||||
setYear(finalYear);
|
||||
setMonth(finalMonth);
|
||||
setDayOfMonth(timex.getDayOfMonth());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (timex.getDayOfWeek() != null) {
|
||||
LocalDateTime start = generateWeekDate(timex, date, true);
|
||||
return TimexValue.dateValue(new TimexProperty() {
|
||||
{
|
||||
setYear(start.getYear());
|
||||
setMonth(start.getMonthValue());
|
||||
setDayOfMonth(start.getDayOfMonth());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return new String();
|
||||
}
|
||||
|
||||
private static String nextDateValue(TimexProperty timex, LocalDateTime date) {
|
||||
if (timex.getDayOfMonth() != null) {
|
||||
Integer year = date.getYear();
|
||||
Integer month = date.getMonth().getValue();
|
||||
if (timex.getMonth() != null) {
|
||||
month = timex.getMonth();
|
||||
if (date.getMonthValue() > month ||
|
||||
(date.getMonthValue() == month && date.getDayOfMonth() > timex.getDayOfMonth())) {
|
||||
year++;
|
||||
}
|
||||
} else {
|
||||
if (date.getDayOfMonth() > timex.getDayOfMonth()) {
|
||||
month++;
|
||||
if (month > 12) {
|
||||
month = month % 12;
|
||||
year--;
|
||||
}
|
||||
}
|
||||
}
|
||||
Integer finalYear = year;
|
||||
Integer finalMonth = month;
|
||||
return TimexValue.dateValue(new TimexProperty() {
|
||||
{
|
||||
setYear(finalYear);
|
||||
setMonth(finalMonth);
|
||||
setDayOfMonth(timex.getDayOfMonth());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (timex.getDayOfWeek() != null) {
|
||||
LocalDateTime start = generateWeekDate(timex, date, false);
|
||||
return TimexValue.dateValue(new TimexProperty() {
|
||||
{
|
||||
setYear(start.getYear());
|
||||
setMonth(start.getMonthValue());
|
||||
setDayOfMonth(start.getDayOfMonth());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return new String();
|
||||
}
|
||||
|
||||
private static List<Resolution.Entry> resolveTime(TimexProperty timex, LocalDateTime date) {
|
||||
return new ArrayList<Resolution.Entry>() {
|
||||
{
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("time");
|
||||
setValue(TimexValue.timeValue(timex, date));
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static List<Resolution.Entry> resolveDuration(TimexProperty timex) {
|
||||
return new ArrayList<Resolution.Entry>() {
|
||||
{
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("duration");
|
||||
setValue(TimexValue.durationValue(timex));
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static Pair<String, String> yearDateRange(Integer year) {
|
||||
Pair<TimexProperty, TimexProperty> yearDateRange = TimexHelpers.yearDateRange(year);
|
||||
|
||||
return Pair.of(TimexValue.dateValue(yearDateRange.getLeft()), TimexValue.dateValue(yearDateRange.getRight()));
|
||||
}
|
||||
|
||||
private static Pair<String, String> monthDateRange(Integer year, Integer month) {
|
||||
Pair<TimexProperty, TimexProperty> monthDateRange = TimexHelpers.monthDateRange(year, month);
|
||||
|
||||
return Pair.of(TimexValue.dateValue(monthDateRange.getLeft()), TimexValue.dateValue(monthDateRange.getRight()));
|
||||
}
|
||||
|
||||
private static Pair<String, String> yearWeekDateRange(Integer year, Integer weekOfYear, Boolean isWeekend) {
|
||||
Pair<TimexProperty, TimexProperty> yearWeekDateRange = TimexHelpers.yearWeekDateRange(year, weekOfYear, isWeekend);
|
||||
|
||||
return Pair.of(TimexValue.dateValue(yearWeekDateRange.getLeft()), TimexValue.dateValue(yearWeekDateRange.getRight()));
|
||||
}
|
||||
|
||||
private static Pair<String, String> monthWeekDateRange(Integer year, Integer month, Integer weekOfMonth) {
|
||||
Pair<TimexProperty, TimexProperty> monthWeekDateRange = TimexHelpers.monthWeekDateRange(year, month, weekOfMonth);
|
||||
|
||||
return Pair.of(TimexValue.dateValue(monthWeekDateRange.getLeft()), TimexValue.dateValue(monthWeekDateRange.getRight()));
|
||||
}
|
||||
|
||||
private static LocalDateTime generateWeekDate(TimexProperty timex, LocalDateTime date, boolean isBefore) {
|
||||
LocalDateTime start;
|
||||
if (timex.getWeekOfMonth() == null && timex.getWeekOfYear() == null) {
|
||||
DayOfWeek day = timex.getDayOfWeek() == 7 ? DayOfWeek.SUNDAY : DayOfWeek.of(timex.getDayOfWeek());
|
||||
if (isBefore) {
|
||||
start = TimexDateHelpers.dateOfLastDay(day, date);
|
||||
} else {
|
||||
start = TimexDateHelpers.dateOfNextDay(day, date);
|
||||
}
|
||||
} else {
|
||||
Integer dayOfWeek = timex.getDayOfWeek() - 1;
|
||||
Integer year = timex.getYear() != null ? timex.getYear() : date.getYear();
|
||||
if (timex.getWeekOfYear() != null) {
|
||||
Integer weekOfYear = timex.getWeekOfYear();
|
||||
start = TimexHelpers.firstDateOfWeek(year, weekOfYear, Locale.getDefault()).plusDays(dayOfWeek);
|
||||
if (timex.getYear() == null) {
|
||||
if (isBefore && start.isAfter(date)) {
|
||||
start = TimexHelpers.firstDateOfWeek(year - 1, weekOfYear, Locale.getDefault()).plusDays(dayOfWeek);
|
||||
} else if (!isBefore && start.isBefore(date)) {
|
||||
start = TimexHelpers.firstDateOfWeek(year + 1, weekOfYear, Locale.getDefault()).plusDays(dayOfWeek);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Integer month = timex.getMonth() != null ? timex.getMonth() : date.getMonthValue();
|
||||
Integer weekOfMonth = timex.getWeekOfMonth();
|
||||
start = TimexHelpers.generateMonthWeekDateStart(year, month, weekOfMonth).plusDays(dayOfWeek);
|
||||
if (timex.getYear() == null || timex.getMonth() == null) {
|
||||
if (isBefore && start.isAfter(date)) {
|
||||
start = TimexHelpers.generateMonthWeekDateStart(timex.getMonth() != null ? year - 1 : year,
|
||||
timex.getMonth() == null ? month - 1 : month, weekOfMonth).plusDays(dayOfWeek);
|
||||
} else if (!isBefore && start.isBefore(date)) {
|
||||
start = TimexHelpers.generateMonthWeekDateStart(timex.getMonth() != null ? year + 1 : year,
|
||||
timex.getMonth() == null ? month + 1 : month, weekOfMonth).plusDays(dayOfWeek);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
private static List<Resolution.Entry> resolveDateRange(TimexProperty timex, LocalDateTime date) {
|
||||
if (timex.getSeason() != null) {
|
||||
return new ArrayList<Resolution.Entry>() {
|
||||
{
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("daterange");
|
||||
setValue("not resolved");
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
} else {
|
||||
if (timex.getMonth() != null && timex.getWeekOfMonth() != null) {
|
||||
List<Pair<String, String>> yearDateRangeList = getMonthWeekDateRange(
|
||||
timex.getYear() != null ? timex.getYear() : Constants.INVALID_VALUE,
|
||||
timex.getMonth(), timex.getWeekOfMonth(), date.getYear());
|
||||
List<Resolution.Entry> result = new ArrayList<Resolution.Entry>();
|
||||
for (Pair<String, String> yearDateRange : yearDateRangeList) {
|
||||
result.add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("daterange");
|
||||
setStart(yearDateRange.getLeft());
|
||||
setEnd(yearDateRange.getRight());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (timex.getYear() != null && timex.getMonth() != null) {
|
||||
Pair<String, String> dateRange = TimexResolver.monthDateRange(timex.getYear(), timex.getMonth());
|
||||
|
||||
return new ArrayList<Resolution.Entry>() {
|
||||
{
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("daterange");
|
||||
setStart(dateRange.getLeft());
|
||||
setEnd(dateRange.getRight());
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if (timex.getYear() != null && timex.getWeekOfYear() != null) {
|
||||
Pair<String, String> dateRange = TimexResolver.yearWeekDateRange(date.getYear(), timex.getWeekOfYear(),
|
||||
timex.getWeekend());
|
||||
|
||||
return new ArrayList<Resolution.Entry>() {
|
||||
{
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("daterange");
|
||||
setStart(dateRange.getLeft());
|
||||
setEnd(dateRange.getRight());
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if (timex.getMonth() != null) {
|
||||
Integer y = date.getYear();
|
||||
Pair<String, String> lastYearDateRange = TimexResolver.monthDateRange(y - 1, timex.getMonth());
|
||||
Pair<String, String> thisYearDateRange = TimexResolver.monthDateRange(y, timex.getMonth());
|
||||
|
||||
return new ArrayList<Resolution.Entry>() {
|
||||
{
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("daterange");
|
||||
setStart(lastYearDateRange.getLeft());
|
||||
setEnd(lastYearDateRange.getRight());
|
||||
}
|
||||
});
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("daterange");
|
||||
setStart(thisYearDateRange.getLeft());
|
||||
setEnd(thisYearDateRange.getRight());
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if (timex.getYear() != null) {
|
||||
Pair<String, String> dateRange = TimexResolver.yearDateRange(timex.getYear());
|
||||
|
||||
return new ArrayList<Resolution.Entry>() {
|
||||
{
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("daterange");
|
||||
setStart(dateRange.getLeft());
|
||||
setEnd(dateRange.getRight());
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return new ArrayList<Resolution.Entry>();
|
||||
}
|
||||
}
|
||||
|
||||
private static Pair<String, String> partOfDayTimeRange(TimexProperty timex) {
|
||||
switch (timex.getPartOfDay()) {
|
||||
case "MO":
|
||||
return Pair.of("08:00:00", "12:00:00");
|
||||
case "AF":
|
||||
return Pair.of("12:00:00", "16:00:00");
|
||||
case "EV":
|
||||
return Pair.of("16:00:00", "20:00:00");
|
||||
case "NI":
|
||||
return Pair.of("20:00:00", "24:00:00");
|
||||
default:
|
||||
}
|
||||
|
||||
return Pair.of("not resolved", "not resolved");
|
||||
}
|
||||
|
||||
private static List<Resolution.Entry> resolveTimeRange(TimexProperty timex, LocalDateTime date) {
|
||||
if (timex.getPartOfDay() != null) {
|
||||
Pair<String, String> range = TimexResolver.partOfDayTimeRange(timex);
|
||||
return new ArrayList<Resolution.Entry>() {
|
||||
{
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("timerange");
|
||||
setStart(range.getLeft());
|
||||
setEnd(range.getRight());
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
} else {
|
||||
TimexRange range = TimexHelpers.expandTimeRange(timex);
|
||||
return new ArrayList<Resolution.Entry>() {
|
||||
{
|
||||
add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("timerange");
|
||||
setStart(TimexValue.timeValue(range.getStart(), date));
|
||||
setEnd(TimexValue.timeValue(range.getEnd(), date));
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static List<Resolution.Entry> resolveDateTime(TimexProperty timex, LocalDateTime date) {
|
||||
List<Resolution.Entry> resolvedDates = TimexResolver.resolveDate(timex, date);
|
||||
for (Resolution.Entry resolved : resolvedDates) {
|
||||
resolved.setType("datetime");
|
||||
resolved.setValue(String.format("%1$s %2$s", resolved.getValue(), TimexValue.timeValue(timex, date)));
|
||||
}
|
||||
|
||||
return resolvedDates;
|
||||
}
|
||||
|
||||
private static List<String> getDateValues(TimexProperty timex, LocalDateTime date) {
|
||||
ArrayList<String> result = new ArrayList<String>();
|
||||
if (timex.getYear() != null && timex.getMonth() != null && timex.getDayOfMonth() != null) {
|
||||
result.add(TimexValue.dateValue(timex));
|
||||
} else {
|
||||
result.add(lastDateValue(timex, date));
|
||||
if (timex.getYear() == null) {
|
||||
result.add(nextDateValue(timex, date));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static List<Pair<String, String>> getMonthWeekDateRange(Integer year, Integer month, Integer weekOfMonth,
|
||||
Integer referYear) {
|
||||
List<Pair<String, String>> result = new ArrayList<Pair<String, String>>();
|
||||
if (year == Constants.INVALID_VALUE) {
|
||||
result.add(monthWeekDateRange(referYear - 1, month, weekOfMonth));
|
||||
result.add(monthWeekDateRange(referYear, month, weekOfMonth));
|
||||
} else {
|
||||
result.add(monthWeekDateRange(year, month, weekOfMonth));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static List<Resolution.Entry> resolveDateTimeRange(TimexProperty timex, LocalDateTime date) {
|
||||
if (timex.getPartOfDay() != null) {
|
||||
List<String> dateValues = getDateValues(timex, date);
|
||||
Pair<String, String> timeRange = partOfDayTimeRange(timex);
|
||||
ArrayList<Resolution.Entry> result = new ArrayList<Resolution.Entry>();
|
||||
for (String dateValue : dateValues) {
|
||||
result.add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("datetimerange");
|
||||
setStart(TimexHelpers.formatResolvedDateValue(dateValue, timeRange.getLeft()));
|
||||
setEnd(TimexHelpers.formatResolvedDateValue(dateValue, timeRange.getRight()));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
} else {
|
||||
TimexRange range = TimexHelpers.expandDateTimeRange(timex);
|
||||
List<String> startDateValues = getDateValues(range.getStart(), date);
|
||||
List<String> endDateValues = getDateValues(range.getEnd(), date);
|
||||
List<Resolution.Entry> result = new ArrayList<Resolution.Entry>();
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
|
||||
LocalTime defaultTime = LocalDateTime.MIN.toLocalTime();
|
||||
List<DateRange> dateRanges = Streams
|
||||
.zip(startDateValues.stream(), endDateValues.stream(), (n, w) -> new DateRange() {
|
||||
{
|
||||
setStart(LocalDateTime.of(LocalDate.parse(n, formatter), defaultTime));
|
||||
setEnd(LocalDateTime.of(LocalDate.parse(w, formatter), defaultTime));
|
||||
}
|
||||
}).collect(Collectors.toList());
|
||||
for (DateRange dateRange : dateRanges) {
|
||||
{
|
||||
result.add(new Resolution.Entry() {
|
||||
{
|
||||
setTimex(timex.getTimexValue());
|
||||
setType("datetimerange");
|
||||
setStart(TimexHelpers.formatResolvedDateValue(dateRange.getStart().toLocalDate().toString(),
|
||||
TimexValue.timeValue(range.getStart(), date)));
|
||||
setEnd(TimexHelpers.formatResolvedDateValue(dateRange.getEnd().toLocalDate().toString(),
|
||||
TimexValue.timeValue(range.getEnd(), date)));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
public class TimexSet {
|
||||
private TimexProperty timex;
|
||||
|
||||
public TimexSet(String timex) {
|
||||
this.timex = new TimexProperty(timex);
|
||||
}
|
||||
|
||||
public TimexProperty getTimex() {
|
||||
return timex;
|
||||
}
|
||||
|
||||
public void setTimex(TimexProperty withTimex) {
|
||||
this.timex = withTimex;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
public enum TimexUnit {
|
||||
/**
|
||||
* Year
|
||||
*/
|
||||
Year,
|
||||
|
||||
/**
|
||||
* Month
|
||||
*/
|
||||
Month,
|
||||
|
||||
/**
|
||||
* Week
|
||||
*/
|
||||
Week,
|
||||
|
||||
/**
|
||||
* Day
|
||||
*/
|
||||
Day,
|
||||
|
||||
/**
|
||||
* Hour
|
||||
*/
|
||||
Hour,
|
||||
|
||||
/**
|
||||
* Minute
|
||||
*/
|
||||
Minute,
|
||||
|
||||
/**
|
||||
* Second
|
||||
*/
|
||||
Second,
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
public class TimexValue {
|
||||
public static String dateValue(TimexProperty timexProperty) {
|
||||
if (timexProperty.getYear() != null && timexProperty.getMonth() != null && timexProperty.getDayOfMonth() != null) {
|
||||
return String.format("%1$s-%2$s-%3$s", TimexDateHelpers.fixedFormatNumber(timexProperty.getYear(), 4),
|
||||
TimexDateHelpers.fixedFormatNumber(timexProperty.getMonth(), 2),
|
||||
TimexDateHelpers.fixedFormatNumber(timexProperty.getDayOfMonth(), 2));
|
||||
}
|
||||
|
||||
return new String();
|
||||
}
|
||||
|
||||
public static String timeValue(TimexProperty timexProperty, LocalDateTime date) {
|
||||
if (timexProperty.getHour() != null && timexProperty.getMinute() != null && timexProperty.getSecond() != null) {
|
||||
return String.format("%1$s:%2$s:%3$s", TimexDateHelpers.fixedFormatNumber(timexProperty.getHour(), 2),
|
||||
TimexDateHelpers.fixedFormatNumber(timexProperty.getMinute(), 2),
|
||||
TimexDateHelpers.fixedFormatNumber(timexProperty.getSecond(), 2));
|
||||
}
|
||||
|
||||
return new String();
|
||||
}
|
||||
|
||||
public static String datetimeValue(TimexProperty timexProperty, LocalDateTime date) {
|
||||
return String.format("%1$s %2$s", TimexValue.dateValue(timexProperty),
|
||||
TimexValue.timeValue(timexProperty, date));
|
||||
}
|
||||
|
||||
public static String durationValue(TimexProperty timexProperty) {
|
||||
BigDecimal duration = new BigDecimal(0);
|
||||
if (timexProperty.getYears() != null) {
|
||||
double value = 31536000 * ((timexProperty.getYears() != null) ? timexProperty.getYears().doubleValue() : 0);
|
||||
duration = duration.add(BigDecimal.valueOf(value));
|
||||
}
|
||||
|
||||
if (timexProperty.getMonths() != null) {
|
||||
double value = 2592000
|
||||
* ((timexProperty.getMonths() != null) ? timexProperty.getMonths().doubleValue() : 0);
|
||||
duration = duration.add(BigDecimal.valueOf(value));
|
||||
}
|
||||
|
||||
if (timexProperty.getWeeks() != null) {
|
||||
double value = 604800 * ((timexProperty.getWeeks() != null) ? timexProperty.getWeeks().doubleValue() : 0);
|
||||
duration = duration.add(BigDecimal.valueOf(value));
|
||||
}
|
||||
|
||||
if (timexProperty.getDays() != null) {
|
||||
double value = 86400 * ((timexProperty.getDays() != null) ? timexProperty.getDays().doubleValue() : 0);
|
||||
duration = duration.add(BigDecimal.valueOf(value));
|
||||
}
|
||||
|
||||
if (timexProperty.getHours() != null) {
|
||||
double value = 3600 * ((timexProperty.getHours() != null) ? timexProperty.getHours().doubleValue() : 0);
|
||||
duration = duration.add(BigDecimal.valueOf(value));
|
||||
}
|
||||
|
||||
if (timexProperty.getMinutes() != null) {
|
||||
double value = 60 * ((timexProperty.getMinutes() != null) ? timexProperty.getMinutes().doubleValue() : 0);
|
||||
duration = duration.add(BigDecimal.valueOf(value));
|
||||
}
|
||||
|
||||
if (timexProperty.getSeconds() != null) {
|
||||
duration = duration.add(BigDecimal.valueOf((timexProperty.getSeconds() != null) ? timexProperty.getSeconds().doubleValue() : 0));
|
||||
}
|
||||
|
||||
duration = BigDecimal.valueOf(duration.intValue());
|
||||
return duration.toPlainString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression.english;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class TimexConstantsEnglish {
|
||||
public static final String EVERY = "every";
|
||||
public static final String NOW = "now";
|
||||
public static final String MIDNIGHT = "midnight";
|
||||
public static final String MIDDAY = "midday";
|
||||
public static final String TODAY = "today";
|
||||
public static final String TOMORROW = "tomorrow";
|
||||
public static final String YESTERDAY = "yesterday";
|
||||
public static final String WEEKEND = "weekend";
|
||||
public static final String TONIGHT = "tonight";
|
||||
public static final String THIS = "this";
|
||||
public static final String LAST = "last";
|
||||
public static final String NEXT = "next";
|
||||
|
||||
public static final String[] DAYS = { "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday",
|
||||
"Sunday" };
|
||||
|
||||
public static final String[] MONTHS = { "January", "February", "March", "April", "May", "June", "July", "August",
|
||||
"September", "October", "November", "December", };
|
||||
|
||||
public static final String[] DATE_ABBREVIATION = { "th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th", };
|
||||
|
||||
public static final String[] HOURS = { "midnight", "1AM", "2AM", "3AM", "4AM", "5AM", "6AM", "7AM", "8AM", "9AM",
|
||||
"10AM", "11AM", "midday", "1PM", "2PM", "3PM", "4PM", "5PM", "6PM", "7PM", "8PM", "9PM", "10PM", "11PM", };
|
||||
|
||||
public static final Map<String, String> SEASONS = new HashMap<String, String>() {
|
||||
{
|
||||
put("SP", "spring");
|
||||
put("SU", "summer");
|
||||
put("FA", "fall");
|
||||
put("WI", "winter");
|
||||
}
|
||||
};
|
||||
|
||||
public static final String[] WEEKS = { "first", "second", "third", "forth", };
|
||||
|
||||
public static final Map<String, String> DAY_PARTS = new HashMap<String, String>() {
|
||||
{
|
||||
put("DT", "daytime");
|
||||
put("NI", "night");
|
||||
put("MO", "morning");
|
||||
put("AF", "afternoon");
|
||||
put("EV", "evening");
|
||||
}
|
||||
};
|
||||
}
|
|
@ -0,0 +1,206 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression.english;
|
||||
|
||||
import com.microsoft.recognizers.datatypes.timex.expression.Constants;
|
||||
import com.microsoft.recognizers.datatypes.timex.expression.TimexInference;
|
||||
import com.microsoft.recognizers.datatypes.timex.expression.TimexProperty;
|
||||
import com.microsoft.recognizers.datatypes.timex.expression.TimexSet;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.HashSet;
|
||||
|
||||
public class TimexConvertEnglish {
|
||||
public static String convertTimexToString(TimexProperty timex) {
|
||||
HashSet<String> types = timex.getTypes().size() != 0 ? timex.getTypes() : TimexInference.infer(timex);
|
||||
|
||||
if (types.contains(Constants.TimexTypes.PRESENT)) {
|
||||
return TimexConstantsEnglish.NOW;
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE_TIME_RANGE)) {
|
||||
return TimexConvertEnglish.convertDateTimeRange(timex);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE_RANGE)) {
|
||||
return TimexConvertEnglish.convertDateRange(timex);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DURATION)) {
|
||||
return TimexConvertEnglish.convertDuration(timex);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.TIME_RANGE)) {
|
||||
return TimexConvertEnglish.convertTimeRange(timex);
|
||||
}
|
||||
|
||||
// TODO: where appropriate delegate most the formatting delegate to
|
||||
// Date.toLocaleString(options)
|
||||
if (types.contains(Constants.TimexTypes.DATE_TIME)) {
|
||||
return TimexConvertEnglish.convertDateTime(timex);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE)) {
|
||||
return TimexConvertEnglish.convertDate(timex);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.TIME)) {
|
||||
return TimexConvertEnglish.convertTime(timex);
|
||||
}
|
||||
|
||||
return new String();
|
||||
}
|
||||
|
||||
public static String convertTimexSetToString(TimexSet timexSet) {
|
||||
TimexProperty timex = timexSet.getTimex();
|
||||
if (timex.getTypes().contains(Constants.TimexTypes.DURATION)) {
|
||||
return String.format("%1$s %2$s", TimexConstantsEnglish.EVERY,
|
||||
TimexConvertEnglish.convertTimexDurationToString(timex, false));
|
||||
} else {
|
||||
return String.format("%1$s %2$s", TimexConstantsEnglish.EVERY,
|
||||
TimexConvertEnglish.convertTimexToString(timex));
|
||||
}
|
||||
}
|
||||
|
||||
public static String convertTime(TimexProperty timex) {
|
||||
if (timex.getHour() == 0 && timex.getMinute() == 0 && timex.getSecond() == 0) {
|
||||
return TimexConstantsEnglish.MIDNIGHT;
|
||||
}
|
||||
|
||||
if (timex.getHour() == 12 && timex.getMinute() == 0 && timex.getSecond() == 0) {
|
||||
return TimexConstantsEnglish.MIDDAY;
|
||||
}
|
||||
|
||||
String hour = (timex.getHour() == 0) ? "12"
|
||||
: (timex.getHour() > 12) ? String.valueOf(timex.getHour() - 12) : String.valueOf(timex.getHour());
|
||||
String minute = (timex.getMinute() == 0 && timex.getSecond() == 0) ? new String()
|
||||
: Constants.TIME_TIMEX_CONNECTOR
|
||||
+ String.format("%1$2s", String.valueOf(timex.getMinute())).replace(' ', '0');
|
||||
String second = (timex.getSecond() == 0) ? new String()
|
||||
: Constants.TIME_TIMEX_CONNECTOR
|
||||
+ String.format("%1$2s", String.valueOf(timex.getSecond())).replace(' ', '0');
|
||||
String period = timex.getHour() < 12 ? Constants.AM : Constants.PM;
|
||||
|
||||
return String.format("%1$s%2$s%3$s%4$s", hour, minute, second, period);
|
||||
}
|
||||
|
||||
public static String convertDate(TimexProperty timex) {
|
||||
if (timex.getDayOfWeek() != null) {
|
||||
return TimexConstantsEnglish.DAYS[timex.getDayOfWeek() - 1];
|
||||
}
|
||||
|
||||
String date = String.valueOf(timex.getDayOfMonth());
|
||||
|
||||
String abbreviation = TimexConstantsEnglish.DATE_ABBREVIATION[Integer
|
||||
.parseInt(String.valueOf(date.charAt(date.length() - 1)))];
|
||||
|
||||
if (timex.getMonth() != null) {
|
||||
String month = TimexConstantsEnglish.MONTHS[timex.getMonth() - 1];
|
||||
if (timex.getYear() != null) {
|
||||
return String.format("%1$s%2$s %3$s %4$s", date, abbreviation, month, timex.getYear()).trim();
|
||||
}
|
||||
return String.format("%1$s%2$s %3$s", date, abbreviation, month);
|
||||
}
|
||||
return date.concat(abbreviation);
|
||||
}
|
||||
|
||||
private static String convertDurationPropertyToString(BigDecimal value, String property,
|
||||
Boolean includeSingleCount) {
|
||||
if (value.intValue() == 1) {
|
||||
return includeSingleCount ? "1 " + property : property;
|
||||
} else {
|
||||
return String.format("%1$s %2$s%3$s", value, property, Constants.TIME_DURATION_UNIT);
|
||||
}
|
||||
}
|
||||
|
||||
private static String convertTimexDurationToString(TimexProperty timex, Boolean includeSingleCount) {
|
||||
String result = new String();
|
||||
if (timex.getYears() != null) {
|
||||
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getYears(), Constants.YEAR_UNIT,
|
||||
includeSingleCount);
|
||||
}
|
||||
|
||||
if (timex.getMonths() != null) {
|
||||
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getMonths(), Constants.MONTH_UNIT,
|
||||
includeSingleCount);
|
||||
}
|
||||
|
||||
if (timex.getWeeks() != null) {
|
||||
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getWeeks(), Constants.WEEK_UNIT,
|
||||
includeSingleCount);
|
||||
}
|
||||
|
||||
if (timex.getDays() != null) {
|
||||
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getDays(), Constants.DAY_UNIT,
|
||||
includeSingleCount);
|
||||
}
|
||||
|
||||
if (timex.getHours() != null) {
|
||||
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getHours(), Constants.HOUR_UNIT,
|
||||
includeSingleCount);
|
||||
}
|
||||
|
||||
if (timex.getMinutes() != null) {
|
||||
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getMinutes(), Constants.MINUTE_UNIT,
|
||||
includeSingleCount);
|
||||
}
|
||||
|
||||
if (timex.getSeconds() != null) {
|
||||
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getSeconds(), Constants.SECOND_UNIT,
|
||||
includeSingleCount);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static String convertDuration(TimexProperty timex) {
|
||||
return TimexConvertEnglish.convertTimexDurationToString(timex, true);
|
||||
}
|
||||
|
||||
private static String convertDateRange(TimexProperty timex) {
|
||||
String season = (timex.getSeason() != null) ? TimexConstantsEnglish.SEASONS.get(timex.getSeason())
|
||||
: new String();
|
||||
|
||||
String year = (timex.getYear() != null) ? timex.getYear().toString() : new String();
|
||||
|
||||
if (timex.getWeekOfYear() != null) {
|
||||
if (timex.getWeekend() != null) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
if (timex.getMonth() != null) {
|
||||
String month = TimexConstantsEnglish.MONTHS[timex.getMonth() - 1];
|
||||
if (timex.getWeekOfMonth() != null) {
|
||||
return String.format("%1$s week of %2$s", TimexConstantsEnglish.WEEKS[timex.getWeekOfMonth() - 1],
|
||||
month);
|
||||
} else {
|
||||
return String.format("%1$s %2$s", month, year).trim();
|
||||
}
|
||||
}
|
||||
|
||||
return String.format("%1$s %2$s", season, year).trim();
|
||||
}
|
||||
|
||||
private static String convertTimeRange(TimexProperty timex) {
|
||||
return TimexConstantsEnglish.DAY_PARTS.get(timex.getPartOfDay());
|
||||
}
|
||||
|
||||
private static String convertDateTime(TimexProperty timex) {
|
||||
return String.format("%1$s %2$s", TimexConvertEnglish.convertTime(timex),
|
||||
TimexConvertEnglish.convertDate(timex));
|
||||
}
|
||||
|
||||
private static String convertDateTimeRange(TimexProperty timex) {
|
||||
if (timex.getTypes().contains(Constants.TimexTypes.TIME_RANGE)) {
|
||||
return String.format("%1$s %2$s", TimexConvertEnglish.convertDate(timex),
|
||||
TimexConvertEnglish.convertTimeRange(timex));
|
||||
}
|
||||
|
||||
// date + time + duration
|
||||
// - OR -
|
||||
// date + duration
|
||||
return new String();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,184 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.datatypes.timex.expression.english;
|
||||
|
||||
import com.microsoft.recognizers.datatypes.timex.expression.Constants;
|
||||
import com.microsoft.recognizers.datatypes.timex.expression.TimexConvert;
|
||||
import com.microsoft.recognizers.datatypes.timex.expression.TimexDateHelpers;
|
||||
import com.microsoft.recognizers.datatypes.timex.expression.TimexInference;
|
||||
import com.microsoft.recognizers.datatypes.timex.expression.TimexProperty;
|
||||
|
||||
import java.time.DayOfWeek;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.HashSet;
|
||||
|
||||
public class TimexRelativeConvertEnglish {
|
||||
public static String convertTimexToStringRelative(TimexProperty timex, LocalDateTime date) {
|
||||
HashSet<String> types = timex.getTypes().size() != 0 ? timex.getTypes() : TimexInference.infer(timex);
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE_TIME_RANGE)) {
|
||||
return TimexRelativeConvertEnglish.convertDateTimeRange(timex, date);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE_RANGE)) {
|
||||
return TimexRelativeConvertEnglish.convertDateRange(timex, date);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE_TIME)) {
|
||||
return TimexRelativeConvertEnglish.convertDateTime(timex, date);
|
||||
}
|
||||
|
||||
if (types.contains(Constants.TimexTypes.DATE)) {
|
||||
return TimexRelativeConvertEnglish.convertDate(timex, date);
|
||||
}
|
||||
|
||||
return TimexConvert.convertTimexToString(timex);
|
||||
}
|
||||
|
||||
private static String getDateDay(DayOfWeek day) {
|
||||
Integer index = (day.getValue() == 0) ? 6 : day.getValue() - 1;
|
||||
return TimexConstantsEnglish.DAYS[index];
|
||||
}
|
||||
|
||||
private static String convertDate(TimexProperty timex, LocalDateTime date) {
|
||||
if (timex.getYear() != null && timex.getMonth() != null && timex.getDayOfMonth() != null) {
|
||||
LocalDateTime timexDate = LocalDateTime.of(timex.getYear(), timex.getMonth(), timex.getDayOfMonth(), 0, 0);
|
||||
if (TimexDateHelpers.datePartEquals(timexDate, date)) {
|
||||
return TimexConstantsEnglish.TODAY;
|
||||
}
|
||||
|
||||
LocalDateTime tomorrow = TimexDateHelpers.tomorrow(date);
|
||||
if (TimexDateHelpers.datePartEquals(timexDate, tomorrow)) {
|
||||
return TimexConstantsEnglish.TOMORROW;
|
||||
}
|
||||
|
||||
LocalDateTime yesterday = TimexDateHelpers.yesterday(date);
|
||||
if (TimexDateHelpers.datePartEquals(timexDate, yesterday)) {
|
||||
return TimexConstantsEnglish.YESTERDAY;
|
||||
}
|
||||
|
||||
if (TimexDateHelpers.isThisWeek(timexDate, date)) {
|
||||
return String.format("%1$s %2$s", TimexConstantsEnglish.THIS,
|
||||
TimexRelativeConvertEnglish.getDateDay(timexDate.getDayOfWeek()));
|
||||
}
|
||||
|
||||
if (TimexDateHelpers.isNextWeek(timexDate, date)) {
|
||||
return String.format("%1$s %2$s", TimexConstantsEnglish.NEXT,
|
||||
TimexRelativeConvertEnglish.getDateDay(timexDate.getDayOfWeek()));
|
||||
}
|
||||
|
||||
if (TimexDateHelpers.isLastWeek(timexDate, date)) {
|
||||
return String.format("%1$s %2$s", TimexConstantsEnglish.LAST,
|
||||
TimexRelativeConvertEnglish.getDateDay(timexDate.getDayOfWeek()));
|
||||
}
|
||||
}
|
||||
|
||||
return TimexConvertEnglish.convertDate(timex);
|
||||
}
|
||||
|
||||
private static String convertDateTime(TimexProperty timex, LocalDateTime date) {
|
||||
return String.format("%1$s %2$s", TimexRelativeConvertEnglish.convertDate(timex, date),
|
||||
TimexConvertEnglish.convertTime(timex));
|
||||
}
|
||||
|
||||
private static String convertDateRange(TimexProperty timex, LocalDateTime date) {
|
||||
if (timex.getYear() != null) {
|
||||
int year = date.getYear();
|
||||
if (timex.getYear() == year) {
|
||||
if (timex.getWeekOfYear() != null) {
|
||||
Integer thisWeek = TimexDateHelpers.weekOfYear(date);
|
||||
if (thisWeek == timex.getWeekOfYear()) {
|
||||
return timex.getWeekend() != null ? String.format("%1$s %2$s", TimexConstantsEnglish.THIS, TimexConstantsEnglish.WEEKEND)
|
||||
: String.format("%1$s %2$s", TimexConstantsEnglish.THIS, Constants.WEEK_UNIT);
|
||||
}
|
||||
|
||||
if (thisWeek == timex.getWeekOfYear() + 1) {
|
||||
return timex.getWeekend() != null ? String.format("%1$s %2$s", TimexConstantsEnglish.LAST, TimexConstantsEnglish.WEEKEND)
|
||||
: String.format("%1$s %2$s", TimexConstantsEnglish.LAST, Constants.WEEK_UNIT);
|
||||
}
|
||||
|
||||
if (thisWeek == timex.getWeekOfYear() - 1) {
|
||||
return timex.getWeekend() != null ? String.format("%1$s %2$s", TimexConstantsEnglish.NEXT, TimexConstantsEnglish.WEEKEND)
|
||||
: String.format("%1$s %2$s", TimexConstantsEnglish.NEXT, Constants.WEEK_UNIT);
|
||||
}
|
||||
}
|
||||
|
||||
if (timex.getMonth() != null) {
|
||||
if (timex.getMonth() == date.getMonthValue()) {
|
||||
return String.format("%1$s %2$s", TimexConstantsEnglish.THIS, Constants.MONTH_UNIT);
|
||||
}
|
||||
|
||||
if (timex.getMonth() == date.getMonthValue() + 1) {
|
||||
return String.format("%1$s %2$s", TimexConstantsEnglish.NEXT, Constants.MONTH_UNIT);
|
||||
}
|
||||
|
||||
if (timex.getMonth() == date.getMonthValue() - 1) {
|
||||
return String.format("%1$s %2$s", TimexConstantsEnglish.LAST, Constants.MONTH_UNIT);
|
||||
}
|
||||
}
|
||||
|
||||
return (timex.getSeason() != null) ? String.format("%1$s %2$s", TimexConstantsEnglish.THIS,
|
||||
TimexConstantsEnglish.SEASONS.get(timex.getSeason()))
|
||||
: String.format("%1$s %2$s", TimexConstantsEnglish.THIS, Constants.YEAR_UNIT);
|
||||
}
|
||||
|
||||
if (timex.getYear() == year + 1) {
|
||||
return (timex.getSeason() != null) ? String.format("%1$s %2$s", TimexConstantsEnglish.NEXT,
|
||||
TimexConstantsEnglish.SEASONS.get(timex.getSeason()))
|
||||
: String.format("%1$s %2$s", TimexConstantsEnglish.NEXT, Constants.YEAR_UNIT);
|
||||
}
|
||||
|
||||
if (timex.getYear() == year - 1) {
|
||||
return (timex.getSeason() != null) ? String.format("%1$s %2$s", TimexConstantsEnglish.LAST,
|
||||
TimexConstantsEnglish.SEASONS.get(timex.getSeason()))
|
||||
: String.format("%1$s %2$s", TimexConstantsEnglish.LAST, Constants.YEAR_UNIT);
|
||||
}
|
||||
}
|
||||
|
||||
return new String();
|
||||
}
|
||||
|
||||
private static String convertDateTimeRange(TimexProperty timex, LocalDateTime date) {
|
||||
if (timex.getYear() != null && timex.getMonth() != null && timex.getDayOfMonth() != null) {
|
||||
LocalDateTime timexDate = LocalDateTime.of(timex.getYear(), timex.getMonth(), timex.getDayOfMonth(), 0, 0);
|
||||
|
||||
if (timex.getPartOfDay() != null) {
|
||||
if (TimexDateHelpers.datePartEquals(timexDate, date)) {
|
||||
if (timex.getPartOfDay().equals(Constants.TIMEX_NIGHT)) {
|
||||
return TimexConstantsEnglish.TONIGHT;
|
||||
} else {
|
||||
return String.format("%1$s %2$s", TimexConstantsEnglish.THIS,
|
||||
TimexConstantsEnglish.DAY_PARTS.get(timex.getPartOfDay()));
|
||||
}
|
||||
}
|
||||
|
||||
LocalDateTime tomorrow = TimexDateHelpers.tomorrow(date);
|
||||
if (TimexDateHelpers.datePartEquals(timexDate, tomorrow)) {
|
||||
return String.format("%1$s %2$s", TimexConstantsEnglish.TOMORROW,
|
||||
TimexConstantsEnglish.DAY_PARTS.get(timex.getPartOfDay()));
|
||||
}
|
||||
|
||||
LocalDateTime yesterday = TimexDateHelpers.yesterday(date);
|
||||
if (TimexDateHelpers.datePartEquals(timexDate, yesterday)) {
|
||||
return String.format("%1$s %2$s", TimexConstantsEnglish.YESTERDAY,
|
||||
TimexConstantsEnglish.DAY_PARTS.get(timex.getPartOfDay()));
|
||||
}
|
||||
|
||||
if (TimexDateHelpers.isNextWeek(timexDate, date)) {
|
||||
return String.format("%1$s %2$s %3$s", TimexConstantsEnglish.NEXT,
|
||||
TimexRelativeConvertEnglish.getDateDay(timexDate.getDayOfWeek()),
|
||||
TimexConstantsEnglish.DAY_PARTS.get(timex.getPartOfDay()));
|
||||
}
|
||||
|
||||
if (TimexDateHelpers.isLastWeek(timexDate, date)) {
|
||||
return String.format("%1$s %2$s", TimexConstantsEnglish.LAST,
|
||||
TimexRelativeConvertEnglish.getDateDay(timexDate.getDayOfWeek()),
|
||||
TimexConstantsEnglish.DAY_PARTS.get(timex.getPartOfDay()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new String();
|
||||
}
|
||||
}
|
|
@ -28,7 +28,7 @@ public class NumberRangeExtractor extends BaseNumberRangeExtractor {
|
|||
// less than ... more than ...
|
||||
builder.put(RegExpUtility.getSafeRegExp(EnglishNumeric.TwoNumberRangeRegex3, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.TWONUM);
|
||||
// from ... to/~/- ...
|
||||
builder.put(Pattern.compile(EnglishNumeric.TwoNumberRangeRegex4, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.TWONUMTILL);
|
||||
builder.put(RegExpUtility.getSafeLookbehindRegExp(EnglishNumeric.TwoNumberRangeRegex4, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.TWONUMTILL);
|
||||
// more/greater/higher than ...
|
||||
builder.put(Pattern.compile(EnglishNumeric.OneNumberRangeMoreRegex1, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.MORE);
|
||||
// 30 and/or greater/higher
|
||||
|
@ -38,13 +38,13 @@ public class NumberRangeExtractor extends BaseNumberRangeExtractor {
|
|||
// 30 and/or less/smaller/lower
|
||||
builder.put(Pattern.compile(EnglishNumeric.OneNumberRangeLessRegex2, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.LESS);
|
||||
// equal to ...
|
||||
builder.put(Pattern.compile(EnglishNumeric.OneNumberRangeEqualRegex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.EQUAL);
|
||||
builder.put(RegExpUtility.getSafeLookbehindRegExp(EnglishNumeric.OneNumberRangeEqualRegex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.EQUAL);
|
||||
// equal to 30 or more than, larger than 30 or equal to ...
|
||||
builder.put(RegExpUtility.getSafeRegExp(EnglishNumeric.OneNumberRangeMoreSeparateRegex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS),
|
||||
NumberRangeConstants.MORE);
|
||||
NumberRangeConstants.MORE);
|
||||
// equal to 30 or less, smaller than 30 or equal ...
|
||||
builder.put(RegExpUtility.getSafeRegExp(EnglishNumeric.OneNumberRangeLessSeparateRegex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS),
|
||||
NumberRangeConstants.LESS);
|
||||
NumberRangeConstants.LESS);
|
||||
|
||||
this.regexes = Collections.unmodifiableMap(builder);
|
||||
}
|
||||
|
|
|
@ -51,10 +51,11 @@ public abstract class AbstractNumberModel implements IModel {
|
|||
SortedMap<String, Object> sortedMap = new TreeMap<String, Object>();
|
||||
sortedMap.put(ResolutionKey.Value, o.getResolutionStr());
|
||||
|
||||
// We decreased the end property by 1 in order to keep parity with other platforms (C#/JS).
|
||||
return new ModelResult(
|
||||
o.getText(),
|
||||
o.getStart(),
|
||||
o.getStart() + o.getLength(),
|
||||
o.getStart() + o.getLength() - 1,
|
||||
getModelTypeName(),
|
||||
sortedMap
|
||||
);
|
||||
|
|
|
@ -37,11 +37,15 @@ public final class NumberFormatUtility {
|
|||
|
||||
// EXPONENTIAL_AT: [-5, 15] });
|
||||
// For small positive decimal places. E.g.: 0,000015 or 0,0000015 -> 1.5E-05 or 1.5E-06
|
||||
if (doubleValue > 0 && doubleValue != Math.round(doubleValue) && doubleValue < 1E-4) {
|
||||
result = doubleValue.toString();
|
||||
} else {
|
||||
BigDecimal bc = new BigDecimal(doubleValue, new MathContext(15, RoundingMode.HALF_EVEN));
|
||||
result = bc.toString();
|
||||
try {
|
||||
if (doubleValue > 0 && doubleValue != Math.round(doubleValue) && doubleValue < 1E-4) {
|
||||
result = doubleValue.toString();
|
||||
} else {
|
||||
BigDecimal bc = new BigDecimal(doubleValue, new MathContext(15, RoundingMode.HALF_EVEN));
|
||||
result = bc.toString();
|
||||
}
|
||||
} catch (NumberFormatException ex) {
|
||||
return value.toString();
|
||||
}
|
||||
|
||||
result = result.replace('e', 'E');
|
||||
|
|
|
@ -25,7 +25,7 @@ public class EnglishNumeric {
|
|||
|
||||
public static final Boolean MultiDecimalSeparatorCulture = true;
|
||||
|
||||
public static final String RoundNumberIntegerRegex = "(?:hundred|thousand|million|billion|trillion|lakh|crore)";
|
||||
public static final String RoundNumberIntegerRegex = "(?:hundred|thousand|million|mln|billion|bln|trillion|tln|lakh|crore)s?";
|
||||
|
||||
public static final String ZeroToNineIntegerRegex = "(?:three|seven|eight|four|five|zero|nine|one|two|six)";
|
||||
|
||||
|
@ -319,7 +319,7 @@ public class EnglishNumeric {
|
|||
|
||||
public static final String HalfADozenRegex = "half\\s+a\\s+dozen";
|
||||
|
||||
public static final String DigitalNumberRegex = "((?<=\\b)(hundred|thousand|[mb]illion|trillion|lakh|crore|dozen(s)?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))"
|
||||
public static final String DigitalNumberRegex = "((?<=\\b)(hundred|thousand|[mb]illion|trillion|[mbt]ln|lakh|crore|dozen(s)?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))"
|
||||
.replace("{BaseNumbers.MultiplierLookupRegex}", BaseNumbers.MultiplierLookupRegex);
|
||||
|
||||
public static final ImmutableMap<String, Long> CardinalNumberMap = ImmutableMap.<String, Long>builder()
|
||||
|
@ -358,8 +358,11 @@ public class EnglishNumeric {
|
|||
.put("hundred", 100L)
|
||||
.put("thousand", 1000L)
|
||||
.put("million", 1000000L)
|
||||
.put("mln", 1000000L)
|
||||
.put("billion", 1000000000L)
|
||||
.put("bln", 1000000000L)
|
||||
.put("trillion", 1000000000000L)
|
||||
.put("tln", 1000000000000L)
|
||||
.put("lakh", 100000L)
|
||||
.put("crore", 10000000L)
|
||||
.build();
|
||||
|
@ -439,8 +442,11 @@ public class EnglishNumeric {
|
|||
.put("hundred", 100L)
|
||||
.put("thousand", 1000L)
|
||||
.put("million", 1000000L)
|
||||
.put("mln", 1000000L)
|
||||
.put("billion", 1000000000L)
|
||||
.put("bln", 1000000000L)
|
||||
.put("trillion", 1000000000000L)
|
||||
.put("tln", 1000000000000L)
|
||||
.put("lakh", 100000L)
|
||||
.put("crore", 10000000L)
|
||||
.put("hundredth", 100L)
|
||||
|
|
|
@ -25,11 +25,11 @@ public class GermanNumeric {
|
|||
|
||||
public static final Boolean MultiDecimalSeparatorCulture = false;
|
||||
|
||||
public static final String ZeroToNineIntegerRegex = "(drei|sieben|acht|vier|fuenf|fünf|null|neun|eins|(ein(?!($|\\.|,|!|\\?)))|eine|einer|einen|zwei|zwo|sechs)";
|
||||
public static final String ZeroToNineIntegerRegex = "(drei|sieben|acht|vier|fuenf|fünf|null|neun|eins|(ein(?!($|\\.|,|!|\\?)))|eine[rn]?|zwei|zwo|sechs)";
|
||||
|
||||
public static final String RoundNumberIntegerRegex = "(hundert|einhundert|tausend|(\\s*million\\s*)|(\\s*millionen\\s*)|(\\s*mio\\s*)|(\\s*milliarde\\s*)|(\\s*milliarden\\s*)|(\\s*mrd\\s*)|(\\s*billion\\s*)|(\\s*billionen\\s*))";
|
||||
public static final String RoundNumberIntegerRegex = "((ein)?hundert|tausend|(\\s*(million(en)?|mio|milliarden?|mrd|billion(en)?)\\s*))";
|
||||
|
||||
public static final String AnIntRegex = "(eine|ein)(?=\\s)";
|
||||
public static final String AnIntRegex = "(eine?)(?=\\s)";
|
||||
|
||||
public static final String TenToNineteenIntegerRegex = "(siebzehn|dreizehn|vierzehn|achtzehn|neunzehn|fuenfzehn|sechzehn|elf|zwoelf|zwölf|zehn)";
|
||||
|
||||
|
@ -60,7 +60,7 @@ public class GermanNumeric {
|
|||
public static final String PlaceHolderDefault = "\\D|\\b";
|
||||
|
||||
public static String NumbersWithPlaceHolder(String placeholder) {
|
||||
return "(((?<!\\d+\\s*)-\\s*)|(?<=\\b))\\d+(?!(\\,\\d+[a-zA-Z]))(?={placeholder})"
|
||||
return "(((?<!\\d+(\\s*(K|k|MM?|mil|G|T|B|b))?\\s*)-\\s*)|(?<=\\b))\\d+(?!(,\\d+[a-zA-Z]))(?={placeholder})"
|
||||
.replace("{placeholder}", placeholder);
|
||||
}
|
||||
|
||||
|
@ -80,6 +80,8 @@ public class GermanNumeric {
|
|||
|
||||
public static final String RoundNumberOrdinalRegex = "(hundertst(er|es|en|el|e)?|tausendst(er|es|en|el|e)?|millionst(er|es|en|el|e)?|milliardst(er|es|en|el|e)?|billionst(er|es|en|el|e)?)";
|
||||
|
||||
public static final String RelativeOrdinalRegex = "(?<relativeOrdinal>(ante)?penultimate|letzter|nächster|vorheriger|aktueller)";
|
||||
|
||||
public static final String BasicOrdinalRegex = "(zuerst|erst(er|es|en|e)|zweit(er|es|en|e)?|dritt(er|es|en|el|e)?|viert(er|es|en|el|e)?|fünft(er|es|en|el|e)?|fuenft(er|es|en|el|e)?|sechst(er|es|en|el|e)?|siebt(er|es|en|el|e)?|acht(er|es|en|el|e)?|neunt(er|es|en|el|e)?|zehnt(er|es|en|el|e)?|elft(er|es|en|el|e)?|zwölft(er|es|en|el|e)?|zwoelft(er|es|en|el|e)?|dreizehnt(er|es|en|el|e)?|vierzehnt(er|es|en|el|e)?|fünfzehnt(er|es|en|el|e)?|fuenfzehnt(er|es|en|el|e)?|sechzehnt(er|es|en|el|e)?|siebzehnt(er|es|en|el|e)?|achtzehnt(er|es|en|el|e)?|neunzehnt(er|es|en|el|e)?|zwanzigst(er|es|en|el|e)?|dreißigst(er|es|en|el|e)?|vierziegt(er|es|en|el|e)?|fünfzigst(er|es|en|el|e)?|fuenfzigst(er|es|en|el|e)?|sechzigst(er|es|en|el|e)?|siebzigst(er|es|en|el|e)?|achtzigst(er|es|en|el|e)?|neunzigst(er|es|en|el|e)?)";
|
||||
|
||||
public static final String SuffixBasicOrdinalRegex = "({BasicOrdinalRegex}|({ZeroToNineIntegerRegex}(und|\\s){BasicOrdinalRegex}))"
|
||||
|
@ -166,12 +168,107 @@ public class GermanNumeric {
|
|||
public static final String DoubleAllFloatRegex = "((?<=\\b){AllFloatRegex}(?=\\b))"
|
||||
.replace("{AllFloatRegex}", AllFloatRegex);
|
||||
|
||||
public static final String ConnectorRegex = "(?<spacer>und)";
|
||||
|
||||
public static final String NumberWithSuffixPercentage = "(?<!%)({BaseNumbers.NumberReplaceToken})(\\s*)(%(?!{BaseNumbers.NumberReplaceToken})|prozent(punkte)?\\b)"
|
||||
.replace("{BaseNumbers.NumberReplaceToken}", BaseNumbers.NumberReplaceToken);
|
||||
|
||||
public static final String NumberWithPrefixPercentage = "(Prozent)(\\s*)({BaseNumbers.NumberReplaceToken})"
|
||||
public static final String NumberWithPrefixPercentage = "(prozent)(\\s*)({BaseNumbers.NumberReplaceToken})"
|
||||
.replace("{BaseNumbers.NumberReplaceToken}", BaseNumbers.NumberReplaceToken);
|
||||
|
||||
public static final String TillRegex = "(bis(\\s+zu)?|--|-|—|——|~|–)";
|
||||
|
||||
public static final String MoreRegex = "(?:(größer|höher|mehr)(\\s+als)?|über|darüber(hinaus)?|(?<!<|=)>)";
|
||||
|
||||
public static final String LessRegex = "(?:(weniger|winziger|kleiner|wenig)(\\s+als)?|darunter|unter|(?<!>|=)<)";
|
||||
|
||||
public static final String EqualRegex = "(gleich(\\s+(als|zu))?|(?<!<|>)=)";
|
||||
|
||||
public static final String MoreOrEqualPrefix = "((nicht\\s+{LessRegex})|(als\\s+letzte(r)?))"
|
||||
.replace("{LessRegex}", LessRegex);
|
||||
|
||||
public static final String MoreOrEqual = "(?:({MoreRegex}\\s+(oder)?\\s+{EqualRegex})|({EqualRegex}\\s+(oder)?\\s+{MoreRegex})|{MoreOrEqualPrefix}(\\s+(oder)?\\s+{EqualRegex})?|({EqualRegex}\\s+(oder)?\\s+)?{MoreOrEqualPrefix}|>\\s*=|≥)"
|
||||
.replace("{MoreRegex}", MoreRegex)
|
||||
.replace("{EqualRegex}", EqualRegex)
|
||||
.replace("{LessRegex}", LessRegex)
|
||||
.replace("{MoreOrEqualPrefix}", MoreOrEqualPrefix);
|
||||
|
||||
public static final String MoreOrEqualSuffix = "((und|oder)\\s+(((mehr|größer|höher)((?!\\s+als)|(\\s+als(?!(\\s*\\d+)))))|((über|darüber)(?!\\s+als))))";
|
||||
|
||||
public static final String LessOrEqualPrefix = "((nicht\\s+{MoreRegex})|(at\\s+viele)|(bis\\s+zu))"
|
||||
.replace("{MoreRegex}", MoreRegex);
|
||||
|
||||
public static final String LessOrEqual = "(({LessRegex}\\s+(oder)?\\s+{EqualRegex})|({EqualRegex}\\s+(oder)?\\s+{LessRegex})|{LessOrEqualPrefix}(\\s+(oder)?\\s+{EqualRegex})?|({EqualRegex}\\s+(oder)?\\s+)?{LessOrEqualPrefix}|<\\s*=|≤)"
|
||||
.replace("{LessRegex}", LessRegex)
|
||||
.replace("{EqualRegex}", EqualRegex)
|
||||
.replace("{MoreRegex}", MoreRegex)
|
||||
.replace("{LessOrEqualPrefix}", LessOrEqualPrefix);
|
||||
|
||||
public static final String LessOrEqualSuffix = "((und|oder)\\s+(weniger|geringer|kleiner|winziger)((?!\\s+als)|(\\s+als(?!(\\s*\\d+)))))";
|
||||
|
||||
public static final String NumberSplitMark = "(?![,.](?!\\d+))";
|
||||
|
||||
public static final String MoreRegexNoNumberSucceed = "((größer|mehr|höhrer|breiter)((?!\\s+als)|\\s+(als(?!(\\s*\\d+))))|((dar)?über)(?!(\\s*\\d+)))";
|
||||
|
||||
public static final String LessRegexNoNumberSucceed = "((kleiner|weniger|winziger)((?!\\s+als)|\\s+(als(?!(\\s*\\d+))))|((dar)?unter)(?!(\\s*\\d+)))";
|
||||
|
||||
public static final String EqualRegexNoNumberSucceed = "(gleich(s|ing)?((?!\\s+(zu|als))|(\\s+(zu|als)(?!(\\s*\\d+)))))";
|
||||
|
||||
public static final String OneNumberRangeMoreRegex1 = "({MoreOrEqual}|{MoreRegex})\\s*(der\\s+)?(?<number1>({NumberSplitMark}.)+)"
|
||||
.replace("{MoreOrEqual}", MoreOrEqual)
|
||||
.replace("{MoreRegex}", MoreRegex)
|
||||
.replace("{NumberSplitMark}", NumberSplitMark);
|
||||
|
||||
public static final String OneNumberRangeMoreRegex2 = "(?<number1>({NumberSplitMark}.)+)\\s*{MoreOrEqualSuffix}"
|
||||
.replace("{MoreOrEqualSuffix}", MoreOrEqualSuffix)
|
||||
.replace("{NumberSplitMark}", NumberSplitMark);
|
||||
|
||||
public static final String OneNumberRangeMoreSeparateRegex = "({EqualRegex}\\s+(?<number1>({NumberSplitMark}.)+)(\\s+or\\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\\s+(?<number1>({NumberSplitMark}.)+)(\\s+oder\\s+){EqualRegexNoNumberSucceed})"
|
||||
.replace("{EqualRegex}", EqualRegex)
|
||||
.replace("{MoreRegex}", MoreRegex)
|
||||
.replace("{EqualRegexNoNumberSucceed}", EqualRegexNoNumberSucceed)
|
||||
.replace("{MoreRegexNoNumberSucceed}", MoreRegexNoNumberSucceed)
|
||||
.replace("{NumberSplitMark}", NumberSplitMark);
|
||||
|
||||
public static final String OneNumberRangeLessRegex1 = "({LessOrEqual}|{LessRegex})\\s*(the\\s+)?(?<number2>({NumberSplitMark}.)+)"
|
||||
.replace("{LessOrEqual}", LessOrEqual)
|
||||
.replace("{LessRegex}", LessRegex)
|
||||
.replace("{NumberSplitMark}", NumberSplitMark);
|
||||
|
||||
public static final String OneNumberRangeLessRegex2 = "(?<number2>({NumberSplitMark}.)+)\\s*{LessOrEqualSuffix}"
|
||||
.replace("{LessOrEqualSuffix}", LessOrEqualSuffix)
|
||||
.replace("{NumberSplitMark}", NumberSplitMark);
|
||||
|
||||
public static final String OneNumberRangeLessSeparateRegex = "({EqualRegex}\\s+(?<number1>({NumberSplitMark}.)+)(\\s+or\\s+){LessRegexNoNumberSucceed})|({LessRegex}\\s+(?<number1>({NumberSplitMark}.)+)(\\s+oder\\s+){EqualRegexNoNumberSucceed})"
|
||||
.replace("{EqualRegex}", EqualRegex)
|
||||
.replace("{LessRegex}", LessRegex)
|
||||
.replace("{EqualRegexNoNumberSucceed}", EqualRegexNoNumberSucceed)
|
||||
.replace("{LessRegexNoNumberSucceed}", LessRegexNoNumberSucceed)
|
||||
.replace("{NumberSplitMark}", NumberSplitMark);
|
||||
|
||||
public static final String OneNumberRangeEqualRegex = "{EqualRegex}\\s*(the\\s+)?(?<number1>({NumberSplitMark}.)+)"
|
||||
.replace("{EqualRegex}", EqualRegex)
|
||||
.replace("{NumberSplitMark}", NumberSplitMark);
|
||||
|
||||
public static final String TwoNumberRangeRegex1 = "zwischen\\s*(der\\s+)?(?<number1>({NumberSplitMark}.)+)\\s*und\\s*(der\\s+)?(?<number2>({NumberSplitMark}.)+)"
|
||||
.replace("{NumberSplitMark}", NumberSplitMark);
|
||||
|
||||
public static final String TwoNumberRangeRegex2 = "({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\\s*(und|aber|,)\\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"
|
||||
.replace("{OneNumberRangeMoreRegex1}", OneNumberRangeMoreRegex1)
|
||||
.replace("{OneNumberRangeMoreRegex2}", OneNumberRangeMoreRegex2)
|
||||
.replace("{OneNumberRangeLessRegex1}", OneNumberRangeLessRegex1)
|
||||
.replace("{OneNumberRangeLessRegex2}", OneNumberRangeLessRegex2);
|
||||
|
||||
public static final String TwoNumberRangeRegex3 = "({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\\s*(und|aber|,)\\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"
|
||||
.replace("{OneNumberRangeMoreRegex1}", OneNumberRangeMoreRegex1)
|
||||
.replace("{OneNumberRangeMoreRegex2}", OneNumberRangeMoreRegex2)
|
||||
.replace("{OneNumberRangeLessRegex1}", OneNumberRangeLessRegex1)
|
||||
.replace("{OneNumberRangeLessRegex2}", OneNumberRangeLessRegex2);
|
||||
|
||||
public static final String TwoNumberRangeRegex4 = "(von\\s+)?(?<number1>({NumberSplitMark}(?!\\bvon\\b).)+)\\s*{TillRegex}\\s*(der\\s+)?(?<number2>({NumberSplitMark}.)+)"
|
||||
.replace("{TillRegex}", TillRegex)
|
||||
.replace("{NumberSplitMark}", NumberSplitMark);
|
||||
|
||||
public static final String AmbiguousFractionConnectorsRegex = "^[.]";
|
||||
|
||||
public static final Character DecimalSeparatorChar = ',';
|
||||
|
|
|
@ -162,13 +162,13 @@ public class FrenchNumericWithUnit {
|
|||
.put("Ban", "bani|-ban|ban")
|
||||
.put("Roupie népalaise", "roupie népalaise|roupie nepalaise|npr")
|
||||
.put("Roupie pakistanaise", "roupie pakistanaise|pkr")
|
||||
.put("Roupie indienne", "roupie indienne|inr|roupie indien|inr|₹")
|
||||
.put("Roupie indienne", "roupie indienne|roupies indiennes|inr|roupie indien|inr|₹")
|
||||
.put("Roupie seychelloise", "roupie seychelloise|scr|sr|sre")
|
||||
.put("Roupie mauricienne", "roupie mauricienne|mur")
|
||||
.put("Rufiyaa maldives", "rufiyaa maldives|mvr|.ރ|rf")
|
||||
.put("Roupie srilankaise", "roupie srilankaise|lrk|රු|ரூ")
|
||||
.put("Rupiah Indonésie", "rupia indonésie|rupia indonesie|rupiah|rp|idr")
|
||||
.put("Roupie", "roupie")
|
||||
.put("Roupie", "roupie|roupies")
|
||||
.put("Couronne danoise", "couronne danoise|dkk|couronnes danoise|couronne danemark|couronnes danemark")
|
||||
.put("Couronne norvégienne", "couronne norvégienne|couronne norvegienne|couronnes norvégienne|couronnes norvegienne|nok")
|
||||
.put("Couronne féroïenne", "couronne féroïenne|couronne feroienne")
|
||||
|
|
|
@ -161,7 +161,7 @@ public class GermanNumericWithUnit {
|
|||
.put("United Arab Emirates dirham", "vae dirham|vae-dirham|dirham der vereinigten arabischen emirate|د.إ|aed")
|
||||
.put("Azerbaijani manat", "aserbaidschan-manat|azn")
|
||||
.put("Turkmenistan manat", "turkmenistan-manat|tmt")
|
||||
.put("Manat", "manat")
|
||||
.put("Manat", "manat|manats")
|
||||
.put("Qəpik", "qəpik")
|
||||
.put("Somali shilling", "somalia-schilling|sh.so.|sos")
|
||||
.put("Somaliland shilling", "somaliland-schilling")
|
||||
|
@ -179,7 +179,7 @@ public class GermanNumericWithUnit {
|
|||
.put("Maldivian rufiyaa", "maledivischer rufiyaa|maledivische rufiyaa|maledivischen rufiyaa|mvr|.ރ")
|
||||
.put("Sri Lankan rupee", "sri-lanka-rupie|sri-lanka-rupien|lkr|රු|ரூ")
|
||||
.put("Indonesian rupiah", "indonesischer rupiah|indonesische rupiah|indonesischen rupiah|rupiah|perak|rp|idr")
|
||||
.put("Rupee", "rupie|rs")
|
||||
.put("Rupee", "rupie|rupien|rs")
|
||||
.put("Danish krone", "dänische krone|dänischen krone|dänischer kronen|dänische kronen|dänischen kronen|daenische krone|daenischen krone|daenischer kronen|daenische kronen|daenischen kronen|dkk")
|
||||
.put("Norwegian krone", "norwegische krone|norwegischen krone|norwegischer kronen|norwegische kronen|norwegischen kronen|nok")
|
||||
.put("Faroese króna", "färöische króna|färöische krone|färöischen krone|färöischer kronen|färöische kronen|färöischen kronen")
|
||||
|
@ -232,7 +232,7 @@ public class GermanNumericWithUnit {
|
|||
.put("Mexican peso", "mexikanischer peso|mexikanische peso|mexikanischen peso|mxn")
|
||||
.put("Philippine peso", "piso|philippinischer peso|philippinische peso|philippinischen peso|₱|php")
|
||||
.put("Uruguayan peso", "uruguayischer peso|uruguayische peso|uruguayischen peso|uyu")
|
||||
.put("Peso", "peso")
|
||||
.put("Peso", "peso|pesos")
|
||||
.put("Centavo", "centavos|centavo")
|
||||
.put("Alderney pound", "alderney pfund|alderney £")
|
||||
.put("British pound", "britischer pfund|britische pfund|britischen pfund|british £|gbp|pfund sterling")
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence;
|
||||
|
||||
public class Constants {
|
||||
public static final String SYS_PHONE_NUMBER = "builtin.phonenumber";
|
||||
|
||||
public static final String SYS_IP = "builtin.ip";
|
||||
|
||||
public static final String SYS_MENTION = "builtin.mention";
|
||||
|
||||
public static final String SYS_HASHTAG = "builtin.hashtag";
|
||||
|
||||
public static final String SYS_EMAIL = "builtin.email";
|
||||
|
||||
public static final String SYS_URL = "builtin.url";
|
||||
|
||||
public static final String SYS_GUID = "builtin.guid";
|
||||
|
||||
// Model type name
|
||||
public static final String MODEL_PHONE_NUMBER = "phonenumber";
|
||||
|
||||
public static final String MODEL_IP = "ip";
|
||||
|
||||
public static final String MODEL_MENTION = "mention";
|
||||
|
||||
public static final String MODEL_HASHTAG = "hashtag";
|
||||
|
||||
public static final String MODEL_EMAIL = "email";
|
||||
|
||||
public static final String MODEL_URL = "url";
|
||||
|
||||
public static final String MODEL_GUID = "guid";
|
||||
|
||||
public static final String IP_REGEX_IPV4 = "ipv4";
|
||||
|
||||
public static final String IP_REGEX_IPV6 = "ipv6";
|
||||
|
||||
public static final String IPV6_ELLIPSIS = "::";
|
||||
|
||||
public static final String PHONE_NUMBER_REGEX_GENERAL = "GeneralPhoneNumber";
|
||||
|
||||
public static final String PHONE_NUMBER_REGEX_BR = "BRPhoneNumber";
|
||||
|
||||
public static final String PHONE_NUMBER_REGEX_UK = "UKPhoneNumber";
|
||||
|
||||
public static final String PHONE_NUMBER_REGEX_DE = "DEPhoneNumber";
|
||||
|
||||
public static final String PHONE_NUMBER_REGEX_US = "USPhoneNumber";
|
||||
|
||||
public static final String PHONE_NUMBER_REGEX_CN = "CNPhoneNumber";
|
||||
|
||||
public static final String PHONE_NUMBER_REGEX_DK = "DKPhoneNumber";
|
||||
|
||||
public static final String PHONE_NUMBER_REGEX_IT = "ITPhoneNumber";
|
||||
|
||||
public static final String PHONE_NUMBER_REGEX_NL = "NLPhoneNumber";
|
||||
|
||||
public static final String PHONE_NUMBER_REGEX_SPECIAL = "SpecialPhoneNumber";
|
||||
|
||||
public static final String MENTION_REGEX = "Mention";
|
||||
|
||||
public static final String HASHTAG_REGEX = "Hashtag";
|
||||
|
||||
public static final String EMAIL_REGEX = "Email";
|
||||
|
||||
public static final String URL_REGEX = "Url";
|
||||
|
||||
public static final String GUID_REGEX = "Guid";
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence;
|
||||
|
||||
public enum SequenceOptions {
|
||||
// None
|
||||
None,
|
||||
// Relaxed. Likely match, don't perform extra validation.
|
||||
Relaxed
|
||||
}
|
|
@ -0,0 +1,238 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence;
|
||||
|
||||
import com.microsoft.recognizers.text.Culture;
|
||||
import com.microsoft.recognizers.text.IModel;
|
||||
import com.microsoft.recognizers.text.ModelResult;
|
||||
import com.microsoft.recognizers.text.Recognizer;
|
||||
import com.microsoft.recognizers.text.sequence.config.BaseSequenceConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.english.extractors.EmailExtractor;
|
||||
import com.microsoft.recognizers.text.sequence.english.extractors.EnglishIpExtractorConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.english.extractors.EnglishPhoneNumberExtractorConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.english.extractors.EnglishURLExtractorConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.english.extractors.GUIDExtractor;
|
||||
import com.microsoft.recognizers.text.sequence.english.extractors.HashTagExtractor;
|
||||
import com.microsoft.recognizers.text.sequence.english.extractors.MentionExtractor;
|
||||
import com.microsoft.recognizers.text.sequence.english.parsers.EmailParser;
|
||||
import com.microsoft.recognizers.text.sequence.english.parsers.GUIDParser;
|
||||
import com.microsoft.recognizers.text.sequence.english.parsers.HashTagParser;
|
||||
import com.microsoft.recognizers.text.sequence.english.parsers.IpParser;
|
||||
import com.microsoft.recognizers.text.sequence.english.parsers.MentionParser;
|
||||
import com.microsoft.recognizers.text.sequence.english.parsers.PhoneNumberParser;
|
||||
import com.microsoft.recognizers.text.sequence.english.parsers.URLParser;
|
||||
import com.microsoft.recognizers.text.sequence.extractors.BaseIpExtractor;
|
||||
import com.microsoft.recognizers.text.sequence.extractors.BasePhoneNumberExtractor;
|
||||
import com.microsoft.recognizers.text.sequence.extractors.BasePhoneNumberExtractorConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.extractors.BaseURLExtractor;
|
||||
import com.microsoft.recognizers.text.sequence.models.EmailModel;
|
||||
import com.microsoft.recognizers.text.sequence.models.GUIDModel;
|
||||
import com.microsoft.recognizers.text.sequence.models.HashTagModel;
|
||||
import com.microsoft.recognizers.text.sequence.models.IpAddressModel;
|
||||
import com.microsoft.recognizers.text.sequence.models.MentionModel;
|
||||
import com.microsoft.recognizers.text.sequence.models.PhoneNumberModel;
|
||||
import com.microsoft.recognizers.text.sequence.models.URLModel;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.function.Function;
|
||||
|
||||
public class SequenceRecognizer extends Recognizer<SequenceOptions> {
|
||||
public SequenceRecognizer() {
|
||||
this(null, SequenceOptions.None, true);
|
||||
}
|
||||
|
||||
public SequenceRecognizer(String culture) {
|
||||
this(culture, SequenceOptions.None, false);
|
||||
}
|
||||
|
||||
public SequenceRecognizer(String targetCulture, SequenceOptions options, boolean lazyInitialization) {
|
||||
super(targetCulture, options, lazyInitialization);
|
||||
}
|
||||
|
||||
public SequenceRecognizer(String targetCulture, int options, boolean lazyInitialization) {
|
||||
this(targetCulture, SequenceOptions.values()[options], lazyInitialization);
|
||||
}
|
||||
|
||||
public SequenceRecognizer(int options, boolean lazyInitialization) {
|
||||
this(null, SequenceOptions.values()[options], lazyInitialization);
|
||||
}
|
||||
|
||||
public SequenceRecognizer(SequenceOptions options, boolean lazyInitialization) {
|
||||
this(null, options, lazyInitialization);
|
||||
}
|
||||
|
||||
public SequenceRecognizer(boolean lazyInitialization) {
|
||||
this(null, SequenceOptions.None, lazyInitialization);
|
||||
}
|
||||
|
||||
public SequenceRecognizer(int options) {
|
||||
this(null, SequenceOptions.values()[options], true);
|
||||
}
|
||||
|
||||
public SequenceRecognizer(SequenceOptions options) {
|
||||
this(null, options, true);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizePhoneNumber(String query, String culture) {
|
||||
return recognizePhoneNumber(query, culture, SequenceOptions.None, true);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizePhoneNumber(String query, String culture, SequenceOptions options,
|
||||
Boolean fallbackToDefaultCulture) {
|
||||
options = options != null ? options : SequenceOptions.None;
|
||||
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getPhoneNumberModel(culture, fallbackToDefaultCulture),
|
||||
query, options);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizeIpAddress(String query, String culture) {
|
||||
return recognizeIpAddress(query, culture, SequenceOptions.None, true);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizeIpAddress(String query, String culture, SequenceOptions options,
|
||||
Boolean fallbackToDefaultCulture) {
|
||||
options = options != null ? options : SequenceOptions.None;
|
||||
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getIpAddressModel(culture, fallbackToDefaultCulture),
|
||||
query, options);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizeMention(String query, String culture) {
|
||||
return recognizeMention(query, culture, SequenceOptions.None, true);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizeMention(String query, String culture, SequenceOptions options,
|
||||
Boolean fallbackToDefaultCulture) {
|
||||
options = options != null ? options : SequenceOptions.None;
|
||||
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getMentionModel(culture, fallbackToDefaultCulture),
|
||||
query, options);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizeHashtag(String query, String culture) {
|
||||
return recognizeHashtag(query, culture, SequenceOptions.None, true);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizeHashtag(String query, String culture, SequenceOptions options,
|
||||
Boolean fallbackToDefaultCulture) {
|
||||
options = options != null ? options : SequenceOptions.None;
|
||||
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getHashtagModel(culture, fallbackToDefaultCulture),
|
||||
query, options);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizeEmail(String query, String culture) {
|
||||
return recognizeEmail(query, culture, SequenceOptions.None, true);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizeEmail(String query, String culture, SequenceOptions options,
|
||||
Boolean fallbackToDefaultCulture) {
|
||||
options = options != null ? options : SequenceOptions.None;
|
||||
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getEmailModel(culture, fallbackToDefaultCulture), query,
|
||||
options);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizeURL(String query, String culture) {
|
||||
return recognizeURL(query, culture, SequenceOptions.None, true);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizeURL(String query, String culture, SequenceOptions options,
|
||||
Boolean fallbackToDefaultCulture) {
|
||||
options = options != null ? options : SequenceOptions.None;
|
||||
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getURLModel(culture, fallbackToDefaultCulture), query,
|
||||
options);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizeGUID(String query, String culture) {
|
||||
return recognizeGUID(query, culture, SequenceOptions.None, true);
|
||||
}
|
||||
|
||||
public static List<ModelResult> recognizeGUID(String query, String culture, SequenceOptions options,
|
||||
Boolean fallbackToDefaultCulture) {
|
||||
options = options != null ? options : SequenceOptions.None;
|
||||
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getGUIDModel(culture, fallbackToDefaultCulture), query,
|
||||
options);
|
||||
}
|
||||
|
||||
//region Helper methods for less verbosity
|
||||
public IModel getPhoneNumberModel() {
|
||||
return getPhoneNumberModel(null, true);
|
||||
}
|
||||
|
||||
public IModel getPhoneNumberModel(String culture, Boolean fallbackToDefaultCulture) {
|
||||
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
|
||||
if (culture != null && (culture.toLowerCase(Locale.ROOT).startsWith("zh-") ||
|
||||
culture.toLowerCase(Locale.ROOT).startsWith("ja-"))) {
|
||||
return this.getModel(PhoneNumberModel.class, Culture.Chinese, fallbackToDefaultCulture);
|
||||
}
|
||||
|
||||
return this.getModel(PhoneNumberModel.class, culture, fallbackToDefaultCulture);
|
||||
}
|
||||
|
||||
public IModel getIpAddressModel(String culture, Boolean fallbackToDefaultCulture) {
|
||||
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
|
||||
if (culture != null && (culture.toLowerCase(Locale.ROOT).startsWith("zh-") || culture.toLowerCase(Locale.ROOT).startsWith("ja-"))) {
|
||||
return this.getModel(IpAddressModel.class, Culture.Chinese, fallbackToDefaultCulture);
|
||||
}
|
||||
|
||||
return this.getModel(IpAddressModel.class, Culture.English, fallbackToDefaultCulture);
|
||||
}
|
||||
|
||||
public IModel getMentionModel(String culture, Boolean fallbackToDefaultCulture) {
|
||||
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
|
||||
return this.getModel(MentionModel.class, Culture.English, fallbackToDefaultCulture);
|
||||
}
|
||||
|
||||
public IModel getHashtagModel(String culture, Boolean fallbackToDefaultCulture) {
|
||||
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
|
||||
return this.getModel(HashTagModel.class, Culture.English, fallbackToDefaultCulture);
|
||||
}
|
||||
|
||||
public IModel getEmailModel(String culture, Boolean fallbackToDefaultCulture) {
|
||||
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
|
||||
return this.getModel(EmailModel.class, Culture.English, fallbackToDefaultCulture);
|
||||
}
|
||||
|
||||
public IModel getURLModel(String culture, Boolean fallbackToDefaultCulture) {
|
||||
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
|
||||
if (culture != null && (culture.toLowerCase(Locale.ROOT).startsWith("zh-") ||
|
||||
culture.toLowerCase(Locale.ROOT).startsWith("ja-"))) {
|
||||
return this.getModel(URLModel.class, Culture.Chinese, fallbackToDefaultCulture);
|
||||
}
|
||||
|
||||
return this.getModel(URLModel.class, Culture.English, fallbackToDefaultCulture);
|
||||
}
|
||||
|
||||
public IModel getGUIDModel(String culture, Boolean fallbackToDefaultCulture) {
|
||||
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
|
||||
return this.getModel(GUIDModel.class, Culture.English, fallbackToDefaultCulture);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void initializeConfiguration() {
|
||||
this.registerModel(PhoneNumberModel.class, Culture.English, (options) -> new PhoneNumberModel(new PhoneNumberParser(),
|
||||
new BasePhoneNumberExtractor(new EnglishPhoneNumberExtractorConfiguration(options))));
|
||||
|
||||
this.registerModel(PhoneNumberModel.class, Culture.Spanish, (options) -> new PhoneNumberModel(
|
||||
new PhoneNumberParser(), new BasePhoneNumberExtractor(new BasePhoneNumberExtractorConfiguration(options))));
|
||||
|
||||
this.registerModel(IpAddressModel.class, Culture.English, (options) -> new IpAddressModel(new IpParser(),
|
||||
new BaseIpExtractor(new EnglishIpExtractorConfiguration(options))));
|
||||
|
||||
this.registerModel(MentionModel.class, Culture.English, (options) -> new MentionModel(new MentionParser(), new MentionExtractor()));
|
||||
|
||||
this.registerModel(HashTagModel.class, Culture.English, (options) -> new HashTagModel(new HashTagParser(), new HashTagExtractor()));
|
||||
|
||||
this.registerModel(EmailModel.class, Culture.English, (options) -> new EmailModel(new EmailParser(new BaseSequenceConfiguration(options)),
|
||||
new EmailExtractor(new BaseSequenceConfiguration(options))));
|
||||
|
||||
this.registerModel(URLModel.class, Culture.English, (options) -> new URLModel(new URLParser(),
|
||||
new BaseURLExtractor(new EnglishURLExtractorConfiguration(options))));
|
||||
|
||||
this.registerModel(GUIDModel.class, Culture.English, (options) -> new GUIDModel(new GUIDParser(), new GUIDExtractor()));
|
||||
}
|
||||
|
||||
private static List<ModelResult> recognizeByModel(Function getModelFunc, String query, SequenceOptions options) {
|
||||
SequenceRecognizer recognizer = new SequenceRecognizer(options, false);
|
||||
IModel model = (IModel)getModelFunc.apply(recognizer);
|
||||
return model.parse(query);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.config;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.SequenceOptions;
|
||||
|
||||
public class BaseSequenceConfiguration implements ISequenceConfiguration {
|
||||
private SequenceOptions options;
|
||||
|
||||
public BaseSequenceConfiguration(SequenceOptions options) {
|
||||
this.options = options != null ? options : SequenceOptions.None;
|
||||
}
|
||||
|
||||
public SequenceOptions getOptions() {
|
||||
return this.options;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.config;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.SequenceOptions;
|
||||
|
||||
public interface ISequenceConfiguration {
|
||||
SequenceOptions getOptions();
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.config;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.SequenceOptions;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class IpConfiguration implements ISequenceConfiguration {
|
||||
private SequenceOptions options;
|
||||
private Pattern ipv4Regex;
|
||||
private Pattern ipv6Regex;
|
||||
|
||||
public IpConfiguration(SequenceOptions options) {
|
||||
this.options = options != null ? options : SequenceOptions.None;
|
||||
}
|
||||
|
||||
public SequenceOptions getOptions() {
|
||||
return options;
|
||||
}
|
||||
|
||||
public Pattern getIpv4Regex() {
|
||||
return ipv4Regex;
|
||||
}
|
||||
|
||||
public void setIpv4Regex(Pattern withIpv4Regex) {
|
||||
ipv4Regex = withIpv4Regex;
|
||||
}
|
||||
|
||||
public Pattern getIpv6Regex() {
|
||||
return ipv6Regex;
|
||||
}
|
||||
|
||||
public void setIpv6Regex(Pattern withIpv6Regex) {
|
||||
ipv6Regex = withIpv6Regex;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.config;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.SequenceOptions;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class PhoneNumberConfiguration implements ISequenceConfiguration {
|
||||
private SequenceOptions options;
|
||||
private Pattern falsePositivePrefixRegex;
|
||||
private String wordBoundariesRegex;
|
||||
private String nonWordBoundariesRegex;
|
||||
private String endWordBoundariesRegex;
|
||||
private Pattern colonPrefixCheckRegex;
|
||||
private List<Character> colonMarkers;
|
||||
private List<Character> forbiddenPrefixMarkers;
|
||||
private List<Character> forbiddenSuffixMarkers;
|
||||
|
||||
public PhoneNumberConfiguration(SequenceOptions options) {
|
||||
this.options = options != null ? options : SequenceOptions.None;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceOptions getOptions() {
|
||||
return this.options;
|
||||
}
|
||||
|
||||
public Pattern getFalsePositivePrefixRegex() {
|
||||
return falsePositivePrefixRegex;
|
||||
}
|
||||
|
||||
public void setFalsePositivePrefixRegex(Pattern withFalsePositivePrefixRegex) {
|
||||
this.falsePositivePrefixRegex = withFalsePositivePrefixRegex;
|
||||
}
|
||||
|
||||
public String getWordBoundariesRegex() {
|
||||
return wordBoundariesRegex;
|
||||
}
|
||||
|
||||
public void setWordBoundariesRegex(String wordBoundariesRegex) {
|
||||
this.wordBoundariesRegex = wordBoundariesRegex;
|
||||
}
|
||||
|
||||
public String getNonWordBoundariesRegex() {
|
||||
return nonWordBoundariesRegex;
|
||||
}
|
||||
|
||||
public void setNonWordBoundariesRegex(String withNonWordBoundariesRegex) {
|
||||
this.nonWordBoundariesRegex = withNonWordBoundariesRegex;
|
||||
}
|
||||
|
||||
public String getEndWordBoundariesRegex() {
|
||||
return endWordBoundariesRegex;
|
||||
}
|
||||
|
||||
public void setEndWordBoundariesRegex(String withEndWordBoundariesRegex) {
|
||||
this.endWordBoundariesRegex = withEndWordBoundariesRegex;
|
||||
}
|
||||
|
||||
public Pattern getColonPrefixCheckRegex() {
|
||||
return colonPrefixCheckRegex;
|
||||
}
|
||||
|
||||
public void setColonPrefixCheckRegex(Pattern withColonPrefixCheckRegex) {
|
||||
this.colonPrefixCheckRegex = withColonPrefixCheckRegex;
|
||||
}
|
||||
|
||||
public List<Character> getColonMarkers() {
|
||||
return colonMarkers;
|
||||
}
|
||||
|
||||
public void setColonMarkers(List<Character> withColonMarkers) {
|
||||
this.colonMarkers = withColonMarkers;
|
||||
}
|
||||
|
||||
public List<Character> getForbiddenPrefixMarkers() {
|
||||
return forbiddenPrefixMarkers;
|
||||
}
|
||||
|
||||
public void setForbiddenPrefixMarkers(List<Character> withForbiddenPrefixMarkers) {
|
||||
this.forbiddenPrefixMarkers = withForbiddenPrefixMarkers;
|
||||
}
|
||||
|
||||
public List<Character> getForbiddenSuffixMarkers() {
|
||||
return forbiddenSuffixMarkers;
|
||||
}
|
||||
|
||||
public void setForbiddenSuffixMarkers(List<Character> withForbiddenSuffixMarkers) {
|
||||
this.forbiddenSuffixMarkers = withForbiddenSuffixMarkers;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.config;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.SequenceOptions;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class URLConfiguration implements ISequenceConfiguration {
|
||||
private SequenceOptions options;
|
||||
private Pattern ipUrlRegex;
|
||||
private Pattern urlRegex;
|
||||
|
||||
public URLConfiguration(SequenceOptions options) {
|
||||
this.options = options != null ? options : SequenceOptions.None;
|
||||
}
|
||||
|
||||
public SequenceOptions getOptions() {
|
||||
return this.options;
|
||||
}
|
||||
|
||||
public Pattern getIpUrlRegex() {
|
||||
return ipUrlRegex;
|
||||
}
|
||||
|
||||
public void setIpUrlRegex(Pattern ipUrlRegex) {
|
||||
this.ipUrlRegex = ipUrlRegex;
|
||||
}
|
||||
|
||||
public Pattern getUrlRegex() {
|
||||
return urlRegex;
|
||||
}
|
||||
|
||||
public void setUrlRegex(Pattern urlRegex) {
|
||||
this.urlRegex = urlRegex;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.config.BaseSequenceConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.extractors.BaseEmailExtractor;
|
||||
|
||||
public class EmailExtractor extends BaseEmailExtractor {
|
||||
public EmailExtractor(BaseSequenceConfiguration config) {
|
||||
super(config);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.SequenceOptions;
|
||||
import com.microsoft.recognizers.text.sequence.config.IpConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.resources.BaseIp;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class EnglishIpExtractorConfiguration extends IpConfiguration {
|
||||
public EnglishIpExtractorConfiguration(SequenceOptions options) {
|
||||
super(options);
|
||||
|
||||
this.setIpv4Regex(Pattern.compile(BaseIp.Ipv4Regex));
|
||||
this.setIpv6Regex(Pattern.compile(BaseIp.Ipv6Regex));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.SequenceOptions;
|
||||
import com.microsoft.recognizers.text.sequence.extractors.BasePhoneNumberExtractorConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.resources.EnglishPhoneNumbers;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class EnglishPhoneNumberExtractorConfiguration extends BasePhoneNumberExtractorConfiguration {
|
||||
public EnglishPhoneNumberExtractorConfiguration(SequenceOptions options) {
|
||||
super(options);
|
||||
|
||||
super.setFalsePositivePrefixRegex(Pattern.compile(EnglishPhoneNumbers.FalsePositivePrefixRegex));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.SequenceOptions;
|
||||
import com.microsoft.recognizers.text.sequence.config.URLConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.resources.BaseURL;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class EnglishURLExtractorConfiguration extends URLConfiguration {
|
||||
public EnglishURLExtractorConfiguration(SequenceOptions options) {
|
||||
super(options);
|
||||
|
||||
this.setIpUrlRegex(Pattern.compile(BaseURL.IpUrlRegex));
|
||||
this.setUrlRegex(Pattern.compile(BaseURL.UrlRegex));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.extractors.BaseGUIDExtractor;
|
||||
|
||||
public class GUIDExtractor extends BaseGUIDExtractor {
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.extractors.BaseHashTagExtractor;
|
||||
|
||||
public class HashTagExtractor extends BaseHashTagExtractor {
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.extractors.BaseMentionExtractor;
|
||||
|
||||
public class MentionExtractor extends BaseMentionExtractor {
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.parsers;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.config.BaseSequenceConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.parsers.BaseSequenceParser;
|
||||
|
||||
public class EmailParser extends BaseSequenceParser {
|
||||
private BaseSequenceConfiguration config;
|
||||
|
||||
public EmailParser(BaseSequenceConfiguration config) {
|
||||
this.config = config;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.parsers;
|
||||
|
||||
import com.microsoft.recognizers.text.ExtractResult;
|
||||
import com.microsoft.recognizers.text.ParseResult;
|
||||
import com.microsoft.recognizers.text.sequence.parsers.BaseSequenceParser;
|
||||
import com.microsoft.recognizers.text.sequence.resources.BaseGUID;
|
||||
import com.microsoft.recognizers.text.utilities.Match;
|
||||
import com.microsoft.recognizers.text.utilities.RegExpUtility;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class GUIDParser extends BaseSequenceParser {
|
||||
private static Double SCORE_UPPER_LIMIT = 100d;
|
||||
private static Double SCORE_LOWER_LIMIT = 0d;
|
||||
private static Double BASE_SCORE = 100d;
|
||||
private static Double NO_BOUNDARY_PENALTY = 10d;
|
||||
private static Double NO_FORMAT_PENALTY = 10d;
|
||||
private static Double PURE_DIGIT_PENALTY = 15d;
|
||||
private static String PURE_DIGIT_REGEX = "^\\d*$";
|
||||
private static String FORMAT_REGEX = "-";
|
||||
|
||||
private static final Pattern GUID_ELEMENT_REGEX = Pattern.compile(BaseGUID.GUIDRegexElement);
|
||||
|
||||
public static Double scoreGUID(String textGUID) {
|
||||
Double score = BASE_SCORE;
|
||||
|
||||
Match[] elementMatch = RegExpUtility.getMatches(GUID_ELEMENT_REGEX, textGUID);
|
||||
if (elementMatch.length > 0) {
|
||||
Integer startIndex = elementMatch[0].index;
|
||||
String guidElement = elementMatch[0].value;
|
||||
score -= startIndex == 0 ? NO_BOUNDARY_PENALTY : 0;
|
||||
score -= Pattern.compile(FORMAT_REGEX).matcher(guidElement).find() ? 0 : NO_FORMAT_PENALTY;
|
||||
score -= Pattern.compile(PURE_DIGIT_REGEX).matcher(textGUID).find() ? PURE_DIGIT_PENALTY : 0;
|
||||
}
|
||||
|
||||
return Math.max(Math.min(score, SCORE_UPPER_LIMIT), SCORE_LOWER_LIMIT)
|
||||
/ (SCORE_UPPER_LIMIT - SCORE_LOWER_LIMIT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseResult parse(ExtractResult extResult) {
|
||||
return new ParseResult(extResult.getStart(), extResult.getLength(), extResult.getText(), extResult.getType(),
|
||||
null, GUIDParser.scoreGUID(extResult.getText()), extResult.getText());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.parsers;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.parsers.BaseSequenceParser;
|
||||
|
||||
public class HashTagParser extends BaseSequenceParser {
|
||||
public HashTagParser() {
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.parsers;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.parsers.BaseIpParser;
|
||||
|
||||
public class IpParser extends BaseIpParser {
|
||||
public IpParser() {
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.parsers;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.parsers.BaseSequenceParser;
|
||||
|
||||
public class MentionParser extends BaseSequenceParser {
|
||||
public MentionParser() {
|
||||
}
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.parsers;
|
||||
|
||||
import com.microsoft.recognizers.text.ExtractResult;
|
||||
import com.microsoft.recognizers.text.ParseResult;
|
||||
import com.microsoft.recognizers.text.sequence.parsers.BaseSequenceParser;
|
||||
import com.microsoft.recognizers.text.sequence.resources.BasePhoneNumbers;
|
||||
import com.microsoft.recognizers.text.utilities.Match;
|
||||
import com.microsoft.recognizers.text.utilities.RegExpUtility;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class PhoneNumberParser extends BaseSequenceParser {
|
||||
private static Double SCORE_UPPER_LIMIT = 100d;
|
||||
private static Double SCORE_LOWER_LIMIT = 0d;
|
||||
private static Double BASE_SCORE = 30d;
|
||||
private static Double COUNTRY_CODE_AWARD = 40d;
|
||||
private static Double AREA_CODE_AWARD = 30d;
|
||||
private static Double FORMATTED_AWARD = 20d;
|
||||
private static Double LENGTH_AWARD = 10d;
|
||||
private static Double TYPICAL_FORMAT_DEDUCTION_SCORE = 40d;
|
||||
private static Double CONTINUE_DIGIT_DEDUCTION_SCORE = 10d;
|
||||
private static Double TAIL_SAME_DEDUCTION_SCORE = 10d;
|
||||
private static Double CONTINUE_FORMAT_INDICATOR_DEDUCTION_SCORE = 20d;
|
||||
private static Double WRONG_FORMAT_DEDUCTION_SCORE = 20d;
|
||||
private static Integer MAX_FORMAT_INDICATOR_NUM = 3;
|
||||
private static Integer MAX_LENGTH_AWARD_NUM = 3;
|
||||
private static Integer TAIL_SAME_LIMIT = 2;
|
||||
private static Integer PHONE_NUMBER_LENGTH_BASE = 8;
|
||||
private static Integer PURE_DIGIT_LENGTH_LIMIT = 11;
|
||||
|
||||
// @TODO move regexes to base resource files
|
||||
private static String COMPLETE_BRACKET_REGEX = "\\(.*\\)";
|
||||
private static String SINGLE_BRACKER_REGEX = "\\(|\\)";
|
||||
private static String TAIL_SAME_DIGIT_REGEX = "([\\d])\\1{2,10}$";
|
||||
private static String PURE_DIGIT_REGEX = "^\\d*$";
|
||||
private static String CONTINUE_DIGIT_REGEX = "\\d{5}\\d*";
|
||||
private static String DIGIT_REGEX = "\\d";
|
||||
|
||||
private static final Pattern COUNTRY_CODE_REGEX = Pattern.compile(BasePhoneNumbers.CountryCodeRegex);
|
||||
private static final Pattern AREA_CODE_REGEX = Pattern.compile(BasePhoneNumbers.AreaCodeIndicatorRegex);
|
||||
private static final Pattern FORMAT_INDICATOR_REGEX = Pattern.compile(BasePhoneNumbers.FormatIndicatorRegex);
|
||||
private static final Pattern NO_AREA_CODE_US_PHONE_NUMBER_REGEX = Pattern
|
||||
.compile(BasePhoneNumbers.NoAreaCodeUSPhoneNumberRegex);
|
||||
|
||||
public static Double scorePhoneNumber(String phoneNumberText) {
|
||||
Double score = BASE_SCORE;
|
||||
|
||||
// Country code score or area code score
|
||||
score += COUNTRY_CODE_REGEX.matcher(phoneNumberText).find() ? COUNTRY_CODE_AWARD
|
||||
: AREA_CODE_REGEX.matcher(phoneNumberText).find() ? AREA_CODE_AWARD : 0;
|
||||
|
||||
// Formatted score
|
||||
Match[] formatMatches = RegExpUtility.getMatches(FORMAT_INDICATOR_REGEX, phoneNumberText);
|
||||
if (formatMatches.length > 0) {
|
||||
Integer formatIndicatorCount = formatMatches.length;
|
||||
score += Math.min(formatIndicatorCount, MAX_FORMAT_INDICATOR_NUM) * FORMATTED_AWARD;
|
||||
Boolean anyMatch = Arrays.stream(formatMatches).anyMatch(match -> match.value.length() > 1);
|
||||
score -= anyMatch ? CONTINUE_FORMAT_INDICATOR_DEDUCTION_SCORE : 0;
|
||||
if (Pattern.matches(SINGLE_BRACKER_REGEX, phoneNumberText) && !Pattern.matches(COMPLETE_BRACKET_REGEX, phoneNumberText)) {
|
||||
score -= WRONG_FORMAT_DEDUCTION_SCORE;
|
||||
}
|
||||
}
|
||||
|
||||
// Length score
|
||||
score += Math.min(RegExpUtility.getMatches(Pattern.compile(DIGIT_REGEX), phoneNumberText).length
|
||||
- PHONE_NUMBER_LENGTH_BASE, MAX_LENGTH_AWARD_NUM) * LENGTH_AWARD;
|
||||
|
||||
// Same tailing digit deduction
|
||||
Match[] tailSameDigitMatches = RegExpUtility.getMatches(Pattern.compile(TAIL_SAME_DIGIT_REGEX),
|
||||
phoneNumberText);
|
||||
if (tailSameDigitMatches.length > 0) {
|
||||
score -= (tailSameDigitMatches[0].value.length() - TAIL_SAME_LIMIT) * TAIL_SAME_DEDUCTION_SCORE;
|
||||
}
|
||||
|
||||
// Pure digit deduction
|
||||
Match[] pureDigitMatches = RegExpUtility.getMatches(Pattern.compile(PURE_DIGIT_REGEX), phoneNumberText);
|
||||
if (pureDigitMatches.length > 0) {
|
||||
score -= phoneNumberText.length() > PURE_DIGIT_LENGTH_LIMIT ? (phoneNumberText.length() - PURE_DIGIT_LENGTH_LIMIT) * LENGTH_AWARD
|
||||
: 0;
|
||||
}
|
||||
|
||||
// Special format deduction
|
||||
score -= BasePhoneNumbers.TypicalDeductionRegexList.stream().anyMatch(o -> Pattern.compile(o).matcher(phoneNumberText).find()) ? TYPICAL_FORMAT_DEDUCTION_SCORE : 0;
|
||||
|
||||
// Continue digit deduction
|
||||
Match[] continueDigitMatches = RegExpUtility.getMatches(Pattern.compile(CONTINUE_DIGIT_REGEX), phoneNumberText);
|
||||
score -= Math.max(continueDigitMatches.length - 1, 0) * CONTINUE_DIGIT_DEDUCTION_SCORE;
|
||||
|
||||
// Special award for US phonenumber without area code, i.e. 223-4567 or 223 -
|
||||
// 4567
|
||||
if (NO_AREA_CODE_US_PHONE_NUMBER_REGEX.matcher(phoneNumberText).find()) {
|
||||
score += LENGTH_AWARD * 1.5;
|
||||
}
|
||||
|
||||
return Math.max(Math.min(score, SCORE_UPPER_LIMIT), SCORE_LOWER_LIMIT)
|
||||
/ (SCORE_UPPER_LIMIT - SCORE_LOWER_LIMIT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseResult parse(ExtractResult extResult) {
|
||||
return new ParseResult(extResult.getStart(), extResult.getLength(), extResult.getText(), extResult.getType(),
|
||||
null, PhoneNumberParser.scorePhoneNumber(extResult.getText()), extResult.getText());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.english.parsers;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.parsers.BaseSequenceParser;
|
||||
|
||||
public class URLParser extends BaseSequenceParser {
|
||||
public URLParser() {
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.ExtractResult;
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
import com.microsoft.recognizers.text.sequence.SequenceOptions;
|
||||
import com.microsoft.recognizers.text.sequence.config.BaseSequenceConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.resources.BaseEmail;
|
||||
import com.microsoft.recognizers.text.utilities.StringUtility;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class BaseEmailExtractor extends BaseSequenceExtractor {
|
||||
private static final Pattern RFC_5322_VALIDATION_REGEX = Pattern.compile(BaseEmail.RFC5322Regex);
|
||||
private final BaseSequenceConfiguration config;
|
||||
|
||||
protected final String extractType = Constants.SYS_EMAIL;
|
||||
|
||||
protected Map<Pattern, String> getRegexes() {
|
||||
return regexes;
|
||||
}
|
||||
|
||||
protected String getExtractType() {
|
||||
return extractType;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<ExtractResult> postFilter(List<ExtractResult> results) {
|
||||
// If Relaxed is on, no extra validation is applied
|
||||
if (config.getOptions() != SequenceOptions.None) {
|
||||
return results;
|
||||
} else {
|
||||
// Not return malformed e-mail addresses and trim ending '.'
|
||||
results.forEach(result -> {
|
||||
if (result.getText().endsWith(".")) {
|
||||
result.setText(StringUtility.trimEnd(result.getText()));
|
||||
result.setLength(result.getLength() - 1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return results.stream().filter((o -> RFC_5322_VALIDATION_REGEX.matcher((o).getText()).matches()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public BaseEmailExtractor(BaseSequenceConfiguration config) {
|
||||
this.config = config;
|
||||
Map<Pattern, String> regexes = new HashMap<Pattern, String>();
|
||||
regexes.put(Pattern.compile(BaseEmail.EmailRegex), Constants.EMAIL_REGEX);
|
||||
// EmailRegex2 will break the code as it's not supported in Java, comment out for now
|
||||
// Error: java.util.regex.PatternSyntaxException: Unknown inline modifier near index 4
|
||||
// The same issue happens in JS and it was disabled in https://github.com/microsoft/Recognizers-Text/pull/905
|
||||
// @TODO: we need to search a way to handle inline modifier.
|
||||
// See https://stackoverflow.com/questions/48189725/java-regex-pattern-compilation-error
|
||||
// regexes.put(Pattern.compile(BaseEmail.EmailRegex2), Constants.EMAIL_REGEX);
|
||||
|
||||
super.regexes = regexes;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
import com.microsoft.recognizers.text.sequence.resources.BaseGUID;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class BaseGUIDExtractor extends BaseSequenceExtractor {
|
||||
protected final String extractType = Constants.SYS_GUID;
|
||||
|
||||
protected String getExtractType() {
|
||||
return this.extractType;
|
||||
}
|
||||
|
||||
public BaseGUIDExtractor() {
|
||||
Map<Pattern, String> regexes = new HashMap<Pattern, String>() {
|
||||
{
|
||||
put(Pattern.compile(BaseGUID.GUIDRegex), Constants.GUID_REGEX);
|
||||
}
|
||||
};
|
||||
|
||||
super.regexes = regexes;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
import com.microsoft.recognizers.text.sequence.resources.BaseHashtag;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class BaseHashTagExtractor extends BaseSequenceExtractor {
|
||||
protected final String extractType = Constants.SYS_HASHTAG;
|
||||
|
||||
protected String getExtractType() {
|
||||
return this.extractType;
|
||||
}
|
||||
|
||||
public BaseHashTagExtractor() {
|
||||
Map<Pattern, String> regexes = new HashMap<Pattern, String>() {
|
||||
{
|
||||
put(Pattern.compile(BaseHashtag.HashtagRegex), Constants.HASHTAG_REGEX);
|
||||
}
|
||||
};
|
||||
|
||||
super.regexes = regexes;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.ExtractResult;
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
import com.microsoft.recognizers.text.sequence.config.IpConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.resources.BaseIp;
|
||||
import com.microsoft.recognizers.text.utilities.Match;
|
||||
import com.microsoft.recognizers.text.utilities.RegExpUtility;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
public class BaseIpExtractor extends BaseSequenceExtractor {
|
||||
private IpConfiguration config;
|
||||
protected String extractType = Constants.SYS_IP;
|
||||
|
||||
protected String getExtractType() {
|
||||
return this.extractType;
|
||||
}
|
||||
|
||||
// The Ipv6 address regexes is written following the Recommendation:
|
||||
// https://tools.ietf.org/html/rfc5952
|
||||
public BaseIpExtractor(IpConfiguration config) {
|
||||
Map<Pattern, String> regexes = new HashMap<Pattern, String>() {
|
||||
{
|
||||
put(Pattern.compile(BaseIp.Ipv4Regex), Constants.IP_REGEX_IPV4);
|
||||
put(Pattern.compile(BaseIp.Ipv6Regex), Constants.IP_REGEX_IPV6);
|
||||
}
|
||||
};
|
||||
|
||||
super.regexes = regexes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ExtractResult> extract(String text) {
|
||||
List<ExtractResult> result = new ArrayList<ExtractResult>();
|
||||
|
||||
if (StringUtils.isBlank(text)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
HashMap<Match, String> matchSource = new HashMap<>();
|
||||
boolean[] matched = new boolean[text.length()];
|
||||
|
||||
// Traverse every match results to see each position in the text is matched or
|
||||
// not.
|
||||
HashMap<Match[], String> collections = new HashMap<>();
|
||||
regexes.forEach((key, value) -> {
|
||||
Match[] matches = RegExpUtility.getMatches(key, text);
|
||||
collections.put(matches, value);
|
||||
});
|
||||
|
||||
collections.forEach((key, value) -> {
|
||||
for (Match match : key) {
|
||||
for (int j = 0; j < match.length; j++) {
|
||||
matched[match.index + j] = true;
|
||||
}
|
||||
|
||||
// Keep Source Data for extra information
|
||||
matchSource.put(match, value);
|
||||
}
|
||||
});
|
||||
|
||||
int lastNotMatched = -1;
|
||||
for (int i = 0; i < text.length(); i++) {
|
||||
if (matched[i]) {
|
||||
if (i + 1 == text.length() || !matched[i + 1]) {
|
||||
int start = lastNotMatched + 1;
|
||||
int length = i - lastNotMatched;
|
||||
String substr = text.substring(start, start + length);
|
||||
if (substr.startsWith(Constants.IPV6_ELLIPSIS) && (start > 0 && Character.isLetterOrDigit(text.charAt(start - 1)))) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (substr.endsWith(Constants.IPV6_ELLIPSIS) && (i + 1 < text.length() && Character.isLetterOrDigit(text.charAt(i + 1)))) {
|
||||
break;
|
||||
}
|
||||
|
||||
Function<Match, Boolean> matchFunc = match -> match.index == start && match.length == length;
|
||||
|
||||
if (matchSource.keySet().stream().anyMatch(o -> matchFunc.apply(o))) {
|
||||
Match srcMatch = (Match)matchSource.keySet().toArray()[0];
|
||||
ExtractResult extResult = new ExtractResult();
|
||||
|
||||
extResult.setStart(start);
|
||||
extResult.setLength(length);
|
||||
extResult.setText(substr);
|
||||
extResult.setType(this.extractType);
|
||||
extResult.setData(matchSource.getOrDefault(srcMatch, null));
|
||||
result.add(extResult);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
lastNotMatched = i;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
import com.microsoft.recognizers.text.sequence.resources.BaseMention;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class BaseMentionExtractor extends BaseSequenceExtractor {
|
||||
protected final String extractType = Constants.SYS_MENTION;
|
||||
|
||||
protected String getExtractType() {
|
||||
return this.extractType;
|
||||
}
|
||||
|
||||
public BaseMentionExtractor() {
|
||||
Map<Pattern, String> regexes = new HashMap<Pattern, String>() {
|
||||
{
|
||||
put(Pattern.compile(BaseMention.MentionRegex), Constants.MENTION_REGEX);
|
||||
}
|
||||
};
|
||||
|
||||
super.regexes = regexes;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,216 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.ExtractResult;
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
import com.microsoft.recognizers.text.sequence.config.PhoneNumberConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.resources.BasePhoneNumbers;
|
||||
import com.microsoft.recognizers.text.utilities.Match;
|
||||
import com.microsoft.recognizers.text.utilities.RegExpUtility;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class BasePhoneNumberExtractor extends BaseSequenceExtractor {
|
||||
private static final Pattern INTERNATIONAL_DIALING_PREFIX_REGEX = Pattern
|
||||
.compile(BasePhoneNumbers.InternationDialingPrefixRegex);
|
||||
|
||||
private static final Pattern PRE_CHECK_PHONE_NUMBER_REGEX = Pattern
|
||||
.compile(BasePhoneNumbers.PreCheckPhoneNumberRegex);
|
||||
|
||||
private static final Pattern SSN_FILTER_REGEX = Pattern.compile(BasePhoneNumbers.SSNFilterRegex);
|
||||
|
||||
private static List<Character> SPECIAL_BOUNDARY_MARKERS = BasePhoneNumbers.SpecialBoundaryMarkers;
|
||||
|
||||
private PhoneNumberConfiguration config;
|
||||
|
||||
protected String extractType = Constants.SYS_PHONE_NUMBER;
|
||||
|
||||
protected String getExtractType() {
|
||||
return this.extractType;
|
||||
}
|
||||
|
||||
public BasePhoneNumberExtractor(PhoneNumberConfiguration config) {
|
||||
this.config = config;
|
||||
|
||||
String wordBoundariesRegex = config.getWordBoundariesRegex();
|
||||
String nonWordBoundariesRegex = config.getNonWordBoundariesRegex();
|
||||
String endWordBoundariesRegex = config.getEndWordBoundariesRegex();
|
||||
|
||||
Map<Pattern, String> regexes = new HashMap<Pattern, String>() {
|
||||
{
|
||||
put(Pattern
|
||||
.compile(BasePhoneNumbers.GeneralPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
|
||||
Constants.PHONE_NUMBER_REGEX_GENERAL);
|
||||
put(Pattern.compile(BasePhoneNumbers.BRPhoneNumberRegex(wordBoundariesRegex, nonWordBoundariesRegex,
|
||||
endWordBoundariesRegex)), Constants.PHONE_NUMBER_REGEX_BR);
|
||||
put(Pattern.compile(BasePhoneNumbers.UKPhoneNumberRegex(wordBoundariesRegex, nonWordBoundariesRegex,
|
||||
endWordBoundariesRegex)), Constants.PHONE_NUMBER_REGEX_UK);
|
||||
put(Pattern.compile(BasePhoneNumbers.DEPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
|
||||
Constants.PHONE_NUMBER_REGEX_DE);
|
||||
put(Pattern.compile(BasePhoneNumbers.USPhoneNumberRegex(wordBoundariesRegex, nonWordBoundariesRegex,
|
||||
endWordBoundariesRegex)), Constants.PHONE_NUMBER_REGEX_US);
|
||||
put(Pattern.compile(BasePhoneNumbers.CNPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
|
||||
Constants.PHONE_NUMBER_REGEX_CN);
|
||||
put(Pattern.compile(BasePhoneNumbers.DKPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
|
||||
Constants.PHONE_NUMBER_REGEX_DK);
|
||||
put(Pattern.compile(BasePhoneNumbers.ITPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
|
||||
Constants.PHONE_NUMBER_REGEX_IT);
|
||||
put(Pattern.compile(BasePhoneNumbers.NLPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
|
||||
Constants.PHONE_NUMBER_REGEX_NL);
|
||||
put(Pattern
|
||||
.compile(BasePhoneNumbers.SpecialPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
|
||||
Constants.PHONE_NUMBER_REGEX_SPECIAL);
|
||||
}
|
||||
};
|
||||
|
||||
this.regexes = regexes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ExtractResult> extract(String text) {
|
||||
if (!PRE_CHECK_PHONE_NUMBER_REGEX.matcher(text).find()) {
|
||||
return new ArrayList<ExtractResult>();
|
||||
}
|
||||
|
||||
List<ExtractResult> ers = super.extract(text);
|
||||
|
||||
for (int i = 0; i < ers.size(); i++) {
|
||||
ExtractResult er = ers.get(i);
|
||||
if ((BasePhoneNumberExtractor.countDigits(er.getText()) < 7 && er.getData().toString() != "ITPhoneNumber") ||
|
||||
Pattern.matches(SSN_FILTER_REGEX.toString(), er.getText())) {
|
||||
ers.remove(er);
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((BasePhoneNumberExtractor.countDigits(er.getText()) == 16 && !er.getText().startsWith("+"))) {
|
||||
ers.remove(er);
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (BasePhoneNumberExtractor.countDigits(er.getText()) == 15) {
|
||||
Boolean flag = false;
|
||||
for (String numSpan : er.getText().split(" ")) {
|
||||
if (BasePhoneNumberExtractor.countDigits(numSpan) == 4 || BasePhoneNumberExtractor.countDigits(numSpan) == 3) {
|
||||
flag = false;
|
||||
} else {
|
||||
flag = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (flag == false) {
|
||||
ers.remove(er);
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (er.getStart() + er.getLength() < text.length()) {
|
||||
Character ch = text.charAt(er.getStart() + er.getLength());
|
||||
if (BasePhoneNumbers.ForbiddenSuffixMarkers.contains(ch)) {
|
||||
ers.remove(er);
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (er.getStart() != 0) {
|
||||
Character ch = text.charAt(er.getStart() - 1);
|
||||
String front = text.substring(0, er.getStart() - 1);
|
||||
|
||||
if (this.config.getFalsePositivePrefixRegex() != null &&
|
||||
this.config.getFalsePositivePrefixRegex().matcher(front).find()) {
|
||||
ers.remove(er);
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (BasePhoneNumbers.BoundaryMarkers.contains(ch)) {
|
||||
if (SPECIAL_BOUNDARY_MARKERS.contains(ch) &&
|
||||
BasePhoneNumberExtractor.checkFormattedPhoneNumber(er.getText()) && er.getStart() >= 2) {
|
||||
Character charGap = text.charAt(er.getStart() - 2);
|
||||
if (!Character.isDigit(charGap) && !Character.isWhitespace(charGap)) {
|
||||
// check if the extracted string has a non-digit string before "-".
|
||||
Boolean flag = Pattern.matches("^[^0-9]+$", text.substring(0, er.getStart() - 2));
|
||||
|
||||
// Handle cases like "91a-677-0060".
|
||||
if (Character.isLowerCase(charGap) && !flag) {
|
||||
ers.remove(er);
|
||||
i--;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// check the international dialing prefix
|
||||
if (INTERNATIONAL_DIALING_PREFIX_REGEX.matcher(front).find()) {
|
||||
Integer moveOffset = RegExpUtility.getMatches(INTERNATIONAL_DIALING_PREFIX_REGEX,
|
||||
front)[0].length + 1;
|
||||
er.setStart(er.getStart() - moveOffset);
|
||||
er.setLength(er.getLength() + moveOffset);
|
||||
er.setText(text.substring(er.getStart(), er.getStart() + er.getLength()));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle cases like "-1234567" and "-1234+5678"
|
||||
ers.remove(er);
|
||||
i--;
|
||||
}
|
||||
|
||||
if (this.config.getForbiddenPrefixMarkers().contains(ch)) {
|
||||
{
|
||||
// Handle "tel:123456"
|
||||
if (BasePhoneNumbers.ColonMarkers.contains(ch)) {
|
||||
if (this.config.getColonPrefixCheckRegex().matcher(front).find()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
ers.remove(er);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// filter hexadecimal address like 00 10 00 31 46 D9 E9 11
|
||||
Match[] maskMatchCollection = RegExpUtility.getMatches(Pattern.compile(BasePhoneNumbers.PhoneNumberMaskRegex),
|
||||
text);
|
||||
|
||||
for (int index = ers.size() - 1; index >= 0; --index) {
|
||||
for (Match m : maskMatchCollection) {
|
||||
if (ers.get(index).getStart() >= m.index &&
|
||||
ers.get(index).getStart() + ers.get(index).getLength() <= m.index + m.length) {
|
||||
ers.remove(index);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ers;
|
||||
}
|
||||
|
||||
private static Boolean checkFormattedPhoneNumber(String phoneNumberText) {
|
||||
return Pattern.compile(BasePhoneNumbers.FormatIndicatorRegex).matcher(phoneNumberText).find();
|
||||
}
|
||||
|
||||
private static Integer countDigits(String candidateString) {
|
||||
Integer count = 0;
|
||||
for (Character t : candidateString.toCharArray()) {
|
||||
if (Character.isDigit(t)) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.sequence.SequenceOptions;
|
||||
import com.microsoft.recognizers.text.sequence.config.PhoneNumberConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.resources.BasePhoneNumbers;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class BasePhoneNumberExtractorConfiguration extends PhoneNumberConfiguration {
|
||||
public BasePhoneNumberExtractorConfiguration(SequenceOptions options) {
|
||||
super(options);
|
||||
setWordBoundariesRegex(BasePhoneNumbers.WordBoundariesRegex);
|
||||
setNonWordBoundariesRegex(BasePhoneNumbers.NonWordBoundariesRegex);
|
||||
setEndWordBoundariesRegex(BasePhoneNumbers.EndWordBoundariesRegex);
|
||||
setColonPrefixCheckRegex(Pattern.compile(BasePhoneNumbers.ColonPrefixCheckRegex));
|
||||
setColonMarkers(BasePhoneNumbers.ColonMarkers);
|
||||
setForbiddenPrefixMarkers(BasePhoneNumbers.ForbiddenPrefixMarkers);
|
||||
setForbiddenSuffixMarkers(BasePhoneNumbers.ForbiddenSuffixMarkers);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.ExtractResult;
|
||||
import com.microsoft.recognizers.text.IExtractor;
|
||||
import com.microsoft.recognizers.text.utilities.Match;
|
||||
import com.microsoft.recognizers.text.utilities.RegExpUtility;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public abstract class BaseSequenceExtractor implements IExtractor {
|
||||
protected Map<Pattern, String> regexes;
|
||||
protected String extractType = "";
|
||||
|
||||
protected List<ExtractResult> postFilter(List<ExtractResult> results) {
|
||||
return results;
|
||||
}
|
||||
|
||||
protected Map<Pattern, String> getRegexes() {
|
||||
return regexes;
|
||||
}
|
||||
|
||||
protected String getExtractType() {
|
||||
return extractType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ExtractResult> extract(String text) {
|
||||
List<ExtractResult> result = new ArrayList<>();
|
||||
|
||||
if (text.isEmpty()) {
|
||||
return result;
|
||||
}
|
||||
|
||||
HashMap<Match, String> matchSource = new HashMap<>();
|
||||
boolean[] matched = new boolean[text.length()];
|
||||
|
||||
// Traverse every match results to see each position in the text is matched or
|
||||
// not.
|
||||
HashMap<Match[], String> collections = new HashMap<>();
|
||||
regexes.forEach((key, value) -> {
|
||||
Match[] matches = RegExpUtility.getMatches(key, text);
|
||||
collections.put(matches, value);
|
||||
});
|
||||
|
||||
collections.forEach((key, value) -> {
|
||||
for (Match match : key) {
|
||||
if (isValidMatch(match)) {
|
||||
for (int j = 0; j < match.length; j++) {
|
||||
matched[match.index + j] = true;
|
||||
}
|
||||
|
||||
// Keep Source Data for extra information
|
||||
matchSource.put(match, value);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Form the extracted results mark all the matched intervals in the text.
|
||||
int lastNotMatched = -1;
|
||||
for (int i = 0; i < text.length(); i++) {
|
||||
if (matched[i]) {
|
||||
if (i + 1 == text.length() || !matched[i + 1]) {
|
||||
int start = lastNotMatched + 1;
|
||||
int length = i - lastNotMatched;
|
||||
String substr = text.substring(start, start + length);
|
||||
Function<Match, Boolean> matchFunc = match -> match.index == start && match.length == length;
|
||||
|
||||
if (matchSource.keySet().stream().anyMatch(o -> matchFunc.apply(o))) {
|
||||
Match srcMatch = (Match)matchSource.keySet().toArray()[0];
|
||||
ExtractResult extResult = new ExtractResult();
|
||||
|
||||
extResult.setStart(start);
|
||||
extResult.setLength(length);
|
||||
extResult.setText(substr);
|
||||
extResult.setType(this.extractType);
|
||||
extResult.setData(matchSource.getOrDefault(srcMatch, null));
|
||||
result.add(extResult);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
lastNotMatched = i;
|
||||
}
|
||||
}
|
||||
|
||||
return this.postFilter(result);
|
||||
}
|
||||
|
||||
public Boolean isValidMatch(Match match) {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.extractors;
|
||||
|
||||
import com.microsoft.recognizers.text.matcher.MatchResult;
|
||||
import com.microsoft.recognizers.text.matcher.StringMatcher;
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
import com.microsoft.recognizers.text.sequence.config.URLConfiguration;
|
||||
import com.microsoft.recognizers.text.sequence.resources.BaseURL;
|
||||
import com.microsoft.recognizers.text.utilities.Match;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
public class BaseURLExtractor extends BaseSequenceExtractor {
|
||||
private final URLConfiguration config;
|
||||
private StringMatcher tldMatcher;
|
||||
private Pattern ambiguousTimeTerm;
|
||||
|
||||
protected final String extractType = Constants.SYS_URL;
|
||||
|
||||
protected Map<Pattern, String> getRegexes() {
|
||||
return regexes;
|
||||
}
|
||||
|
||||
protected String getExtractType() {
|
||||
return extractType;
|
||||
}
|
||||
|
||||
public BaseURLExtractor(URLConfiguration config) {
|
||||
this.config = config;
|
||||
Map<Pattern, String> regexes = new HashMap<Pattern, String>() {
|
||||
{
|
||||
put(config.getUrlRegex(), Constants.URL_REGEX);
|
||||
put(config.getIpUrlRegex(), Constants.URL_REGEX);
|
||||
put(Pattern.compile(BaseURL.UrlRegex2), Constants.URL_REGEX);
|
||||
}
|
||||
};
|
||||
|
||||
super.regexes = regexes;
|
||||
this.ambiguousTimeTerm = Pattern.compile(BaseURL.AmbiguousTimeTerm);
|
||||
|
||||
this.tldMatcher = new StringMatcher();
|
||||
this.tldMatcher.init(BaseURL.TldList);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean isValidMatch(Match match) {
|
||||
// For cases like "7.am" or "8.pm" which are more likely time terms.
|
||||
return !this.ambiguousTimeTerm.matcher(match.value).find();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.models;
|
||||
|
||||
import com.microsoft.recognizers.text.ExtractResult;
|
||||
import com.microsoft.recognizers.text.IExtractor;
|
||||
import com.microsoft.recognizers.text.IModel;
|
||||
import com.microsoft.recognizers.text.IParser;
|
||||
import com.microsoft.recognizers.text.ModelResult;
|
||||
import com.microsoft.recognizers.text.ParseResult;
|
||||
import com.microsoft.recognizers.text.ResolutionKey;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public abstract class AbstractSequenceModel implements IModel {
|
||||
protected String modelTypeName;
|
||||
protected IExtractor extractor;
|
||||
protected IParser parser;
|
||||
|
||||
protected AbstractSequenceModel(IParser withParser, IExtractor withExtractor) {
|
||||
this.parser = withParser;
|
||||
this.extractor = withExtractor;
|
||||
}
|
||||
|
||||
public List<ModelResult> parse(String query) {
|
||||
List<ParseResult> parsedSequences = new ArrayList<ParseResult>();
|
||||
|
||||
try {
|
||||
List<ExtractResult> extractResults = extractor.extract(query);
|
||||
|
||||
for (ExtractResult result : extractResults) {
|
||||
parsedSequences.add(this.parser.parse(result));
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
// Nothing to do. Exceptions in parse should not break users of recognizers.
|
||||
// No result.
|
||||
}
|
||||
|
||||
return parsedSequences.stream().map(o -> {
|
||||
return new ModelResult(o.getText(), o.getStart(), o.getStart() + o.getLength() - 1, modelTypeName,
|
||||
new TreeMap<String, Object>() {
|
||||
{
|
||||
put(ResolutionKey.Value, o.getResolutionStr());
|
||||
}
|
||||
});
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public String getModelTypeName() {
|
||||
return modelTypeName;
|
||||
}
|
||||
|
||||
public IExtractor getExtractor() {
|
||||
return extractor;
|
||||
}
|
||||
|
||||
public IParser getParser() {
|
||||
return parser;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.models;
|
||||
|
||||
import com.microsoft.recognizers.text.IExtractor;
|
||||
import com.microsoft.recognizers.text.IParser;
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
|
||||
public class EmailModel extends AbstractSequenceModel {
|
||||
public EmailModel(IParser parser, IExtractor extractor) {
|
||||
super(parser, extractor);
|
||||
this.modelTypeName = Constants.MODEL_EMAIL;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.models;
|
||||
|
||||
import com.microsoft.recognizers.text.ExtractResult;
|
||||
import com.microsoft.recognizers.text.IExtractor;
|
||||
import com.microsoft.recognizers.text.IParser;
|
||||
import com.microsoft.recognizers.text.ModelResult;
|
||||
import com.microsoft.recognizers.text.ParseResult;
|
||||
import com.microsoft.recognizers.text.ResolutionKey;
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
import com.microsoft.recognizers.text.utilities.QueryProcessor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class GUIDModel extends AbstractSequenceModel {
|
||||
public GUIDModel(IParser parser, IExtractor extractor) {
|
||||
super(parser, extractor);
|
||||
this.modelTypeName = Constants.MODEL_GUID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ModelResult> parse(String query) {
|
||||
List<ParseResult> parsedSequences = new ArrayList<ParseResult>();
|
||||
|
||||
// Preprocess the query
|
||||
query = QueryProcessor.preprocess(query);
|
||||
|
||||
try {
|
||||
List<ExtractResult> extractResults = extractor.extract(query);
|
||||
|
||||
for (ExtractResult result : extractResults) {
|
||||
parsedSequences.add(this.parser.parse(result));
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
// Nothing to do. Exceptions in parse should not break users of recognizers.
|
||||
// No result.
|
||||
}
|
||||
|
||||
return parsedSequences.stream().map(o -> {
|
||||
return new ModelResult(o.getText(), o.getStart(), o.getStart() + o.getLength() - 1, modelTypeName,
|
||||
new TreeMap<String, Object>() {
|
||||
{
|
||||
put(ResolutionKey.Value, o.getResolutionStr());
|
||||
put(ResolutionKey.Score, o.getValue());
|
||||
}
|
||||
});
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.models;
|
||||
|
||||
import com.microsoft.recognizers.text.IExtractor;
|
||||
import com.microsoft.recognizers.text.IParser;
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
|
||||
public class HashTagModel extends AbstractSequenceModel {
|
||||
public HashTagModel(IParser parser, IExtractor extractor) {
|
||||
super(parser, extractor);
|
||||
this.modelTypeName = Constants.MODEL_HASHTAG;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.models;
|
||||
|
||||
import com.microsoft.recognizers.text.ExtractResult;
|
||||
import com.microsoft.recognizers.text.IExtractor;
|
||||
import com.microsoft.recognizers.text.IParser;
|
||||
import com.microsoft.recognizers.text.ModelResult;
|
||||
import com.microsoft.recognizers.text.ParseResult;
|
||||
import com.microsoft.recognizers.text.ResolutionKey;
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class IpAddressModel extends AbstractSequenceModel {
|
||||
public IpAddressModel(IParser parser, IExtractor extractor) {
|
||||
super(parser, extractor);
|
||||
this.modelTypeName = Constants.MODEL_IP;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ModelResult> parse(String query) {
|
||||
List<ParseResult> parsedSequences = new ArrayList<ParseResult>();
|
||||
|
||||
try {
|
||||
List<ExtractResult> extractResults = extractor.extract(query);
|
||||
|
||||
for (ExtractResult result : extractResults) {
|
||||
parsedSequences.add(this.parser.parse(result));
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
// Nothing to do. Exceptions in parse should not break users of recognizers.
|
||||
// No result.
|
||||
}
|
||||
|
||||
return parsedSequences.stream().filter(o -> {
|
||||
return o.getData() != null;
|
||||
}).map(o -> {
|
||||
return new ModelResult(o.getText(), o.getStart(), o.getStart() + o.getLength() - 1, modelTypeName,
|
||||
new TreeMap<String, Object>() {
|
||||
{
|
||||
put(ResolutionKey.Value, o.getResolutionStr());
|
||||
put(ResolutionKey.Type, o.getData());
|
||||
}
|
||||
});
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.models;
|
||||
|
||||
import com.microsoft.recognizers.text.IExtractor;
|
||||
import com.microsoft.recognizers.text.IParser;
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
|
||||
public class MentionModel extends AbstractSequenceModel {
|
||||
public MentionModel(IParser parser, IExtractor extractor) {
|
||||
super(parser, extractor);
|
||||
this.modelTypeName = Constants.MODEL_MENTION;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.models;
|
||||
|
||||
import com.microsoft.recognizers.text.ExtractResult;
|
||||
import com.microsoft.recognizers.text.IExtractor;
|
||||
import com.microsoft.recognizers.text.IParser;
|
||||
import com.microsoft.recognizers.text.ModelResult;
|
||||
import com.microsoft.recognizers.text.ParseResult;
|
||||
import com.microsoft.recognizers.text.ResolutionKey;
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
import com.microsoft.recognizers.text.utilities.QueryProcessor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class PhoneNumberModel extends AbstractSequenceModel {
|
||||
public PhoneNumberModel(IParser parser, IExtractor extractor) {
|
||||
super(parser, extractor);
|
||||
this.modelTypeName = Constants.MODEL_PHONE_NUMBER;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ModelResult> parse(String query) {
|
||||
List<ParseResult> parsedSequences = new ArrayList<ParseResult>();
|
||||
|
||||
// Preprocess the query
|
||||
query = QueryProcessor.preprocess(query);
|
||||
|
||||
try {
|
||||
List<ExtractResult> extractResults = extractor.extract(query);
|
||||
|
||||
for (ExtractResult result : extractResults) {
|
||||
parsedSequences.add(this.parser.parse(result));
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
// Nothing to do. Exceptions in parse should not break users of recognizers.
|
||||
// No result.
|
||||
}
|
||||
|
||||
return parsedSequences.stream().map(o -> {
|
||||
return new ModelResult(o.getText(), o.getStart(), o.getStart() + o.getLength() - 1, modelTypeName,
|
||||
new TreeMap<String, Object>() {
|
||||
{
|
||||
put(ResolutionKey.Value, o.getResolutionStr());
|
||||
put(ResolutionKey.Score, o.getValue());
|
||||
}
|
||||
});
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.models;
|
||||
|
||||
import com.microsoft.recognizers.text.IExtractor;
|
||||
import com.microsoft.recognizers.text.IParser;
|
||||
import com.microsoft.recognizers.text.sequence.Constants;
|
||||
|
||||
public class URLModel extends AbstractSequenceModel {
|
||||
public URLModel(IParser parser, IExtractor extractor) {
|
||||
super(parser, extractor);
|
||||
this.modelTypeName = Constants.MODEL_URL;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.parsers;
|
||||
|
||||
import com.microsoft.recognizers.text.ExtractResult;
|
||||
import com.microsoft.recognizers.text.ParseResult;
|
||||
import com.microsoft.recognizers.text.utilities.StringUtility;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
public class BaseIpParser extends BaseSequenceParser {
|
||||
@Override
|
||||
public ParseResult parse(ExtractResult extResult) {
|
||||
ParseResult result = new ParseResult(extResult.getStart(), extResult.getLength(), extResult.getText(),
|
||||
extResult.getType(), extResult.getData(), null, BaseIpParser.dropLeadingZeros(extResult.getText()));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static String dropLeadingZeros(String text) {
|
||||
String result = new String();
|
||||
String number = new String();
|
||||
for (int i = 0; i < text.length(); i++) {
|
||||
Character c = text.charAt(i);
|
||||
if (c == '.' || c == ':') {
|
||||
if (!StringUtils.isBlank(number)) {
|
||||
number = number == "0" ? number : StringUtility.trimStart(number, "^[0]+","");
|
||||
number = StringUtils.isBlank(number) ? "0" : number;
|
||||
result += number;
|
||||
}
|
||||
|
||||
result += text.charAt(i);
|
||||
number = new String();
|
||||
} else {
|
||||
number += c.toString();
|
||||
if (i == text.length() - 1) {
|
||||
number = number == "0" ? number : StringUtility.trimStart(number, "^[0]+","");
|
||||
number = StringUtils.isBlank(number) ? "0" : number;
|
||||
result += number;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.parsers;
|
||||
|
||||
import com.microsoft.recognizers.text.ExtractResult;
|
||||
import com.microsoft.recognizers.text.IParser;
|
||||
import com.microsoft.recognizers.text.ParseResult;
|
||||
|
||||
public class BaseSequenceParser implements IParser {
|
||||
@Override
|
||||
public ParseResult parse(ExtractResult extResult) {
|
||||
ParseResult result = new ParseResult(extResult.getStart(), extResult.getLength(), extResult.getText(),
|
||||
extResult.getType(), null, null, extResult.getText());
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
// ------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.resources;
|
||||
|
||||
public class BaseEmail {
|
||||
|
||||
public static final String EmailRegex = "(([-a-zA-Z0-9_\\+\\.]+)@([-a-zA-Z\\d\\.]+)\\.([a-zA-Z\\.]{2,6}))";
|
||||
|
||||
public static final String IPv4Regex = "(?<ipv4>(\\d{1,3}\\.){3}\\d{1,3})";
|
||||
|
||||
public static final String NormalSuffixRegex = "(([0-9A-Za-z][-]*[0-9A-Za-z]*\\.)+(?<tld>[a-zA-Z][\\-a-zA-Z]{0,22}[a-zA-Z]))";
|
||||
|
||||
public static final String EmailPrefix = "(?(\"\")(\"\".+?(?<!\\\\)\"\")|(([0-9A-Za-z]((\\.(?!\\.))|[-!#\\$%&'\\*\\+/=\\?\\^\\{\\}\\|~\\w])*)(?<=[0-9A-Za-z])))";
|
||||
|
||||
public static final String EmailSuffix = "(?(\\[)(\\[{IPv4Regex}\\])|{NormalSuffixRegex})"
|
||||
.replace("{IPv4Regex}", IPv4Regex)
|
||||
.replace("{NormalSuffixRegex}", NormalSuffixRegex);
|
||||
|
||||
public static final String EmailRegex2 = "(({EmailPrefix})@({EmailSuffix}))"
|
||||
.replace("{EmailPrefix}", EmailPrefix)
|
||||
.replace("{EmailSuffix}", EmailSuffix);
|
||||
|
||||
public static final String RFC5322Regex = "\\A(?:[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"\"(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])*\"\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])\\z";
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
// ------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.resources;
|
||||
|
||||
public class BaseGUID {
|
||||
|
||||
public static final String GUIDRegexElement = "(([a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12})|([a-f0-9]{32}))";
|
||||
|
||||
public static final String GUIDRegex = "(\\b{GUIDRegexElement}\\b|\\{{GUIDRegexElement}\\}|urn:uuid:{GUIDRegexElement}\\b|%7[b]{GUIDRegexElement}%7[d]|[x]\\'{GUIDRegexElement}\\')"
|
||||
.replace("{GUIDRegexElement}", GUIDRegexElement);
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
// ------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.resources;
|
||||
|
||||
public class BaseHashtag {
|
||||
|
||||
public static final String HashtagRegex = "((?<=\\s|^)#([a-zA-Z0-9_]+))";
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
// ------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.resources;
|
||||
|
||||
public class BaseIp {
|
||||
|
||||
public static final String Ipv4Regex = "\\b(1\\d{2}|2[0-4]\\d|25[0-5]|0?[1-9]\\d|0{0,2}\\d)((\\.(1\\d{2}|2[0-4]\\d|25[0-5]|0?[1-9]\\d|0{0,2}\\d)){3})\\b";
|
||||
|
||||
public static final String BasicIpv6Element = "([\\da-fA-F]{1,4})";
|
||||
|
||||
public static final String BasicIpv6Regex = "(({BasicIpv6Element}:){7}{BasicIpv6Element})"
|
||||
.replace("{BasicIpv6Element}", BasicIpv6Element);
|
||||
|
||||
public static final String Ipv6EllipsisRegex1 = "(:(:{BasicIpv6Element}){1,7})"
|
||||
.replace("{BasicIpv6Element}", BasicIpv6Element);
|
||||
|
||||
public static final String Ipv6EllipsisRegex2 = "(({BasicIpv6Element}:){1}((:{BasicIpv6Element}){1,6}))"
|
||||
.replace("{BasicIpv6Element}", BasicIpv6Element);
|
||||
|
||||
public static final String Ipv6EllipsisRegex3 = "(({BasicIpv6Element}:){2}((:{BasicIpv6Element}){1,5}))"
|
||||
.replace("{BasicIpv6Element}", BasicIpv6Element);
|
||||
|
||||
public static final String Ipv6EllipsisRegex4 = "(({BasicIpv6Element}:){3}((:{BasicIpv6Element}){1,4}))"
|
||||
.replace("{BasicIpv6Element}", BasicIpv6Element);
|
||||
|
||||
public static final String Ipv6EllipsisRegex5 = "(({BasicIpv6Element}:){4}((:{BasicIpv6Element}){1,3}))"
|
||||
.replace("{BasicIpv6Element}", BasicIpv6Element);
|
||||
|
||||
public static final String Ipv6EllipsisRegex6 = "(({BasicIpv6Element}:){5}((:{BasicIpv6Element}){1,2}))"
|
||||
.replace("{BasicIpv6Element}", BasicIpv6Element);
|
||||
|
||||
public static final String Ipv6EllipsisRegex7 = "(({BasicIpv6Element}:){6}((:{BasicIpv6Element}){1}))"
|
||||
.replace("{BasicIpv6Element}", BasicIpv6Element);
|
||||
|
||||
public static final String Ipv6EllipsisRegex8 = "(({BasicIpv6Element}:){7}(:))"
|
||||
.replace("{BasicIpv6Element}", BasicIpv6Element);
|
||||
|
||||
public static final String Ipv6EllipsisRegexOther = "\\B::\\B|\\B:(:{BasicIpv6Element}){1,7}\\b|\\b({BasicIpv6Element}:){1,7}:\\B"
|
||||
.replace("{BasicIpv6Element}", BasicIpv6Element);
|
||||
|
||||
public static final String MergedIpv6Regex = "({BasicIpv6Regex}|{Ipv6EllipsisRegex1}|{Ipv6EllipsisRegex2}|{Ipv6EllipsisRegex3}|{Ipv6EllipsisRegex4}|{Ipv6EllipsisRegex5}|{Ipv6EllipsisRegex6}|{Ipv6EllipsisRegex7}|{Ipv6EllipsisRegex8})"
|
||||
.replace("{BasicIpv6Regex}", BasicIpv6Regex)
|
||||
.replace("{Ipv6EllipsisRegex1}", Ipv6EllipsisRegex1)
|
||||
.replace("{Ipv6EllipsisRegex2}", Ipv6EllipsisRegex2)
|
||||
.replace("{Ipv6EllipsisRegex3}", Ipv6EllipsisRegex3)
|
||||
.replace("{Ipv6EllipsisRegex4}", Ipv6EllipsisRegex4)
|
||||
.replace("{Ipv6EllipsisRegex5}", Ipv6EllipsisRegex5)
|
||||
.replace("{Ipv6EllipsisRegex6}", Ipv6EllipsisRegex6)
|
||||
.replace("{Ipv6EllipsisRegex7}", Ipv6EllipsisRegex7)
|
||||
.replace("{Ipv6EllipsisRegex8}", Ipv6EllipsisRegex8);
|
||||
|
||||
public static final String Ipv6Regex = "(\\b{MergedIpv6Regex}\\b)|({Ipv6EllipsisRegexOther})"
|
||||
.replace("{MergedIpv6Regex}", MergedIpv6Regex)
|
||||
.replace("{Ipv6EllipsisRegexOther}", Ipv6EllipsisRegexOther);
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
// ------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.resources;
|
||||
|
||||
public class BaseMention {
|
||||
|
||||
public static final String MentionRegex = "@([a-zA-Z0-9_]+)(?![.]\\w)\\b";
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
// ------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.resources;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class BasePhoneNumbers {
|
||||
|
||||
public static final String NumberReplaceToken = "@builtin.phonenumber";
|
||||
|
||||
public static final String WordBoundariesRegex = "\\b";
|
||||
|
||||
public static final String NonWordBoundariesRegex = "\\B";
|
||||
|
||||
public static final String EndWordBoundariesRegex = "\\b";
|
||||
|
||||
public static final String PreCheckPhoneNumberRegex = "(\\d{1,4}.){2,4}\\s?\\d{2,3}";
|
||||
|
||||
public static String GeneralPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
|
||||
return "({WordBoundariesRegex}(((\\d[\\s]?){4,12}))(-?[\\d\\s?]{3}\\d)(?!-){EndWordBoundariesRegex})|(\\(\\d{5}\\)\\s?\\d{5,6})|\\+\\d{2}\\(\\d\\)\\d{10}"
|
||||
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
|
||||
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
|
||||
}
|
||||
|
||||
public static String BRPhoneNumberRegex(String WordBoundariesRegex, String NonWordBoundariesRegex, String EndWordBoundariesRegex) {
|
||||
return "((\\(\\s?(\\+\\s?|00)55\\s?\\)\\s?)|(((?<!\\d)\\+\\s?|{WordBoundariesRegex}00)55\\s?)|{WordBoundariesRegex})?((({NonWordBoundariesRegex}\\(\\s?))\\d{2,3}(\\s?\\))|({WordBoundariesRegex}\\d{2,3}))\\s?\\d{4,5}-?\\d{3,5}(?!-){EndWordBoundariesRegex}"
|
||||
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
|
||||
.replace("{NonWordBoundariesRegex}", NonWordBoundariesRegex)
|
||||
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
|
||||
}
|
||||
|
||||
public static String UKPhoneNumberRegex(String WordBoundariesRegex, String NonWordBoundariesRegex, String EndWordBoundariesRegex) {
|
||||
return "((({WordBoundariesRegex}(00)|{NonWordBoundariesRegex}\\+)\\s?)?({WordBoundariesRegex}\\d{2}\\s?)?((\\s?\\(0\\)[-\\s]?|{WordBoundariesRegex}|(?<=(\\b^#)\\d{2}))\\d{2,5}|\\(0\\d{3,4}\\))[/-]?\\s?(\\d{5,8}|\\d{3,4}[-\\s]?\\d{3,4})(?!-){EndWordBoundariesRegex})"
|
||||
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
|
||||
.replace("{NonWordBoundariesRegex}", NonWordBoundariesRegex)
|
||||
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
|
||||
}
|
||||
|
||||
public static String DEPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
|
||||
return "((\\+\\d{2}\\s?((\\(0\\))?\\d\\s?)?|{WordBoundariesRegex})(\\d{2,4}\\s?[-/]?[\\s\\d]{7,10}\\d)(?!-){EndWordBoundariesRegex})"
|
||||
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
|
||||
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
|
||||
}
|
||||
|
||||
public static String USPhoneNumberRegex(String WordBoundariesRegex, String NonWordBoundariesRegex, String EndWordBoundariesRegex) {
|
||||
return "((((({NonWordBoundariesRegex}\\+)|{WordBoundariesRegex})1(\\s|-)?)|{WordBoundariesRegex})?(\\d{3}\\)[-\\s]?|\\(\\d{3}\\)[-\\.\\s]?|{WordBoundariesRegex}\\d{3}\\s?[-\\.]?\\s?)|{WordBoundariesRegex})[2-9]\\d{2}\\s?[-\\.]?\\s?\\d{4}(\\s?(x|X|ext)\\s?\\d{3,5})?(?!(-\\s?\\d)){EndWordBoundariesRegex}"
|
||||
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
|
||||
.replace("{NonWordBoundariesRegex}", NonWordBoundariesRegex)
|
||||
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
|
||||
}
|
||||
|
||||
public static String CNPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
|
||||
return "(({WordBoundariesRegex}00\\s?)?\\+?(86|82|81)\\s?-?\\s?)?((({WordBoundariesRegex}|(?<=(86|82|81)))\\d{2,5}\\s?-?\\s?|\\(\\d{2,5}\\)\\s?)\\d{4}\\s?-?\\s?\\d{4}(\\s?-?\\s?\\d{4})?|(\\b|(?<=(86|82|81)))\\d{3}\\s?-?\\s?\\d{4}\\s?-?\\s?\\d{4})(?!-){EndWordBoundariesRegex}"
|
||||
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
|
||||
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
|
||||
}
|
||||
|
||||
public static String DKPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
|
||||
return "((\\(\\s?(\\+\\s?|00)45\\s?\\)\\s?)|(((?<!\\d)\\+\\s?|\\b00)45\\s?)|{WordBoundariesRegex})(\\s?\\(0\\)\\s?)?((\\d{8})|(\\d{4}\\s?-?\\s?\\d{4,6})|((\\d{2}[\\s-]){3}\\d{2})|(\\d{2}\\s?-?\\s?\\d{3}\\s?-?\\s?\\d{3}))(?!-){EndWordBoundariesRegex}"
|
||||
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
|
||||
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
|
||||
}
|
||||
|
||||
public static String ITPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
|
||||
return "((\\(\\s?(\\+\\s?|00)39\\s?\\)\\s?)|(((?<!\\d)\\+\\s?|\\b00)39\\s?)|{WordBoundariesRegex})((0[\\d-]{4,12}\\d)|(3[\\d-]{7,12}\\d)|(0[\\d\\s]{4,12}\\d)|(3[\\d\\s]{7,12}\\d))(?!-){EndWordBoundariesRegex}"
|
||||
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
|
||||
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
|
||||
}
|
||||
|
||||
public static String NLPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
|
||||
return "((((\\(\\s?(\\+\\s?|00)31\\s?\\)\\s?)|(((?<!\\d)\\+\\s?|{WordBoundariesRegex}00)31\\s?))?((({WordBoundariesRegex}|(?<=31))0?\\d{1,3}|\\(\\s?0?\\d{1,3}\\s?\\)|\\(0\\)[-\\s]?\\d{1,3})((-?[\\d]{5,11})|(\\s[\\d\\s]{5,11}))\\d))|\\b\\d{10,12})(?!-){EndWordBoundariesRegex}"
|
||||
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
|
||||
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
|
||||
}
|
||||
|
||||
public static String SpecialPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
|
||||
return "({WordBoundariesRegex}(\\d{3,4}[/-]\\d{1,4}[/-]\\d{3,4}){EndWordBoundariesRegex})"
|
||||
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
|
||||
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
|
||||
}
|
||||
|
||||
public static final String NoAreaCodeUSPhoneNumberRegex = "(?<!(-|-\\s|\\d|\\)|\\)\\s|\\.))[2-9]\\d{2}\\s?[-\\.]\\s?\\d{4}(?!(-\\s?\\d))\\b";
|
||||
|
||||
public static final String InternationDialingPrefixRegex = "0(0|11)$";
|
||||
|
||||
public static final List<String> TypicalDeductionRegexList = Arrays.asList("^\\d{5}-\\d{4}$", "\\)\\.", "^0(0|11)(-)");
|
||||
|
||||
public static final String PhoneNumberMaskRegex = "([0-9a-e]{2}(\\s[0-9a-e]{2}){7})";
|
||||
|
||||
public static final String CountryCodeRegex = "^(\\(\\s?(\\+\\s?|00)\\d{1,3}\\s?\\)|(\\+\\s?|00)\\d{1,3})";
|
||||
|
||||
public static final String AreaCodeIndicatorRegex = "\\(";
|
||||
|
||||
public static final String FormatIndicatorRegex = "(\\s|-|/|\\.)+";
|
||||
|
||||
public static final List<Character> ColonMarkers = Arrays.asList(':');
|
||||
|
||||
public static final String ColonPrefixCheckRegex = "(([a-z])\\s*$)";
|
||||
|
||||
public static final List<Character> SpecialBoundaryMarkers = Arrays.asList('-', ' ');
|
||||
|
||||
public static final List<Character> BoundaryMarkers = Arrays.asList('-', '.', '/', '+', '#', '*');
|
||||
|
||||
public static final List<Character> ForbiddenPrefixMarkers = Arrays.asList(',', ':', '%');
|
||||
|
||||
public static final List<Character> ForbiddenSuffixMarkers = Arrays.asList('/', '+', '#', '*', ':', '%');
|
||||
|
||||
public static final String SSNFilterRegex = "^\\d{3}-\\d{2}-\\d{4}$";
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
// ------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.resources;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class BaseURL {
|
||||
|
||||
public static final String ProtocolRegex = "((https?|ftp):\\/\\/)";
|
||||
|
||||
public static final String PortRegex = "(:\\d{1,5})";
|
||||
|
||||
public static final String ExtractionRestrictionRegex = "(?<=\\s|[\\'\"\"\\(\\[:]|^)";
|
||||
|
||||
public static final String UrlPrefixRegex = "({ExtractionRestrictionRegex}{ProtocolRegex}?|{ProtocolRegex})[a-zA-Z0-9][-a-zA-Z0-9._]{0,256}(?<![.])\\."
|
||||
.replace("{ExtractionRestrictionRegex}", ExtractionRestrictionRegex)
|
||||
.replace("{ProtocolRegex}", ProtocolRegex);
|
||||
|
||||
public static final String UrlSuffixRegex = "{PortRegex}?([/#][-a-zA-Z0-9:%_\\+.~#?!&//=]*)?(?![-a-zA-Z0-9:%_\\+~#?!&//=@])"
|
||||
.replace("{PortRegex}", PortRegex);
|
||||
|
||||
public static final String UrlRegex = "{UrlPrefixRegex}(?<Tld>[a-zA-Z]{2,18}){UrlSuffixRegex}"
|
||||
.replace("{UrlPrefixRegex}", UrlPrefixRegex)
|
||||
.replace("{ProtocolRegex}", ProtocolRegex)
|
||||
.replace("{UrlSuffixRegex}", UrlSuffixRegex);
|
||||
|
||||
public static final String UrlRegex2 = "((ht|f)tp(s?)\\:\\/\\/|www\\.)[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.(?<Tld>[a-zA-Z0-9()]{1,6})\\b([-a-zA-Z0-9()@:%_\\+.~#?&//=]*)";
|
||||
|
||||
public static final String IpUrlRegex = "(?<IPurl>({ExtractionRestrictionRegex}{ProtocolRegex}({BaseIp.Ipv4Regex}|localhost){UrlSuffixRegex}))"
|
||||
.replace("{ExtractionRestrictionRegex}", ExtractionRestrictionRegex)
|
||||
.replace("{ProtocolRegex}", ProtocolRegex)
|
||||
.replace("{BaseIp.Ipv4Regex}", BaseIp.Ipv4Regex)
|
||||
.replace("{UrlSuffixRegex}", UrlSuffixRegex);
|
||||
|
||||
public static final String AmbiguousTimeTerm = "^(1?[0-9]|2[0-3]).[ap]m$";
|
||||
|
||||
public static final List<String> TldList = Arrays.asList("com", "org", "net", "int", "edu", "gov", "mil", "academy", "app", "aws", "bot", "buy", "cafe", "city", "cloud", "company", "eco", "education", "game", "games", "gmbh", "law", "limited", "live", "llc", "ltd", "ltda", "map", "med", "news", "ngo", "ong", "phd", "place", "radio", "science", "search", "shopping", "sport", "store", "tvs", "wiki", "work", "ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq", "ar", "as", "at", "au", "aw", "ax", "az", "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bj", "bl", "bm", "bn", "bo", "bq", "br", "bs", "bt", "bv", "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck", "cl", "cm", "cn", "co", "cr", "cu", "cv", "cw", "cx", "cy", "cz", "de", "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "eh", "er", "es", "et", "eu", "fi", "fj", "fk", "fm", "fo", "fr", "ga", "gb", "gd", "ge", "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs", "gt", "gu", "gw", "gy", "hk", "hm", "hn", "hr", "ht", "hu", "id", "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm", "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kp", "kr", "kw", "ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu", "lv", "ly", "ma", "mc", "md", "me", "mf", "mg", "mh", "mk", "ml", "mm", "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx", "my", "mz", "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np", "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl", "pm", "pn", "pr", "ps", "pt", "pw", "py", "qa", "re", "ro", "rs", "ru", "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sj", "sk", "sl", "sm", "sn", "so", "sr", "ss", "st", "su", "sv", "sx", "sy", "sz", "tc", "td", "tf", "tg", "th", "tj", "tk", "tl", "tm", "tn", "to", "tp", "tr", "tt", "tv", "tw", "tz", "ua", "ug", "uk", "um", "us", "uy", "uz", "va", "vc", "ve", "vg", "vi", "vn", "vu", "wf", "ws", "ye", "yt", "za", "zm", "zw");
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
// ------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.resources;
|
||||
|
||||
public class ChineseIp {
|
||||
|
||||
private static ChinesePhoneNumbers PhoneNumbersDefinitions;
|
||||
|
||||
public static final String Ipv4Regex = "({PhoneNumbersDefinitions.WordBoundariesRegex}(1\\d{2}|2[0-4]\\d|25[0-5]|0?[1-9]\\d|0{0,2}\\d)((\\.(1\\d{2}|2[0-4]\\d|25[0-5]|0?[1-9]\\d|0{0,2}\\d)){3}){PhoneNumbersDefinitions.EndWordBoundariesRegex})"
|
||||
.replace("{PhoneNumbersDefinitions.WordBoundariesRegex}", PhoneNumbersDefinitions.WordBoundariesRegex)
|
||||
.replace("{PhoneNumbersDefinitions.EndWordBoundariesRegex}", PhoneNumbersDefinitions.EndWordBoundariesRegex);
|
||||
|
||||
public static final String Ipv6EllipsisRegexOther = "({PhoneNumbersDefinitions.NonWordBoundariesRegex}::{PhoneNumbersDefinitions.NonWordBoundariesRegex}|{PhoneNumbersDefinitions.NonWordBoundariesRegex}:(:{BaseIp.BasicIpv6Element}){1,7}{PhoneNumbersDefinitions.WordBoundariesRegex}|{PhoneNumbersDefinitions.WordBoundariesRegex}({BaseIp.BasicIpv6Element}:){1,7}:{PhoneNumbersDefinitions.NonWordBoundariesRegex})"
|
||||
.replace("{PhoneNumbersDefinitions.NonWordBoundariesRegex}", PhoneNumbersDefinitions.NonWordBoundariesRegex)
|
||||
.replace("{PhoneNumbersDefinitions.WordBoundariesRegex}", PhoneNumbersDefinitions.WordBoundariesRegex)
|
||||
.replace("{BaseIp.BasicIpv6Element}", BaseIp.BasicIpv6Element);
|
||||
|
||||
public static final String Ipv6Regex = "({PhoneNumbersDefinitions.WordBoundariesRegex}{BaseIp.MergedIpv6Regex}{PhoneNumbersDefinitions.WordBoundariesRegex})|({Ipv6EllipsisRegexOther})"
|
||||
.replace("{PhoneNumbersDefinitions.WordBoundariesRegex}", PhoneNumbersDefinitions.WordBoundariesRegex)
|
||||
.replace("{BaseIp.MergedIpv6Regex}", BaseIp.MergedIpv6Regex)
|
||||
.replace("{Ipv6EllipsisRegexOther}", Ipv6EllipsisRegexOther);
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
// ------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.resources;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class ChinesePhoneNumbers {
|
||||
|
||||
public static final String NumberReplaceToken = "@builtin.phonenumber";
|
||||
|
||||
public static final String WordBoundariesRegex = "(\\b|(?<=[\\u0800-\\u9FFF]))";
|
||||
|
||||
public static final String NonWordBoundariesRegex = "(\\B|(?<=[\\u0800-\\u9FFF]))";
|
||||
|
||||
public static final String EndWordBoundariesRegex = "(\\b|(?=[\\u0800-\\u9FFF]))";
|
||||
|
||||
public static final String ColonPrefixCheckRegex = "(([a-z]|[\\u4E00-\\u9FA5])\\s*$)";
|
||||
|
||||
public static final List<Character> ForbiddenPrefixMarkers = Arrays.asList(':', '%');
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
// ------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.resources;
|
||||
|
||||
public class ChineseURL {
|
||||
|
||||
public static final String ExtractionRestrictionRegex = "(?<=\\s|[\\'\"\"\\(\\[::]|^|[\\u0800-\\u9FFF])";
|
||||
|
||||
public static final String UrlPrefixRegex = "({ExtractionRestrictionRegex}{BaseURL.ProtocolRegex}?|{BaseURL.ProtocolRegex})[a-zA-Z0-9][-a-zA-Z0-9._]{0,256}(?<![.])\\."
|
||||
.replace("{ExtractionRestrictionRegex}", ExtractionRestrictionRegex)
|
||||
.replace("{BaseURL.ProtocolRegex}", BaseURL.ProtocolRegex);
|
||||
|
||||
public static final String UrlRegex = "{UrlPrefixRegex}(?<Tld>[a-zA-Z]{2,18}){BaseURL.UrlSuffixRegex}"
|
||||
.replace("{UrlPrefixRegex}", UrlPrefixRegex)
|
||||
.replace("{BaseURL.UrlSuffixRegex}", BaseURL.UrlSuffixRegex);
|
||||
|
||||
public static final String IpUrlRegex = "(?<IPurl>({ExtractionRestrictionRegex}{BaseURL.ProtocolRegex}({BaseIp.Ipv4Regex}|localhost){BaseURL.UrlSuffixRegex}))"
|
||||
.replace("{ExtractionRestrictionRegex}", ExtractionRestrictionRegex)
|
||||
.replace("{BaseURL.ProtocolRegex}", BaseURL.ProtocolRegex)
|
||||
.replace("{BaseIp.Ipv4Regex}", BaseIp.Ipv4Regex)
|
||||
.replace("{BaseURL.UrlSuffixRegex}", BaseURL.UrlSuffixRegex);
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
// ------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.resources;
|
||||
|
||||
public class EnglishPhoneNumbers {
|
||||
|
||||
public static final String NumberReplaceToken = "@builtin.phonenumber";
|
||||
|
||||
public static final String FalsePositivePrefixRegex = "(account|card)(\\s+(#|number))?(\\s+is)?:?\\s*$";
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
// ------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
package com.microsoft.recognizers.text.sequence.resources;
|
||||
|
||||
public class PortuguesePhoneNumbers {
|
||||
|
||||
public static final String NumberReplaceToken = "@builtin.phonenumber";
|
||||
|
||||
public static final String FalsePositivePrefixRegex = "conta(\\s+(número|bancária(\\s+número)?))?(\\s+é)?:?\\s*$";
|
||||
}
|
|
@ -1,6 +1,5 @@
|
|||
package com.microsoft.recognizers.text.utilities;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
@ -13,6 +12,7 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.javatuples.Pair;
|
||||
|
||||
public abstract class RegExpUtility {
|
||||
|
@ -23,9 +23,6 @@ public abstract class RegExpUtility {
|
|||
private static final Pattern matchNegativeLookbehind = Pattern.compile("\\(\\?<!", Pattern.CASE_INSENSITIVE);
|
||||
private static final String groupNameIndexSep = "iii";
|
||||
private static final String groupNameIndexSepRegex = Pattern.quote(groupNameIndexSep);
|
||||
|
||||
private static final boolean unboundedLookBehindNotSupported = isRestrictedJavaVersion();
|
||||
|
||||
private static final Pattern lookBehindCheckRegex = Pattern.compile("(\\\\?<[!=])");
|
||||
private static final Map<Character, String> bindings = new HashMap<Character, String>() {
|
||||
{
|
||||
|
@ -177,14 +174,8 @@ public abstract class RegExpUtility {
|
|||
}
|
||||
|
||||
public static Pattern getSafeLookbehindRegExp(String source, int flags) {
|
||||
|
||||
String result = source;
|
||||
|
||||
// Java pre 1.9 doesn't support unbounded lookbehind lengths
|
||||
if (unboundedLookBehindNotSupported) {
|
||||
result = bindLookbehinds(result);
|
||||
}
|
||||
|
||||
result = bindLookbehinds(result);
|
||||
return Pattern.compile(result, flags);
|
||||
}
|
||||
|
||||
|
@ -387,33 +378,4 @@ public abstract class RegExpUtility {
|
|||
|
||||
return resultString.toString();
|
||||
}
|
||||
|
||||
// Checks if Java version is <= 8, as they don't support look-behind groups with no maximum length.
|
||||
private static boolean isRestrictedJavaVersion() {
|
||||
|
||||
boolean result = false;
|
||||
BigDecimal targetVersion = new BigDecimal("1.8");
|
||||
|
||||
try {
|
||||
String specVersion = System.getProperty("java.specification.version");
|
||||
result = new BigDecimal(specVersion).compareTo(targetVersion) >= 0;
|
||||
} catch (Exception e1) {
|
||||
|
||||
try {
|
||||
// Could also be "java.runtime.version".
|
||||
String runtimeVersion = System.getProperty("java.version");
|
||||
result = new BigDecimal(runtimeVersion).compareTo(targetVersion) >= 0;
|
||||
|
||||
} catch (Exception e2) {
|
||||
// Nothing to do, ignore.
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (result) {
|
||||
System.out.println("WARN: Look-behind groups with no maximum length not supported. Java version <= 8.");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -10,7 +10,11 @@ public abstract class StringUtility {
|
|||
}
|
||||
|
||||
public static String trimStart(String source) {
|
||||
return source.replaceFirst("^\\s+", "");
|
||||
return trimStart(source, "^\\s+", "");
|
||||
}
|
||||
|
||||
public static String trimStart(String source, String regex, String replacement) {
|
||||
return source.replaceFirst(regex, replacement);
|
||||
}
|
||||
|
||||
public static String trimEnd(String source) {
|
||||
|
|
Загрузка…
Ссылка в новой задаче