[SDK][Bot-Dialogs] Update Recognizers-Text internal library (#1021)

* Update Recognizers-Text Choice library

* Update Recognizers-Text Datetime library

* Update Recognizers-Text Number library

* Update Recognizers-Text NumberWithUnit library

* Update Recognizers-Text Main library

* Add Recognizers-Text Expression library

* Add Recognizers-Text Sequence library

* Remove deprecated workaround as it's fixed in RT
This commit is contained in:
Martin Battaglino 2021-02-25 13:28:05 -03:00 коммит произвёл GitHub
Родитель 0a7c6c1f90
Коммит 349ab351af
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
98 изменённых файлов: 5920 добавлений и 99 удалений

Просмотреть файл

@ -131,7 +131,7 @@ public final class ChoiceRecognizers {
return result.stream().map(r ->
new ModelResult<FoundChoice>() {{
setStart(r.start);
setEnd(r.end - 1); // bug in 1.0-SNAPSHOT, should not have to decrement
setEnd(r.end);
setText(r.text);
setResolution(new FoundChoice() {{
setValue(r.resolution.get("value").toString());

Просмотреть файл

@ -17,7 +17,11 @@ public class ChineseChoice {
public static final String TokenizerRegex = "[^\\u3040-\\u30ff\\u3400-\\u4dbf\\u4e00-\\u9fff\\uf900-\\ufaff\\uff66-\\uff9f]";
public static final String TrueRegex = "(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
public static final String FalseRegex = "(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
public static final String TrueRegex = "(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
public static final String FalseRegex = "(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
}

Просмотреть файл

@ -17,7 +17,11 @@ public class EnglishChoice {
public static final String TokenizerRegex = "[^\\w\\d]";
public static final String TrueRegex = "\\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C|\\u0001f44c)";
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
public static final String FalseRegex = "\\b(false|nope|nop|no|not\\s+ok|disagree)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90|\\u0001F44E|\\u0001F590)";
public static final String TrueRegex = "\\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C|\\u0001f44c){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
public static final String FalseRegex = "\\b(false|nope|nop|no|not\\s+ok|disagree)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90|\\u0001F44E|\\u0001F590){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
}

Просмотреть файл

@ -17,7 +17,11 @@ public class FrenchChoice {
public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]";
public static final String TrueRegex = "\\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
public static final String FalseRegex = "\\b(faux|nan|non|pas\\s+d'accord|pas\\s+concorder|n'est\\s+pas\\s+(correct|ok)|pas)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
public static final String TrueRegex = "\\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
public static final String FalseRegex = "\\b(faux|nan|non|pas\\s+d'accord|pas\\s+concorder|n'est\\s+pas\\s+(correct|ok)|pas)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
}

Просмотреть файл

@ -17,7 +17,11 @@ public class PortugueseChoice {
public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]";
public static final String TrueRegex = "\\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
public static final String FalseRegex = "\\b(falso|n[aã]o|incorreto|nada disso)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
public static final String TrueRegex = "\\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
public static final String FalseRegex = "\\b(falso|n[aã]o|incorreto|nada disso)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
}

Просмотреть файл

@ -17,7 +17,11 @@ public class SpanishChoice {
public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]";
public static final String TrueRegex = "\\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
public static final String FalseRegex = "\\b(falso|no|nop|n|no)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
public static final String TrueRegex = "\\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
public static final String FalseRegex = "\\b(falso|no|nop|n|no)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
}

Просмотреть файл

@ -1231,8 +1231,11 @@ public class EnglishDateTime {
public static final ImmutableMap<String, Integer> DayOfMonth = ImmutableMap.<String, Integer>builder()
.put("1st", 1)
.put("1th", 1)
.put("2nd", 2)
.put("2th", 2)
.put("3rd", 3)
.put("3th", 3)
.put("4th", 4)
.put("5th", 5)
.put("6th", 6)
@ -1268,8 +1271,11 @@ public class EnglishDateTime {
.put("30th", 30)
.put("31st", 31)
.put("01st", 1)
.put("01th", 1)
.put("02nd", 2)
.put("02th", 2)
.put("03rd", 3)
.put("03th", 3)
.put("04th", 4)
.put("05th", 5)
.put("06th", 6)

Просмотреть файл

@ -369,6 +369,16 @@ public class SpanishDateTime {
public static final String PmTimeRegex = "(?<pm>(esta|(por|de|a|en)\\s+la)\\s+(tarde|noche))";
public static final String NightTimeRegex = "(noche)";
public static final String LastNightTimeRegex = "(anoche)";
public static final String NowTimeRegex = "(ahora|mismo|momento)";
public static final String RecentlyTimeRegex = "(mente)";
public static final String AsapTimeRegex = "(posible|pueda[ns]?|podamos)";
public static final String LessThanOneHour = "(?<lth>((\\s+y\\s+)?cuarto|(\\s*)menos cuarto|(\\s+y\\s+)media|{BaseDateTime.DeltaMinuteRegex}(\\s+(minutos?|mins?))|{DeltaMinuteNumRegex}(\\s+(minutos?|mins?))))"
.replace("{BaseDateTime.DeltaMinuteRegex}", BaseDateTime.DeltaMinuteRegex)
.replace("{DeltaMinuteNumRegex}", DeltaMinuteNumRegex);

Просмотреть файл

@ -40,6 +40,7 @@ public class SpanishDateTimeParserConfiguration extends BaseOptionsConfiguration
public final Pattern nowRegex;
public final Pattern amTimeRegex;
public final Pattern pmTimeRegex;
public final Pattern lastNightTimeRegex;
public final Pattern simpleTimeOfTodayAfterRegex;
public final Pattern simpleTimeOfTodayBeforeRegex;
public final Pattern specificTimeOfDayRegex;
@ -80,6 +81,7 @@ public class SpanishDateTimeParserConfiguration extends BaseOptionsConfiguration
pmTimeRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.PmRegex);
amTimeRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.AmTimeRegex);
lastNightTimeRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.LastNightTimeRegex);
}
@Override
@ -122,12 +124,18 @@ public class SpanishDateTimeParserConfiguration extends BaseOptionsConfiguration
Matcher regexMatcher = SpanishDatePeriodParserConfiguration.previousPrefixRegex.matcher(trimmedText);
int swift = 0;
if (regexMatcher.find()) {
swift = 1;
swift = -1;
} else {
regexMatcher = SpanishDatePeriodParserConfiguration.nextPrefixRegex.matcher(trimmedText);
regexMatcher = this.lastNightTimeRegex.matcher(trimmedText);
if (regexMatcher.find()) {
swift = -1;
} else {
regexMatcher = SpanishDatePeriodParserConfiguration.nextPrefixRegex.matcher(trimmedText);
if (regexMatcher.find()) {
swift = 1;
}
}
}

Просмотреть файл

@ -332,7 +332,7 @@ public class SpanishDateTimePeriodParserConfiguration extends BaseOptionsConfigu
Matcher regexMatcher = regex.matcher(trimmedText);
int swift = 0;
if (regexMatcher.find() || trimmedText.equals("anoche")) {
if (regexMatcher.find() || trimmedText.startsWith("anoche")) {
swift = -1;
} else {
regex = Pattern.compile(SpanishDateTime.NextPrefixRegex);

Просмотреть файл

@ -1,6 +1,7 @@
package com.microsoft.recognizers.text.datetime.utilities;
import com.google.common.collect.ImmutableMap;
import com.microsoft.recognizers.datatypes.timex.expression.TimexHelpers;
import com.microsoft.recognizers.text.datetime.Constants;
import com.microsoft.recognizers.text.datetime.DatePeriodTimexType;
import com.microsoft.recognizers.text.datetime.DateTimeResolutionKey;
@ -14,6 +15,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class TimexUtility {
private static final HashMap<DatePeriodTimexType, String> DatePeriodTimexTypeToTimexSuffix = new HashMap<DatePeriodTimexType, String>() {
@ -28,33 +30,14 @@ public class TimexUtility {
public static String generateCompoundDurationTimex(Map<String, String> unitToTimexComponents, ImmutableMap<String, Long> unitValueMap) {
List<String> unitList = new ArrayList<>(unitToTimexComponents.keySet());
unitList.sort((x, y) -> unitValueMap.get(x) < unitValueMap.get(y) ? 1 : -1);
boolean isTimeDurationAlreadyExist = false;
StringBuilder timexBuilder = new StringBuilder(Constants.GeneralPeriodPrefix);
for (String unitKey : unitList) {
String timexComponent = unitToTimexComponents.get(unitKey);
// The Time Duration component occurs first time
if (!isTimeDurationAlreadyExist && isTimeDurationTimex(timexComponent)) {
timexBuilder.append(Constants.TimeTimexPrefix);
timexBuilder.append(getDurationTimexWithoutPrefix(timexComponent));
isTimeDurationAlreadyExist = true;
} else {
timexBuilder.append(getDurationTimexWithoutPrefix(timexComponent));
}
}
return timexBuilder.toString();
unitList = unitList.stream().map(t -> unitToTimexComponents.get(t)).collect(Collectors.toList());
return TimexHelpers.generateCompoundDurationTimex(unitList);
}
private static boolean isTimeDurationTimex(String timex) {
private static Boolean isTimeDurationTimex(String timex) {
return timex.startsWith(Constants.GeneralPeriodPrefix + Constants.TimeTimexPrefix);
}
private static String getDurationTimexWithoutPrefix(String timex) {
// Remove "PT" prefix for TimeDuration, Remove "P" prefix for DateDuration
return timex.substring(isTimeDurationTimex(timex) ? 2 : 1);
}
public static String getDatePeriodTimexUnitCount(LocalDateTime begin, LocalDateTime end,
DatePeriodTimexType timexType, Boolean equalDurationLength) {
String unitCount = "XX";

Просмотреть файл

@ -0,0 +1,55 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
public class Constants {
// Timex
public static final String TIMEX_YEAR = "Y";
public static final String TIMEX_MONTH = "M";
public static final String TIMEX_MONTH_FULL = "MON";
public static final String TIMEX_WEEK = "W";
public static final String TIMEX_DAY = "D";
public static final String TIMEX_BUSINESS_DAY = "BD";
public static final String TIMEX_WEEKEND = "WE";
public static final String TIMEX_HOUR = "H";
public static final String TIMEX_MINUTE = "M";
public static final String TIMEX_SECOND = "S";
public static final String TIMEX_NIGHT = "NI";
public static final Character TIMEX_FUZZY = 'X';
public static final String TIMEX_FUZZY_YEAR = "XXXX";
public static final String TIMEX_FUZZY_MONTH = "XX";
public static final String TIMEX_FUZZY_WEEK = "WXX";
public static final String TIMEX_FUZZY_DAY = "XX";
public static final String DATE_TIMEX_CONNECTOR = "-";
public static final String TIME_TIMEX_CONNECTOR = ":";
public static final String GENERAL_PERIOD_PREFIX = "P";
public static final String TIME_TIMEX_PREFIX = "T";
public static final String YEAR_UNIT = "year";
public static final String MONTH_UNIT = "month";
public static final String WEEK_UNIT = "week";
public static final String DAY_UNIT = "day";
public static final String HOUR_UNIT = "hour";
public static final String MINUTE_UNIT = "minute";
public static final String SECOND_UNIT = "second";
public static final String TIME_DURATION_UNIT = "s";
public static final String AM = "AM";
public static final String PM = "PM";
public static final int INVALID_VALUE = -1;
public static class TimexTypes {
public static final String PRESENT = "present";
public static final String DEFINITE = "definite";
public static final String DATE = "date";
public static final String DATE_TIME = "datetime";
public static final String DATE_RANGE = "daterange";
public static final String DURATION = "duration";
public static final String TIME = "time";
public static final String TIME_RANGE = "timerange";
public static final String DATE_TIME_RANGE = "datetimerange";
}
}

Просмотреть файл

@ -0,0 +1,27 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import java.time.LocalDateTime;
public class DateRange {
private LocalDateTime start;
private LocalDateTime end;
public LocalDateTime getStart() {
return start;
}
public void setStart(LocalDateTime withStart) {
this.start = withStart;
}
public LocalDateTime getEnd() {
return end;
}
public void setEnd(LocalDateTime withEnd) {
this.end = withEnd;
}
}

Просмотреть файл

@ -0,0 +1,71 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import java.util.ArrayList;
import java.util.List;
public class Resolution {
private List<Entry> values;
public List<Entry> getValues() {
return this.values;
}
public Resolution() {
this.values = new ArrayList<Entry>();
}
public static class Entry {
private String timex;
private String type;
private String value;
private String start;
private String end;
public String getTimex() {
return timex;
}
public void setTimex(String withTimex) {
this.timex = withTimex;
}
public String getType() {
return type;
}
public void setType(String withType) {
this.type = withType;
}
public String getValue() {
return value;
}
public void setValue(String withValue) {
this.value = withValue;
}
public String getStart() {
return start;
}
public void setStart(String withStart) {
this.start = withStart;
}
public String getEnd() {
return end;
}
public void setEnd(String withEnd) {
this.end = withEnd;
}
}
}

Просмотреть файл

@ -0,0 +1,52 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
public class Time {
private Integer hour;
private Integer minute;
private Integer second;
public Time(Integer withSeconds) {
this.hour = (int)Math.floor(withSeconds / 3600000d);
this.minute = (int)Math.floor((withSeconds - (this.hour * 3600000)) / 60000d);
this.second = (withSeconds - (this.hour * 3600000) - (this.minute * 60000)) / 1000;
}
public Time(Integer withHour, Integer withMinute, Integer withSecond) {
this.hour = withHour;
this.minute = withMinute;
this.second = withSecond;
}
public Integer getTime() {
return (this.second * 1000) + (this.minute * 60000) + (this.hour * 3600000);
}
public Integer getHour() {
return hour;
}
public void setHour(Integer withHour) {
this.hour = withHour;
}
public Integer getMinute() {
return minute;
}
public void setMinute(Integer withMinute) {
this.minute = withMinute;
}
public Integer getSecond() {
return second;
}
public void setSecond(Integer withSecond) {
this.second = withSecond;
}
}

Просмотреть файл

@ -0,0 +1,26 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
public class TimeRange {
private Time start;
private Time end;
public Time getStart() {
return start;
}
public void setStart(Time withStart) {
this.start = withStart;
}
public Time getEnd() {
return end;
}
public void setEnd(Time withEnd) {
this.end = withEnd;
}
}

Просмотреть файл

@ -0,0 +1,102 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import java.util.List;
public class TimexConstraintsHelper {
public static List<TimeRange> collapseTimeRanges(List<TimeRange> ranges) {
List<TimeRange> r = ranges;
while (TimexConstraintsHelper.innerCollapseTimeRanges(r)) {
}
r.sort((a, b) -> a.getStart().getTime() - b.getStart().getTime());
return r;
}
public static List<DateRange> collapseDateRanges(List<DateRange> ranges) {
List<DateRange> r = ranges;
while (TimexConstraintsHelper.innerCollapseDateRanges(r)) {
}
r.sort((a, b) -> a.getStart().compareTo(b.getStart()));
return r;
}
public static Boolean isOverlapping(TimeRange r1, TimeRange r2) {
return (r1.getEnd().getTime() > r2.getStart().getTime() && r1.getStart().getTime() <= r2.getStart().getTime()) ||
(r1.getStart().getTime() < r2.getEnd().getTime() &&
r1.getStart().getTime() >= r2.getStart().getTime());
}
private static Boolean isOverlapping(DateRange r1, DateRange r2) {
return (r1.getEnd().isAfter(r2.getStart()) && (r1.getStart().isBefore(r2.getStart()) || r1.getStart().isEqual(r2.getStart()))) ||
(r1.getStart().isBefore(r2.getEnd()) && (r1.getStart().isAfter(r2.getStart()) || r1.getStart().isEqual(r2.getStart())));
}
private static TimeRange collapseOverlapping(TimeRange r1, TimeRange r2) {
return new TimeRange() {
{
setStart(new Time(Math.max(r1.getStart().getTime(), r2.getStart().getTime())));
setEnd(new Time(Math.min(r1.getEnd().getTime(), r2.getEnd().getTime())));
}
};
}
private static DateRange collapseOverlapping(DateRange r1, DateRange r2) {
return new DateRange() {
{
setStart(r1.getStart().compareTo(r2.getStart()) > 0 ? r1.getStart() : r2.getStart());
setEnd(r1.getEnd().compareTo(r2.getEnd()) < 0 ? r1.getEnd() : r2.getEnd());
}
};
}
private static Boolean innerCollapseTimeRanges(List<TimeRange> ranges) {
if (ranges.size() == 1) {
return false;
}
for (int i = 0; i < ranges.size(); i++) {
TimeRange r1 = ranges.get(i);
for (int j = i + 1; j < ranges.size(); j++) {
TimeRange r2 = ranges.get(j);
if (TimexConstraintsHelper.isOverlapping(r1, r2)) {
ranges.subList(i, 1).clear();
ranges.subList(j - 1, 1).clear();
ranges.add(TimexConstraintsHelper.collapseOverlapping(r1, r2));
return true;
}
}
}
return false;
}
private static Boolean innerCollapseDateRanges(List<DateRange> ranges) {
if (ranges.size() == 1) {
return false;
}
for (int i = 0; i < ranges.size(); i++) {
DateRange r1 = ranges.get(i);
for (int j = i + 1; j < ranges.size(); j++) {
DateRange r2 = ranges.get(j);
if (TimexConstraintsHelper.isOverlapping(r1, r2)) {
ranges.subList(i, 1).clear();
ranges.subList(j - 1, 1).clear();
ranges.add(TimexConstraintsHelper.collapseOverlapping(r1, r2));
return true;
}
}
}
return false;
}
}

Просмотреть файл

@ -0,0 +1,16 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import com.microsoft.recognizers.datatypes.timex.expression.english.TimexConvertEnglish;
public class TimexConvert {
public static String convertTimexToString(TimexProperty timex) {
return TimexConvertEnglish.convertTimexToString(timex);
}
public static String convertTimexSetToString(TimexSet timexSet) {
return TimexConvertEnglish.convertTimexSetToString(timexSet);
}
}

Просмотреть файл

@ -0,0 +1,88 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import java.math.BigDecimal;
import java.time.DayOfWeek;
import java.time.LocalDateTime;
public class TimexCreator {
// The following constants are consistent with the Recognizer results
public static final String MONDAY = "XXXX-WXX-1";
public static final String TUESDAY = "XXXX-WXX-2";
public static final String WEDNESDAY = "XXXX-WXX-3";
public static final String THURSDAY = "XXXX-WXX-4";
public static final String FRIDAY = "XXXX-WXX-5";
public static final String SATURDAY = "XXXX-WXX-6";
public static final String SUNDAY = "XXXX-WXX-7";
public static final String MORNING = "(T08,T12,PT4H)";
public static final String AFTERNOON = "(T12,T16,PT4H)";
public static final String EVENING = "(T16,T20,PT4H)";
public static final String DAYTIME = "(T08,T18,PT10H)";
public static final String NIGHT = "(T20,T24,PT10H)";
public static String today(LocalDateTime date) {
return TimexProperty.fromDate(date == null ? LocalDateTime.now() : date).getTimexValue();
}
public static String tomorrow(LocalDateTime date) {
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
d = d.plusDays(1);
return TimexProperty.fromDate(d).getTimexValue();
}
public static String yesterday(LocalDateTime date) {
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
d = d.plusDays(-1);
return TimexProperty.fromDate(d).getTimexValue();
}
public static String weekFromToday(LocalDateTime date) {
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
TimexProperty t = TimexProperty.fromDate(d);
t.setDays(new BigDecimal(7));
return t.getTimexValue();
}
public static String weekBackFromToday(LocalDateTime date) {
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
d = d.plusDays(-7);
TimexProperty t = TimexProperty.fromDate(d);
t.setDays(new BigDecimal(7));
return t.getTimexValue();
}
public static String thisWeek(LocalDateTime date) {
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
d = d.plusDays(-7);
LocalDateTime start = TimexDateHelpers.dateOfNextDay(DayOfWeek.MONDAY, d);
TimexProperty t = TimexProperty.fromDate(start);
t.setDays(new BigDecimal(7));
return t.getTimexValue();
}
public static String nextWeek(LocalDateTime date) {
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
LocalDateTime start = TimexDateHelpers.dateOfNextDay(DayOfWeek.MONDAY, d);
TimexProperty t = TimexProperty.fromDate(start);
t.setDays(new BigDecimal(7));
return t.getTimexValue();
}
public static String lastWeek(LocalDateTime date) {
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
LocalDateTime start = TimexDateHelpers.dateOfLastDay(DayOfWeek.MONDAY, d);
start = start.plusDays(-7);
TimexProperty t = TimexProperty.fromDate(start);
t.setDays(new BigDecimal(7));
return t.getTimexValue();
}
public static String nextWeeksFromToday(Integer n, LocalDateTime date) {
LocalDateTime d = (date == null) ? LocalDateTime.now() : date;
TimexProperty t = TimexProperty.fromDate(d);
t.setDays(new BigDecimal(n * 7));
return t.getTimexValue();
}
}

Просмотреть файл

@ -0,0 +1,126 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import java.time.DayOfWeek;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
public class TimexDateHelpers {
public static LocalDateTime tomorrow(LocalDateTime date) {
date = date.plusDays(1);
return date;
}
public static LocalDateTime yesterday(LocalDateTime date) {
date = date.plusDays(-1);
return date;
}
public static Boolean datePartEquals(LocalDateTime dateX, LocalDateTime dateY) {
return (dateX.getYear() == dateY.getYear()) &&
(dateX.getMonthValue() == dateY.getMonthValue()) &&
(dateX.getDayOfMonth() == dateY.getDayOfMonth());
}
public static boolean isDateInWeek(LocalDateTime date, LocalDateTime startOfWeek) {
LocalDateTime d = startOfWeek;
for (int i = 0; i < 7; i++) {
if (TimexDateHelpers.datePartEquals(date, d)) {
return true;
}
d = d.plusDays(1);
}
return false;
}
public static Boolean isThisWeek(LocalDateTime date, LocalDateTime referenceDate) {
// Note ISO 8601 week starts on a Monday
LocalDateTime startOfWeek = referenceDate;
while (TimexDateHelpers.getUSDayOfWeek(startOfWeek.getDayOfWeek()) > TimexDateHelpers.getUSDayOfWeek(DayOfWeek.MONDAY)) {
startOfWeek = startOfWeek.plusDays(-1);
}
return TimexDateHelpers.isDateInWeek(date, startOfWeek);
}
public static Boolean isNextWeek(LocalDateTime date, LocalDateTime referenceDate) {
LocalDateTime nextWeekDate = referenceDate;
nextWeekDate = nextWeekDate.plusDays(7);
return TimexDateHelpers.isThisWeek(date, nextWeekDate);
}
public static Boolean isLastWeek(LocalDateTime date, LocalDateTime referenceDate) {
LocalDateTime nextWeekDate = referenceDate;
nextWeekDate = nextWeekDate.plusDays(-7);
return TimexDateHelpers.isThisWeek(date, nextWeekDate);
}
public static Integer weekOfYear(LocalDateTime date) {
LocalDateTime ds = LocalDateTime.of(date.getYear(), 1, 1, 0, 0);
LocalDateTime de = LocalDateTime.of(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), 0, 0);
Integer weeks = 1;
while (ds.compareTo(de) < 0) {
Integer dayOfWeek = TimexDateHelpers.getUSDayOfWeek(ds.getDayOfWeek());
Integer isoDayOfWeek = (dayOfWeek == 0) ? 7 : dayOfWeek;
if (isoDayOfWeek == 7) {
weeks++;
}
ds = ds.plusDays(1);
}
return weeks;
}
public static String fixedFormatNumber(Integer n, Integer size) {
return String.format("%1$" + size + "s", n.toString()).replace(' ', '0');
}
public static LocalDateTime dateOfLastDay(DayOfWeek day, LocalDateTime referenceDate) {
LocalDateTime result = referenceDate;
result = result.plusDays(-1);
while (result.getDayOfWeek() != day) {
result = result.plusDays(-1);
}
return result;
}
public static LocalDateTime dateOfNextDay(DayOfWeek day, LocalDateTime referenceDate) {
LocalDateTime result = referenceDate;
result = result.plusDays(1);
while (result.getDayOfWeek() != day) {
result = result.plusDays(1);
}
return result;
}
public static List<LocalDateTime> datesMatchingDay(DayOfWeek day, LocalDateTime start, LocalDateTime end) {
List<LocalDateTime> result = new ArrayList<LocalDateTime>();
LocalDateTime d = start;
while (!TimexDateHelpers.datePartEquals(d, end)) {
if (d.getDayOfWeek() == day) {
result.add(d);
}
d = d.plusDays(1);
}
return result;
}
public static Integer getUSDayOfWeek(DayOfWeek dayOfWeek) {
return dayOfWeek.getValue() % 7;
}
}

Просмотреть файл

@ -0,0 +1,195 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import java.math.BigDecimal;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
public class TimexFormat {
public static String format(TimexProperty timex) {
HashSet<String> types = timex.getTypes().size() != 0 ? timex.getTypes() : TimexInference.infer(timex);
if (types.contains(Constants.TimexTypes.PRESENT)) {
return "PRESENT_REF";
}
if ((types.contains(Constants.TimexTypes.DATE_TIME_RANGE) || types.contains(Constants.TimexTypes.DATE_RANGE) ||
types.contains(Constants.TimexTypes.TIME_RANGE)) && types.contains(Constants.TimexTypes.DURATION)) {
TimexRange range = TimexHelpers.expandDateTimeRange(timex);
return String.format("(%1$s,%2$s,%3$s)", TimexFormat.format(range.getStart()),
TimexFormat.format(range.getEnd()), TimexFormat.format(range.getDuration()));
}
if (types.contains(Constants.TimexTypes.DATE_TIME_RANGE)) {
return String.format("%1$s%2$s", TimexFormat.formatDate(timex), TimexFormat.formatTimeRange(timex));
}
if (types.contains(Constants.TimexTypes.DATE_RANGE)) {
return TimexFormat.formatDateRange(timex);
}
if (types.contains(Constants.TimexTypes.TIME_RANGE)) {
return TimexFormat.formatTimeRange(timex);
}
if (types.contains(Constants.TimexTypes.DATE_TIME)) {
return String.format("%1$s%2$s", TimexFormat.formatDate(timex), TimexFormat.formatTime(timex));
}
if (types.contains(Constants.TimexTypes.DURATION)) {
return TimexFormat.formatDuration(timex);
}
if (types.contains(Constants.TimexTypes.DATE)) {
return TimexFormat.formatDate(timex);
}
if (types.contains(Constants.TimexTypes.TIME)) {
return TimexFormat.formatTime(timex);
}
return new String();
}
private static String formatDuration(TimexProperty timex) {
List<String> timexList = new ArrayList<String>();
NumberFormat nf = NumberFormat.getInstance(Locale.getDefault());
if (timex.getYears() != null) {
nf.setMaximumFractionDigits(timex.getYears().scale());
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Year,
timex.getYears() != null ? timex.getYears() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
}
if (timex.getMonths() != null) {
nf.setMaximumFractionDigits(timex.getMonths().scale());
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Month,
timex.getMonths() != null ? timex.getMonths() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
}
if (timex.getWeeks() != null) {
nf.setMaximumFractionDigits(timex.getWeeks().scale());
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Week,
timex.getWeeks() != null ? timex.getWeeks() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
}
if (timex.getDays() != null) {
nf.setMaximumFractionDigits(timex.getDays().scale());
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Day,
timex.getDays() != null ? timex.getDays() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
}
if (timex.getHours() != null) {
nf.setMaximumFractionDigits(timex.getHours().scale());
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Hour,
timex.getHours() != null ? timex.getHours() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
}
if (timex.getMinutes() != null) {
nf.setMaximumFractionDigits(timex.getMinutes().scale());
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Minute,
timex.getMinutes() != null ? timex.getMinutes() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
}
if (timex.getSeconds() != null) {
nf.setMaximumFractionDigits(timex.getSeconds().scale());
timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Second,
timex.getSeconds() != null ? timex.getSeconds() : BigDecimal.valueOf(Constants.INVALID_VALUE)));
}
return TimexHelpers.generateCompoundDurationTimex(timexList);
}
private static String formatTime(TimexProperty timex) {
if (timex.getMinute() == 0 && timex.getSecond() == 0) {
return String.format("T%s", TimexDateHelpers.fixedFormatNumber(timex.getHour(), 2));
}
if (timex.getSecond() == 0) {
return String.format("T%1$s:%2$s", TimexDateHelpers.fixedFormatNumber(timex.getHour(), 2),
TimexDateHelpers.fixedFormatNumber(timex.getMinute(), 2));
}
return String.format("T%1$s:%2$s:%3$s", TimexDateHelpers.fixedFormatNumber(timex.getHour(), 2),
TimexDateHelpers.fixedFormatNumber(timex.getMinute(), 2),
TimexDateHelpers.fixedFormatNumber(timex.getSecond(), 2));
}
private static String formatDate(TimexProperty timex) {
Integer year = timex.getYear() != null ? timex.getYear() : Constants.INVALID_VALUE;
Integer month = timex.getWeekOfYear() != null ? timex.getWeekOfYear()
: (timex.getMonth() != null ? timex.getMonth() : Constants.INVALID_VALUE);
Integer day = timex.getDayOfWeek() != null ? timex.getDayOfWeek()
: timex.getDayOfMonth() != null ? timex.getDayOfMonth() : Constants.INVALID_VALUE;
Integer weekOfMonth = timex.getWeekOfMonth() != null ? timex.getWeekOfMonth() : Constants.INVALID_VALUE;
return TimexHelpers.generateDateTimex(year, month, day, weekOfMonth, timex.getDayOfWeek() != null);
}
private static String formatDateRange(TimexProperty timex) {
if (timex.getYear() != null && timex.getWeekOfYear() != null && timex.getWeekend() != null) {
return String.format("%1$s-W%2$s-WE", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4),
TimexDateHelpers.fixedFormatNumber(timex.getWeekOfYear(), 2));
}
if (timex.getYear() != null && timex.getWeekOfYear() != null) {
return String.format("%1$s-W%2$s", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4),
TimexDateHelpers.fixedFormatNumber(timex.getWeekOfYear(), 2));
}
if (timex.getYear() != null && timex.getMonth() != null && timex.getWeekOfMonth() != null) {
return String.format("%1$s-%2$s-W%3$s", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4),
TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2),
TimexDateHelpers.fixedFormatNumber(timex.getWeekOfMonth(), 2));
}
if (timex.getYear() != null && timex.getSeason() != null) {
return String.format("%1$s-%2$s", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4),
timex.getSeason());
}
if (timex.getSeason() != null) {
return timex.getSeason();
}
if (timex.getYear() != null && timex.getMonth() != null) {
return String.format("%1$s-%2$s", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4),
TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2));
}
if (timex.getYear() != null) {
return TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4);
}
if (timex.getMonth() != null && timex.getWeekOfMonth() != null && timex.getDayOfWeek() != null) {
return String.format("%1$s-%2$s-%3$s-%4$s-%5$s", Constants.TIMEX_FUZZY_YEAR,
TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2), Constants.TIMEX_FUZZY_WEEK,
timex.getWeekOfMonth(), timex.getDayOfWeek());
}
if (timex.getMonth() != null && timex.getWeekOfMonth() != null) {
return String.format("%1$s-%2$s-W%3$02d", Constants.TIMEX_FUZZY_YEAR,
TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2), timex.getWeekOfMonth());
}
if (timex.getMonth() != null) {
return String.format("%1$s-%2$s", Constants.TIMEX_FUZZY_YEAR,
TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2));
}
return new String();
}
private static String formatTimeRange(TimexProperty timex) {
if (timex.getPartOfDay() != null) {
return String.format("T%s", timex.getPartOfDay());
}
return new String();
}
}

Просмотреть файл

@ -0,0 +1,515 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import java.math.BigDecimal;
import java.time.DayOfWeek;
import java.time.LocalDateTime;
import java.time.temporal.TemporalField;
import java.time.temporal.WeekFields;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import org.apache.commons.lang3.tuple.Pair;
public class TimexHelpers {
public static final HashMap<TimexUnit, String> TIMEX_UNIT_TO_STRING_MAP = new HashMap<TimexUnit, String>() {
{
put(TimexUnit.Year, Constants.TIMEX_YEAR);
put(TimexUnit.Month, Constants.TIMEX_MONTH);
put(TimexUnit.Week, Constants.TIMEX_WEEK);
put(TimexUnit.Day, Constants.TIMEX_DAY);
put(TimexUnit.Hour, Constants.TIMEX_HOUR);
put(TimexUnit.Minute, Constants.TIMEX_MINUTE);
put(TimexUnit.Second, Constants.TIMEX_SECOND);
}
};
public static final List<TimexUnit> TimeTimexUnitList = Arrays.asList(TimexUnit.Hour, TimexUnit.Minute,
TimexUnit.Second);
public static TimexRange expandDateTimeRange(TimexProperty timex) {
HashSet<String> types = timex.getTypes().size() != 0 ? timex.getTypes() : TimexInference.infer(timex);
if (types.contains(Constants.TimexTypes.DURATION)) {
TimexProperty start = TimexHelpers.cloneDateTime(timex);
TimexProperty duration = TimexHelpers.cloneDuration(timex);
return new TimexRange() {
{
setStart(start);
setEnd(TimexHelpers.timexDateTimeAdd(start, duration));
setDuration(duration);
}
};
} else {
if (timex.getYear() != null) {
Pair<TimexProperty, TimexProperty> dateRange;
if (timex.getMonth() != null && timex.getWeekOfMonth() != null) {
dateRange = TimexHelpers.monthWeekDateRange(timex.getYear(), timex.getMonth(),
timex.getWeekOfMonth());
} else if (timex.getMonth() != null) {
dateRange = TimexHelpers.monthDateRange(timex.getYear(), timex.getMonth());
} else if (timex.getWeekOfYear() != null) {
dateRange = TimexHelpers.yearWeekDateRange(timex.getYear(), timex.getWeekOfYear(),
timex.getWeekend());
} else {
dateRange = TimexHelpers.yearDateRange(timex.getYear());
}
return new TimexRange() {
{
setStart(dateRange.getLeft());
setEnd(dateRange.getRight());
}
};
}
}
return new TimexRange() {
{
setStart(new TimexProperty());
setEnd(new TimexProperty());
}
};
}
public static TimexRange expandTimeRange(TimexProperty timex) {
if (!timex.getTypes().contains(Constants.TimexTypes.TIME_RANGE)) {
throw new IllegalArgumentException("argument must be a timerange: timex");
}
if (timex.getPartOfDay() != null) {
switch (timex.getPartOfDay()) {
case "DT":
timex = new TimexProperty(TimexCreator.DAYTIME);
break;
case "MO":
timex = new TimexProperty(TimexCreator.MORNING);
break;
case "AF":
timex = new TimexProperty(TimexCreator.AFTERNOON);
break;
case "EV":
timex = new TimexProperty(TimexCreator.EVENING);
break;
case "NI":
timex = new TimexProperty(TimexCreator.NIGHT);
break;
default:
throw new IllegalArgumentException("unrecognized part of day timerange: timex");
}
}
Integer hour = timex.getHour();
Integer minute = timex.getMinute();
Integer second = timex.getSecond();
TimexProperty start = new TimexProperty() {
{
setHour(hour);
setMinute(minute);
setSecond(second);
}
};
TimexProperty duration = TimexHelpers.cloneDuration(timex);
return new TimexRange() {
{
setStart(start);
setEnd(TimexHelpers.timeAdd(start, duration));
setDuration(duration);
}
};
}
public static TimexProperty timexDateAdd(TimexProperty start, TimexProperty duration) {
if (start.getDayOfWeek() != null) {
TimexProperty end = start.clone();
if (duration.getDays() != null) {
Integer newDayOfWeek = end.getDayOfWeek() + (int)Math.round(duration.getDays().doubleValue());
end.setDayOfWeek(newDayOfWeek);
}
return end;
}
if (start.getMonth() != null && start.getDayOfMonth() != null) {
Double durationDays = null;
if (duration.getDays() != null) {
durationDays = duration.getDays().doubleValue();
}
if (durationDays == null && duration.getWeeks() != null) {
durationDays = 7 * duration.getWeeks().doubleValue();
}
if (durationDays != null) {
if (start.getYear() != null) {
LocalDateTime d = LocalDateTime.of(start.getYear(), start.getMonth(), start.getDayOfMonth(), 0, 0,
0);
LocalDateTime d2 = d.plusDays(durationDays.longValue());
return new TimexProperty() {
{
setYear(d2.getYear());
setMonth(d2.getMonthValue());
setDayOfMonth(d2.getDayOfMonth());
}
};
} else {
LocalDateTime d = LocalDateTime.of(2001, start.getMonth(), start.getDayOfMonth(), 0, 0, 0);
LocalDateTime d2 = d.plusDays(durationDays.longValue());
return new TimexProperty() {
{
setMonth(d2.getMonthValue());
setDayOfMonth(d2.getDayOfMonth());
}
};
}
}
if (duration.getYears() != null) {
if (start.getYear() != null) {
return new TimexProperty() {
{
setYear(start.getYear() + (int)Math.round(duration.getYears().doubleValue()));
setMonth(start.getMonth());
setDayOfMonth(start.getDayOfMonth());
}
};
}
}
if (duration.getMonths() != null) {
if (start.getMonth() != null) {
return new TimexProperty() {
{
setYear(start.getYear());
setMonth(start.getMonth() + (int)Math.round(duration.getMonths().doubleValue()));
setDayOfMonth(start.getDayOfMonth());
}
};
}
}
}
return start;
}
public static String generateCompoundDurationTimex(List<String> timexList) {
Boolean isTimeDurationAlreadyExist = false;
StringBuilder timexBuilder = new StringBuilder(Constants.GENERAL_PERIOD_PREFIX);
for (String timexComponent : timexList) {
// The Time Duration component occurs first time
if (!isTimeDurationAlreadyExist && isTimeDurationTimex(timexComponent)) {
timexBuilder.append(Constants.TIME_TIMEX_PREFIX.concat(getDurationTimexWithoutPrefix(timexComponent)));
isTimeDurationAlreadyExist = true;
} else {
timexBuilder.append(getDurationTimexWithoutPrefix(timexComponent));
}
}
return timexBuilder.toString();
}
public static String generateDateTimex(Integer year, Integer monthOrWeekOfYear, Integer day, Integer weekOfMonth,
boolean byWeek) {
String yearString = year == Constants.INVALID_VALUE ? Constants.TIMEX_FUZZY_YEAR
: TimexDateHelpers.fixedFormatNumber(year, 4);
String monthWeekString = monthOrWeekOfYear == Constants.INVALID_VALUE ? Constants.TIMEX_FUZZY_MONTH
: TimexDateHelpers.fixedFormatNumber(monthOrWeekOfYear, 2);
String dayString;
if (byWeek) {
dayString = day.toString();
if (weekOfMonth != Constants.INVALID_VALUE) {
monthWeekString = monthWeekString + String.format("-%s-", Constants.TIMEX_FUZZY_WEEK)
+ weekOfMonth.toString();
} else {
monthWeekString = Constants.TIMEX_WEEK + monthWeekString;
}
} else {
dayString = day == Constants.INVALID_VALUE ? Constants.TIMEX_FUZZY_DAY
: TimexDateHelpers.fixedFormatNumber(day, 2);
}
return String.join("-", yearString, monthWeekString, dayString);
}
public static String generateDurationTimex(TimexUnit unit, BigDecimal value) {
if (value.intValue() == Constants.INVALID_VALUE) {
return new String();
}
StringBuilder timexBuilder = new StringBuilder(Constants.GENERAL_PERIOD_PREFIX);
if (TimeTimexUnitList.contains(unit)) {
timexBuilder.append(Constants.TIME_TIMEX_PREFIX);
}
timexBuilder.append(value.toString());
timexBuilder.append(TIMEX_UNIT_TO_STRING_MAP.get(unit));
return timexBuilder.toString();
}
public static TimexProperty timexTimeAdd(TimexProperty start, TimexProperty duration) {
TimexProperty result = start.clone();
if (duration.getMinutes() != null) {
result.setMinute(result.getMinute() + (int)Math.round(duration.getMinutes().doubleValue()));
if (result.getMinute() > 59) {
result.setHour(((result.getHour() != null) ? result.getHour() : 0) + 1);
result.setMinute(result.getMinute() % 60);
}
}
if (duration.getHours() != null) {
result.setHour(result.getHour() + (int)Math.round(duration.getHours().doubleValue()));
}
if (result.getHour() != null && result.getHour() > 23) {
Double days = Math.floor(result.getHour() / 24d);
Integer hour = result.getHour() % 24;
result.setHour(hour);
if (result.getYear() != null && result.getMonth() != null && result.getDayOfMonth() != null) {
LocalDateTime d = LocalDateTime.of(result.getYear(), result.getMonth(), result.getDayOfMonth(), 0, 0,
0);
d = d.plusDays(days.longValue());
result.setYear(d.getYear());
result.setMonth(d.getMonthValue());
result.setDayOfMonth(d.getDayOfMonth());
return result;
}
if (result.getDayOfWeek() != null) {
result.setDayOfWeek(result.getDayOfWeek() + (int)Math.round(days));
return result;
}
}
return result;
}
public static TimexProperty timexDateTimeAdd(TimexProperty start, TimexProperty duration) {
return TimexHelpers.timexTimeAdd(TimexHelpers.timexDateAdd(start, duration), duration);
}
public static LocalDateTime dateFromTimex(TimexProperty timex) {
Integer year = timex.getYear() != null ? timex.getYear() : 2001;
Integer month = timex.getMonth() != null ? timex.getMonth() : 1;
Integer day = timex.getDayOfMonth() != null ? timex.getDayOfMonth() : 1;
Integer hour = timex.getHour() != null ? timex.getHour() : 0;
Integer minute = timex.getMinute() != null ? timex.getMinute() : 0;
Integer second = timex.getSecond() != null ? timex.getSecond() : 0;
LocalDateTime date = LocalDateTime.of(year, month, day, hour, minute, second);
return date;
}
public static Time timeFromTimex(TimexProperty timex) {
Integer hour = timex.getHour() != null ? timex.getHour() : 0;
Integer minute = timex.getMinute() != null ? timex.getMinute() : 0;
Integer second = timex.getSecond() != null ? timex.getSecond() : 0;
return new Time(hour, minute, second);
}
public static DateRange dateRangeFromTimex(TimexProperty timex) {
TimexRange expanded = TimexHelpers.expandDateTimeRange(timex);
return new DateRange() {
{
setStart(TimexHelpers.dateFromTimex(expanded.getStart()));
setEnd(TimexHelpers.dateFromTimex(expanded.getEnd()));
}
};
}
public static TimeRange timeRangeFromTimex(TimexProperty timex) {
TimexRange expanded = TimexHelpers.expandTimeRange(timex);
return new TimeRange() {
{
setStart(TimexHelpers.timeFromTimex(expanded.getStart()));
setEnd(TimexHelpers.timeFromTimex(expanded.getEnd()));
}
};
}
public static String formatResolvedDateValue(String dateValue, String timeValue) {
return String.format("%1$s %2$s", dateValue, timeValue);
}
public static Pair<TimexProperty, TimexProperty> monthWeekDateRange(Integer year, Integer month,
Integer weekOfMonth) {
LocalDateTime start = TimexHelpers.generateMonthWeekDateStart(year, month, weekOfMonth);
LocalDateTime end = start.plusDays(7);
TimexProperty value1 = new TimexProperty() {
{
setYear(start.getYear());
setMonth(start.getMonth().getValue());
setDayOfMonth(start.getDayOfMonth());
}
};
TimexProperty value2 = new TimexProperty() {
{
setYear(end.getYear());
setMonth(end.getMonth().getValue());
setDayOfMonth(end.getDayOfMonth());
}
};
return Pair.of(value1, value2);
}
public static Pair<TimexProperty, TimexProperty> monthDateRange(Integer year, Integer month) {
TimexProperty value1 = new TimexProperty() {
{
setYear(year);
setMonth(month);
setDayOfMonth(1);
}
};
TimexProperty value2 = new TimexProperty() {
{
setYear(month == 12 ? year + 1 : year);
setMonth(month == 12 ? 1 : month + 1);
setDayOfMonth(1);
}
};
return Pair.of(value1, value2);
}
public static Pair<TimexProperty, TimexProperty> yearDateRange(Integer year) {
TimexProperty value1 = new TimexProperty() {
{
setYear(year);
setMonth(1);
setDayOfMonth(1);
}
};
TimexProperty value2 = new TimexProperty() {
{
setYear(year + 1);
setMonth(1);
setDayOfMonth(1);
}
};
return Pair.of(value1, value2);
}
public static Pair<TimexProperty, TimexProperty> yearWeekDateRange(Integer year, Integer weekOfYear,
Boolean isWeekend) {
LocalDateTime firstMondayInWeek = TimexHelpers.firstDateOfWeek(year, weekOfYear, null);
LocalDateTime start = (isWeekend == null || !isWeekend) ? firstMondayInWeek
: TimexDateHelpers.dateOfNextDay(DayOfWeek.SATURDAY, firstMondayInWeek);
LocalDateTime end = firstMondayInWeek.plusDays(7);
TimexProperty value1 = new TimexProperty() {
{
setYear(start.getYear());
setMonth(start.getMonth().getValue());
setDayOfMonth(start.getDayOfMonth());
}
};
TimexProperty value2 = new TimexProperty() {
{
setYear(end.getYear());
setMonth(end.getMonth().getValue());
setDayOfMonth(end.getDayOfMonth());
}
};
return Pair.of(value1, value2);
}
// this is based on
// https://stackoverflow.com/questions/19901666/get-date-of-first-and-last-day-of-week-knowing-week-number/34727270
public static LocalDateTime firstDateOfWeek(Integer year, Integer weekOfYear, Locale cultureInfo) {
// ISO uses FirstFourDayWeek, and Monday as first day of week, according to
// https://en.wikipedia.org/wiki/ISO_8601
LocalDateTime jan1 = LocalDateTime.of(year, 1, 1, 0, 0);
Integer daysOffset = DayOfWeek.MONDAY.getValue() - TimexDateHelpers.getUSDayOfWeek(jan1.getDayOfWeek());
LocalDateTime firstWeekDay = jan1;
firstWeekDay = firstWeekDay.plusDays(daysOffset);
TemporalField woy = WeekFields.ISO.weekOfYear();
Integer firstWeek = jan1.get(woy);
if ((firstWeek <= 1 || firstWeek >= 52) && daysOffset >= -3) {
weekOfYear -= 1;
}
firstWeekDay = firstWeekDay.plusDays(weekOfYear * 7);
return firstWeekDay;
}
public static LocalDateTime generateMonthWeekDateStart(Integer year, Integer month, Integer weekOfMonth) {
LocalDateTime dateInWeek = LocalDateTime.of(year, month, 1 + ((weekOfMonth - 1) * 7), 0, 0);
// Align the date of the week according to Thursday, base on ISO 8601,
// https://en.wikipedia.org/wiki/ISO_8601
if (dateInWeek.getDayOfWeek().getValue() > DayOfWeek.THURSDAY.getValue()) {
dateInWeek = dateInWeek.plusDays(7 - dateInWeek.getDayOfWeek().getValue() + 1);
} else {
dateInWeek = dateInWeek.plusDays(1 - dateInWeek.getDayOfWeek().getValue());
}
return dateInWeek;
}
private static TimexProperty timeAdd(TimexProperty start, TimexProperty duration) {
Integer second = start.getSecond()
+ (int)(duration.getSeconds() != null ? duration.getSeconds().intValue() : 0);
Integer minute = start.getMinute() + second / 60
+ (duration.getMinutes() != null ? duration.getMinutes().intValue() : 0);
Integer hour = start.getHour() + (minute / 60)
+ (duration.getHours() != null ? duration.getHours().intValue() : 0);
return new TimexProperty() {
{
setHour((hour == 24 && minute % 60 == 0 && second % 60 == 0) ? hour : hour % 24);
setMinute(minute % 60);
setSecond(second % 60);
}
};
}
private static TimexProperty cloneDateTime(TimexProperty timex) {
TimexProperty result = timex.clone();
result.setYears(null);
result.setMonths(null);
result.setWeeks(null);
result.setDays(null);
result.setHours(null);
result.setMinutes(null);
result.setSeconds(null);
return result;
}
private static TimexProperty cloneDuration(TimexProperty timex) {
TimexProperty result = timex.clone();
result.setYear(null);
result.setMonth(null);
result.setDayOfMonth(null);
result.setDayOfWeek(null);
result.setWeekOfYear(null);
result.setWeekOfMonth(null);
result.setSeason(null);
result.setHour(null);
result.setMinute(null);
result.setSecond(null);
result.setWeekend(null);
result.setPartOfDay(null);
return result;
}
private static Boolean isTimeDurationTimex(String timex) {
return timex.startsWith(Constants.GENERAL_PERIOD_PREFIX.concat(Constants.TIME_TIMEX_PREFIX));
}
private static String getDurationTimexWithoutPrefix(String timex) {
// Remove "PT" prefix for TimeDuration, Remove "P" prefix for DateDuration
return timex.substring(isTimeDurationTimex(timex) ? 2 : 1);
}
}

Просмотреть файл

@ -0,0 +1,100 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import java.util.HashSet;
public class TimexInference {
public static HashSet<String> infer(TimexProperty timexProperty) {
HashSet<String> types = new HashSet<String>();
if (TimexInference.isPresent(timexProperty)) {
types.add(Constants.TimexTypes.PRESENT);
}
if (TimexInference.isDefinite(timexProperty)) {
types.add(Constants.TimexTypes.DEFINITE);
}
if (TimexInference.isDate(timexProperty)) {
types.add(Constants.TimexTypes.DATE);
}
if (TimexInference.isDateRange(timexProperty)) {
types.add(Constants.TimexTypes.DATE_RANGE);
}
if (TimexInference.isDuration(timexProperty)) {
types.add(Constants.TimexTypes.DURATION);
}
if (TimexInference.isTime(timexProperty)) {
types.add(Constants.TimexTypes.TIME);
}
if (TimexInference.isTimeRange(timexProperty)) {
types.add(Constants.TimexTypes.TIME_RANGE);
}
if (types.contains(Constants.TimexTypes.PRESENT)) {
types.add(Constants.TimexTypes.DATE);
types.add(Constants.TimexTypes.TIME);
}
if (types.contains(Constants.TimexTypes.TIME) && types.contains(Constants.TimexTypes.DURATION)) {
types.add(Constants.TimexTypes.TIME_RANGE);
}
if (types.contains(Constants.TimexTypes.DATE) && types.contains(Constants.TimexTypes.TIME)) {
types.add(Constants.TimexTypes.DATE_TIME);
}
if (types.contains(Constants.TimexTypes.DATE) && types.contains(Constants.TimexTypes.DURATION)) {
types.add(Constants.TimexTypes.DATE_RANGE);
}
if (types.contains(Constants.TimexTypes.DATE_TIME) && types.contains(Constants.TimexTypes.DURATION)) {
types.add((Constants.TimexTypes.DATE_TIME_RANGE));
}
if (types.contains(Constants.TimexTypes.DATE) && types.contains(Constants.TimexTypes.TIME_RANGE)) {
types.add(Constants.TimexTypes.DATE_TIME_RANGE);
}
return types;
}
private static Boolean isPresent(TimexProperty timexProperty) {
return timexProperty.getNow() != null && timexProperty.getNow() == true;
}
private static Boolean isDuration(TimexProperty timexProperty) {
return timexProperty.getYears() != null || timexProperty.getMonths() != null || timexProperty.getWeeks() != null ||
timexProperty.getDays() != null | timexProperty.getHours() != null ||
timexProperty.getMinutes() != null || timexProperty.getSeconds() != null;
}
private static Boolean isTime(TimexProperty timexProperty) {
return timexProperty.getHour() != null && timexProperty.getMinute() != null && timexProperty.getSecond() != null;
}
private static Boolean isDate(TimexProperty timexProperty) {
return timexProperty.getDayOfMonth() != null || timexProperty.getDayOfWeek() != null;
}
private static Boolean isTimeRange(TimexProperty timexProperty) {
return timexProperty.getPartOfDay() != null;
}
private static Boolean isDateRange(TimexProperty timexProperty) {
return (timexProperty.getDayOfMonth() == null && timexProperty.getDayOfWeek() == null) &&
(timexProperty.getYear() != null || timexProperty.getMonth() != null ||
timexProperty.getSeason() != null || timexProperty.getWeekOfYear() != null ||
timexProperty.getWeekOfMonth() != null);
}
private static Boolean isDefinite(TimexProperty timexProperty) {
return timexProperty.getYear() != null & timexProperty.getMonth() != null && timexProperty.getDayOfMonth() != null;
}
}

Просмотреть файл

@ -0,0 +1,56 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import java.util.HashMap;
import java.util.Map;
public class TimexParsing {
public static void parseString(String timex, TimexProperty timexProperty) {
// a reference to the present
if (timex == "PRESENT_REF") {
timexProperty.setNow(true);
} else if (timex.startsWith("P")) {
// duration
TimexParsing.extractDuration(timex, timexProperty);
} else if (timex.startsWith("(") && timex.endsWith(")")) {
// range indicated with start and end dates and a duration
TimexParsing.extractStartEndRange(timex, timexProperty);
} else {
// date andt ime and their respective ranges
TimexParsing.extractDateTime(timex, timexProperty);
}
}
private static void extractDuration(String s, TimexProperty timexProperty) {
Map<String, String> extracted = new HashMap<String, String>();
TimexRegex.extract("period", s, extracted);
timexProperty.assignProperties(extracted);
}
private static void extractStartEndRange(String s, TimexProperty timexProperty) {
String[] parts = s.substring(1, s.length() - 1).split(",");
if (parts.length == 3) {
TimexParsing.extractDateTime(parts[0], timexProperty);
TimexParsing.extractDuration(parts[2], timexProperty);
}
}
private static void extractDateTime(String s, TimexProperty timexProperty) {
Integer indexOfT = s.indexOf("T");
if (indexOfT == -1) {
Map<String, String> extracted = new HashMap<String, String>();
TimexRegex.extract("date", s, extracted);
timexProperty.assignProperties(extracted);
} else {
Map<String, String> extracted = new HashMap<String, String>();
TimexRegex.extract("date", s.substring(0, indexOfT), extracted);
TimexRegex.extract("time", s.substring(indexOfT), extracted);
timexProperty.assignProperties(extracted);
}
}
}

Просмотреть файл

@ -0,0 +1,445 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import java.math.BigDecimal;
import java.time.LocalDateTime;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang3.StringUtils;
public class TimexProperty {
private Time time;
private String timexValue;
private HashSet<String> types;
private Boolean now;
private BigDecimal years;
private BigDecimal months;
private BigDecimal weeks;
private BigDecimal days;
private BigDecimal hours;
private BigDecimal minutes;
private BigDecimal seconds;
private Integer year;
private Integer month;
private Integer dayOfMonth;
private Integer dayOfWeek;
private String season;
private Integer weekOfYear;
private Boolean weekend;
public Integer weekOfMonth;
private Integer hour;
private Integer minute;
private Integer second;
private String partOfDay;
public TimexProperty() {
}
public TimexProperty(String timex) {
TimexParsing.parseString(timex, this);
}
public String getTimexValue() {
return TimexFormat.format(this);
}
public void setTimexValue(String withTimexValue) {
this.timexValue = withTimexValue;
}
public HashSet<String> getTypes() {
return TimexInference.infer(this);
}
public void setTypes(HashSet<String> withTypes) {
this.types = withTypes;
}
public Boolean getNow() {
return now;
}
public void setNow(Boolean withNow) {
this.now = withNow;
}
public BigDecimal getYears() {
return years;
}
public void setYears(BigDecimal withYears) {
this.years = withYears;
}
public BigDecimal getMonths() {
return months;
}
public void setMonths(BigDecimal withMonths) {
this.months = withMonths;
}
public BigDecimal getWeeks() {
return weeks;
}
public void setWeeks(BigDecimal withWeeks) {
this.weeks = withWeeks;
}
public BigDecimal getDays() {
return days;
}
public void setDays(BigDecimal withDays) {
this.days = withDays;
}
public BigDecimal getHours() {
return hours;
}
public void setHours(BigDecimal withHours) {
this.hours = withHours;
}
public BigDecimal getMinutes() {
return minutes;
}
public void setMinutes(BigDecimal withMinutes) {
this.minutes = withMinutes;
}
public BigDecimal getSeconds() {
return seconds;
}
public void setSeconds(BigDecimal withSeconds) {
this.seconds = withSeconds;
}
public Integer getYear() {
return year;
}
public void setYear(Integer withYear) {
this.year = withYear;
}
public Integer getMonth() {
return month;
}
public void setMonth(Integer withMonth) {
this.month = withMonth;
}
public Integer getDayOfMonth() {
return dayOfMonth;
}
public void setDayOfMonth(Integer withDayOfMonth) {
this.dayOfMonth = withDayOfMonth;
}
public Integer getDayOfWeek() {
return dayOfWeek;
}
public void setDayOfWeek(Integer withDayOfWeek) {
this.dayOfWeek = withDayOfWeek;
}
public String getSeason() {
return season;
}
public void setSeason(String withSeason) {
this.season = withSeason;
}
public Integer getWeekOfYear() {
return weekOfYear;
}
public void setWeekOfYear(Integer withWeekOfYear) {
this.weekOfYear = withWeekOfYear;
}
public Boolean getWeekend() {
return weekend;
}
public void setWeekend(Boolean withWeekend) {
this.weekend = withWeekend;
}
public Integer getWeekOfMonth() {
return weekOfMonth;
}
public void setWeekOfMonth(Integer withWeekOfMonth) {
this.weekOfMonth = withWeekOfMonth;
}
public Integer getHour() {
if (this.time != null) {
return this.time.getHour();
}
return null;
}
public void setHour(Integer withHour) {
if (withHour != null) {
if (this.time == null) {
this.time = new Time(withHour, 0, 0);
} else {
this.time.setHour(withHour);
}
} else {
this.time = null;
}
}
public Integer getMinute() {
if (this.time != null) {
return this.time.getMinute();
}
return null;
}
public void setMinute(Integer withMinute) {
if (withMinute != null) {
if (this.time == null) {
time = new Time(0, withMinute, 0);
} else {
time.setMinute(withMinute);
}
} else {
this.time = null;
}
}
public Integer getSecond() {
if (this.time != null) {
return this.time.getSecond();
}
return null;
}
public void setSecond(Integer withSecond) {
if (withSecond != null) {
if (this.time == null) {
this.time = new Time(0, 0, withSecond);
} else {
this.time.setSecond(withSecond);
}
} else {
this.time = null;
}
}
public String getPartOfDay() {
return partOfDay;
}
public void setPartOfDay(String wthPartOfDay) {
this.partOfDay = wthPartOfDay;
}
public static TimexProperty fromDate(LocalDateTime date) {
TimexProperty timex = new TimexProperty() {
{
setYear(date.getYear());
setMonth(date.getMonthValue());
setDayOfMonth(date.getDayOfMonth());
}
};
return timex;
}
public static TimexProperty fromDateTime(LocalDateTime datetime) {
TimexProperty timex = TimexProperty.fromDate(datetime);
timex.setHour(datetime.getHour());
timex.setMinute(datetime.getMinute());
timex.setSecond(datetime.getSecond());
return timex;
}
public static TimexProperty fromTime(Time time) {
return new TimexProperty() {
{
setHour(time.getHour());
setMinute(time.getMinute());
setSecond(time.getSecond());
}
};
}
@Override
public String toString() {
return TimexConvert.convertTimexToString(this);
}
public String toNaturalLanguage(LocalDateTime referenceDate) {
return TimexRelativeConvert.convertTimexToStringRelative(this, referenceDate);
}
public TimexProperty clone() {
Boolean now = this.getNow();
BigDecimal years = this.getYears();
BigDecimal months = this.getMonths();
BigDecimal weeks = this.getWeeks();
BigDecimal days = this.getDays();
BigDecimal hours = this.getHours();
BigDecimal minutes = this.getMinutes();
BigDecimal seconds = this.getSeconds();
Integer year = this.getYear();
Integer month = this.getMonth();
Integer dayOfMonth = this.getDayOfMonth();
Integer dayOfWeek = this.getDayOfWeek();
String season = this.getSeason();
Integer weekOfYear = this.getWeekOfYear();
Boolean weekend = this.getWeekend();
Integer innerWeekOfMonth = this.getWeekOfMonth();
Integer hour = this.getHour();
Integer minute = this.getMinute();
Integer second = this.getSecond();
String partOfDay = this.getPartOfDay();
return new TimexProperty() {
{
setNow(now);
setYears(years);
setMonths(months);
setWeeks(weeks);
setDays(days);
setHours(hours);
setMinutes(minutes);
setSeconds(seconds);
setYear(year);
setMonth(month);
setDayOfMonth(dayOfMonth);
setDayOfWeek(dayOfWeek);
setSeason(season);
setWeekOfYear(weekOfYear);
setWeekend(weekend);
setWeekOfMonth(innerWeekOfMonth);
setHour(hour);
setMinute(minute);
setSecond(second);
setPartOfDay(partOfDay);
}
};
}
public void assignProperties(Map<String, String> source) {
for (Entry<String, String> item : source.entrySet()) {
if (StringUtils.isBlank(item.getValue())) {
continue;
}
switch (item.getKey()) {
case "year":
setYear(Integer.parseInt(item.getValue()));
break;
case "month":
setMonth(Integer.parseInt(item.getValue()));
break;
case "dayOfMonth":
setDayOfMonth(Integer.parseInt(item.getValue()));
break;
case "dayOfWeek":
setDayOfWeek(Integer.parseInt(item.getValue()));
break;
case "season":
setSeason(item.getValue());
break;
case "weekOfYear":
setWeekOfYear(Integer.parseInt(item.getValue()));
break;
case "weekend":
setWeekend(true);
break;
case "weekOfMonth":
setWeekOfMonth(Integer.parseInt(item.getValue()));
break;
case "hour":
setHour(Integer.parseInt(item.getValue()));
break;
case "minute":
setMinute(Integer.parseInt(item.getValue()));
break;
case "second":
setSecond(Integer.parseInt(item.getValue()));
break;
case "partOfDay":
setPartOfDay(item.getValue());
break;
case "dateUnit":
this.assignDateDuration(source);
break;
case "hourAmount":
setHours(new BigDecimal(item.getValue()));
break;
case "minuteAmount":
setMinutes(new BigDecimal(item.getValue()));
break;
case "secondAmount":
setSeconds(new BigDecimal(item.getValue()));
break;
default:
}
}
}
private void assignDateDuration(Map<String, String> source) {
switch (source.get("dateUnit")) {
case "Y":
this.years = new BigDecimal(source.get("amount"));
break;
case "M":
this.months = new BigDecimal(source.get("amount"));
break;
case "W":
this.weeks = new BigDecimal(source.get("amount"));
break;
case "D":
this.days = new BigDecimal(source.get("amount"));
break;
default:
}
}
}

Просмотреть файл

@ -0,0 +1,36 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
public class TimexRange {
private TimexProperty start;
private TimexProperty end;
private TimexProperty duration;
public TimexProperty getStart() {
return start;
}
public void setStart(TimexProperty withStart) {
this.start = withStart;
}
public TimexProperty getEnd() {
return end;
}
public void setEnd(TimexProperty withEnd) {
this.end = withEnd;
}
public TimexProperty getDuration() {
return duration;
}
public void setDuration(TimexProperty withDuration) {
this.duration = withDuration;
}
}

Просмотреть файл

@ -0,0 +1,266 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import java.time.DayOfWeek;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
public class TimexRangeResolver {
public static List<TimexProperty> evaluate(Set<String> candidates, List<String> constraints) {
List<TimexProperty> timexConstraints = constraints.stream().map(x -> {
return new TimexProperty(x);
}).collect(Collectors.toList());
Set<String> candidatesWithDurationsResolved = TimexRangeResolver.resolveDurations(candidates, timexConstraints);
Set<String> candidatesAccordingToDate = TimexRangeResolver
.resolveByDateRangeConstraints(candidatesWithDurationsResolved, timexConstraints);
Set<String> candidatesWithAddedTime = TimexRangeResolver.resolveByTimeConstraints(candidatesAccordingToDate,
timexConstraints);
Set<String> candidatesFilteredByTime = TimexRangeResolver.resolveByTimeRangeConstraints(candidatesWithAddedTime,
timexConstraints);
List<TimexProperty> timexResults = candidatesFilteredByTime.stream().map(x -> {
return new TimexProperty(x);
}).collect(Collectors.toList());
return timexResults;
}
public static Set<String> resolveDurations(Set<String> candidates, List<TimexProperty> constraints) {
Set<String> results = new HashSet<String>();
for (String candidate : candidates) {
TimexProperty timex = new TimexProperty(candidate);
if (timex.getTypes().contains(Constants.TimexTypes.DURATION)) {
List<TimexProperty> r = TimexRangeResolver.resolveDuration(timex, constraints);
for (TimexProperty resolved : r) {
results.add(resolved.getTimexValue());
}
} else {
results.add(candidate);
}
}
return results;
}
private static List<TimexProperty> resolveDuration(TimexProperty candidate, List<TimexProperty> constraints) {
List<TimexProperty> results = new ArrayList<TimexProperty>();
for (TimexProperty constraint : constraints) {
if (constraint.getTypes().contains(Constants.TimexTypes.DATE_TIME)) {
results.add(TimexHelpers.timexDateTimeAdd(constraint, candidate));
} else if (constraint.getTypes().contains(Constants.TimexTypes.TIME)) {
results.add(TimexHelpers.timexTimeAdd(constraint, candidate));
}
}
return results;
}
private static Set<String> resolveByDateRangeConstraints(Set<String> candidates,
List<TimexProperty> timexConstraints) {
List<DateRange> dateRangeconstraints = timexConstraints.stream().filter(timex -> {
return timex.getTypes().contains(Constants.TimexTypes.DATE_RANGE);
}).map(timex -> {
return TimexHelpers.dateRangeFromTimex(timex);
}).collect(Collectors.toList());
List<DateRange> collapseDateRanges = TimexConstraintsHelper.collapseDateRanges(dateRangeconstraints);
if (collapseDateRanges.isEmpty()) {
return candidates;
}
List<String> resolution = new ArrayList<String>();
for (String timex : candidates) {
List<String> r = TimexRangeResolver.resolveDate(new TimexProperty(timex), collapseDateRanges);
resolution.addAll(r);
}
return TimexRangeResolver.removeDuplicates(resolution);
}
private static List<String> resolveDate(TimexProperty timex, List<DateRange> constraints) {
List<String> result = new ArrayList<String>();
for (DateRange constraint : constraints) {
result.addAll(TimexRangeResolver.resolveDateAgainstConstraint(timex, constraint));
}
return result;
}
private static Set<String> resolveByTimeRangeConstraints(Set<String> candidates,
List<TimexProperty> timexConstrainst) {
List<TimeRange> timeRangeConstraints = timexConstrainst.stream().filter(timex -> {
return timex.getTypes().contains(Constants.TimexTypes.TIME_RANGE);
}).map(timex -> {
return TimexHelpers.timeRangeFromTimex(timex);
}).collect(Collectors.toList());
List<TimeRange> collapsedTimeRanges = TimexConstraintsHelper.collapseTimeRanges(timeRangeConstraints);
if (collapsedTimeRanges.isEmpty()) {
return candidates;
}
List<String> resolution = new ArrayList<String>();
for (String timex : candidates) {
TimexProperty t = new TimexProperty(timex);
if (t.getTypes().contains(Constants.TimexTypes.TIME_RANGE)) {
List<String> r = TimexRangeResolver.resolveTimeRange(t, collapsedTimeRanges);
resolution.addAll(r);
} else if (t.getTypes().contains(Constants.TimexTypes.TIME)) {
List<String> r = TimexRangeResolver.resolveTime(t, collapsedTimeRanges);
resolution.addAll(r);
}
}
return TimexRangeResolver.removeDuplicates(resolution);
}
private static List<String> resolveTimeRange(TimexProperty timex, List<TimeRange> constraints) {
TimeRange candidate = TimexHelpers.timeRangeFromTimex(timex);
List<String> result = new ArrayList<String>();
for (TimeRange constraint : constraints) {
if (TimexConstraintsHelper.isOverlapping(candidate, constraint)) {
Integer start = Math.max(candidate.getStart().getTime(), constraint.getStart().getTime());
Time time = new Time(start);
// TODO: consider a method on TimexProperty to do this clone/overwrite pattern
TimexProperty resolved = timex.clone();
resolved.setPartOfDay(null);
resolved.setSeconds(null);
resolved.setMinutes(null);
resolved.setHours(null);
resolved.setSecond(time.getSecond());
resolved.setMinute(time.getMinute());
resolved.setHour(time.getHour());
result.add(resolved.getTimexValue());
}
}
return result;
}
private static List<String> resolveTime(TimexProperty timex, List<TimeRange> constraints) {
List<String> result = new ArrayList<String>();
for (TimeRange constraint : constraints) {
result.addAll(TimexRangeResolver.resolveTimeAgainstConstraint(timex, constraint));
}
return result;
}
private static List<String> resolveTimeAgainstConstraint(TimexProperty timex, TimeRange constraint) {
Time t = new Time(timex.getHour(), timex.getMinute(), timex.getSecond());
if (t.getTime() >= constraint.getStart().getTime() && t.getTime() < constraint.getEnd().getTime()) {
return new ArrayList<String>() {
{
add(timex.getTimexValue());
}
};
}
return new ArrayList<String>();
}
private static Set<String> removeDuplicates(List<String> original) {
return new HashSet<String>(original);
}
private static List<String> resolveDefiniteAgainstConstraint(TimexProperty timex, DateRange constraint) {
LocalDateTime timexDate = TimexHelpers.dateFromTimex(timex);
if (timexDate.compareTo(constraint.getStart()) >= 0 && timexDate.compareTo(constraint.getEnd()) < 0) {
return new ArrayList<String>() {
{
add(timex.getTimexValue());
}
};
}
return new ArrayList<String>();
}
private static List<String> resolveDateAgainstConstraint(TimexProperty timex, DateRange constraint) {
if (timex.getMonth() != null && timex.getDayOfMonth() != null) {
List<String> result = new ArrayList<String>();
for (int year = constraint.getStart().getYear(); year <= constraint.getEnd()
.getYear(); year++) {
TimexProperty t = timex.clone();
t.setYear(year);
result.addAll(TimexRangeResolver.resolveDefiniteAgainstConstraint(t, constraint));
}
return result;
}
if (timex.getDayOfWeek() != null) {
// convert between ISO day of week and .NET day of week
DayOfWeek day = timex.getDayOfWeek() == 7 ? DayOfWeek.SUNDAY : DayOfWeek.of(timex.getDayOfWeek());
List<LocalDateTime> dates = TimexDateHelpers.datesMatchingDay(day, constraint.getStart(), constraint.getEnd());
List<String> result = new ArrayList<String>();
for (LocalDateTime d : dates) {
TimexProperty t = timex.clone();
t.setDayOfWeek(null);
t.setYear(d.getYear());
t.setMonth(d.getMonthValue());
t.setDayOfMonth(d.getDayOfMonth());
result.add(t.getTimexValue());
}
return result;
}
if (timex.getHour() != null) {
List<String> result = new ArrayList<String>();
LocalDateTime day = constraint.getStart();
while (day.compareTo(constraint.getEnd()) <= 0) {
TimexProperty t = timex.clone();
t.setYear(day.getYear());
t.setMonth(day.getMonthValue());
t.setDayOfMonth(day.getDayOfMonth());
result.addAll(TimexRangeResolver.resolveDefiniteAgainstConstraint(t, constraint));
day = day.plusDays(1);
}
return result;
}
return new ArrayList<String>();
}
private static Set<String> resolveByTimeConstraints(Set<String> candidates, List<TimexProperty> timexConstrainst) {
List<Time> times = timexConstrainst.stream().filter(timex -> {
return timex.getTypes().contains(Constants.TimexTypes.TIME);
}).map(timex -> {
return TimexHelpers.timeFromTimex(timex);
}).collect(Collectors.toList());
if (times.isEmpty()) {
return candidates;
}
List<String> resolution = new ArrayList<String>();
for (TimexProperty timex : candidates.stream().map(t -> new TimexProperty(t)).collect(Collectors.toList())) {
if (timex.getTypes().contains(Constants.TimexTypes.DATE) && !timex.getTypes().contains(Constants.TimexTypes.TIME)) {
for (Time time : times) {
timex.setHour(time.getHour());
timex.setMinute(time.getMinute());
timex.setSecond(time.getSecond());
resolution.add(timex.getTimexValue());
}
} else {
resolution.add(timex.getTimexValue());
}
}
return TimexRangeResolver.removeDuplicates(resolution);
}
}

Просмотреть файл

@ -0,0 +1,93 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import com.microsoft.recognizers.text.utilities.RegExpUtility;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TimexRegex {
private static final String DATE_TIME_COLLECTION_NAME = "datetime";
private static final String DATE_COLLECTION_NAME = "date";
private static final String TIME_COLLECTION_NAME = "time";
private static final String PERIOD_COLLECTION_NAME = "period";
private static Pattern[] DATE_COLLECTION_NAME_PATTERNS = {
// date
Pattern.compile("^(XXXX|(?<year>\\d\\d\\d\\d))-(?<month>\\d\\d)(-(?<dayOfMonth>\\d\\d))?"),
Pattern.compile("^XXXX-WXX-(?<dayOfWeek>\\d)"),
Pattern.compile("^XXXX-XX-(?<dayOfMonth>\\d\\d)"),
// daterange
Pattern.compile("^(?<year>\\d\\d\\d\\d)"),
Pattern.compile("^(XXXX|(?<year>\\d\\d\\d\\d))-(?<month>\\d\\d)-W(?<weekOfMonth>\\d\\d)"),
Pattern.compile("^(XXXX|(?<year>\\d\\d\\d\\d))-(?<month>\\d\\d)-WXX-(?<weekOfMonth>\\d{1,2})(-(?<dayOfWeek>\\d))?"),
Pattern.compile("^(?<season>SP|SU|FA|WI)"),
Pattern.compile("^(XXXX|(?<year>\\d\\d\\d\\d))-(?<season>SP|SU|FA|WI)"),
Pattern.compile("^(XXXX|(?<year>\\d\\d\\d\\d))-W(?<weekOfYear>\\d\\d)(-(?<dayOfWeek>\\d)|-(?<weekend>WE))?"), };
private static Pattern[] TIME_COLLECTION_NAME_PATTERNS = {
// time
Pattern.compile("T(?<hour>\\d\\d)Z?$"), Pattern.compile("T(?<hour>\\d\\d):(?<minute>\\d\\d)Z?$"),
Pattern.compile("T(?<hour>\\d\\d):(?<minute>\\d\\d):(?<second>\\d\\d)Z?$"),
// timerange
Pattern.compile("^T(?<partOfDay>DT|NI|MO|AF|EV)$") };
private static Pattern[] PERIOD_COLLECTION_NAME_PATTERNS = {
Pattern.compile("^P(?<amount>\\d*\\.?\\d+)(?<dateUnit>Y|M|W|D)$"),
Pattern.compile("^PT(?<hourAmount>\\d*\\.?\\d+)H(\\d*\\.?\\d+(M|S)){0,2}$"),
Pattern.compile("^PT(\\d*\\.?\\d+H)?(?<minuteAmount>\\d*\\.?\\d+)M(\\d*\\.?\\d+S)?$"),
Pattern.compile("^PT(\\d*\\.?\\d+(H|M)){0,2}(?<secondAmount>\\d*\\.?\\d+)S$"), };
private static Map<String, Pattern[]> TIMEX_REGEX = new HashMap<String, Pattern[]>() {
{
put(DATE_COLLECTION_NAME, DATE_COLLECTION_NAME_PATTERNS);
put(TIME_COLLECTION_NAME, TIME_COLLECTION_NAME_PATTERNS);
put(PERIOD_COLLECTION_NAME, PERIOD_COLLECTION_NAME_PATTERNS);
}
};
public static Boolean extract(String name, String timex, Map<String, String> result) {
String lowerName = name.toLowerCase();
String[] nameGroup = new String[lowerName == DATE_TIME_COLLECTION_NAME ? 2 : 1];
if (lowerName == DATE_TIME_COLLECTION_NAME) {
nameGroup[0] = DATE_COLLECTION_NAME;
nameGroup[1] = TIME_COLLECTION_NAME;
} else {
nameGroup[0] = lowerName;
}
Boolean anyTrue = false;
for (String nameItem : nameGroup) {
for (Pattern entry : TIMEX_REGEX.get(nameItem)) {
if (TimexRegex.tryExtract(entry, timex, result)) {
anyTrue = true;
}
}
}
return anyTrue;
}
private static Boolean tryExtract(Pattern regex, String timex, Map<String, String> result) {
Matcher regexResult = regex.matcher(timex);
if (!regexResult.find()) {
return false;
}
Map<String, String> regexGroupNames = RegExpUtility.getNamedGroups(regexResult, true);
for (Entry<String, String> entry : regexGroupNames.entrySet()) {
result.put(entry.getKey(), entry.getValue());
}
return true;
}
}

Просмотреть файл

@ -0,0 +1,14 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import com.microsoft.recognizers.datatypes.timex.expression.english.TimexRelativeConvertEnglish;
import java.time.LocalDateTime;
public class TimexRelativeConvert {
public static String convertTimexToStringRelative(TimexProperty timex, LocalDateTime referenceDate) {
return TimexRelativeConvertEnglish.convertTimexToStringRelative(timex, referenceDate);
}
}

Просмотреть файл

@ -0,0 +1,572 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import com.google.common.collect.Streams;
import java.time.DayOfWeek;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
public class TimexResolver {
public static Resolution resolve(String[] timexArray, LocalDateTime date) {
date = date != null ? date : LocalDateTime.now();
Resolution resolution = new Resolution();
for (String timex : timexArray) {
TimexProperty t = new TimexProperty(timex);
List<Resolution.Entry> r = TimexResolver.resolveTimex(t, date);
resolution.getValues().addAll(r);
}
return resolution;
}
private static List<Resolution.Entry> resolveTimex(TimexProperty timex, LocalDateTime date) {
HashSet<String> types = timex.getTypes().size() != 0 ? timex.getTypes() : TimexInference.infer(timex);
if (types.contains(Constants.TimexTypes.DATE_TIME_RANGE)) {
return TimexResolver.resolveDateTimeRange(timex, date);
}
if (types.contains(Constants.TimexTypes.DEFINITE) && types.contains(Constants.TimexTypes.TIME)) {
return TimexResolver.resolveDefiniteTime(timex, date);
}
if (types.contains(Constants.TimexTypes.DEFINITE) && types.contains(Constants.TimexTypes.DATE_RANGE)) {
return TimexResolver.resolveDefiniteDateRange(timex, date);
}
if (types.contains(Constants.TimexTypes.DATE_RANGE)) {
return TimexResolver.resolveDateRange(timex, date);
}
if (types.contains(Constants.TimexTypes.DEFINITE)) {
return TimexResolver.resolveDefinite(timex);
}
if (types.contains(Constants.TimexTypes.TIME_RANGE)) {
return TimexResolver.resolveTimeRange(timex, date);
}
if (types.contains(Constants.TimexTypes.DATE_TIME)) {
return TimexResolver.resolveDateTime(timex, date);
}
if (types.contains(Constants.TimexTypes.DURATION)) {
return TimexResolver.resolveDuration(timex);
}
if (types.contains(Constants.TimexTypes.DATE)) {
return TimexResolver.resolveDate(timex, date);
}
if (types.contains(Constants.TimexTypes.TIME)) {
return TimexResolver.resolveTime(timex, date);
}
return new ArrayList<Resolution.Entry>();
}
private static List<Resolution.Entry> resolveDefiniteTime(TimexProperty timex, LocalDateTime date) {
return new ArrayList<Resolution.Entry>() {
{
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("datetime");
setValue(String.format("%1$s %2$s", TimexValue.dateValue(timex),
TimexValue.timeValue(timex, date)));
}
});
}
};
}
private static List<Resolution.Entry> resolveDefinite(TimexProperty timex) {
return new ArrayList<Resolution.Entry>() {
{
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("date");
setValue(TimexValue.dateValue(timex));
}
});
}
};
}
private static List<Resolution.Entry> resolveDefiniteDateRange(TimexProperty timex, LocalDateTime date) {
TimexRange range = TimexHelpers.expandDateTimeRange(timex);
return new ArrayList<Resolution.Entry>() {
{
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("daterange");
setStart(TimexValue.dateValue(range.getStart()));
setEnd(TimexValue.dateValue(range.getEnd()));
}
});
}
};
}
private static List<Resolution.Entry> resolveDate(TimexProperty timex, LocalDateTime date) {
List<String> dateValueList = TimexResolver.getDateValues(timex, date);
List<Resolution.Entry> result = new ArrayList<Resolution.Entry>();
for (String dateValue : dateValueList) {
result.add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("date");
setValue(dateValue);
}
});
}
return result;
}
private static String lastDateValue(TimexProperty timex, LocalDateTime date) {
if (timex.getDayOfMonth() != null) {
Integer year = date.getYear();
Integer month = date.getMonth().getValue();
if (timex.getMonth() != null) {
month = timex.getMonth();
if (date.getMonthValue() <= month || (date.getMonth().getValue() == month && TimexDateHelpers.getUSDayOfWeek(date.getDayOfWeek()) <= timex.getDayOfMonth())) {
year--;
}
} else {
if (date.getDayOfMonth() <= timex.getDayOfMonth()) {
month--;
if (month < 1) {
month = (month + 12) % 12;
year--;
}
}
}
Integer finalYear = year;
Integer finalMonth = month;
return TimexValue.dateValue(new TimexProperty() {
{
setYear(finalYear);
setMonth(finalMonth);
setDayOfMonth(timex.getDayOfMonth());
}
});
}
if (timex.getDayOfWeek() != null) {
LocalDateTime start = generateWeekDate(timex, date, true);
return TimexValue.dateValue(new TimexProperty() {
{
setYear(start.getYear());
setMonth(start.getMonthValue());
setDayOfMonth(start.getDayOfMonth());
}
});
}
return new String();
}
private static String nextDateValue(TimexProperty timex, LocalDateTime date) {
if (timex.getDayOfMonth() != null) {
Integer year = date.getYear();
Integer month = date.getMonth().getValue();
if (timex.getMonth() != null) {
month = timex.getMonth();
if (date.getMonthValue() > month ||
(date.getMonthValue() == month && date.getDayOfMonth() > timex.getDayOfMonth())) {
year++;
}
} else {
if (date.getDayOfMonth() > timex.getDayOfMonth()) {
month++;
if (month > 12) {
month = month % 12;
year--;
}
}
}
Integer finalYear = year;
Integer finalMonth = month;
return TimexValue.dateValue(new TimexProperty() {
{
setYear(finalYear);
setMonth(finalMonth);
setDayOfMonth(timex.getDayOfMonth());
}
});
}
if (timex.getDayOfWeek() != null) {
LocalDateTime start = generateWeekDate(timex, date, false);
return TimexValue.dateValue(new TimexProperty() {
{
setYear(start.getYear());
setMonth(start.getMonthValue());
setDayOfMonth(start.getDayOfMonth());
}
});
}
return new String();
}
private static List<Resolution.Entry> resolveTime(TimexProperty timex, LocalDateTime date) {
return new ArrayList<Resolution.Entry>() {
{
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("time");
setValue(TimexValue.timeValue(timex, date));
}
});
}
};
}
private static List<Resolution.Entry> resolveDuration(TimexProperty timex) {
return new ArrayList<Resolution.Entry>() {
{
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("duration");
setValue(TimexValue.durationValue(timex));
}
});
}
};
}
private static Pair<String, String> yearDateRange(Integer year) {
Pair<TimexProperty, TimexProperty> yearDateRange = TimexHelpers.yearDateRange(year);
return Pair.of(TimexValue.dateValue(yearDateRange.getLeft()), TimexValue.dateValue(yearDateRange.getRight()));
}
private static Pair<String, String> monthDateRange(Integer year, Integer month) {
Pair<TimexProperty, TimexProperty> monthDateRange = TimexHelpers.monthDateRange(year, month);
return Pair.of(TimexValue.dateValue(monthDateRange.getLeft()), TimexValue.dateValue(monthDateRange.getRight()));
}
private static Pair<String, String> yearWeekDateRange(Integer year, Integer weekOfYear, Boolean isWeekend) {
Pair<TimexProperty, TimexProperty> yearWeekDateRange = TimexHelpers.yearWeekDateRange(year, weekOfYear, isWeekend);
return Pair.of(TimexValue.dateValue(yearWeekDateRange.getLeft()), TimexValue.dateValue(yearWeekDateRange.getRight()));
}
private static Pair<String, String> monthWeekDateRange(Integer year, Integer month, Integer weekOfMonth) {
Pair<TimexProperty, TimexProperty> monthWeekDateRange = TimexHelpers.monthWeekDateRange(year, month, weekOfMonth);
return Pair.of(TimexValue.dateValue(monthWeekDateRange.getLeft()), TimexValue.dateValue(monthWeekDateRange.getRight()));
}
private static LocalDateTime generateWeekDate(TimexProperty timex, LocalDateTime date, boolean isBefore) {
LocalDateTime start;
if (timex.getWeekOfMonth() == null && timex.getWeekOfYear() == null) {
DayOfWeek day = timex.getDayOfWeek() == 7 ? DayOfWeek.SUNDAY : DayOfWeek.of(timex.getDayOfWeek());
if (isBefore) {
start = TimexDateHelpers.dateOfLastDay(day, date);
} else {
start = TimexDateHelpers.dateOfNextDay(day, date);
}
} else {
Integer dayOfWeek = timex.getDayOfWeek() - 1;
Integer year = timex.getYear() != null ? timex.getYear() : date.getYear();
if (timex.getWeekOfYear() != null) {
Integer weekOfYear = timex.getWeekOfYear();
start = TimexHelpers.firstDateOfWeek(year, weekOfYear, Locale.getDefault()).plusDays(dayOfWeek);
if (timex.getYear() == null) {
if (isBefore && start.isAfter(date)) {
start = TimexHelpers.firstDateOfWeek(year - 1, weekOfYear, Locale.getDefault()).plusDays(dayOfWeek);
} else if (!isBefore && start.isBefore(date)) {
start = TimexHelpers.firstDateOfWeek(year + 1, weekOfYear, Locale.getDefault()).plusDays(dayOfWeek);
}
}
} else {
Integer month = timex.getMonth() != null ? timex.getMonth() : date.getMonthValue();
Integer weekOfMonth = timex.getWeekOfMonth();
start = TimexHelpers.generateMonthWeekDateStart(year, month, weekOfMonth).plusDays(dayOfWeek);
if (timex.getYear() == null || timex.getMonth() == null) {
if (isBefore && start.isAfter(date)) {
start = TimexHelpers.generateMonthWeekDateStart(timex.getMonth() != null ? year - 1 : year,
timex.getMonth() == null ? month - 1 : month, weekOfMonth).plusDays(dayOfWeek);
} else if (!isBefore && start.isBefore(date)) {
start = TimexHelpers.generateMonthWeekDateStart(timex.getMonth() != null ? year + 1 : year,
timex.getMonth() == null ? month + 1 : month, weekOfMonth).plusDays(dayOfWeek);
}
}
}
}
return start;
}
private static List<Resolution.Entry> resolveDateRange(TimexProperty timex, LocalDateTime date) {
if (timex.getSeason() != null) {
return new ArrayList<Resolution.Entry>() {
{
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("daterange");
setValue("not resolved");
}
});
}
};
} else {
if (timex.getMonth() != null && timex.getWeekOfMonth() != null) {
List<Pair<String, String>> yearDateRangeList = getMonthWeekDateRange(
timex.getYear() != null ? timex.getYear() : Constants.INVALID_VALUE,
timex.getMonth(), timex.getWeekOfMonth(), date.getYear());
List<Resolution.Entry> result = new ArrayList<Resolution.Entry>();
for (Pair<String, String> yearDateRange : yearDateRangeList) {
result.add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("daterange");
setStart(yearDateRange.getLeft());
setEnd(yearDateRange.getRight());
}
});
}
return result;
}
if (timex.getYear() != null && timex.getMonth() != null) {
Pair<String, String> dateRange = TimexResolver.monthDateRange(timex.getYear(), timex.getMonth());
return new ArrayList<Resolution.Entry>() {
{
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("daterange");
setStart(dateRange.getLeft());
setEnd(dateRange.getRight());
}
});
}
};
}
if (timex.getYear() != null && timex.getWeekOfYear() != null) {
Pair<String, String> dateRange = TimexResolver.yearWeekDateRange(date.getYear(), timex.getWeekOfYear(),
timex.getWeekend());
return new ArrayList<Resolution.Entry>() {
{
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("daterange");
setStart(dateRange.getLeft());
setEnd(dateRange.getRight());
}
});
}
};
}
if (timex.getMonth() != null) {
Integer y = date.getYear();
Pair<String, String> lastYearDateRange = TimexResolver.monthDateRange(y - 1, timex.getMonth());
Pair<String, String> thisYearDateRange = TimexResolver.monthDateRange(y, timex.getMonth());
return new ArrayList<Resolution.Entry>() {
{
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("daterange");
setStart(lastYearDateRange.getLeft());
setEnd(lastYearDateRange.getRight());
}
});
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("daterange");
setStart(thisYearDateRange.getLeft());
setEnd(thisYearDateRange.getRight());
}
});
}
};
}
if (timex.getYear() != null) {
Pair<String, String> dateRange = TimexResolver.yearDateRange(timex.getYear());
return new ArrayList<Resolution.Entry>() {
{
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("daterange");
setStart(dateRange.getLeft());
setEnd(dateRange.getRight());
}
});
}
};
}
return new ArrayList<Resolution.Entry>();
}
}
private static Pair<String, String> partOfDayTimeRange(TimexProperty timex) {
switch (timex.getPartOfDay()) {
case "MO":
return Pair.of("08:00:00", "12:00:00");
case "AF":
return Pair.of("12:00:00", "16:00:00");
case "EV":
return Pair.of("16:00:00", "20:00:00");
case "NI":
return Pair.of("20:00:00", "24:00:00");
default:
}
return Pair.of("not resolved", "not resolved");
}
private static List<Resolution.Entry> resolveTimeRange(TimexProperty timex, LocalDateTime date) {
if (timex.getPartOfDay() != null) {
Pair<String, String> range = TimexResolver.partOfDayTimeRange(timex);
return new ArrayList<Resolution.Entry>() {
{
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("timerange");
setStart(range.getLeft());
setEnd(range.getRight());
}
});
}
};
} else {
TimexRange range = TimexHelpers.expandTimeRange(timex);
return new ArrayList<Resolution.Entry>() {
{
add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("timerange");
setStart(TimexValue.timeValue(range.getStart(), date));
setEnd(TimexValue.timeValue(range.getEnd(), date));
}
});
}
};
}
}
private static List<Resolution.Entry> resolveDateTime(TimexProperty timex, LocalDateTime date) {
List<Resolution.Entry> resolvedDates = TimexResolver.resolveDate(timex, date);
for (Resolution.Entry resolved : resolvedDates) {
resolved.setType("datetime");
resolved.setValue(String.format("%1$s %2$s", resolved.getValue(), TimexValue.timeValue(timex, date)));
}
return resolvedDates;
}
private static List<String> getDateValues(TimexProperty timex, LocalDateTime date) {
ArrayList<String> result = new ArrayList<String>();
if (timex.getYear() != null && timex.getMonth() != null && timex.getDayOfMonth() != null) {
result.add(TimexValue.dateValue(timex));
} else {
result.add(lastDateValue(timex, date));
if (timex.getYear() == null) {
result.add(nextDateValue(timex, date));
}
}
return result;
}
private static List<Pair<String, String>> getMonthWeekDateRange(Integer year, Integer month, Integer weekOfMonth,
Integer referYear) {
List<Pair<String, String>> result = new ArrayList<Pair<String, String>>();
if (year == Constants.INVALID_VALUE) {
result.add(monthWeekDateRange(referYear - 1, month, weekOfMonth));
result.add(monthWeekDateRange(referYear, month, weekOfMonth));
} else {
result.add(monthWeekDateRange(year, month, weekOfMonth));
}
return result;
}
private static List<Resolution.Entry> resolveDateTimeRange(TimexProperty timex, LocalDateTime date) {
if (timex.getPartOfDay() != null) {
List<String> dateValues = getDateValues(timex, date);
Pair<String, String> timeRange = partOfDayTimeRange(timex);
ArrayList<Resolution.Entry> result = new ArrayList<Resolution.Entry>();
for (String dateValue : dateValues) {
result.add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("datetimerange");
setStart(TimexHelpers.formatResolvedDateValue(dateValue, timeRange.getLeft()));
setEnd(TimexHelpers.formatResolvedDateValue(dateValue, timeRange.getRight()));
}
});
}
return result;
} else {
TimexRange range = TimexHelpers.expandDateTimeRange(timex);
List<String> startDateValues = getDateValues(range.getStart(), date);
List<String> endDateValues = getDateValues(range.getEnd(), date);
List<Resolution.Entry> result = new ArrayList<Resolution.Entry>();
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
LocalTime defaultTime = LocalDateTime.MIN.toLocalTime();
List<DateRange> dateRanges = Streams
.zip(startDateValues.stream(), endDateValues.stream(), (n, w) -> new DateRange() {
{
setStart(LocalDateTime.of(LocalDate.parse(n, formatter), defaultTime));
setEnd(LocalDateTime.of(LocalDate.parse(w, formatter), defaultTime));
}
}).collect(Collectors.toList());
for (DateRange dateRange : dateRanges) {
{
result.add(new Resolution.Entry() {
{
setTimex(timex.getTimexValue());
setType("datetimerange");
setStart(TimexHelpers.formatResolvedDateValue(dateRange.getStart().toLocalDate().toString(),
TimexValue.timeValue(range.getStart(), date)));
setEnd(TimexHelpers.formatResolvedDateValue(dateRange.getEnd().toLocalDate().toString(),
TimexValue.timeValue(range.getEnd(), date)));
}
});
}
}
return result;
}
}
}

Просмотреть файл

@ -0,0 +1,20 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
public class TimexSet {
private TimexProperty timex;
public TimexSet(String timex) {
this.timex = new TimexProperty(timex);
}
public TimexProperty getTimex() {
return timex;
}
public void setTimex(TimexProperty withTimex) {
this.timex = withTimex;
}
}

Просмотреть файл

@ -0,0 +1,41 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
public enum TimexUnit {
/**
* Year
*/
Year,
/**
* Month
*/
Month,
/**
* Week
*/
Week,
/**
* Day
*/
Day,
/**
* Hour
*/
Hour,
/**
* Minute
*/
Minute,
/**
* Second
*/
Second,
}

Просмотреть файл

@ -0,0 +1,75 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression;
import java.math.BigDecimal;
import java.time.LocalDateTime;
public class TimexValue {
public static String dateValue(TimexProperty timexProperty) {
if (timexProperty.getYear() != null && timexProperty.getMonth() != null && timexProperty.getDayOfMonth() != null) {
return String.format("%1$s-%2$s-%3$s", TimexDateHelpers.fixedFormatNumber(timexProperty.getYear(), 4),
TimexDateHelpers.fixedFormatNumber(timexProperty.getMonth(), 2),
TimexDateHelpers.fixedFormatNumber(timexProperty.getDayOfMonth(), 2));
}
return new String();
}
public static String timeValue(TimexProperty timexProperty, LocalDateTime date) {
if (timexProperty.getHour() != null && timexProperty.getMinute() != null && timexProperty.getSecond() != null) {
return String.format("%1$s:%2$s:%3$s", TimexDateHelpers.fixedFormatNumber(timexProperty.getHour(), 2),
TimexDateHelpers.fixedFormatNumber(timexProperty.getMinute(), 2),
TimexDateHelpers.fixedFormatNumber(timexProperty.getSecond(), 2));
}
return new String();
}
public static String datetimeValue(TimexProperty timexProperty, LocalDateTime date) {
return String.format("%1$s %2$s", TimexValue.dateValue(timexProperty),
TimexValue.timeValue(timexProperty, date));
}
public static String durationValue(TimexProperty timexProperty) {
BigDecimal duration = new BigDecimal(0);
if (timexProperty.getYears() != null) {
double value = 31536000 * ((timexProperty.getYears() != null) ? timexProperty.getYears().doubleValue() : 0);
duration = duration.add(BigDecimal.valueOf(value));
}
if (timexProperty.getMonths() != null) {
double value = 2592000
* ((timexProperty.getMonths() != null) ? timexProperty.getMonths().doubleValue() : 0);
duration = duration.add(BigDecimal.valueOf(value));
}
if (timexProperty.getWeeks() != null) {
double value = 604800 * ((timexProperty.getWeeks() != null) ? timexProperty.getWeeks().doubleValue() : 0);
duration = duration.add(BigDecimal.valueOf(value));
}
if (timexProperty.getDays() != null) {
double value = 86400 * ((timexProperty.getDays() != null) ? timexProperty.getDays().doubleValue() : 0);
duration = duration.add(BigDecimal.valueOf(value));
}
if (timexProperty.getHours() != null) {
double value = 3600 * ((timexProperty.getHours() != null) ? timexProperty.getHours().doubleValue() : 0);
duration = duration.add(BigDecimal.valueOf(value));
}
if (timexProperty.getMinutes() != null) {
double value = 60 * ((timexProperty.getMinutes() != null) ? timexProperty.getMinutes().doubleValue() : 0);
duration = duration.add(BigDecimal.valueOf(value));
}
if (timexProperty.getSeconds() != null) {
duration = duration.add(BigDecimal.valueOf((timexProperty.getSeconds() != null) ? timexProperty.getSeconds().doubleValue() : 0));
}
duration = BigDecimal.valueOf(duration.intValue());
return duration.toPlainString();
}
}

Просмотреть файл

@ -0,0 +1,54 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression.english;
import java.util.HashMap;
import java.util.Map;
public class TimexConstantsEnglish {
public static final String EVERY = "every";
public static final String NOW = "now";
public static final String MIDNIGHT = "midnight";
public static final String MIDDAY = "midday";
public static final String TODAY = "today";
public static final String TOMORROW = "tomorrow";
public static final String YESTERDAY = "yesterday";
public static final String WEEKEND = "weekend";
public static final String TONIGHT = "tonight";
public static final String THIS = "this";
public static final String LAST = "last";
public static final String NEXT = "next";
public static final String[] DAYS = { "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday",
"Sunday" };
public static final String[] MONTHS = { "January", "February", "March", "April", "May", "June", "July", "August",
"September", "October", "November", "December", };
public static final String[] DATE_ABBREVIATION = { "th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th", };
public static final String[] HOURS = { "midnight", "1AM", "2AM", "3AM", "4AM", "5AM", "6AM", "7AM", "8AM", "9AM",
"10AM", "11AM", "midday", "1PM", "2PM", "3PM", "4PM", "5PM", "6PM", "7PM", "8PM", "9PM", "10PM", "11PM", };
public static final Map<String, String> SEASONS = new HashMap<String, String>() {
{
put("SP", "spring");
put("SU", "summer");
put("FA", "fall");
put("WI", "winter");
}
};
public static final String[] WEEKS = { "first", "second", "third", "forth", };
public static final Map<String, String> DAY_PARTS = new HashMap<String, String>() {
{
put("DT", "daytime");
put("NI", "night");
put("MO", "morning");
put("AF", "afternoon");
put("EV", "evening");
}
};
}

Просмотреть файл

@ -0,0 +1,206 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression.english;
import com.microsoft.recognizers.datatypes.timex.expression.Constants;
import com.microsoft.recognizers.datatypes.timex.expression.TimexInference;
import com.microsoft.recognizers.datatypes.timex.expression.TimexProperty;
import com.microsoft.recognizers.datatypes.timex.expression.TimexSet;
import java.math.BigDecimal;
import java.util.HashSet;
public class TimexConvertEnglish {
public static String convertTimexToString(TimexProperty timex) {
HashSet<String> types = timex.getTypes().size() != 0 ? timex.getTypes() : TimexInference.infer(timex);
if (types.contains(Constants.TimexTypes.PRESENT)) {
return TimexConstantsEnglish.NOW;
}
if (types.contains(Constants.TimexTypes.DATE_TIME_RANGE)) {
return TimexConvertEnglish.convertDateTimeRange(timex);
}
if (types.contains(Constants.TimexTypes.DATE_RANGE)) {
return TimexConvertEnglish.convertDateRange(timex);
}
if (types.contains(Constants.TimexTypes.DURATION)) {
return TimexConvertEnglish.convertDuration(timex);
}
if (types.contains(Constants.TimexTypes.TIME_RANGE)) {
return TimexConvertEnglish.convertTimeRange(timex);
}
// TODO: where appropriate delegate most the formatting delegate to
// Date.toLocaleString(options)
if (types.contains(Constants.TimexTypes.DATE_TIME)) {
return TimexConvertEnglish.convertDateTime(timex);
}
if (types.contains(Constants.TimexTypes.DATE)) {
return TimexConvertEnglish.convertDate(timex);
}
if (types.contains(Constants.TimexTypes.TIME)) {
return TimexConvertEnglish.convertTime(timex);
}
return new String();
}
public static String convertTimexSetToString(TimexSet timexSet) {
TimexProperty timex = timexSet.getTimex();
if (timex.getTypes().contains(Constants.TimexTypes.DURATION)) {
return String.format("%1$s %2$s", TimexConstantsEnglish.EVERY,
TimexConvertEnglish.convertTimexDurationToString(timex, false));
} else {
return String.format("%1$s %2$s", TimexConstantsEnglish.EVERY,
TimexConvertEnglish.convertTimexToString(timex));
}
}
public static String convertTime(TimexProperty timex) {
if (timex.getHour() == 0 && timex.getMinute() == 0 && timex.getSecond() == 0) {
return TimexConstantsEnglish.MIDNIGHT;
}
if (timex.getHour() == 12 && timex.getMinute() == 0 && timex.getSecond() == 0) {
return TimexConstantsEnglish.MIDDAY;
}
String hour = (timex.getHour() == 0) ? "12"
: (timex.getHour() > 12) ? String.valueOf(timex.getHour() - 12) : String.valueOf(timex.getHour());
String minute = (timex.getMinute() == 0 && timex.getSecond() == 0) ? new String()
: Constants.TIME_TIMEX_CONNECTOR
+ String.format("%1$2s", String.valueOf(timex.getMinute())).replace(' ', '0');
String second = (timex.getSecond() == 0) ? new String()
: Constants.TIME_TIMEX_CONNECTOR
+ String.format("%1$2s", String.valueOf(timex.getSecond())).replace(' ', '0');
String period = timex.getHour() < 12 ? Constants.AM : Constants.PM;
return String.format("%1$s%2$s%3$s%4$s", hour, minute, second, period);
}
public static String convertDate(TimexProperty timex) {
if (timex.getDayOfWeek() != null) {
return TimexConstantsEnglish.DAYS[timex.getDayOfWeek() - 1];
}
String date = String.valueOf(timex.getDayOfMonth());
String abbreviation = TimexConstantsEnglish.DATE_ABBREVIATION[Integer
.parseInt(String.valueOf(date.charAt(date.length() - 1)))];
if (timex.getMonth() != null) {
String month = TimexConstantsEnglish.MONTHS[timex.getMonth() - 1];
if (timex.getYear() != null) {
return String.format("%1$s%2$s %3$s %4$s", date, abbreviation, month, timex.getYear()).trim();
}
return String.format("%1$s%2$s %3$s", date, abbreviation, month);
}
return date.concat(abbreviation);
}
private static String convertDurationPropertyToString(BigDecimal value, String property,
Boolean includeSingleCount) {
if (value.intValue() == 1) {
return includeSingleCount ? "1 " + property : property;
} else {
return String.format("%1$s %2$s%3$s", value, property, Constants.TIME_DURATION_UNIT);
}
}
private static String convertTimexDurationToString(TimexProperty timex, Boolean includeSingleCount) {
String result = new String();
if (timex.getYears() != null) {
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getYears(), Constants.YEAR_UNIT,
includeSingleCount);
}
if (timex.getMonths() != null) {
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getMonths(), Constants.MONTH_UNIT,
includeSingleCount);
}
if (timex.getWeeks() != null) {
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getWeeks(), Constants.WEEK_UNIT,
includeSingleCount);
}
if (timex.getDays() != null) {
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getDays(), Constants.DAY_UNIT,
includeSingleCount);
}
if (timex.getHours() != null) {
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getHours(), Constants.HOUR_UNIT,
includeSingleCount);
}
if (timex.getMinutes() != null) {
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getMinutes(), Constants.MINUTE_UNIT,
includeSingleCount);
}
if (timex.getSeconds() != null) {
result += TimexConvertEnglish.convertDurationPropertyToString(timex.getSeconds(), Constants.SECOND_UNIT,
includeSingleCount);
}
return result;
}
private static String convertDuration(TimexProperty timex) {
return TimexConvertEnglish.convertTimexDurationToString(timex, true);
}
private static String convertDateRange(TimexProperty timex) {
String season = (timex.getSeason() != null) ? TimexConstantsEnglish.SEASONS.get(timex.getSeason())
: new String();
String year = (timex.getYear() != null) ? timex.getYear().toString() : new String();
if (timex.getWeekOfYear() != null) {
if (timex.getWeekend() != null) {
throw new UnsupportedOperationException();
}
}
if (timex.getMonth() != null) {
String month = TimexConstantsEnglish.MONTHS[timex.getMonth() - 1];
if (timex.getWeekOfMonth() != null) {
return String.format("%1$s week of %2$s", TimexConstantsEnglish.WEEKS[timex.getWeekOfMonth() - 1],
month);
} else {
return String.format("%1$s %2$s", month, year).trim();
}
}
return String.format("%1$s %2$s", season, year).trim();
}
private static String convertTimeRange(TimexProperty timex) {
return TimexConstantsEnglish.DAY_PARTS.get(timex.getPartOfDay());
}
private static String convertDateTime(TimexProperty timex) {
return String.format("%1$s %2$s", TimexConvertEnglish.convertTime(timex),
TimexConvertEnglish.convertDate(timex));
}
private static String convertDateTimeRange(TimexProperty timex) {
if (timex.getTypes().contains(Constants.TimexTypes.TIME_RANGE)) {
return String.format("%1$s %2$s", TimexConvertEnglish.convertDate(timex),
TimexConvertEnglish.convertTimeRange(timex));
}
// date + time + duration
// - OR -
// date + duration
return new String();
}
}

Просмотреть файл

@ -0,0 +1,184 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.datatypes.timex.expression.english;
import com.microsoft.recognizers.datatypes.timex.expression.Constants;
import com.microsoft.recognizers.datatypes.timex.expression.TimexConvert;
import com.microsoft.recognizers.datatypes.timex.expression.TimexDateHelpers;
import com.microsoft.recognizers.datatypes.timex.expression.TimexInference;
import com.microsoft.recognizers.datatypes.timex.expression.TimexProperty;
import java.time.DayOfWeek;
import java.time.LocalDateTime;
import java.util.HashSet;
public class TimexRelativeConvertEnglish {
public static String convertTimexToStringRelative(TimexProperty timex, LocalDateTime date) {
HashSet<String> types = timex.getTypes().size() != 0 ? timex.getTypes() : TimexInference.infer(timex);
if (types.contains(Constants.TimexTypes.DATE_TIME_RANGE)) {
return TimexRelativeConvertEnglish.convertDateTimeRange(timex, date);
}
if (types.contains(Constants.TimexTypes.DATE_RANGE)) {
return TimexRelativeConvertEnglish.convertDateRange(timex, date);
}
if (types.contains(Constants.TimexTypes.DATE_TIME)) {
return TimexRelativeConvertEnglish.convertDateTime(timex, date);
}
if (types.contains(Constants.TimexTypes.DATE)) {
return TimexRelativeConvertEnglish.convertDate(timex, date);
}
return TimexConvert.convertTimexToString(timex);
}
private static String getDateDay(DayOfWeek day) {
Integer index = (day.getValue() == 0) ? 6 : day.getValue() - 1;
return TimexConstantsEnglish.DAYS[index];
}
private static String convertDate(TimexProperty timex, LocalDateTime date) {
if (timex.getYear() != null && timex.getMonth() != null && timex.getDayOfMonth() != null) {
LocalDateTime timexDate = LocalDateTime.of(timex.getYear(), timex.getMonth(), timex.getDayOfMonth(), 0, 0);
if (TimexDateHelpers.datePartEquals(timexDate, date)) {
return TimexConstantsEnglish.TODAY;
}
LocalDateTime tomorrow = TimexDateHelpers.tomorrow(date);
if (TimexDateHelpers.datePartEquals(timexDate, tomorrow)) {
return TimexConstantsEnglish.TOMORROW;
}
LocalDateTime yesterday = TimexDateHelpers.yesterday(date);
if (TimexDateHelpers.datePartEquals(timexDate, yesterday)) {
return TimexConstantsEnglish.YESTERDAY;
}
if (TimexDateHelpers.isThisWeek(timexDate, date)) {
return String.format("%1$s %2$s", TimexConstantsEnglish.THIS,
TimexRelativeConvertEnglish.getDateDay(timexDate.getDayOfWeek()));
}
if (TimexDateHelpers.isNextWeek(timexDate, date)) {
return String.format("%1$s %2$s", TimexConstantsEnglish.NEXT,
TimexRelativeConvertEnglish.getDateDay(timexDate.getDayOfWeek()));
}
if (TimexDateHelpers.isLastWeek(timexDate, date)) {
return String.format("%1$s %2$s", TimexConstantsEnglish.LAST,
TimexRelativeConvertEnglish.getDateDay(timexDate.getDayOfWeek()));
}
}
return TimexConvertEnglish.convertDate(timex);
}
private static String convertDateTime(TimexProperty timex, LocalDateTime date) {
return String.format("%1$s %2$s", TimexRelativeConvertEnglish.convertDate(timex, date),
TimexConvertEnglish.convertTime(timex));
}
private static String convertDateRange(TimexProperty timex, LocalDateTime date) {
if (timex.getYear() != null) {
int year = date.getYear();
if (timex.getYear() == year) {
if (timex.getWeekOfYear() != null) {
Integer thisWeek = TimexDateHelpers.weekOfYear(date);
if (thisWeek == timex.getWeekOfYear()) {
return timex.getWeekend() != null ? String.format("%1$s %2$s", TimexConstantsEnglish.THIS, TimexConstantsEnglish.WEEKEND)
: String.format("%1$s %2$s", TimexConstantsEnglish.THIS, Constants.WEEK_UNIT);
}
if (thisWeek == timex.getWeekOfYear() + 1) {
return timex.getWeekend() != null ? String.format("%1$s %2$s", TimexConstantsEnglish.LAST, TimexConstantsEnglish.WEEKEND)
: String.format("%1$s %2$s", TimexConstantsEnglish.LAST, Constants.WEEK_UNIT);
}
if (thisWeek == timex.getWeekOfYear() - 1) {
return timex.getWeekend() != null ? String.format("%1$s %2$s", TimexConstantsEnglish.NEXT, TimexConstantsEnglish.WEEKEND)
: String.format("%1$s %2$s", TimexConstantsEnglish.NEXT, Constants.WEEK_UNIT);
}
}
if (timex.getMonth() != null) {
if (timex.getMonth() == date.getMonthValue()) {
return String.format("%1$s %2$s", TimexConstantsEnglish.THIS, Constants.MONTH_UNIT);
}
if (timex.getMonth() == date.getMonthValue() + 1) {
return String.format("%1$s %2$s", TimexConstantsEnglish.NEXT, Constants.MONTH_UNIT);
}
if (timex.getMonth() == date.getMonthValue() - 1) {
return String.format("%1$s %2$s", TimexConstantsEnglish.LAST, Constants.MONTH_UNIT);
}
}
return (timex.getSeason() != null) ? String.format("%1$s %2$s", TimexConstantsEnglish.THIS,
TimexConstantsEnglish.SEASONS.get(timex.getSeason()))
: String.format("%1$s %2$s", TimexConstantsEnglish.THIS, Constants.YEAR_UNIT);
}
if (timex.getYear() == year + 1) {
return (timex.getSeason() != null) ? String.format("%1$s %2$s", TimexConstantsEnglish.NEXT,
TimexConstantsEnglish.SEASONS.get(timex.getSeason()))
: String.format("%1$s %2$s", TimexConstantsEnglish.NEXT, Constants.YEAR_UNIT);
}
if (timex.getYear() == year - 1) {
return (timex.getSeason() != null) ? String.format("%1$s %2$s", TimexConstantsEnglish.LAST,
TimexConstantsEnglish.SEASONS.get(timex.getSeason()))
: String.format("%1$s %2$s", TimexConstantsEnglish.LAST, Constants.YEAR_UNIT);
}
}
return new String();
}
private static String convertDateTimeRange(TimexProperty timex, LocalDateTime date) {
if (timex.getYear() != null && timex.getMonth() != null && timex.getDayOfMonth() != null) {
LocalDateTime timexDate = LocalDateTime.of(timex.getYear(), timex.getMonth(), timex.getDayOfMonth(), 0, 0);
if (timex.getPartOfDay() != null) {
if (TimexDateHelpers.datePartEquals(timexDate, date)) {
if (timex.getPartOfDay().equals(Constants.TIMEX_NIGHT)) {
return TimexConstantsEnglish.TONIGHT;
} else {
return String.format("%1$s %2$s", TimexConstantsEnglish.THIS,
TimexConstantsEnglish.DAY_PARTS.get(timex.getPartOfDay()));
}
}
LocalDateTime tomorrow = TimexDateHelpers.tomorrow(date);
if (TimexDateHelpers.datePartEquals(timexDate, tomorrow)) {
return String.format("%1$s %2$s", TimexConstantsEnglish.TOMORROW,
TimexConstantsEnglish.DAY_PARTS.get(timex.getPartOfDay()));
}
LocalDateTime yesterday = TimexDateHelpers.yesterday(date);
if (TimexDateHelpers.datePartEquals(timexDate, yesterday)) {
return String.format("%1$s %2$s", TimexConstantsEnglish.YESTERDAY,
TimexConstantsEnglish.DAY_PARTS.get(timex.getPartOfDay()));
}
if (TimexDateHelpers.isNextWeek(timexDate, date)) {
return String.format("%1$s %2$s %3$s", TimexConstantsEnglish.NEXT,
TimexRelativeConvertEnglish.getDateDay(timexDate.getDayOfWeek()),
TimexConstantsEnglish.DAY_PARTS.get(timex.getPartOfDay()));
}
if (TimexDateHelpers.isLastWeek(timexDate, date)) {
return String.format("%1$s %2$s", TimexConstantsEnglish.LAST,
TimexRelativeConvertEnglish.getDateDay(timexDate.getDayOfWeek()),
TimexConstantsEnglish.DAY_PARTS.get(timex.getPartOfDay()));
}
}
}
return new String();
}
}

Просмотреть файл

@ -28,7 +28,7 @@ public class NumberRangeExtractor extends BaseNumberRangeExtractor {
// less than ... more than ...
builder.put(RegExpUtility.getSafeRegExp(EnglishNumeric.TwoNumberRangeRegex3, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.TWONUM);
// from ... to/~/- ...
builder.put(Pattern.compile(EnglishNumeric.TwoNumberRangeRegex4, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.TWONUMTILL);
builder.put(RegExpUtility.getSafeLookbehindRegExp(EnglishNumeric.TwoNumberRangeRegex4, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.TWONUMTILL);
// more/greater/higher than ...
builder.put(Pattern.compile(EnglishNumeric.OneNumberRangeMoreRegex1, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.MORE);
// 30 and/or greater/higher
@ -38,13 +38,13 @@ public class NumberRangeExtractor extends BaseNumberRangeExtractor {
// 30 and/or less/smaller/lower
builder.put(Pattern.compile(EnglishNumeric.OneNumberRangeLessRegex2, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.LESS);
// equal to ...
builder.put(Pattern.compile(EnglishNumeric.OneNumberRangeEqualRegex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.EQUAL);
builder.put(RegExpUtility.getSafeLookbehindRegExp(EnglishNumeric.OneNumberRangeEqualRegex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS), NumberRangeConstants.EQUAL);
// equal to 30 or more than, larger than 30 or equal to ...
builder.put(RegExpUtility.getSafeRegExp(EnglishNumeric.OneNumberRangeMoreSeparateRegex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS),
NumberRangeConstants.MORE);
NumberRangeConstants.MORE);
// equal to 30 or less, smaller than 30 or equal ...
builder.put(RegExpUtility.getSafeRegExp(EnglishNumeric.OneNumberRangeLessSeparateRegex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS),
NumberRangeConstants.LESS);
NumberRangeConstants.LESS);
this.regexes = Collections.unmodifiableMap(builder);
}

Просмотреть файл

@ -51,10 +51,11 @@ public abstract class AbstractNumberModel implements IModel {
SortedMap<String, Object> sortedMap = new TreeMap<String, Object>();
sortedMap.put(ResolutionKey.Value, o.getResolutionStr());
// We decreased the end property by 1 in order to keep parity with other platforms (C#/JS).
return new ModelResult(
o.getText(),
o.getStart(),
o.getStart() + o.getLength(),
o.getStart() + o.getLength() - 1,
getModelTypeName(),
sortedMap
);

Просмотреть файл

@ -37,11 +37,15 @@ public final class NumberFormatUtility {
// EXPONENTIAL_AT: [-5, 15] });
// For small positive decimal places. E.g.: 0,000015 or 0,0000015 -> 1.5E-05 or 1.5E-06
if (doubleValue > 0 && doubleValue != Math.round(doubleValue) && doubleValue < 1E-4) {
result = doubleValue.toString();
} else {
BigDecimal bc = new BigDecimal(doubleValue, new MathContext(15, RoundingMode.HALF_EVEN));
result = bc.toString();
try {
if (doubleValue > 0 && doubleValue != Math.round(doubleValue) && doubleValue < 1E-4) {
result = doubleValue.toString();
} else {
BigDecimal bc = new BigDecimal(doubleValue, new MathContext(15, RoundingMode.HALF_EVEN));
result = bc.toString();
}
} catch (NumberFormatException ex) {
return value.toString();
}
result = result.replace('e', 'E');

Просмотреть файл

@ -25,7 +25,7 @@ public class EnglishNumeric {
public static final Boolean MultiDecimalSeparatorCulture = true;
public static final String RoundNumberIntegerRegex = "(?:hundred|thousand|million|billion|trillion|lakh|crore)";
public static final String RoundNumberIntegerRegex = "(?:hundred|thousand|million|mln|billion|bln|trillion|tln|lakh|crore)s?";
public static final String ZeroToNineIntegerRegex = "(?:three|seven|eight|four|five|zero|nine|one|two|six)";
@ -319,7 +319,7 @@ public class EnglishNumeric {
public static final String HalfADozenRegex = "half\\s+a\\s+dozen";
public static final String DigitalNumberRegex = "((?<=\\b)(hundred|thousand|[mb]illion|trillion|lakh|crore|dozen(s)?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))"
public static final String DigitalNumberRegex = "((?<=\\b)(hundred|thousand|[mb]illion|trillion|[mbt]ln|lakh|crore|dozen(s)?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))"
.replace("{BaseNumbers.MultiplierLookupRegex}", BaseNumbers.MultiplierLookupRegex);
public static final ImmutableMap<String, Long> CardinalNumberMap = ImmutableMap.<String, Long>builder()
@ -358,8 +358,11 @@ public class EnglishNumeric {
.put("hundred", 100L)
.put("thousand", 1000L)
.put("million", 1000000L)
.put("mln", 1000000L)
.put("billion", 1000000000L)
.put("bln", 1000000000L)
.put("trillion", 1000000000000L)
.put("tln", 1000000000000L)
.put("lakh", 100000L)
.put("crore", 10000000L)
.build();
@ -439,8 +442,11 @@ public class EnglishNumeric {
.put("hundred", 100L)
.put("thousand", 1000L)
.put("million", 1000000L)
.put("mln", 1000000L)
.put("billion", 1000000000L)
.put("bln", 1000000000L)
.put("trillion", 1000000000000L)
.put("tln", 1000000000000L)
.put("lakh", 100000L)
.put("crore", 10000000L)
.put("hundredth", 100L)

Просмотреть файл

@ -25,11 +25,11 @@ public class GermanNumeric {
public static final Boolean MultiDecimalSeparatorCulture = false;
public static final String ZeroToNineIntegerRegex = "(drei|sieben|acht|vier|fuenf|fünf|null|neun|eins|(ein(?!($|\\.|,|!|\\?)))|eine|einer|einen|zwei|zwo|sechs)";
public static final String ZeroToNineIntegerRegex = "(drei|sieben|acht|vier|fuenf|fünf|null|neun|eins|(ein(?!($|\\.|,|!|\\?)))|eine[rn]?|zwei|zwo|sechs)";
public static final String RoundNumberIntegerRegex = "(hundert|einhundert|tausend|(\\s*million\\s*)|(\\s*millionen\\s*)|(\\s*mio\\s*)|(\\s*milliarde\\s*)|(\\s*milliarden\\s*)|(\\s*mrd\\s*)|(\\s*billion\\s*)|(\\s*billionen\\s*))";
public static final String RoundNumberIntegerRegex = "((ein)?hundert|tausend|(\\s*(million(en)?|mio|milliarden?|mrd|billion(en)?)\\s*))";
public static final String AnIntRegex = "(eine|ein)(?=\\s)";
public static final String AnIntRegex = "(eine?)(?=\\s)";
public static final String TenToNineteenIntegerRegex = "(siebzehn|dreizehn|vierzehn|achtzehn|neunzehn|fuenfzehn|sechzehn|elf|zwoelf|zwölf|zehn)";
@ -60,7 +60,7 @@ public class GermanNumeric {
public static final String PlaceHolderDefault = "\\D|\\b";
public static String NumbersWithPlaceHolder(String placeholder) {
return "(((?<!\\d+\\s*)-\\s*)|(?<=\\b))\\d+(?!(\\,\\d+[a-zA-Z]))(?={placeholder})"
return "(((?<!\\d+(\\s*(K|k|MM?|mil|G|T|B|b))?\\s*)-\\s*)|(?<=\\b))\\d+(?!(,\\d+[a-zA-Z]))(?={placeholder})"
.replace("{placeholder}", placeholder);
}
@ -80,6 +80,8 @@ public class GermanNumeric {
public static final String RoundNumberOrdinalRegex = "(hundertst(er|es|en|el|e)?|tausendst(er|es|en|el|e)?|millionst(er|es|en|el|e)?|milliardst(er|es|en|el|e)?|billionst(er|es|en|el|e)?)";
public static final String RelativeOrdinalRegex = "(?<relativeOrdinal>(ante)?penultimate|letzter|nächster|vorheriger|aktueller)";
public static final String BasicOrdinalRegex = "(zuerst|erst(er|es|en|e)|zweit(er|es|en|e)?|dritt(er|es|en|el|e)?|viert(er|es|en|el|e)?|fünft(er|es|en|el|e)?|fuenft(er|es|en|el|e)?|sechst(er|es|en|el|e)?|siebt(er|es|en|el|e)?|acht(er|es|en|el|e)?|neunt(er|es|en|el|e)?|zehnt(er|es|en|el|e)?|elft(er|es|en|el|e)?|zwölft(er|es|en|el|e)?|zwoelft(er|es|en|el|e)?|dreizehnt(er|es|en|el|e)?|vierzehnt(er|es|en|el|e)?|fünfzehnt(er|es|en|el|e)?|fuenfzehnt(er|es|en|el|e)?|sechzehnt(er|es|en|el|e)?|siebzehnt(er|es|en|el|e)?|achtzehnt(er|es|en|el|e)?|neunzehnt(er|es|en|el|e)?|zwanzigst(er|es|en|el|e)?|dreißigst(er|es|en|el|e)?|vierziegt(er|es|en|el|e)?|fünfzigst(er|es|en|el|e)?|fuenfzigst(er|es|en|el|e)?|sechzigst(er|es|en|el|e)?|siebzigst(er|es|en|el|e)?|achtzigst(er|es|en|el|e)?|neunzigst(er|es|en|el|e)?)";
public static final String SuffixBasicOrdinalRegex = "({BasicOrdinalRegex}|({ZeroToNineIntegerRegex}(und|\\s){BasicOrdinalRegex}))"
@ -166,12 +168,107 @@ public class GermanNumeric {
public static final String DoubleAllFloatRegex = "((?<=\\b){AllFloatRegex}(?=\\b))"
.replace("{AllFloatRegex}", AllFloatRegex);
public static final String ConnectorRegex = "(?<spacer>und)";
public static final String NumberWithSuffixPercentage = "(?<!%)({BaseNumbers.NumberReplaceToken})(\\s*)(%(?!{BaseNumbers.NumberReplaceToken})|prozent(punkte)?\\b)"
.replace("{BaseNumbers.NumberReplaceToken}", BaseNumbers.NumberReplaceToken);
public static final String NumberWithPrefixPercentage = "(Prozent)(\\s*)({BaseNumbers.NumberReplaceToken})"
public static final String NumberWithPrefixPercentage = "(prozent)(\\s*)({BaseNumbers.NumberReplaceToken})"
.replace("{BaseNumbers.NumberReplaceToken}", BaseNumbers.NumberReplaceToken);
public static final String TillRegex = "(bis(\\s+zu)?|--|-|—|——|~|–)";
public static final String MoreRegex = "(?:(größer|höher|mehr)(\\s+als)?|über|darüber(hinaus)?|(?<!<|=)>)";
public static final String LessRegex = "(?:(weniger|winziger|kleiner|wenig)(\\s+als)?|darunter|unter|(?<!>|=)<)";
public static final String EqualRegex = "(gleich(\\s+(als|zu))?|(?<!<|>)=)";
public static final String MoreOrEqualPrefix = "((nicht\\s+{LessRegex})|(als\\s+letzte(r)?))"
.replace("{LessRegex}", LessRegex);
public static final String MoreOrEqual = "(?:({MoreRegex}\\s+(oder)?\\s+{EqualRegex})|({EqualRegex}\\s+(oder)?\\s+{MoreRegex})|{MoreOrEqualPrefix}(\\s+(oder)?\\s+{EqualRegex})?|({EqualRegex}\\s+(oder)?\\s+)?{MoreOrEqualPrefix}|>\\s*=|≥)"
.replace("{MoreRegex}", MoreRegex)
.replace("{EqualRegex}", EqualRegex)
.replace("{LessRegex}", LessRegex)
.replace("{MoreOrEqualPrefix}", MoreOrEqualPrefix);
public static final String MoreOrEqualSuffix = "((und|oder)\\s+(((mehr|größer|höher)((?!\\s+als)|(\\s+als(?!(\\s*\\d+)))))|((über|darüber)(?!\\s+als))))";
public static final String LessOrEqualPrefix = "((nicht\\s+{MoreRegex})|(at\\s+viele)|(bis\\s+zu))"
.replace("{MoreRegex}", MoreRegex);
public static final String LessOrEqual = "(({LessRegex}\\s+(oder)?\\s+{EqualRegex})|({EqualRegex}\\s+(oder)?\\s+{LessRegex})|{LessOrEqualPrefix}(\\s+(oder)?\\s+{EqualRegex})?|({EqualRegex}\\s+(oder)?\\s+)?{LessOrEqualPrefix}|<\\s*=|≤)"
.replace("{LessRegex}", LessRegex)
.replace("{EqualRegex}", EqualRegex)
.replace("{MoreRegex}", MoreRegex)
.replace("{LessOrEqualPrefix}", LessOrEqualPrefix);
public static final String LessOrEqualSuffix = "((und|oder)\\s+(weniger|geringer|kleiner|winziger)((?!\\s+als)|(\\s+als(?!(\\s*\\d+)))))";
public static final String NumberSplitMark = "(?![,.](?!\\d+))";
public static final String MoreRegexNoNumberSucceed = "((größer|mehr|höhrer|breiter)((?!\\s+als)|\\s+(als(?!(\\s*\\d+))))|((dar)?über)(?!(\\s*\\d+)))";
public static final String LessRegexNoNumberSucceed = "((kleiner|weniger|winziger)((?!\\s+als)|\\s+(als(?!(\\s*\\d+))))|((dar)?unter)(?!(\\s*\\d+)))";
public static final String EqualRegexNoNumberSucceed = "(gleich(s|ing)?((?!\\s+(zu|als))|(\\s+(zu|als)(?!(\\s*\\d+)))))";
public static final String OneNumberRangeMoreRegex1 = "({MoreOrEqual}|{MoreRegex})\\s*(der\\s+)?(?<number1>({NumberSplitMark}.)+)"
.replace("{MoreOrEqual}", MoreOrEqual)
.replace("{MoreRegex}", MoreRegex)
.replace("{NumberSplitMark}", NumberSplitMark);
public static final String OneNumberRangeMoreRegex2 = "(?<number1>({NumberSplitMark}.)+)\\s*{MoreOrEqualSuffix}"
.replace("{MoreOrEqualSuffix}", MoreOrEqualSuffix)
.replace("{NumberSplitMark}", NumberSplitMark);
public static final String OneNumberRangeMoreSeparateRegex = "({EqualRegex}\\s+(?<number1>({NumberSplitMark}.)+)(\\s+or\\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\\s+(?<number1>({NumberSplitMark}.)+)(\\s+oder\\s+){EqualRegexNoNumberSucceed})"
.replace("{EqualRegex}", EqualRegex)
.replace("{MoreRegex}", MoreRegex)
.replace("{EqualRegexNoNumberSucceed}", EqualRegexNoNumberSucceed)
.replace("{MoreRegexNoNumberSucceed}", MoreRegexNoNumberSucceed)
.replace("{NumberSplitMark}", NumberSplitMark);
public static final String OneNumberRangeLessRegex1 = "({LessOrEqual}|{LessRegex})\\s*(the\\s+)?(?<number2>({NumberSplitMark}.)+)"
.replace("{LessOrEqual}", LessOrEqual)
.replace("{LessRegex}", LessRegex)
.replace("{NumberSplitMark}", NumberSplitMark);
public static final String OneNumberRangeLessRegex2 = "(?<number2>({NumberSplitMark}.)+)\\s*{LessOrEqualSuffix}"
.replace("{LessOrEqualSuffix}", LessOrEqualSuffix)
.replace("{NumberSplitMark}", NumberSplitMark);
public static final String OneNumberRangeLessSeparateRegex = "({EqualRegex}\\s+(?<number1>({NumberSplitMark}.)+)(\\s+or\\s+){LessRegexNoNumberSucceed})|({LessRegex}\\s+(?<number1>({NumberSplitMark}.)+)(\\s+oder\\s+){EqualRegexNoNumberSucceed})"
.replace("{EqualRegex}", EqualRegex)
.replace("{LessRegex}", LessRegex)
.replace("{EqualRegexNoNumberSucceed}", EqualRegexNoNumberSucceed)
.replace("{LessRegexNoNumberSucceed}", LessRegexNoNumberSucceed)
.replace("{NumberSplitMark}", NumberSplitMark);
public static final String OneNumberRangeEqualRegex = "{EqualRegex}\\s*(the\\s+)?(?<number1>({NumberSplitMark}.)+)"
.replace("{EqualRegex}", EqualRegex)
.replace("{NumberSplitMark}", NumberSplitMark);
public static final String TwoNumberRangeRegex1 = "zwischen\\s*(der\\s+)?(?<number1>({NumberSplitMark}.)+)\\s*und\\s*(der\\s+)?(?<number2>({NumberSplitMark}.)+)"
.replace("{NumberSplitMark}", NumberSplitMark);
public static final String TwoNumberRangeRegex2 = "({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\\s*(und|aber|,)\\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"
.replace("{OneNumberRangeMoreRegex1}", OneNumberRangeMoreRegex1)
.replace("{OneNumberRangeMoreRegex2}", OneNumberRangeMoreRegex2)
.replace("{OneNumberRangeLessRegex1}", OneNumberRangeLessRegex1)
.replace("{OneNumberRangeLessRegex2}", OneNumberRangeLessRegex2);
public static final String TwoNumberRangeRegex3 = "({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\\s*(und|aber|,)\\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"
.replace("{OneNumberRangeMoreRegex1}", OneNumberRangeMoreRegex1)
.replace("{OneNumberRangeMoreRegex2}", OneNumberRangeMoreRegex2)
.replace("{OneNumberRangeLessRegex1}", OneNumberRangeLessRegex1)
.replace("{OneNumberRangeLessRegex2}", OneNumberRangeLessRegex2);
public static final String TwoNumberRangeRegex4 = "(von\\s+)?(?<number1>({NumberSplitMark}(?!\\bvon\\b).)+)\\s*{TillRegex}\\s*(der\\s+)?(?<number2>({NumberSplitMark}.)+)"
.replace("{TillRegex}", TillRegex)
.replace("{NumberSplitMark}", NumberSplitMark);
public static final String AmbiguousFractionConnectorsRegex = "^[.]";
public static final Character DecimalSeparatorChar = ',';

Просмотреть файл

@ -162,13 +162,13 @@ public class FrenchNumericWithUnit {
.put("Ban", "bani|-ban|ban")
.put("Roupie népalaise", "roupie népalaise|roupie nepalaise|npr")
.put("Roupie pakistanaise", "roupie pakistanaise|pkr")
.put("Roupie indienne", "roupie indienne|inr|roupie indien|inr|₹")
.put("Roupie indienne", "roupie indienne|roupies indiennes|inr|roupie indien|inr|₹")
.put("Roupie seychelloise", "roupie seychelloise|scr|sr|sre")
.put("Roupie mauricienne", "roupie mauricienne|mur")
.put("Rufiyaa maldives", "rufiyaa maldives|mvr|.ރ|rf")
.put("Roupie srilankaise", "roupie srilankaise|lrk|රු|ரூ")
.put("Rupiah Indonésie", "rupia indonésie|rupia indonesie|rupiah|rp|idr")
.put("Roupie", "roupie")
.put("Roupie", "roupie|roupies")
.put("Couronne danoise", "couronne danoise|dkk|couronnes danoise|couronne danemark|couronnes danemark")
.put("Couronne norvégienne", "couronne norvégienne|couronne norvegienne|couronnes norvégienne|couronnes norvegienne|nok")
.put("Couronne féroïenne", "couronne féroïenne|couronne feroienne")

Просмотреть файл

@ -161,7 +161,7 @@ public class GermanNumericWithUnit {
.put("United Arab Emirates dirham", "vae dirham|vae-dirham|dirham der vereinigten arabischen emirate|د.إ|aed")
.put("Azerbaijani manat", "aserbaidschan-manat|azn")
.put("Turkmenistan manat", "turkmenistan-manat|tmt")
.put("Manat", "manat")
.put("Manat", "manat|manats")
.put("Qəpik", "qəpik")
.put("Somali shilling", "somalia-schilling|sh.so.|sos")
.put("Somaliland shilling", "somaliland-schilling")
@ -179,7 +179,7 @@ public class GermanNumericWithUnit {
.put("Maldivian rufiyaa", "maledivischer rufiyaa|maledivische rufiyaa|maledivischen rufiyaa|mvr|.ރ")
.put("Sri Lankan rupee", "sri-lanka-rupie|sri-lanka-rupien|lkr|රු|ரூ")
.put("Indonesian rupiah", "indonesischer rupiah|indonesische rupiah|indonesischen rupiah|rupiah|perak|rp|idr")
.put("Rupee", "rupie|rs")
.put("Rupee", "rupie|rupien|rs")
.put("Danish krone", "dänische krone|dänischen krone|dänischer kronen|dänische kronen|dänischen kronen|daenische krone|daenischen krone|daenischer kronen|daenische kronen|daenischen kronen|dkk")
.put("Norwegian krone", "norwegische krone|norwegischen krone|norwegischer kronen|norwegische kronen|norwegischen kronen|nok")
.put("Faroese króna", "färöische króna|färöische krone|färöischen krone|färöischer kronen|färöische kronen|färöischen kronen")
@ -232,7 +232,7 @@ public class GermanNumericWithUnit {
.put("Mexican peso", "mexikanischer peso|mexikanische peso|mexikanischen peso|mxn")
.put("Philippine peso", "piso|philippinischer peso|philippinische peso|philippinischen peso|₱|php")
.put("Uruguayan peso", "uruguayischer peso|uruguayische peso|uruguayischen peso|uyu")
.put("Peso", "peso")
.put("Peso", "peso|pesos")
.put("Centavo", "centavos|centavo")
.put("Alderney pound", "alderney pfund|alderney £")
.put("British pound", "britischer pfund|britische pfund|britischen pfund|british £|gbp|pfund sterling")

Просмотреть файл

@ -0,0 +1,71 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence;
public class Constants {
public static final String SYS_PHONE_NUMBER = "builtin.phonenumber";
public static final String SYS_IP = "builtin.ip";
public static final String SYS_MENTION = "builtin.mention";
public static final String SYS_HASHTAG = "builtin.hashtag";
public static final String SYS_EMAIL = "builtin.email";
public static final String SYS_URL = "builtin.url";
public static final String SYS_GUID = "builtin.guid";
// Model type name
public static final String MODEL_PHONE_NUMBER = "phonenumber";
public static final String MODEL_IP = "ip";
public static final String MODEL_MENTION = "mention";
public static final String MODEL_HASHTAG = "hashtag";
public static final String MODEL_EMAIL = "email";
public static final String MODEL_URL = "url";
public static final String MODEL_GUID = "guid";
public static final String IP_REGEX_IPV4 = "ipv4";
public static final String IP_REGEX_IPV6 = "ipv6";
public static final String IPV6_ELLIPSIS = "::";
public static final String PHONE_NUMBER_REGEX_GENERAL = "GeneralPhoneNumber";
public static final String PHONE_NUMBER_REGEX_BR = "BRPhoneNumber";
public static final String PHONE_NUMBER_REGEX_UK = "UKPhoneNumber";
public static final String PHONE_NUMBER_REGEX_DE = "DEPhoneNumber";
public static final String PHONE_NUMBER_REGEX_US = "USPhoneNumber";
public static final String PHONE_NUMBER_REGEX_CN = "CNPhoneNumber";
public static final String PHONE_NUMBER_REGEX_DK = "DKPhoneNumber";
public static final String PHONE_NUMBER_REGEX_IT = "ITPhoneNumber";
public static final String PHONE_NUMBER_REGEX_NL = "NLPhoneNumber";
public static final String PHONE_NUMBER_REGEX_SPECIAL = "SpecialPhoneNumber";
public static final String MENTION_REGEX = "Mention";
public static final String HASHTAG_REGEX = "Hashtag";
public static final String EMAIL_REGEX = "Email";
public static final String URL_REGEX = "Url";
public static final String GUID_REGEX = "Guid";
}

Просмотреть файл

@ -0,0 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence;
public enum SequenceOptions {
// None
None,
// Relaxed. Likely match, don't perform extra validation.
Relaxed
}

Просмотреть файл

@ -0,0 +1,238 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence;
import com.microsoft.recognizers.text.Culture;
import com.microsoft.recognizers.text.IModel;
import com.microsoft.recognizers.text.ModelResult;
import com.microsoft.recognizers.text.Recognizer;
import com.microsoft.recognizers.text.sequence.config.BaseSequenceConfiguration;
import com.microsoft.recognizers.text.sequence.english.extractors.EmailExtractor;
import com.microsoft.recognizers.text.sequence.english.extractors.EnglishIpExtractorConfiguration;
import com.microsoft.recognizers.text.sequence.english.extractors.EnglishPhoneNumberExtractorConfiguration;
import com.microsoft.recognizers.text.sequence.english.extractors.EnglishURLExtractorConfiguration;
import com.microsoft.recognizers.text.sequence.english.extractors.GUIDExtractor;
import com.microsoft.recognizers.text.sequence.english.extractors.HashTagExtractor;
import com.microsoft.recognizers.text.sequence.english.extractors.MentionExtractor;
import com.microsoft.recognizers.text.sequence.english.parsers.EmailParser;
import com.microsoft.recognizers.text.sequence.english.parsers.GUIDParser;
import com.microsoft.recognizers.text.sequence.english.parsers.HashTagParser;
import com.microsoft.recognizers.text.sequence.english.parsers.IpParser;
import com.microsoft.recognizers.text.sequence.english.parsers.MentionParser;
import com.microsoft.recognizers.text.sequence.english.parsers.PhoneNumberParser;
import com.microsoft.recognizers.text.sequence.english.parsers.URLParser;
import com.microsoft.recognizers.text.sequence.extractors.BaseIpExtractor;
import com.microsoft.recognizers.text.sequence.extractors.BasePhoneNumberExtractor;
import com.microsoft.recognizers.text.sequence.extractors.BasePhoneNumberExtractorConfiguration;
import com.microsoft.recognizers.text.sequence.extractors.BaseURLExtractor;
import com.microsoft.recognizers.text.sequence.models.EmailModel;
import com.microsoft.recognizers.text.sequence.models.GUIDModel;
import com.microsoft.recognizers.text.sequence.models.HashTagModel;
import com.microsoft.recognizers.text.sequence.models.IpAddressModel;
import com.microsoft.recognizers.text.sequence.models.MentionModel;
import com.microsoft.recognizers.text.sequence.models.PhoneNumberModel;
import com.microsoft.recognizers.text.sequence.models.URLModel;
import java.util.List;
import java.util.Locale;
import java.util.function.Function;
public class SequenceRecognizer extends Recognizer<SequenceOptions> {
public SequenceRecognizer() {
this(null, SequenceOptions.None, true);
}
public SequenceRecognizer(String culture) {
this(culture, SequenceOptions.None, false);
}
public SequenceRecognizer(String targetCulture, SequenceOptions options, boolean lazyInitialization) {
super(targetCulture, options, lazyInitialization);
}
public SequenceRecognizer(String targetCulture, int options, boolean lazyInitialization) {
this(targetCulture, SequenceOptions.values()[options], lazyInitialization);
}
public SequenceRecognizer(int options, boolean lazyInitialization) {
this(null, SequenceOptions.values()[options], lazyInitialization);
}
public SequenceRecognizer(SequenceOptions options, boolean lazyInitialization) {
this(null, options, lazyInitialization);
}
public SequenceRecognizer(boolean lazyInitialization) {
this(null, SequenceOptions.None, lazyInitialization);
}
public SequenceRecognizer(int options) {
this(null, SequenceOptions.values()[options], true);
}
public SequenceRecognizer(SequenceOptions options) {
this(null, options, true);
}
public static List<ModelResult> recognizePhoneNumber(String query, String culture) {
return recognizePhoneNumber(query, culture, SequenceOptions.None, true);
}
public static List<ModelResult> recognizePhoneNumber(String query, String culture, SequenceOptions options,
Boolean fallbackToDefaultCulture) {
options = options != null ? options : SequenceOptions.None;
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getPhoneNumberModel(culture, fallbackToDefaultCulture),
query, options);
}
public static List<ModelResult> recognizeIpAddress(String query, String culture) {
return recognizeIpAddress(query, culture, SequenceOptions.None, true);
}
public static List<ModelResult> recognizeIpAddress(String query, String culture, SequenceOptions options,
Boolean fallbackToDefaultCulture) {
options = options != null ? options : SequenceOptions.None;
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getIpAddressModel(culture, fallbackToDefaultCulture),
query, options);
}
public static List<ModelResult> recognizeMention(String query, String culture) {
return recognizeMention(query, culture, SequenceOptions.None, true);
}
public static List<ModelResult> recognizeMention(String query, String culture, SequenceOptions options,
Boolean fallbackToDefaultCulture) {
options = options != null ? options : SequenceOptions.None;
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getMentionModel(culture, fallbackToDefaultCulture),
query, options);
}
public static List<ModelResult> recognizeHashtag(String query, String culture) {
return recognizeHashtag(query, culture, SequenceOptions.None, true);
}
public static List<ModelResult> recognizeHashtag(String query, String culture, SequenceOptions options,
Boolean fallbackToDefaultCulture) {
options = options != null ? options : SequenceOptions.None;
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getHashtagModel(culture, fallbackToDefaultCulture),
query, options);
}
public static List<ModelResult> recognizeEmail(String query, String culture) {
return recognizeEmail(query, culture, SequenceOptions.None, true);
}
public static List<ModelResult> recognizeEmail(String query, String culture, SequenceOptions options,
Boolean fallbackToDefaultCulture) {
options = options != null ? options : SequenceOptions.None;
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getEmailModel(culture, fallbackToDefaultCulture), query,
options);
}
public static List<ModelResult> recognizeURL(String query, String culture) {
return recognizeURL(query, culture, SequenceOptions.None, true);
}
public static List<ModelResult> recognizeURL(String query, String culture, SequenceOptions options,
Boolean fallbackToDefaultCulture) {
options = options != null ? options : SequenceOptions.None;
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getURLModel(culture, fallbackToDefaultCulture), query,
options);
}
public static List<ModelResult> recognizeGUID(String query, String culture) {
return recognizeGUID(query, culture, SequenceOptions.None, true);
}
public static List<ModelResult> recognizeGUID(String query, String culture, SequenceOptions options,
Boolean fallbackToDefaultCulture) {
options = options != null ? options : SequenceOptions.None;
return SequenceRecognizer.recognizeByModel(recognizer -> ((SequenceRecognizer)recognizer).getGUIDModel(culture, fallbackToDefaultCulture), query,
options);
}
//region Helper methods for less verbosity
public IModel getPhoneNumberModel() {
return getPhoneNumberModel(null, true);
}
public IModel getPhoneNumberModel(String culture, Boolean fallbackToDefaultCulture) {
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
if (culture != null && (culture.toLowerCase(Locale.ROOT).startsWith("zh-") ||
culture.toLowerCase(Locale.ROOT).startsWith("ja-"))) {
return this.getModel(PhoneNumberModel.class, Culture.Chinese, fallbackToDefaultCulture);
}
return this.getModel(PhoneNumberModel.class, culture, fallbackToDefaultCulture);
}
public IModel getIpAddressModel(String culture, Boolean fallbackToDefaultCulture) {
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
if (culture != null && (culture.toLowerCase(Locale.ROOT).startsWith("zh-") || culture.toLowerCase(Locale.ROOT).startsWith("ja-"))) {
return this.getModel(IpAddressModel.class, Culture.Chinese, fallbackToDefaultCulture);
}
return this.getModel(IpAddressModel.class, Culture.English, fallbackToDefaultCulture);
}
public IModel getMentionModel(String culture, Boolean fallbackToDefaultCulture) {
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
return this.getModel(MentionModel.class, Culture.English, fallbackToDefaultCulture);
}
public IModel getHashtagModel(String culture, Boolean fallbackToDefaultCulture) {
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
return this.getModel(HashTagModel.class, Culture.English, fallbackToDefaultCulture);
}
public IModel getEmailModel(String culture, Boolean fallbackToDefaultCulture) {
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
return this.getModel(EmailModel.class, Culture.English, fallbackToDefaultCulture);
}
public IModel getURLModel(String culture, Boolean fallbackToDefaultCulture) {
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
if (culture != null && (culture.toLowerCase(Locale.ROOT).startsWith("zh-") ||
culture.toLowerCase(Locale.ROOT).startsWith("ja-"))) {
return this.getModel(URLModel.class, Culture.Chinese, fallbackToDefaultCulture);
}
return this.getModel(URLModel.class, Culture.English, fallbackToDefaultCulture);
}
public IModel getGUIDModel(String culture, Boolean fallbackToDefaultCulture) {
fallbackToDefaultCulture = fallbackToDefaultCulture != null ? fallbackToDefaultCulture : true;
return this.getModel(GUIDModel.class, Culture.English, fallbackToDefaultCulture);
}
@Override
protected void initializeConfiguration() {
this.registerModel(PhoneNumberModel.class, Culture.English, (options) -> new PhoneNumberModel(new PhoneNumberParser(),
new BasePhoneNumberExtractor(new EnglishPhoneNumberExtractorConfiguration(options))));
this.registerModel(PhoneNumberModel.class, Culture.Spanish, (options) -> new PhoneNumberModel(
new PhoneNumberParser(), new BasePhoneNumberExtractor(new BasePhoneNumberExtractorConfiguration(options))));
this.registerModel(IpAddressModel.class, Culture.English, (options) -> new IpAddressModel(new IpParser(),
new BaseIpExtractor(new EnglishIpExtractorConfiguration(options))));
this.registerModel(MentionModel.class, Culture.English, (options) -> new MentionModel(new MentionParser(), new MentionExtractor()));
this.registerModel(HashTagModel.class, Culture.English, (options) -> new HashTagModel(new HashTagParser(), new HashTagExtractor()));
this.registerModel(EmailModel.class, Culture.English, (options) -> new EmailModel(new EmailParser(new BaseSequenceConfiguration(options)),
new EmailExtractor(new BaseSequenceConfiguration(options))));
this.registerModel(URLModel.class, Culture.English, (options) -> new URLModel(new URLParser(),
new BaseURLExtractor(new EnglishURLExtractorConfiguration(options))));
this.registerModel(GUIDModel.class, Culture.English, (options) -> new GUIDModel(new GUIDParser(), new GUIDExtractor()));
}
private static List<ModelResult> recognizeByModel(Function getModelFunc, String query, SequenceOptions options) {
SequenceRecognizer recognizer = new SequenceRecognizer(options, false);
IModel model = (IModel)getModelFunc.apply(recognizer);
return model.parse(query);
}
}

Просмотреть файл

@ -0,0 +1,18 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.config;
import com.microsoft.recognizers.text.sequence.SequenceOptions;
public class BaseSequenceConfiguration implements ISequenceConfiguration {
private SequenceOptions options;
public BaseSequenceConfiguration(SequenceOptions options) {
this.options = options != null ? options : SequenceOptions.None;
}
public SequenceOptions getOptions() {
return this.options;
}
}

Просмотреть файл

@ -0,0 +1,10 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.config;
import com.microsoft.recognizers.text.sequence.SequenceOptions;
public interface ISequenceConfiguration {
SequenceOptions getOptions();
}

Просмотреть файл

@ -0,0 +1,38 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.config;
import com.microsoft.recognizers.text.sequence.SequenceOptions;
import java.util.regex.Pattern;
public class IpConfiguration implements ISequenceConfiguration {
private SequenceOptions options;
private Pattern ipv4Regex;
private Pattern ipv6Regex;
public IpConfiguration(SequenceOptions options) {
this.options = options != null ? options : SequenceOptions.None;
}
public SequenceOptions getOptions() {
return options;
}
public Pattern getIpv4Regex() {
return ipv4Regex;
}
public void setIpv4Regex(Pattern withIpv4Regex) {
ipv4Regex = withIpv4Regex;
}
public Pattern getIpv6Regex() {
return ipv6Regex;
}
public void setIpv6Regex(Pattern withIpv6Regex) {
ipv6Regex = withIpv6Regex;
}
}

Просмотреть файл

@ -0,0 +1,94 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.config;
import com.microsoft.recognizers.text.sequence.SequenceOptions;
import java.util.List;
import java.util.regex.Pattern;
public class PhoneNumberConfiguration implements ISequenceConfiguration {
private SequenceOptions options;
private Pattern falsePositivePrefixRegex;
private String wordBoundariesRegex;
private String nonWordBoundariesRegex;
private String endWordBoundariesRegex;
private Pattern colonPrefixCheckRegex;
private List<Character> colonMarkers;
private List<Character> forbiddenPrefixMarkers;
private List<Character> forbiddenSuffixMarkers;
public PhoneNumberConfiguration(SequenceOptions options) {
this.options = options != null ? options : SequenceOptions.None;
}
@Override
public SequenceOptions getOptions() {
return this.options;
}
public Pattern getFalsePositivePrefixRegex() {
return falsePositivePrefixRegex;
}
public void setFalsePositivePrefixRegex(Pattern withFalsePositivePrefixRegex) {
this.falsePositivePrefixRegex = withFalsePositivePrefixRegex;
}
public String getWordBoundariesRegex() {
return wordBoundariesRegex;
}
public void setWordBoundariesRegex(String wordBoundariesRegex) {
this.wordBoundariesRegex = wordBoundariesRegex;
}
public String getNonWordBoundariesRegex() {
return nonWordBoundariesRegex;
}
public void setNonWordBoundariesRegex(String withNonWordBoundariesRegex) {
this.nonWordBoundariesRegex = withNonWordBoundariesRegex;
}
public String getEndWordBoundariesRegex() {
return endWordBoundariesRegex;
}
public void setEndWordBoundariesRegex(String withEndWordBoundariesRegex) {
this.endWordBoundariesRegex = withEndWordBoundariesRegex;
}
public Pattern getColonPrefixCheckRegex() {
return colonPrefixCheckRegex;
}
public void setColonPrefixCheckRegex(Pattern withColonPrefixCheckRegex) {
this.colonPrefixCheckRegex = withColonPrefixCheckRegex;
}
public List<Character> getColonMarkers() {
return colonMarkers;
}
public void setColonMarkers(List<Character> withColonMarkers) {
this.colonMarkers = withColonMarkers;
}
public List<Character> getForbiddenPrefixMarkers() {
return forbiddenPrefixMarkers;
}
public void setForbiddenPrefixMarkers(List<Character> withForbiddenPrefixMarkers) {
this.forbiddenPrefixMarkers = withForbiddenPrefixMarkers;
}
public List<Character> getForbiddenSuffixMarkers() {
return forbiddenSuffixMarkers;
}
public void setForbiddenSuffixMarkers(List<Character> withForbiddenSuffixMarkers) {
this.forbiddenSuffixMarkers = withForbiddenSuffixMarkers;
}
}

Просмотреть файл

@ -0,0 +1,38 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.config;
import com.microsoft.recognizers.text.sequence.SequenceOptions;
import java.util.regex.Pattern;
public class URLConfiguration implements ISequenceConfiguration {
private SequenceOptions options;
private Pattern ipUrlRegex;
private Pattern urlRegex;
public URLConfiguration(SequenceOptions options) {
this.options = options != null ? options : SequenceOptions.None;
}
public SequenceOptions getOptions() {
return this.options;
}
public Pattern getIpUrlRegex() {
return ipUrlRegex;
}
public void setIpUrlRegex(Pattern ipUrlRegex) {
this.ipUrlRegex = ipUrlRegex;
}
public Pattern getUrlRegex() {
return urlRegex;
}
public void setUrlRegex(Pattern urlRegex) {
this.urlRegex = urlRegex;
}
}

Просмотреть файл

@ -0,0 +1,13 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.extractors;
import com.microsoft.recognizers.text.sequence.config.BaseSequenceConfiguration;
import com.microsoft.recognizers.text.sequence.extractors.BaseEmailExtractor;
public class EmailExtractor extends BaseEmailExtractor {
public EmailExtractor(BaseSequenceConfiguration config) {
super(config);
}
}

Просмотреть файл

@ -0,0 +1,19 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.extractors;
import com.microsoft.recognizers.text.sequence.SequenceOptions;
import com.microsoft.recognizers.text.sequence.config.IpConfiguration;
import com.microsoft.recognizers.text.sequence.resources.BaseIp;
import java.util.regex.Pattern;
public class EnglishIpExtractorConfiguration extends IpConfiguration {
public EnglishIpExtractorConfiguration(SequenceOptions options) {
super(options);
this.setIpv4Regex(Pattern.compile(BaseIp.Ipv4Regex));
this.setIpv6Regex(Pattern.compile(BaseIp.Ipv6Regex));
}
}

Просмотреть файл

@ -0,0 +1,18 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.extractors;
import com.microsoft.recognizers.text.sequence.SequenceOptions;
import com.microsoft.recognizers.text.sequence.extractors.BasePhoneNumberExtractorConfiguration;
import com.microsoft.recognizers.text.sequence.resources.EnglishPhoneNumbers;
import java.util.regex.Pattern;
public class EnglishPhoneNumberExtractorConfiguration extends BasePhoneNumberExtractorConfiguration {
public EnglishPhoneNumberExtractorConfiguration(SequenceOptions options) {
super(options);
super.setFalsePositivePrefixRegex(Pattern.compile(EnglishPhoneNumbers.FalsePositivePrefixRegex));
}
}

Просмотреть файл

@ -0,0 +1,19 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.extractors;
import com.microsoft.recognizers.text.sequence.SequenceOptions;
import com.microsoft.recognizers.text.sequence.config.URLConfiguration;
import com.microsoft.recognizers.text.sequence.resources.BaseURL;
import java.util.regex.Pattern;
public class EnglishURLExtractorConfiguration extends URLConfiguration {
public EnglishURLExtractorConfiguration(SequenceOptions options) {
super(options);
this.setIpUrlRegex(Pattern.compile(BaseURL.IpUrlRegex));
this.setUrlRegex(Pattern.compile(BaseURL.UrlRegex));
}
}

Просмотреть файл

@ -0,0 +1,9 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.extractors;
import com.microsoft.recognizers.text.sequence.extractors.BaseGUIDExtractor;
public class GUIDExtractor extends BaseGUIDExtractor {
}

Просмотреть файл

@ -0,0 +1,9 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.extractors;
import com.microsoft.recognizers.text.sequence.extractors.BaseHashTagExtractor;
public class HashTagExtractor extends BaseHashTagExtractor {
}

Просмотреть файл

@ -0,0 +1,9 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.extractors;
import com.microsoft.recognizers.text.sequence.extractors.BaseMentionExtractor;
public class MentionExtractor extends BaseMentionExtractor {
}

Просмотреть файл

@ -0,0 +1,15 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.parsers;
import com.microsoft.recognizers.text.sequence.config.BaseSequenceConfiguration;
import com.microsoft.recognizers.text.sequence.parsers.BaseSequenceParser;
public class EmailParser extends BaseSequenceParser {
private BaseSequenceConfiguration config;
public EmailParser(BaseSequenceConfiguration config) {
this.config = config;
}
}

Просмотреть файл

@ -0,0 +1,48 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.parsers;
import com.microsoft.recognizers.text.ExtractResult;
import com.microsoft.recognizers.text.ParseResult;
import com.microsoft.recognizers.text.sequence.parsers.BaseSequenceParser;
import com.microsoft.recognizers.text.sequence.resources.BaseGUID;
import com.microsoft.recognizers.text.utilities.Match;
import com.microsoft.recognizers.text.utilities.RegExpUtility;
import java.util.regex.Pattern;
public class GUIDParser extends BaseSequenceParser {
private static Double SCORE_UPPER_LIMIT = 100d;
private static Double SCORE_LOWER_LIMIT = 0d;
private static Double BASE_SCORE = 100d;
private static Double NO_BOUNDARY_PENALTY = 10d;
private static Double NO_FORMAT_PENALTY = 10d;
private static Double PURE_DIGIT_PENALTY = 15d;
private static String PURE_DIGIT_REGEX = "^\\d*$";
private static String FORMAT_REGEX = "-";
private static final Pattern GUID_ELEMENT_REGEX = Pattern.compile(BaseGUID.GUIDRegexElement);
public static Double scoreGUID(String textGUID) {
Double score = BASE_SCORE;
Match[] elementMatch = RegExpUtility.getMatches(GUID_ELEMENT_REGEX, textGUID);
if (elementMatch.length > 0) {
Integer startIndex = elementMatch[0].index;
String guidElement = elementMatch[0].value;
score -= startIndex == 0 ? NO_BOUNDARY_PENALTY : 0;
score -= Pattern.compile(FORMAT_REGEX).matcher(guidElement).find() ? 0 : NO_FORMAT_PENALTY;
score -= Pattern.compile(PURE_DIGIT_REGEX).matcher(textGUID).find() ? PURE_DIGIT_PENALTY : 0;
}
return Math.max(Math.min(score, SCORE_UPPER_LIMIT), SCORE_LOWER_LIMIT)
/ (SCORE_UPPER_LIMIT - SCORE_LOWER_LIMIT);
}
@Override
public ParseResult parse(ExtractResult extResult) {
return new ParseResult(extResult.getStart(), extResult.getLength(), extResult.getText(), extResult.getType(),
null, GUIDParser.scoreGUID(extResult.getText()), extResult.getText());
}
}

Просмотреть файл

@ -0,0 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.parsers;
import com.microsoft.recognizers.text.sequence.parsers.BaseSequenceParser;
public class HashTagParser extends BaseSequenceParser {
public HashTagParser() {
}
}

Просмотреть файл

@ -0,0 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.parsers;
import com.microsoft.recognizers.text.sequence.parsers.BaseIpParser;
public class IpParser extends BaseIpParser {
public IpParser() {
}
}

Просмотреть файл

@ -0,0 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.parsers;
import com.microsoft.recognizers.text.sequence.parsers.BaseSequenceParser;
public class MentionParser extends BaseSequenceParser {
public MentionParser() {
}
}

Просмотреть файл

@ -0,0 +1,108 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.parsers;
import com.microsoft.recognizers.text.ExtractResult;
import com.microsoft.recognizers.text.ParseResult;
import com.microsoft.recognizers.text.sequence.parsers.BaseSequenceParser;
import com.microsoft.recognizers.text.sequence.resources.BasePhoneNumbers;
import com.microsoft.recognizers.text.utilities.Match;
import com.microsoft.recognizers.text.utilities.RegExpUtility;
import java.util.Arrays;
import java.util.regex.Pattern;
public class PhoneNumberParser extends BaseSequenceParser {
private static Double SCORE_UPPER_LIMIT = 100d;
private static Double SCORE_LOWER_LIMIT = 0d;
private static Double BASE_SCORE = 30d;
private static Double COUNTRY_CODE_AWARD = 40d;
private static Double AREA_CODE_AWARD = 30d;
private static Double FORMATTED_AWARD = 20d;
private static Double LENGTH_AWARD = 10d;
private static Double TYPICAL_FORMAT_DEDUCTION_SCORE = 40d;
private static Double CONTINUE_DIGIT_DEDUCTION_SCORE = 10d;
private static Double TAIL_SAME_DEDUCTION_SCORE = 10d;
private static Double CONTINUE_FORMAT_INDICATOR_DEDUCTION_SCORE = 20d;
private static Double WRONG_FORMAT_DEDUCTION_SCORE = 20d;
private static Integer MAX_FORMAT_INDICATOR_NUM = 3;
private static Integer MAX_LENGTH_AWARD_NUM = 3;
private static Integer TAIL_SAME_LIMIT = 2;
private static Integer PHONE_NUMBER_LENGTH_BASE = 8;
private static Integer PURE_DIGIT_LENGTH_LIMIT = 11;
// @TODO move regexes to base resource files
private static String COMPLETE_BRACKET_REGEX = "\\(.*\\)";
private static String SINGLE_BRACKER_REGEX = "\\(|\\)";
private static String TAIL_SAME_DIGIT_REGEX = "([\\d])\\1{2,10}$";
private static String PURE_DIGIT_REGEX = "^\\d*$";
private static String CONTINUE_DIGIT_REGEX = "\\d{5}\\d*";
private static String DIGIT_REGEX = "\\d";
private static final Pattern COUNTRY_CODE_REGEX = Pattern.compile(BasePhoneNumbers.CountryCodeRegex);
private static final Pattern AREA_CODE_REGEX = Pattern.compile(BasePhoneNumbers.AreaCodeIndicatorRegex);
private static final Pattern FORMAT_INDICATOR_REGEX = Pattern.compile(BasePhoneNumbers.FormatIndicatorRegex);
private static final Pattern NO_AREA_CODE_US_PHONE_NUMBER_REGEX = Pattern
.compile(BasePhoneNumbers.NoAreaCodeUSPhoneNumberRegex);
public static Double scorePhoneNumber(String phoneNumberText) {
Double score = BASE_SCORE;
// Country code score or area code score
score += COUNTRY_CODE_REGEX.matcher(phoneNumberText).find() ? COUNTRY_CODE_AWARD
: AREA_CODE_REGEX.matcher(phoneNumberText).find() ? AREA_CODE_AWARD : 0;
// Formatted score
Match[] formatMatches = RegExpUtility.getMatches(FORMAT_INDICATOR_REGEX, phoneNumberText);
if (formatMatches.length > 0) {
Integer formatIndicatorCount = formatMatches.length;
score += Math.min(formatIndicatorCount, MAX_FORMAT_INDICATOR_NUM) * FORMATTED_AWARD;
Boolean anyMatch = Arrays.stream(formatMatches).anyMatch(match -> match.value.length() > 1);
score -= anyMatch ? CONTINUE_FORMAT_INDICATOR_DEDUCTION_SCORE : 0;
if (Pattern.matches(SINGLE_BRACKER_REGEX, phoneNumberText) && !Pattern.matches(COMPLETE_BRACKET_REGEX, phoneNumberText)) {
score -= WRONG_FORMAT_DEDUCTION_SCORE;
}
}
// Length score
score += Math.min(RegExpUtility.getMatches(Pattern.compile(DIGIT_REGEX), phoneNumberText).length
- PHONE_NUMBER_LENGTH_BASE, MAX_LENGTH_AWARD_NUM) * LENGTH_AWARD;
// Same tailing digit deduction
Match[] tailSameDigitMatches = RegExpUtility.getMatches(Pattern.compile(TAIL_SAME_DIGIT_REGEX),
phoneNumberText);
if (tailSameDigitMatches.length > 0) {
score -= (tailSameDigitMatches[0].value.length() - TAIL_SAME_LIMIT) * TAIL_SAME_DEDUCTION_SCORE;
}
// Pure digit deduction
Match[] pureDigitMatches = RegExpUtility.getMatches(Pattern.compile(PURE_DIGIT_REGEX), phoneNumberText);
if (pureDigitMatches.length > 0) {
score -= phoneNumberText.length() > PURE_DIGIT_LENGTH_LIMIT ? (phoneNumberText.length() - PURE_DIGIT_LENGTH_LIMIT) * LENGTH_AWARD
: 0;
}
// Special format deduction
score -= BasePhoneNumbers.TypicalDeductionRegexList.stream().anyMatch(o -> Pattern.compile(o).matcher(phoneNumberText).find()) ? TYPICAL_FORMAT_DEDUCTION_SCORE : 0;
// Continue digit deduction
Match[] continueDigitMatches = RegExpUtility.getMatches(Pattern.compile(CONTINUE_DIGIT_REGEX), phoneNumberText);
score -= Math.max(continueDigitMatches.length - 1, 0) * CONTINUE_DIGIT_DEDUCTION_SCORE;
// Special award for US phonenumber without area code, i.e. 223-4567 or 223 -
// 4567
if (NO_AREA_CODE_US_PHONE_NUMBER_REGEX.matcher(phoneNumberText).find()) {
score += LENGTH_AWARD * 1.5;
}
return Math.max(Math.min(score, SCORE_UPPER_LIMIT), SCORE_LOWER_LIMIT)
/ (SCORE_UPPER_LIMIT - SCORE_LOWER_LIMIT);
}
@Override
public ParseResult parse(ExtractResult extResult) {
return new ParseResult(extResult.getStart(), extResult.getLength(), extResult.getText(), extResult.getType(),
null, PhoneNumberParser.scorePhoneNumber(extResult.getText()), extResult.getText());
}
}

Просмотреть файл

@ -0,0 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.english.parsers;
import com.microsoft.recognizers.text.sequence.parsers.BaseSequenceParser;
public class URLParser extends BaseSequenceParser {
public URLParser() {
}
}

Просмотреть файл

@ -0,0 +1,65 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.extractors;
import com.microsoft.recognizers.text.ExtractResult;
import com.microsoft.recognizers.text.sequence.Constants;
import com.microsoft.recognizers.text.sequence.SequenceOptions;
import com.microsoft.recognizers.text.sequence.config.BaseSequenceConfiguration;
import com.microsoft.recognizers.text.sequence.resources.BaseEmail;
import com.microsoft.recognizers.text.utilities.StringUtility;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
public class BaseEmailExtractor extends BaseSequenceExtractor {
private static final Pattern RFC_5322_VALIDATION_REGEX = Pattern.compile(BaseEmail.RFC5322Regex);
private final BaseSequenceConfiguration config;
protected final String extractType = Constants.SYS_EMAIL;
protected Map<Pattern, String> getRegexes() {
return regexes;
}
protected String getExtractType() {
return extractType;
}
@Override
protected List<ExtractResult> postFilter(List<ExtractResult> results) {
// If Relaxed is on, no extra validation is applied
if (config.getOptions() != SequenceOptions.None) {
return results;
} else {
// Not return malformed e-mail addresses and trim ending '.'
results.forEach(result -> {
if (result.getText().endsWith(".")) {
result.setText(StringUtility.trimEnd(result.getText()));
result.setLength(result.getLength() - 1);
}
});
}
return results.stream().filter((o -> RFC_5322_VALIDATION_REGEX.matcher((o).getText()).matches()))
.collect(Collectors.toList());
}
public BaseEmailExtractor(BaseSequenceConfiguration config) {
this.config = config;
Map<Pattern, String> regexes = new HashMap<Pattern, String>();
regexes.put(Pattern.compile(BaseEmail.EmailRegex), Constants.EMAIL_REGEX);
// EmailRegex2 will break the code as it's not supported in Java, comment out for now
// Error: java.util.regex.PatternSyntaxException: Unknown inline modifier near index 4
// The same issue happens in JS and it was disabled in https://github.com/microsoft/Recognizers-Text/pull/905
// @TODO: we need to search a way to handle inline modifier.
// See https://stackoverflow.com/questions/48189725/java-regex-pattern-compilation-error
// regexes.put(Pattern.compile(BaseEmail.EmailRegex2), Constants.EMAIL_REGEX);
super.regexes = regexes;
}
}

Просмотреть файл

@ -0,0 +1,29 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.extractors;
import com.microsoft.recognizers.text.sequence.Constants;
import com.microsoft.recognizers.text.sequence.resources.BaseGUID;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
public class BaseGUIDExtractor extends BaseSequenceExtractor {
protected final String extractType = Constants.SYS_GUID;
protected String getExtractType() {
return this.extractType;
}
public BaseGUIDExtractor() {
Map<Pattern, String> regexes = new HashMap<Pattern, String>() {
{
put(Pattern.compile(BaseGUID.GUIDRegex), Constants.GUID_REGEX);
}
};
super.regexes = regexes;
}
}

Просмотреть файл

@ -0,0 +1,29 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.extractors;
import com.microsoft.recognizers.text.sequence.Constants;
import com.microsoft.recognizers.text.sequence.resources.BaseHashtag;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
public class BaseHashTagExtractor extends BaseSequenceExtractor {
protected final String extractType = Constants.SYS_HASHTAG;
protected String getExtractType() {
return this.extractType;
}
public BaseHashTagExtractor() {
Map<Pattern, String> regexes = new HashMap<Pattern, String>() {
{
put(Pattern.compile(BaseHashtag.HashtagRegex), Constants.HASHTAG_REGEX);
}
};
super.regexes = regexes;
}
}

Просмотреть файл

@ -0,0 +1,111 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.extractors;
import com.microsoft.recognizers.text.ExtractResult;
import com.microsoft.recognizers.text.sequence.Constants;
import com.microsoft.recognizers.text.sequence.config.IpConfiguration;
import com.microsoft.recognizers.text.sequence.resources.BaseIp;
import com.microsoft.recognizers.text.utilities.Match;
import com.microsoft.recognizers.text.utilities.RegExpUtility;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
public class BaseIpExtractor extends BaseSequenceExtractor {
private IpConfiguration config;
protected String extractType = Constants.SYS_IP;
protected String getExtractType() {
return this.extractType;
}
// The Ipv6 address regexes is written following the Recommendation:
// https://tools.ietf.org/html/rfc5952
public BaseIpExtractor(IpConfiguration config) {
Map<Pattern, String> regexes = new HashMap<Pattern, String>() {
{
put(Pattern.compile(BaseIp.Ipv4Regex), Constants.IP_REGEX_IPV4);
put(Pattern.compile(BaseIp.Ipv6Regex), Constants.IP_REGEX_IPV6);
}
};
super.regexes = regexes;
}
@Override
public List<ExtractResult> extract(String text) {
List<ExtractResult> result = new ArrayList<ExtractResult>();
if (StringUtils.isBlank(text)) {
return result;
}
HashMap<Match, String> matchSource = new HashMap<>();
boolean[] matched = new boolean[text.length()];
// Traverse every match results to see each position in the text is matched or
// not.
HashMap<Match[], String> collections = new HashMap<>();
regexes.forEach((key, value) -> {
Match[] matches = RegExpUtility.getMatches(key, text);
collections.put(matches, value);
});
collections.forEach((key, value) -> {
for (Match match : key) {
for (int j = 0; j < match.length; j++) {
matched[match.index + j] = true;
}
// Keep Source Data for extra information
matchSource.put(match, value);
}
});
int lastNotMatched = -1;
for (int i = 0; i < text.length(); i++) {
if (matched[i]) {
if (i + 1 == text.length() || !matched[i + 1]) {
int start = lastNotMatched + 1;
int length = i - lastNotMatched;
String substr = text.substring(start, start + length);
if (substr.startsWith(Constants.IPV6_ELLIPSIS) && (start > 0 && Character.isLetterOrDigit(text.charAt(start - 1)))) {
break;
}
if (substr.endsWith(Constants.IPV6_ELLIPSIS) && (i + 1 < text.length() && Character.isLetterOrDigit(text.charAt(i + 1)))) {
break;
}
Function<Match, Boolean> matchFunc = match -> match.index == start && match.length == length;
if (matchSource.keySet().stream().anyMatch(o -> matchFunc.apply(o))) {
Match srcMatch = (Match)matchSource.keySet().toArray()[0];
ExtractResult extResult = new ExtractResult();
extResult.setStart(start);
extResult.setLength(length);
extResult.setText(substr);
extResult.setType(this.extractType);
extResult.setData(matchSource.getOrDefault(srcMatch, null));
result.add(extResult);
}
}
} else {
lastNotMatched = i;
}
}
return result;
}
}

Просмотреть файл

@ -0,0 +1,29 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.extractors;
import com.microsoft.recognizers.text.sequence.Constants;
import com.microsoft.recognizers.text.sequence.resources.BaseMention;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
public class BaseMentionExtractor extends BaseSequenceExtractor {
protected final String extractType = Constants.SYS_MENTION;
protected String getExtractType() {
return this.extractType;
}
public BaseMentionExtractor() {
Map<Pattern, String> regexes = new HashMap<Pattern, String>() {
{
put(Pattern.compile(BaseMention.MentionRegex), Constants.MENTION_REGEX);
}
};
super.regexes = regexes;
}
}

Просмотреть файл

@ -0,0 +1,216 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.extractors;
import com.microsoft.recognizers.text.ExtractResult;
import com.microsoft.recognizers.text.sequence.Constants;
import com.microsoft.recognizers.text.sequence.config.PhoneNumberConfiguration;
import com.microsoft.recognizers.text.sequence.resources.BasePhoneNumbers;
import com.microsoft.recognizers.text.utilities.Match;
import com.microsoft.recognizers.text.utilities.RegExpUtility;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
public class BasePhoneNumberExtractor extends BaseSequenceExtractor {
private static final Pattern INTERNATIONAL_DIALING_PREFIX_REGEX = Pattern
.compile(BasePhoneNumbers.InternationDialingPrefixRegex);
private static final Pattern PRE_CHECK_PHONE_NUMBER_REGEX = Pattern
.compile(BasePhoneNumbers.PreCheckPhoneNumberRegex);
private static final Pattern SSN_FILTER_REGEX = Pattern.compile(BasePhoneNumbers.SSNFilterRegex);
private static List<Character> SPECIAL_BOUNDARY_MARKERS = BasePhoneNumbers.SpecialBoundaryMarkers;
private PhoneNumberConfiguration config;
protected String extractType = Constants.SYS_PHONE_NUMBER;
protected String getExtractType() {
return this.extractType;
}
public BasePhoneNumberExtractor(PhoneNumberConfiguration config) {
this.config = config;
String wordBoundariesRegex = config.getWordBoundariesRegex();
String nonWordBoundariesRegex = config.getNonWordBoundariesRegex();
String endWordBoundariesRegex = config.getEndWordBoundariesRegex();
Map<Pattern, String> regexes = new HashMap<Pattern, String>() {
{
put(Pattern
.compile(BasePhoneNumbers.GeneralPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
Constants.PHONE_NUMBER_REGEX_GENERAL);
put(Pattern.compile(BasePhoneNumbers.BRPhoneNumberRegex(wordBoundariesRegex, nonWordBoundariesRegex,
endWordBoundariesRegex)), Constants.PHONE_NUMBER_REGEX_BR);
put(Pattern.compile(BasePhoneNumbers.UKPhoneNumberRegex(wordBoundariesRegex, nonWordBoundariesRegex,
endWordBoundariesRegex)), Constants.PHONE_NUMBER_REGEX_UK);
put(Pattern.compile(BasePhoneNumbers.DEPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
Constants.PHONE_NUMBER_REGEX_DE);
put(Pattern.compile(BasePhoneNumbers.USPhoneNumberRegex(wordBoundariesRegex, nonWordBoundariesRegex,
endWordBoundariesRegex)), Constants.PHONE_NUMBER_REGEX_US);
put(Pattern.compile(BasePhoneNumbers.CNPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
Constants.PHONE_NUMBER_REGEX_CN);
put(Pattern.compile(BasePhoneNumbers.DKPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
Constants.PHONE_NUMBER_REGEX_DK);
put(Pattern.compile(BasePhoneNumbers.ITPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
Constants.PHONE_NUMBER_REGEX_IT);
put(Pattern.compile(BasePhoneNumbers.NLPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
Constants.PHONE_NUMBER_REGEX_NL);
put(Pattern
.compile(BasePhoneNumbers.SpecialPhoneNumberRegex(wordBoundariesRegex, endWordBoundariesRegex)),
Constants.PHONE_NUMBER_REGEX_SPECIAL);
}
};
this.regexes = regexes;
}
@Override
public List<ExtractResult> extract(String text) {
if (!PRE_CHECK_PHONE_NUMBER_REGEX.matcher(text).find()) {
return new ArrayList<ExtractResult>();
}
List<ExtractResult> ers = super.extract(text);
for (int i = 0; i < ers.size(); i++) {
ExtractResult er = ers.get(i);
if ((BasePhoneNumberExtractor.countDigits(er.getText()) < 7 && er.getData().toString() != "ITPhoneNumber") ||
Pattern.matches(SSN_FILTER_REGEX.toString(), er.getText())) {
ers.remove(er);
i--;
continue;
}
if ((BasePhoneNumberExtractor.countDigits(er.getText()) == 16 && !er.getText().startsWith("+"))) {
ers.remove(er);
i--;
continue;
}
if (BasePhoneNumberExtractor.countDigits(er.getText()) == 15) {
Boolean flag = false;
for (String numSpan : er.getText().split(" ")) {
if (BasePhoneNumberExtractor.countDigits(numSpan) == 4 || BasePhoneNumberExtractor.countDigits(numSpan) == 3) {
flag = false;
} else {
flag = true;
break;
}
}
if (flag == false) {
ers.remove(er);
i--;
continue;
}
}
if (er.getStart() + er.getLength() < text.length()) {
Character ch = text.charAt(er.getStart() + er.getLength());
if (BasePhoneNumbers.ForbiddenSuffixMarkers.contains(ch)) {
ers.remove(er);
i--;
continue;
}
}
if (er.getStart() != 0) {
Character ch = text.charAt(er.getStart() - 1);
String front = text.substring(0, er.getStart() - 1);
if (this.config.getFalsePositivePrefixRegex() != null &&
this.config.getFalsePositivePrefixRegex().matcher(front).find()) {
ers.remove(er);
i--;
continue;
}
if (BasePhoneNumbers.BoundaryMarkers.contains(ch)) {
if (SPECIAL_BOUNDARY_MARKERS.contains(ch) &&
BasePhoneNumberExtractor.checkFormattedPhoneNumber(er.getText()) && er.getStart() >= 2) {
Character charGap = text.charAt(er.getStart() - 2);
if (!Character.isDigit(charGap) && !Character.isWhitespace(charGap)) {
// check if the extracted string has a non-digit string before "-".
Boolean flag = Pattern.matches("^[^0-9]+$", text.substring(0, er.getStart() - 2));
// Handle cases like "91a-677-0060".
if (Character.isLowerCase(charGap) && !flag) {
ers.remove(er);
i--;
}
continue;
}
// check the international dialing prefix
if (INTERNATIONAL_DIALING_PREFIX_REGEX.matcher(front).find()) {
Integer moveOffset = RegExpUtility.getMatches(INTERNATIONAL_DIALING_PREFIX_REGEX,
front)[0].length + 1;
er.setStart(er.getStart() - moveOffset);
er.setLength(er.getLength() + moveOffset);
er.setText(text.substring(er.getStart(), er.getStart() + er.getLength()));
continue;
}
}
// Handle cases like "-1234567" and "-1234+5678"
ers.remove(er);
i--;
}
if (this.config.getForbiddenPrefixMarkers().contains(ch)) {
{
// Handle "tel:123456"
if (BasePhoneNumbers.ColonMarkers.contains(ch)) {
if (this.config.getColonPrefixCheckRegex().matcher(front).find()) {
continue;
}
}
ers.remove(er);
i--;
}
}
}
}
// filter hexadecimal address like 00 10 00 31 46 D9 E9 11
Match[] maskMatchCollection = RegExpUtility.getMatches(Pattern.compile(BasePhoneNumbers.PhoneNumberMaskRegex),
text);
for (int index = ers.size() - 1; index >= 0; --index) {
for (Match m : maskMatchCollection) {
if (ers.get(index).getStart() >= m.index &&
ers.get(index).getStart() + ers.get(index).getLength() <= m.index + m.length) {
ers.remove(index);
break;
}
}
}
return ers;
}
private static Boolean checkFormattedPhoneNumber(String phoneNumberText) {
return Pattern.compile(BasePhoneNumbers.FormatIndicatorRegex).matcher(phoneNumberText).find();
}
private static Integer countDigits(String candidateString) {
Integer count = 0;
for (Character t : candidateString.toCharArray()) {
if (Character.isDigit(t)) {
++count;
}
}
return count;
}
}

Просмотреть файл

@ -0,0 +1,23 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.extractors;
import com.microsoft.recognizers.text.sequence.SequenceOptions;
import com.microsoft.recognizers.text.sequence.config.PhoneNumberConfiguration;
import com.microsoft.recognizers.text.sequence.resources.BasePhoneNumbers;
import java.util.regex.Pattern;
public class BasePhoneNumberExtractorConfiguration extends PhoneNumberConfiguration {
public BasePhoneNumberExtractorConfiguration(SequenceOptions options) {
super(options);
setWordBoundariesRegex(BasePhoneNumbers.WordBoundariesRegex);
setNonWordBoundariesRegex(BasePhoneNumbers.NonWordBoundariesRegex);
setEndWordBoundariesRegex(BasePhoneNumbers.EndWordBoundariesRegex);
setColonPrefixCheckRegex(Pattern.compile(BasePhoneNumbers.ColonPrefixCheckRegex));
setColonMarkers(BasePhoneNumbers.ColonMarkers);
setForbiddenPrefixMarkers(BasePhoneNumbers.ForbiddenPrefixMarkers);
setForbiddenSuffixMarkers(BasePhoneNumbers.ForbiddenSuffixMarkers);
}
}

Просмотреть файл

@ -0,0 +1,101 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.extractors;
import com.microsoft.recognizers.text.ExtractResult;
import com.microsoft.recognizers.text.IExtractor;
import com.microsoft.recognizers.text.utilities.Match;
import com.microsoft.recognizers.text.utilities.RegExpUtility;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Stream;
public abstract class BaseSequenceExtractor implements IExtractor {
protected Map<Pattern, String> regexes;
protected String extractType = "";
protected List<ExtractResult> postFilter(List<ExtractResult> results) {
return results;
}
protected Map<Pattern, String> getRegexes() {
return regexes;
}
protected String getExtractType() {
return extractType;
}
@Override
public List<ExtractResult> extract(String text) {
List<ExtractResult> result = new ArrayList<>();
if (text.isEmpty()) {
return result;
}
HashMap<Match, String> matchSource = new HashMap<>();
boolean[] matched = new boolean[text.length()];
// Traverse every match results to see each position in the text is matched or
// not.
HashMap<Match[], String> collections = new HashMap<>();
regexes.forEach((key, value) -> {
Match[] matches = RegExpUtility.getMatches(key, text);
collections.put(matches, value);
});
collections.forEach((key, value) -> {
for (Match match : key) {
if (isValidMatch(match)) {
for (int j = 0; j < match.length; j++) {
matched[match.index + j] = true;
}
// Keep Source Data for extra information
matchSource.put(match, value);
}
}
});
// Form the extracted results mark all the matched intervals in the text.
int lastNotMatched = -1;
for (int i = 0; i < text.length(); i++) {
if (matched[i]) {
if (i + 1 == text.length() || !matched[i + 1]) {
int start = lastNotMatched + 1;
int length = i - lastNotMatched;
String substr = text.substring(start, start + length);
Function<Match, Boolean> matchFunc = match -> match.index == start && match.length == length;
if (matchSource.keySet().stream().anyMatch(o -> matchFunc.apply(o))) {
Match srcMatch = (Match)matchSource.keySet().toArray()[0];
ExtractResult extResult = new ExtractResult();
extResult.setStart(start);
extResult.setLength(length);
extResult.setText(substr);
extResult.setType(this.extractType);
extResult.setData(matchSource.getOrDefault(srcMatch, null));
result.add(extResult);
}
}
} else {
lastNotMatched = i;
}
}
return this.postFilter(result);
}
public Boolean isValidMatch(Match match) {
return true;
}
}

Просмотреть файл

@ -0,0 +1,57 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.extractors;
import com.microsoft.recognizers.text.matcher.MatchResult;
import com.microsoft.recognizers.text.matcher.StringMatcher;
import com.microsoft.recognizers.text.sequence.Constants;
import com.microsoft.recognizers.text.sequence.config.URLConfiguration;
import com.microsoft.recognizers.text.sequence.resources.BaseURL;
import com.microsoft.recognizers.text.utilities.Match;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
public class BaseURLExtractor extends BaseSequenceExtractor {
private final URLConfiguration config;
private StringMatcher tldMatcher;
private Pattern ambiguousTimeTerm;
protected final String extractType = Constants.SYS_URL;
protected Map<Pattern, String> getRegexes() {
return regexes;
}
protected String getExtractType() {
return extractType;
}
public BaseURLExtractor(URLConfiguration config) {
this.config = config;
Map<Pattern, String> regexes = new HashMap<Pattern, String>() {
{
put(config.getUrlRegex(), Constants.URL_REGEX);
put(config.getIpUrlRegex(), Constants.URL_REGEX);
put(Pattern.compile(BaseURL.UrlRegex2), Constants.URL_REGEX);
}
};
super.regexes = regexes;
this.ambiguousTimeTerm = Pattern.compile(BaseURL.AmbiguousTimeTerm);
this.tldMatcher = new StringMatcher();
this.tldMatcher.init(BaseURL.TldList);
}
@Override
public Boolean isValidMatch(Match match) {
// For cases like "7.am" or "8.pm" which are more likely time terms.
return !this.ambiguousTimeTerm.matcher(match.value).find();
}
}

Просмотреть файл

@ -0,0 +1,64 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.models;
import com.microsoft.recognizers.text.ExtractResult;
import com.microsoft.recognizers.text.IExtractor;
import com.microsoft.recognizers.text.IModel;
import com.microsoft.recognizers.text.IParser;
import com.microsoft.recognizers.text.ModelResult;
import com.microsoft.recognizers.text.ParseResult;
import com.microsoft.recognizers.text.ResolutionKey;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeMap;
import java.util.stream.Collectors;
public abstract class AbstractSequenceModel implements IModel {
protected String modelTypeName;
protected IExtractor extractor;
protected IParser parser;
protected AbstractSequenceModel(IParser withParser, IExtractor withExtractor) {
this.parser = withParser;
this.extractor = withExtractor;
}
public List<ModelResult> parse(String query) {
List<ParseResult> parsedSequences = new ArrayList<ParseResult>();
try {
List<ExtractResult> extractResults = extractor.extract(query);
for (ExtractResult result : extractResults) {
parsedSequences.add(this.parser.parse(result));
}
} catch (Exception ex) {
// Nothing to do. Exceptions in parse should not break users of recognizers.
// No result.
}
return parsedSequences.stream().map(o -> {
return new ModelResult(o.getText(), o.getStart(), o.getStart() + o.getLength() - 1, modelTypeName,
new TreeMap<String, Object>() {
{
put(ResolutionKey.Value, o.getResolutionStr());
}
});
}).collect(Collectors.toList());
}
public String getModelTypeName() {
return modelTypeName;
}
public IExtractor getExtractor() {
return extractor;
}
public IParser getParser() {
return parser;
}
}

Просмотреть файл

@ -0,0 +1,15 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.models;
import com.microsoft.recognizers.text.IExtractor;
import com.microsoft.recognizers.text.IParser;
import com.microsoft.recognizers.text.sequence.Constants;
public class EmailModel extends AbstractSequenceModel {
public EmailModel(IParser parser, IExtractor extractor) {
super(parser, extractor);
this.modelTypeName = Constants.MODEL_EMAIL;
}
}

Просмотреть файл

@ -0,0 +1,54 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.models;
import com.microsoft.recognizers.text.ExtractResult;
import com.microsoft.recognizers.text.IExtractor;
import com.microsoft.recognizers.text.IParser;
import com.microsoft.recognizers.text.ModelResult;
import com.microsoft.recognizers.text.ParseResult;
import com.microsoft.recognizers.text.ResolutionKey;
import com.microsoft.recognizers.text.sequence.Constants;
import com.microsoft.recognizers.text.utilities.QueryProcessor;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeMap;
import java.util.stream.Collectors;
public class GUIDModel extends AbstractSequenceModel {
public GUIDModel(IParser parser, IExtractor extractor) {
super(parser, extractor);
this.modelTypeName = Constants.MODEL_GUID;
}
@Override
public List<ModelResult> parse(String query) {
List<ParseResult> parsedSequences = new ArrayList<ParseResult>();
// Preprocess the query
query = QueryProcessor.preprocess(query);
try {
List<ExtractResult> extractResults = extractor.extract(query);
for (ExtractResult result : extractResults) {
parsedSequences.add(this.parser.parse(result));
}
} catch (Exception ex) {
// Nothing to do. Exceptions in parse should not break users of recognizers.
// No result.
}
return parsedSequences.stream().map(o -> {
return new ModelResult(o.getText(), o.getStart(), o.getStart() + o.getLength() - 1, modelTypeName,
new TreeMap<String, Object>() {
{
put(ResolutionKey.Value, o.getResolutionStr());
put(ResolutionKey.Score, o.getValue());
}
});
}).collect(Collectors.toList());
}
}

Просмотреть файл

@ -0,0 +1,15 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.models;
import com.microsoft.recognizers.text.IExtractor;
import com.microsoft.recognizers.text.IParser;
import com.microsoft.recognizers.text.sequence.Constants;
public class HashTagModel extends AbstractSequenceModel {
public HashTagModel(IParser parser, IExtractor extractor) {
super(parser, extractor);
this.modelTypeName = Constants.MODEL_HASHTAG;
}
}

Просмотреть файл

@ -0,0 +1,52 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.models;
import com.microsoft.recognizers.text.ExtractResult;
import com.microsoft.recognizers.text.IExtractor;
import com.microsoft.recognizers.text.IParser;
import com.microsoft.recognizers.text.ModelResult;
import com.microsoft.recognizers.text.ParseResult;
import com.microsoft.recognizers.text.ResolutionKey;
import com.microsoft.recognizers.text.sequence.Constants;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeMap;
import java.util.stream.Collectors;
public class IpAddressModel extends AbstractSequenceModel {
public IpAddressModel(IParser parser, IExtractor extractor) {
super(parser, extractor);
this.modelTypeName = Constants.MODEL_IP;
}
@Override
public List<ModelResult> parse(String query) {
List<ParseResult> parsedSequences = new ArrayList<ParseResult>();
try {
List<ExtractResult> extractResults = extractor.extract(query);
for (ExtractResult result : extractResults) {
parsedSequences.add(this.parser.parse(result));
}
} catch (Exception ex) {
// Nothing to do. Exceptions in parse should not break users of recognizers.
// No result.
}
return parsedSequences.stream().filter(o -> {
return o.getData() != null;
}).map(o -> {
return new ModelResult(o.getText(), o.getStart(), o.getStart() + o.getLength() - 1, modelTypeName,
new TreeMap<String, Object>() {
{
put(ResolutionKey.Value, o.getResolutionStr());
put(ResolutionKey.Type, o.getData());
}
});
}).collect(Collectors.toList());
}
}

Просмотреть файл

@ -0,0 +1,15 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.models;
import com.microsoft.recognizers.text.IExtractor;
import com.microsoft.recognizers.text.IParser;
import com.microsoft.recognizers.text.sequence.Constants;
public class MentionModel extends AbstractSequenceModel {
public MentionModel(IParser parser, IExtractor extractor) {
super(parser, extractor);
this.modelTypeName = Constants.MODEL_MENTION;
}
}

Просмотреть файл

@ -0,0 +1,54 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.models;
import com.microsoft.recognizers.text.ExtractResult;
import com.microsoft.recognizers.text.IExtractor;
import com.microsoft.recognizers.text.IParser;
import com.microsoft.recognizers.text.ModelResult;
import com.microsoft.recognizers.text.ParseResult;
import com.microsoft.recognizers.text.ResolutionKey;
import com.microsoft.recognizers.text.sequence.Constants;
import com.microsoft.recognizers.text.utilities.QueryProcessor;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeMap;
import java.util.stream.Collectors;
public class PhoneNumberModel extends AbstractSequenceModel {
public PhoneNumberModel(IParser parser, IExtractor extractor) {
super(parser, extractor);
this.modelTypeName = Constants.MODEL_PHONE_NUMBER;
}
@Override
public List<ModelResult> parse(String query) {
List<ParseResult> parsedSequences = new ArrayList<ParseResult>();
// Preprocess the query
query = QueryProcessor.preprocess(query);
try {
List<ExtractResult> extractResults = extractor.extract(query);
for (ExtractResult result : extractResults) {
parsedSequences.add(this.parser.parse(result));
}
} catch (Exception ex) {
// Nothing to do. Exceptions in parse should not break users of recognizers.
// No result.
}
return parsedSequences.stream().map(o -> {
return new ModelResult(o.getText(), o.getStart(), o.getStart() + o.getLength() - 1, modelTypeName,
new TreeMap<String, Object>() {
{
put(ResolutionKey.Value, o.getResolutionStr());
put(ResolutionKey.Score, o.getValue());
}
});
}).collect(Collectors.toList());
}
}

Просмотреть файл

@ -0,0 +1,15 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.models;
import com.microsoft.recognizers.text.IExtractor;
import com.microsoft.recognizers.text.IParser;
import com.microsoft.recognizers.text.sequence.Constants;
public class URLModel extends AbstractSequenceModel {
public URLModel(IParser parser, IExtractor extractor) {
super(parser, extractor);
this.modelTypeName = Constants.MODEL_URL;
}
}

Просмотреть файл

@ -0,0 +1,47 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.parsers;
import com.microsoft.recognizers.text.ExtractResult;
import com.microsoft.recognizers.text.ParseResult;
import com.microsoft.recognizers.text.utilities.StringUtility;
import org.apache.commons.lang3.StringUtils;
public class BaseIpParser extends BaseSequenceParser {
@Override
public ParseResult parse(ExtractResult extResult) {
ParseResult result = new ParseResult(extResult.getStart(), extResult.getLength(), extResult.getText(),
extResult.getType(), extResult.getData(), null, BaseIpParser.dropLeadingZeros(extResult.getText()));
return result;
}
private static String dropLeadingZeros(String text) {
String result = new String();
String number = new String();
for (int i = 0; i < text.length(); i++) {
Character c = text.charAt(i);
if (c == '.' || c == ':') {
if (!StringUtils.isBlank(number)) {
number = number == "0" ? number : StringUtility.trimStart(number, "^[0]+","");
number = StringUtils.isBlank(number) ? "0" : number;
result += number;
}
result += text.charAt(i);
number = new String();
} else {
number += c.toString();
if (i == text.length() - 1) {
number = number == "0" ? number : StringUtility.trimStart(number, "^[0]+","");
number = StringUtils.isBlank(number) ? "0" : number;
result += number;
}
}
}
return result;
}
}

Просмотреть файл

@ -0,0 +1,17 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.microsoft.recognizers.text.sequence.parsers;
import com.microsoft.recognizers.text.ExtractResult;
import com.microsoft.recognizers.text.IParser;
import com.microsoft.recognizers.text.ParseResult;
public class BaseSequenceParser implements IParser {
@Override
public ParseResult parse(ExtractResult extResult) {
ParseResult result = new ParseResult(extResult.getStart(), extResult.getLength(), extResult.getText(),
extResult.getType(), null, null, extResult.getText());
return result;
}
}

Просмотреть файл

@ -0,0 +1,33 @@
// ------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------
package com.microsoft.recognizers.text.sequence.resources;
public class BaseEmail {
public static final String EmailRegex = "(([-a-zA-Z0-9_\\+\\.]+)@([-a-zA-Z\\d\\.]+)\\.([a-zA-Z\\.]{2,6}))";
public static final String IPv4Regex = "(?<ipv4>(\\d{1,3}\\.){3}\\d{1,3})";
public static final String NormalSuffixRegex = "(([0-9A-Za-z][-]*[0-9A-Za-z]*\\.)+(?<tld>[a-zA-Z][\\-a-zA-Z]{0,22}[a-zA-Z]))";
public static final String EmailPrefix = "(?(\"\")(\"\".+?(?<!\\\\)\"\")|(([0-9A-Za-z]((\\.(?!\\.))|[-!#\\$%&'\\*\\+/=\\?\\^\\{\\}\\|~\\w])*)(?<=[0-9A-Za-z])))";
public static final String EmailSuffix = "(?(\\[)(\\[{IPv4Regex}\\])|{NormalSuffixRegex})"
.replace("{IPv4Regex}", IPv4Regex)
.replace("{NormalSuffixRegex}", NormalSuffixRegex);
public static final String EmailRegex2 = "(({EmailPrefix})@({EmailSuffix}))"
.replace("{EmailPrefix}", EmailPrefix)
.replace("{EmailSuffix}", EmailSuffix);
public static final String RFC5322Regex = "\\A(?:[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"\"(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])*\"\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])\\z";
}

Просмотреть файл

@ -0,0 +1,20 @@
// ------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------
package com.microsoft.recognizers.text.sequence.resources;
public class BaseGUID {
public static final String GUIDRegexElement = "(([a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12})|([a-f0-9]{32}))";
public static final String GUIDRegex = "(\\b{GUIDRegexElement}\\b|\\{{GUIDRegexElement}\\}|urn:uuid:{GUIDRegexElement}\\b|%7[b]{GUIDRegexElement}%7[d]|[x]\\'{GUIDRegexElement}\\')"
.replace("{GUIDRegexElement}", GUIDRegexElement);
}

Просмотреть файл

@ -0,0 +1,17 @@
// ------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------
package com.microsoft.recognizers.text.sequence.resources;
public class BaseHashtag {
public static final String HashtagRegex = "((?<=\\s|^)#([a-zA-Z0-9_]+))";
}

Просмотреть файл

@ -0,0 +1,64 @@
// ------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------
package com.microsoft.recognizers.text.sequence.resources;
public class BaseIp {
public static final String Ipv4Regex = "\\b(1\\d{2}|2[0-4]\\d|25[0-5]|0?[1-9]\\d|0{0,2}\\d)((\\.(1\\d{2}|2[0-4]\\d|25[0-5]|0?[1-9]\\d|0{0,2}\\d)){3})\\b";
public static final String BasicIpv6Element = "([\\da-fA-F]{1,4})";
public static final String BasicIpv6Regex = "(({BasicIpv6Element}:){7}{BasicIpv6Element})"
.replace("{BasicIpv6Element}", BasicIpv6Element);
public static final String Ipv6EllipsisRegex1 = "(:(:{BasicIpv6Element}){1,7})"
.replace("{BasicIpv6Element}", BasicIpv6Element);
public static final String Ipv6EllipsisRegex2 = "(({BasicIpv6Element}:){1}((:{BasicIpv6Element}){1,6}))"
.replace("{BasicIpv6Element}", BasicIpv6Element);
public static final String Ipv6EllipsisRegex3 = "(({BasicIpv6Element}:){2}((:{BasicIpv6Element}){1,5}))"
.replace("{BasicIpv6Element}", BasicIpv6Element);
public static final String Ipv6EllipsisRegex4 = "(({BasicIpv6Element}:){3}((:{BasicIpv6Element}){1,4}))"
.replace("{BasicIpv6Element}", BasicIpv6Element);
public static final String Ipv6EllipsisRegex5 = "(({BasicIpv6Element}:){4}((:{BasicIpv6Element}){1,3}))"
.replace("{BasicIpv6Element}", BasicIpv6Element);
public static final String Ipv6EllipsisRegex6 = "(({BasicIpv6Element}:){5}((:{BasicIpv6Element}){1,2}))"
.replace("{BasicIpv6Element}", BasicIpv6Element);
public static final String Ipv6EllipsisRegex7 = "(({BasicIpv6Element}:){6}((:{BasicIpv6Element}){1}))"
.replace("{BasicIpv6Element}", BasicIpv6Element);
public static final String Ipv6EllipsisRegex8 = "(({BasicIpv6Element}:){7}(:))"
.replace("{BasicIpv6Element}", BasicIpv6Element);
public static final String Ipv6EllipsisRegexOther = "\\B::\\B|\\B:(:{BasicIpv6Element}){1,7}\\b|\\b({BasicIpv6Element}:){1,7}:\\B"
.replace("{BasicIpv6Element}", BasicIpv6Element);
public static final String MergedIpv6Regex = "({BasicIpv6Regex}|{Ipv6EllipsisRegex1}|{Ipv6EllipsisRegex2}|{Ipv6EllipsisRegex3}|{Ipv6EllipsisRegex4}|{Ipv6EllipsisRegex5}|{Ipv6EllipsisRegex6}|{Ipv6EllipsisRegex7}|{Ipv6EllipsisRegex8})"
.replace("{BasicIpv6Regex}", BasicIpv6Regex)
.replace("{Ipv6EllipsisRegex1}", Ipv6EllipsisRegex1)
.replace("{Ipv6EllipsisRegex2}", Ipv6EllipsisRegex2)
.replace("{Ipv6EllipsisRegex3}", Ipv6EllipsisRegex3)
.replace("{Ipv6EllipsisRegex4}", Ipv6EllipsisRegex4)
.replace("{Ipv6EllipsisRegex5}", Ipv6EllipsisRegex5)
.replace("{Ipv6EllipsisRegex6}", Ipv6EllipsisRegex6)
.replace("{Ipv6EllipsisRegex7}", Ipv6EllipsisRegex7)
.replace("{Ipv6EllipsisRegex8}", Ipv6EllipsisRegex8);
public static final String Ipv6Regex = "(\\b{MergedIpv6Regex}\\b)|({Ipv6EllipsisRegexOther})"
.replace("{MergedIpv6Regex}", MergedIpv6Regex)
.replace("{Ipv6EllipsisRegexOther}", Ipv6EllipsisRegexOther);
}

Просмотреть файл

@ -0,0 +1,17 @@
// ------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------
package com.microsoft.recognizers.text.sequence.resources;
public class BaseMention {
public static final String MentionRegex = "@([a-zA-Z0-9_]+)(?![.]\\w)\\b";
}

Просмотреть файл

@ -0,0 +1,119 @@
// ------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------
package com.microsoft.recognizers.text.sequence.resources;
import java.util.Arrays;
import java.util.List;
public class BasePhoneNumbers {
public static final String NumberReplaceToken = "@builtin.phonenumber";
public static final String WordBoundariesRegex = "\\b";
public static final String NonWordBoundariesRegex = "\\B";
public static final String EndWordBoundariesRegex = "\\b";
public static final String PreCheckPhoneNumberRegex = "(\\d{1,4}.){2,4}\\s?\\d{2,3}";
public static String GeneralPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
return "({WordBoundariesRegex}(((\\d[\\s]?){4,12}))(-?[\\d\\s?]{3}\\d)(?!-){EndWordBoundariesRegex})|(\\(\\d{5}\\)\\s?\\d{5,6})|\\+\\d{2}\\(\\d\\)\\d{10}"
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
}
public static String BRPhoneNumberRegex(String WordBoundariesRegex, String NonWordBoundariesRegex, String EndWordBoundariesRegex) {
return "((\\(\\s?(\\+\\s?|00)55\\s?\\)\\s?)|(((?<!\\d)\\+\\s?|{WordBoundariesRegex}00)55\\s?)|{WordBoundariesRegex})?((({NonWordBoundariesRegex}\\(\\s?))\\d{2,3}(\\s?\\))|({WordBoundariesRegex}\\d{2,3}))\\s?\\d{4,5}-?\\d{3,5}(?!-){EndWordBoundariesRegex}"
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
.replace("{NonWordBoundariesRegex}", NonWordBoundariesRegex)
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
}
public static String UKPhoneNumberRegex(String WordBoundariesRegex, String NonWordBoundariesRegex, String EndWordBoundariesRegex) {
return "((({WordBoundariesRegex}(00)|{NonWordBoundariesRegex}\\+)\\s?)?({WordBoundariesRegex}\\d{2}\\s?)?((\\s?\\(0\\)[-\\s]?|{WordBoundariesRegex}|(?<=(\\b^#)\\d{2}))\\d{2,5}|\\(0\\d{3,4}\\))[/-]?\\s?(\\d{5,8}|\\d{3,4}[-\\s]?\\d{3,4})(?!-){EndWordBoundariesRegex})"
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
.replace("{NonWordBoundariesRegex}", NonWordBoundariesRegex)
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
}
public static String DEPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
return "((\\+\\d{2}\\s?((\\(0\\))?\\d\\s?)?|{WordBoundariesRegex})(\\d{2,4}\\s?[-/]?[\\s\\d]{7,10}\\d)(?!-){EndWordBoundariesRegex})"
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
}
public static String USPhoneNumberRegex(String WordBoundariesRegex, String NonWordBoundariesRegex, String EndWordBoundariesRegex) {
return "((((({NonWordBoundariesRegex}\\+)|{WordBoundariesRegex})1(\\s|-)?)|{WordBoundariesRegex})?(\\d{3}\\)[-\\s]?|\\(\\d{3}\\)[-\\.\\s]?|{WordBoundariesRegex}\\d{3}\\s?[-\\.]?\\s?)|{WordBoundariesRegex})[2-9]\\d{2}\\s?[-\\.]?\\s?\\d{4}(\\s?(x|X|ext)\\s?\\d{3,5})?(?!(-\\s?\\d)){EndWordBoundariesRegex}"
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
.replace("{NonWordBoundariesRegex}", NonWordBoundariesRegex)
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
}
public static String CNPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
return "(({WordBoundariesRegex}00\\s?)?\\+?(86|82|81)\\s?-?\\s?)?((({WordBoundariesRegex}|(?<=(86|82|81)))\\d{2,5}\\s?-?\\s?|\\(\\d{2,5}\\)\\s?)\\d{4}\\s?-?\\s?\\d{4}(\\s?-?\\s?\\d{4})?|(\\b|(?<=(86|82|81)))\\d{3}\\s?-?\\s?\\d{4}\\s?-?\\s?\\d{4})(?!-){EndWordBoundariesRegex}"
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
}
public static String DKPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
return "((\\(\\s?(\\+\\s?|00)45\\s?\\)\\s?)|(((?<!\\d)\\+\\s?|\\b00)45\\s?)|{WordBoundariesRegex})(\\s?\\(0\\)\\s?)?((\\d{8})|(\\d{4}\\s?-?\\s?\\d{4,6})|((\\d{2}[\\s-]){3}\\d{2})|(\\d{2}\\s?-?\\s?\\d{3}\\s?-?\\s?\\d{3}))(?!-){EndWordBoundariesRegex}"
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
}
public static String ITPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
return "((\\(\\s?(\\+\\s?|00)39\\s?\\)\\s?)|(((?<!\\d)\\+\\s?|\\b00)39\\s?)|{WordBoundariesRegex})((0[\\d-]{4,12}\\d)|(3[\\d-]{7,12}\\d)|(0[\\d\\s]{4,12}\\d)|(3[\\d\\s]{7,12}\\d))(?!-){EndWordBoundariesRegex}"
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
}
public static String NLPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
return "((((\\(\\s?(\\+\\s?|00)31\\s?\\)\\s?)|(((?<!\\d)\\+\\s?|{WordBoundariesRegex}00)31\\s?))?((({WordBoundariesRegex}|(?<=31))0?\\d{1,3}|\\(\\s?0?\\d{1,3}\\s?\\)|\\(0\\)[-\\s]?\\d{1,3})((-?[\\d]{5,11})|(\\s[\\d\\s]{5,11}))\\d))|\\b\\d{10,12})(?!-){EndWordBoundariesRegex}"
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
}
public static String SpecialPhoneNumberRegex(String WordBoundariesRegex, String EndWordBoundariesRegex) {
return "({WordBoundariesRegex}(\\d{3,4}[/-]\\d{1,4}[/-]\\d{3,4}){EndWordBoundariesRegex})"
.replace("{WordBoundariesRegex}", WordBoundariesRegex)
.replace("{EndWordBoundariesRegex}", EndWordBoundariesRegex);
}
public static final String NoAreaCodeUSPhoneNumberRegex = "(?<!(-|-\\s|\\d|\\)|\\)\\s|\\.))[2-9]\\d{2}\\s?[-\\.]\\s?\\d{4}(?!(-\\s?\\d))\\b";
public static final String InternationDialingPrefixRegex = "0(0|11)$";
public static final List<String> TypicalDeductionRegexList = Arrays.asList("^\\d{5}-\\d{4}$", "\\)\\.", "^0(0|11)(-)");
public static final String PhoneNumberMaskRegex = "([0-9a-e]{2}(\\s[0-9a-e]{2}){7})";
public static final String CountryCodeRegex = "^(\\(\\s?(\\+\\s?|00)\\d{1,3}\\s?\\)|(\\+\\s?|00)\\d{1,3})";
public static final String AreaCodeIndicatorRegex = "\\(";
public static final String FormatIndicatorRegex = "(\\s|-|/|\\.)+";
public static final List<Character> ColonMarkers = Arrays.asList(':');
public static final String ColonPrefixCheckRegex = "(([a-z])\\s*$)";
public static final List<Character> SpecialBoundaryMarkers = Arrays.asList('-', ' ');
public static final List<Character> BoundaryMarkers = Arrays.asList('-', '.', '/', '+', '#', '*');
public static final List<Character> ForbiddenPrefixMarkers = Arrays.asList(',', ':', '%');
public static final List<Character> ForbiddenSuffixMarkers = Arrays.asList('/', '+', '#', '*', ':', '%');
public static final String SSNFilterRegex = "^\\d{3}-\\d{2}-\\d{4}$";
}

Просмотреть файл

@ -0,0 +1,48 @@
// ------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------
package com.microsoft.recognizers.text.sequence.resources;
import java.util.Arrays;
import java.util.List;
public class BaseURL {
public static final String ProtocolRegex = "((https?|ftp):\\/\\/)";
public static final String PortRegex = "(:\\d{1,5})";
public static final String ExtractionRestrictionRegex = "(?<=\\s|[\\'\"\"\\(\\[:]|^)";
public static final String UrlPrefixRegex = "({ExtractionRestrictionRegex}{ProtocolRegex}?|{ProtocolRegex})[a-zA-Z0-9][-a-zA-Z0-9._]{0,256}(?<![.])\\."
.replace("{ExtractionRestrictionRegex}", ExtractionRestrictionRegex)
.replace("{ProtocolRegex}", ProtocolRegex);
public static final String UrlSuffixRegex = "{PortRegex}?([/#][-a-zA-Z0-9:%_\\+.~#?!&//=]*)?(?![-a-zA-Z0-9:%_\\+~#?!&//=@])"
.replace("{PortRegex}", PortRegex);
public static final String UrlRegex = "{UrlPrefixRegex}(?<Tld>[a-zA-Z]{2,18}){UrlSuffixRegex}"
.replace("{UrlPrefixRegex}", UrlPrefixRegex)
.replace("{ProtocolRegex}", ProtocolRegex)
.replace("{UrlSuffixRegex}", UrlSuffixRegex);
public static final String UrlRegex2 = "((ht|f)tp(s?)\\:\\/\\/|www\\.)[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.(?<Tld>[a-zA-Z0-9()]{1,6})\\b([-a-zA-Z0-9()@:%_\\+.~#?&//=]*)";
public static final String IpUrlRegex = "(?<IPurl>({ExtractionRestrictionRegex}{ProtocolRegex}({BaseIp.Ipv4Regex}|localhost){UrlSuffixRegex}))"
.replace("{ExtractionRestrictionRegex}", ExtractionRestrictionRegex)
.replace("{ProtocolRegex}", ProtocolRegex)
.replace("{BaseIp.Ipv4Regex}", BaseIp.Ipv4Regex)
.replace("{UrlSuffixRegex}", UrlSuffixRegex);
public static final String AmbiguousTimeTerm = "^(1?[0-9]|2[0-3]).[ap]m$";
public static final List<String> TldList = Arrays.asList("com", "org", "net", "int", "edu", "gov", "mil", "academy", "app", "aws", "bot", "buy", "cafe", "city", "cloud", "company", "eco", "education", "game", "games", "gmbh", "law", "limited", "live", "llc", "ltd", "ltda", "map", "med", "news", "ngo", "ong", "phd", "place", "radio", "science", "search", "shopping", "sport", "store", "tvs", "wiki", "work", "ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq", "ar", "as", "at", "au", "aw", "ax", "az", "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bj", "bl", "bm", "bn", "bo", "bq", "br", "bs", "bt", "bv", "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck", "cl", "cm", "cn", "co", "cr", "cu", "cv", "cw", "cx", "cy", "cz", "de", "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "eh", "er", "es", "et", "eu", "fi", "fj", "fk", "fm", "fo", "fr", "ga", "gb", "gd", "ge", "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs", "gt", "gu", "gw", "gy", "hk", "hm", "hn", "hr", "ht", "hu", "id", "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm", "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kp", "kr", "kw", "ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu", "lv", "ly", "ma", "mc", "md", "me", "mf", "mg", "mh", "mk", "ml", "mm", "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx", "my", "mz", "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np", "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl", "pm", "pn", "pr", "ps", "pt", "pw", "py", "qa", "re", "ro", "rs", "ru", "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sj", "sk", "sl", "sm", "sn", "so", "sr", "ss", "st", "su", "sv", "sx", "sy", "sz", "tc", "td", "tf", "tg", "th", "tj", "tk", "tl", "tm", "tn", "to", "tp", "tr", "tt", "tv", "tw", "tz", "ua", "ug", "uk", "um", "us", "uy", "uz", "va", "vc", "ve", "vg", "vi", "vn", "vu", "wf", "ws", "ye", "yt", "za", "zm", "zw");
}

Просмотреть файл

@ -0,0 +1,31 @@
// ------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------
package com.microsoft.recognizers.text.sequence.resources;
public class ChineseIp {
private static ChinesePhoneNumbers PhoneNumbersDefinitions;
public static final String Ipv4Regex = "({PhoneNumbersDefinitions.WordBoundariesRegex}(1\\d{2}|2[0-4]\\d|25[0-5]|0?[1-9]\\d|0{0,2}\\d)((\\.(1\\d{2}|2[0-4]\\d|25[0-5]|0?[1-9]\\d|0{0,2}\\d)){3}){PhoneNumbersDefinitions.EndWordBoundariesRegex})"
.replace("{PhoneNumbersDefinitions.WordBoundariesRegex}", PhoneNumbersDefinitions.WordBoundariesRegex)
.replace("{PhoneNumbersDefinitions.EndWordBoundariesRegex}", PhoneNumbersDefinitions.EndWordBoundariesRegex);
public static final String Ipv6EllipsisRegexOther = "({PhoneNumbersDefinitions.NonWordBoundariesRegex}::{PhoneNumbersDefinitions.NonWordBoundariesRegex}|{PhoneNumbersDefinitions.NonWordBoundariesRegex}:(:{BaseIp.BasicIpv6Element}){1,7}{PhoneNumbersDefinitions.WordBoundariesRegex}|{PhoneNumbersDefinitions.WordBoundariesRegex}({BaseIp.BasicIpv6Element}:){1,7}:{PhoneNumbersDefinitions.NonWordBoundariesRegex})"
.replace("{PhoneNumbersDefinitions.NonWordBoundariesRegex}", PhoneNumbersDefinitions.NonWordBoundariesRegex)
.replace("{PhoneNumbersDefinitions.WordBoundariesRegex}", PhoneNumbersDefinitions.WordBoundariesRegex)
.replace("{BaseIp.BasicIpv6Element}", BaseIp.BasicIpv6Element);
public static final String Ipv6Regex = "({PhoneNumbersDefinitions.WordBoundariesRegex}{BaseIp.MergedIpv6Regex}{PhoneNumbersDefinitions.WordBoundariesRegex})|({Ipv6EllipsisRegexOther})"
.replace("{PhoneNumbersDefinitions.WordBoundariesRegex}", PhoneNumbersDefinitions.WordBoundariesRegex)
.replace("{BaseIp.MergedIpv6Regex}", BaseIp.MergedIpv6Regex)
.replace("{Ipv6EllipsisRegexOther}", Ipv6EllipsisRegexOther);
}

Просмотреть файл

@ -0,0 +1,30 @@
// ------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------
package com.microsoft.recognizers.text.sequence.resources;
import java.util.Arrays;
import java.util.List;
public class ChinesePhoneNumbers {
public static final String NumberReplaceToken = "@builtin.phonenumber";
public static final String WordBoundariesRegex = "(\\b|(?<=[\\u0800-\\u9FFF]))";
public static final String NonWordBoundariesRegex = "(\\B|(?<=[\\u0800-\\u9FFF]))";
public static final String EndWordBoundariesRegex = "(\\b|(?=[\\u0800-\\u9FFF]))";
public static final String ColonPrefixCheckRegex = "(([a-z]|[\\u4E00-\\u9FA5])\\s*$)";
public static final List<Character> ForbiddenPrefixMarkers = Arrays.asList(':', '%');
}

Просмотреть файл

@ -0,0 +1,31 @@
// ------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------
package com.microsoft.recognizers.text.sequence.resources;
public class ChineseURL {
public static final String ExtractionRestrictionRegex = "(?<=\\s|[\\'\"\"\\(\\[:]|^|[\\u0800-\\u9FFF])";
public static final String UrlPrefixRegex = "({ExtractionRestrictionRegex}{BaseURL.ProtocolRegex}?|{BaseURL.ProtocolRegex})[a-zA-Z0-9][-a-zA-Z0-9._]{0,256}(?<![.])\\."
.replace("{ExtractionRestrictionRegex}", ExtractionRestrictionRegex)
.replace("{BaseURL.ProtocolRegex}", BaseURL.ProtocolRegex);
public static final String UrlRegex = "{UrlPrefixRegex}(?<Tld>[a-zA-Z]{2,18}){BaseURL.UrlSuffixRegex}"
.replace("{UrlPrefixRegex}", UrlPrefixRegex)
.replace("{BaseURL.UrlSuffixRegex}", BaseURL.UrlSuffixRegex);
public static final String IpUrlRegex = "(?<IPurl>({ExtractionRestrictionRegex}{BaseURL.ProtocolRegex}({BaseIp.Ipv4Regex}|localhost){BaseURL.UrlSuffixRegex}))"
.replace("{ExtractionRestrictionRegex}", ExtractionRestrictionRegex)
.replace("{BaseURL.ProtocolRegex}", BaseURL.ProtocolRegex)
.replace("{BaseIp.Ipv4Regex}", BaseIp.Ipv4Regex)
.replace("{BaseURL.UrlSuffixRegex}", BaseURL.UrlSuffixRegex);
}

Просмотреть файл

@ -0,0 +1,19 @@
// ------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------
package com.microsoft.recognizers.text.sequence.resources;
public class EnglishPhoneNumbers {
public static final String NumberReplaceToken = "@builtin.phonenumber";
public static final String FalsePositivePrefixRegex = "(account|card)(\\s+(#|number))?(\\s+is)?:?\\s*$";
}

Просмотреть файл

@ -0,0 +1,19 @@
// ------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------
package com.microsoft.recognizers.text.sequence.resources;
public class PortuguesePhoneNumbers {
public static final String NumberReplaceToken = "@builtin.phonenumber";
public static final String FalsePositivePrefixRegex = "conta(\\s+(número|bancária(\\s+número)?))?(\\s+é)?:?\\s*$";
}

Просмотреть файл

@ -1,6 +1,5 @@
package com.microsoft.recognizers.text.utilities;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@ -13,6 +12,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.javatuples.Pair;
public abstract class RegExpUtility {
@ -23,9 +23,6 @@ public abstract class RegExpUtility {
private static final Pattern matchNegativeLookbehind = Pattern.compile("\\(\\?<!", Pattern.CASE_INSENSITIVE);
private static final String groupNameIndexSep = "iii";
private static final String groupNameIndexSepRegex = Pattern.quote(groupNameIndexSep);
private static final boolean unboundedLookBehindNotSupported = isRestrictedJavaVersion();
private static final Pattern lookBehindCheckRegex = Pattern.compile("(\\\\?<[!=])");
private static final Map<Character, String> bindings = new HashMap<Character, String>() {
{
@ -177,14 +174,8 @@ public abstract class RegExpUtility {
}
public static Pattern getSafeLookbehindRegExp(String source, int flags) {
String result = source;
// Java pre 1.9 doesn't support unbounded lookbehind lengths
if (unboundedLookBehindNotSupported) {
result = bindLookbehinds(result);
}
result = bindLookbehinds(result);
return Pattern.compile(result, flags);
}
@ -387,33 +378,4 @@ public abstract class RegExpUtility {
return resultString.toString();
}
// Checks if Java version is <= 8, as they don't support look-behind groups with no maximum length.
private static boolean isRestrictedJavaVersion() {
boolean result = false;
BigDecimal targetVersion = new BigDecimal("1.8");
try {
String specVersion = System.getProperty("java.specification.version");
result = new BigDecimal(specVersion).compareTo(targetVersion) >= 0;
} catch (Exception e1) {
try {
// Could also be "java.runtime.version".
String runtimeVersion = System.getProperty("java.version");
result = new BigDecimal(runtimeVersion).compareTo(targetVersion) >= 0;
} catch (Exception e2) {
// Nothing to do, ignore.
}
}
if (result) {
System.out.println("WARN: Look-behind groups with no maximum length not supported. Java version <= 8.");
}
return result;
}
}

Просмотреть файл

@ -10,7 +10,11 @@ public abstract class StringUtility {
}
public static String trimStart(String source) {
return source.replaceFirst("^\\s+", "");
return trimStart(source, "^\\s+", "");
}
public static String trimStart(String source, String regex, String replacement) {
return source.replaceFirst(regex, replacement);
}
public static String trimEnd(String source) {