diff --git a/pom.xml b/pom.xml index 4e92078..f62b56c 100755 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.zoho hawking - 0.1.5 + 0.1.6 jar Hawking Hawking is a natural language date time parser that extracts date and time from text with context and parse to the required format. diff --git a/src/main/java/com/zoho/hawking/datetimeparser/configuration/HawkingConfiguration.java b/src/main/java/com/zoho/hawking/datetimeparser/configuration/HawkingConfiguration.java index 3561316..8e73592 100755 --- a/src/main/java/com/zoho/hawking/datetimeparser/configuration/HawkingConfiguration.java +++ b/src/main/java/com/zoho/hawking/datetimeparser/configuration/HawkingConfiguration.java @@ -48,6 +48,7 @@ public class HawkingConfiguration { private int weekRange = 0; private String dateFormat = ""; private String timeZone = ""; + private int maxParseDate= 0; /** * @return the year range @@ -643,4 +644,19 @@ public void setTimeZone(String timeZone) { public void setDayhourEnd(int dayhourEnd) { this.dayhourEnd = dayhourEnd; } + /** + * @return the maximum dates to be parsed + * */ + public int getMaxParseDate() { + return maxParseDate; + } + + /** + * @param maxParseDate represent maximum number of date text to be parsed in a given text + * range > 0 + */ + + public void setMaxParseDate(int maxParseDate) { + this.maxParseDate = maxParseDate; + } } diff --git a/src/main/java/com/zoho/hawking/language/english/EnglishLanguage.java b/src/main/java/com/zoho/hawking/language/english/EnglishLanguage.java index 7eea8d6..974b94b 100755 --- a/src/main/java/com/zoho/hawking/language/english/EnglishLanguage.java +++ b/src/main/java/com/zoho/hawking/language/english/EnglishLanguage.java @@ -49,6 +49,8 @@ public List>>> getSeparateD public List predict(String inputSentence, Date referenceDate, HawkingConfiguration config) { List dateList = new ArrayList<>(); List inputSentences = CoreNlpUtils.sentenceTokenize(inputSentence); + int maxParseDates = config.getMaxParseDate(); + int dateCounter = 0; for(String sent: inputSentences){ List>>> singleDatesList = getSeparateDates(Parser.parse(sent)); for (Pair>> relAndDate : singleDatesList) { @@ -73,12 +75,14 @@ public List predict(String inputSentence, Date referenceDate dateTimeEssentials.setTimeZoneOffSet(dateTimeOffsetReturn.getTimeOffset()); try { dateList.addAll(DateTimeGateWay.getDateAndTime(dateTimeEssentials)); + dateCounter += 1; + if (maxParseDates != 0 && dateCounter == maxParseDates){ + return dateList; + } } catch (Exception e) { LOGGER.info("HawkingTimeParser :: Exception in Hawking :: Unparsed date component Present"); } } - - } } return dateList; diff --git a/src/main/java/com/zoho/hawking/language/english/Recognizer.java b/src/main/java/com/zoho/hawking/language/english/Recognizer.java index 526a559..1f593b9 100755 --- a/src/main/java/com/zoho/hawking/language/english/Recognizer.java +++ b/src/main/java/com/zoho/hawking/language/english/Recognizer.java @@ -108,7 +108,7 @@ private static ParsedDate tagShrinker(String parseText, ParsedDate parserDateCur parserDateCurrent.setTaggedWithXML(parserDateCurrent.getTaggedWithXML().replace("" + textOne + " " + " " + textTwo + "", "" + textOne + textTwo + "")); //NO I18n } Triple triplePrev = i > 0 ? triples.get(i - 1) : null; - String tagPrev = triplePrev != null ? triplePrev.first() : null; + String tagPrev = triplePrev != null ? triplePrev.first() : ""; if (tag.equals("exact_number") && tagg.equals("exact_time") && !((tagPrev.equals("month_of_year")) && (TIMEFORMATREGEX.matcher(textTwo).find() || TIMEFORMATREGEXHMS.matcher(textTwo).find()))) { Triple tripleLocal = new Triple<>("exact_time", triple.second(), triplee.third()); //NO I18n @@ -126,7 +126,7 @@ private static ParsedDate tagShrinker(String parseText, ParsedDate parserDateCur private static ParsedDate tagAlternator(String parseText, ParsedDate parserDateCurrent) { List> triples = parserDateCurrent.getOutputWithOffsets(); String tag_xml = parserDateCurrent.getTaggedWithXML(); - if ((tag_xml.contains("day_of_week") || tag_xml.contains("current_day")) && tag_xml.contains("month_of_year") && tag_xml.contains("exact_number")) { + if ((tag_xml.contains("day_of_week") || tag_xml.contains("current_day")) && (tag_xml.contains("month_of_year") || tag_xml.contains("month_span")) && tag_xml.contains("exact_number")) { List> triple = parserDateCurrent .getOutputWithOffsets(); String date_xml = parserDateCurrent.getTaggedWithXML(); diff --git a/src/main/java/com/zoho/hawking/utils/DateTimeProperties.java b/src/main/java/com/zoho/hawking/utils/DateTimeProperties.java index 72676ff..a8e0d65 100755 --- a/src/main/java/com/zoho/hawking/utils/DateTimeProperties.java +++ b/src/main/java/com/zoho/hawking/utils/DateTimeProperties.java @@ -101,7 +101,7 @@ public static String cardinalNumberFinder(String dateText) { } else if (dateText.contains("end day")) { dateText = dateText.replaceAll("\\b(?i)" + "end day" + "\\b", "");//No I18N dateText = "last day of " + dateText;//No I18N - } else if (dateText.contains("to go")) { + } else if (dateText.endsWith("to go")) { dateText = dateText.replaceAll("\\b(?i)" + "to go" + "\\b", "");//No I18N dateText = "until " + dateText;//No I18N } diff --git a/src/main/java/com/zoho/hawking/utils/RecognizerTagger.java b/src/main/java/com/zoho/hawking/utils/RecognizerTagger.java index e0d151d..250298b 100644 --- a/src/main/java/com/zoho/hawking/utils/RecognizerTagger.java +++ b/src/main/java/com/zoho/hawking/utils/RecognizerTagger.java @@ -31,7 +31,7 @@ public class RecognizerTagger { private final static Pattern exact_time_1 = Pattern .compile("^(((0[0-9]|1[0-9]|2[0-3]|[0-9])([:.][0-5][0-9])?([:.][0-5][0-9])?)([AaPp][.]?[Mm]))$"); private final static Pattern exact_time_2 = Pattern - .compile("^(((0[0-9]|1[0-9]|2[0-3]|[0-9])([:.][0-5][0-9])([:.][0-5][0-9])?))$"); + .compile("^(((0[0-9]|1[0-9]|2[0-3]|[0-9])([:.][0-5][0-9])([:.][0-5][0-9])?([:.][0-9]{3})?))$"); private final static Pattern exact_time_3 = Pattern.compile("^([AaPp][.]?[Mm])$"); private final static Pattern exact_year = Pattern.compile("^\\d{4}$");