Skip to content

Commit

Permalink
Merge pull request #28 from zoho/hawking_dev
Browse files Browse the repository at this point in the history
Master Merge Hawking Enhancement v0.1.6
  • Loading branch information
ArulVendhan committed Aug 25, 2022
2 parents e0d20bc + 1cae7a1 commit f320cf1
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 7 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<groupId>com.zoho</groupId>
<artifactId>hawking</artifactId>
<version>0.1.5</version>
<version>0.1.6</version>
<packaging>jar</packaging>
<name>Hawking</name>
<description>Hawking is a natural language date time parser that extracts date and time from text with context and parse to the required format.</description>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public class HawkingConfiguration {
private int weekRange = 0;
private String dateFormat = "";
private String timeZone = "";
private int maxParseDate= 0;

/**
* @return the year range
Expand Down Expand Up @@ -643,4 +644,19 @@ public void setTimeZone(String timeZone) {

public void setDayhourEnd(int dayhourEnd) { this.dayhourEnd = dayhourEnd; }

/**
* @return the maximum dates to be parsed
* */
public int getMaxParseDate() {
return maxParseDate;
}

/**
* @param maxParseDate represent maximum number of date text to be parsed in a given text
* range > 0
*/

public void setMaxParseDate(int maxParseDate) {
this.maxParseDate = maxParseDate;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ public List<Pair<Boolean, List<Triple<String, Integer, Integer>>>> getSeparateD
public List<DateTimeProperties> predict(String inputSentence, Date referenceDate, HawkingConfiguration config) {
List<DateTimeProperties> dateList = new ArrayList<>();
List<String> inputSentences = CoreNlpUtils.sentenceTokenize(inputSentence);
int maxParseDates = config.getMaxParseDate();
int dateCounter = 0;
for(String sent: inputSentences){
List<Pair<Boolean, List<Triple<String, Integer, Integer>>>> singleDatesList = getSeparateDates(Parser.parse(sent));
for (Pair<Boolean, List<Triple<String, Integer, Integer>>> relAndDate : singleDatesList) {
Expand All @@ -73,12 +75,14 @@ public List<DateTimeProperties> predict(String inputSentence, Date referenceDate
dateTimeEssentials.setTimeZoneOffSet(dateTimeOffsetReturn.getTimeOffset());
try {
dateList.addAll(DateTimeGateWay.getDateAndTime(dateTimeEssentials));
dateCounter += 1;
if (maxParseDates != 0 && dateCounter == maxParseDates){
return dateList;
}
} catch (Exception e) {
LOGGER.info("HawkingTimeParser :: Exception in Hawking :: Unparsed date component Present");
}
}


}
}
return dateList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ private static ParsedDate tagShrinker(String parseText, ParsedDate parserDateCur
parserDateCurrent.setTaggedWithXML(parserDateCurrent.getTaggedWithXML().replace("<exact_number>" + textOne + "</exact_number> " + "<exact_number> " + textTwo + "</exact_number>", "<exact_number>" + textOne + textTwo + "</exact_number>")); //NO I18n
}
Triple<String, Integer, Integer> triplePrev = i > 0 ? triples.get(i - 1) : null;
String tagPrev = triplePrev != null ? triplePrev.first() : null;
String tagPrev = triplePrev != null ? triplePrev.first() : "";

if (tag.equals("exact_number") && tagg.equals("exact_time") && !((tagPrev.equals("month_of_year")) && (TIMEFORMATREGEX.matcher(textTwo).find() || TIMEFORMATREGEXHMS.matcher(textTwo).find()))) {
Triple<String, Integer, Integer> tripleLocal = new Triple<>("exact_time", triple.second(), triplee.third()); //NO I18n
Expand All @@ -126,7 +126,7 @@ private static ParsedDate tagShrinker(String parseText, ParsedDate parserDateCur
private static ParsedDate tagAlternator(String parseText, ParsedDate parserDateCurrent) {
List<Triple<String, Integer, Integer>> triples = parserDateCurrent.getOutputWithOffsets();
String tag_xml = parserDateCurrent.getTaggedWithXML();
if ((tag_xml.contains("day_of_week") || tag_xml.contains("current_day")) && tag_xml.contains("month_of_year") && tag_xml.contains("exact_number")) {
if ((tag_xml.contains("day_of_week") || tag_xml.contains("current_day")) && (tag_xml.contains("month_of_year") || tag_xml.contains("month_span")) && tag_xml.contains("exact_number")) {
List<Triple<String, Integer, Integer>> triple = parserDateCurrent
.getOutputWithOffsets();
String date_xml = parserDateCurrent.getTaggedWithXML();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public static String cardinalNumberFinder(String dateText) {
} else if (dateText.contains("end day")) {
dateText = dateText.replaceAll("\\b(?i)" + "end day" + "\\b", "");//No I18N
dateText = "last day of " + dateText;//No I18N
} else if (dateText.contains("to go")) {
} else if (dateText.endsWith("to go")) {
dateText = dateText.replaceAll("\\b(?i)" + "to go" + "\\b", "");//No I18N
dateText = "until " + dateText;//No I18N
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/zoho/hawking/utils/RecognizerTagger.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public class RecognizerTagger {
private final static Pattern exact_time_1 = Pattern
.compile("^(((0[0-9]|1[0-9]|2[0-3]|[0-9])([:.][0-5][0-9])?([:.][0-5][0-9])?)([AaPp][.]?[Mm]))$");
private final static Pattern exact_time_2 = Pattern
.compile("^(((0[0-9]|1[0-9]|2[0-3]|[0-9])([:.][0-5][0-9])([:.][0-5][0-9])?))$");
.compile("^(((0[0-9]|1[0-9]|2[0-3]|[0-9])([:.][0-5][0-9])([:.][0-5][0-9])?([:.][0-9]{3})?))$");
private final static Pattern exact_time_3 = Pattern.compile("^([AaPp][.]?[Mm])$");

private final static Pattern exact_year = Pattern.compile("^\\d{4}$");
Expand Down

0 comments on commit f320cf1

Please sign in to comment.