diff --git a/pom.xml b/pom.xml
index dd8353b..4e92078 100755
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
com.zoho
hawking
- 0.1.4
+ 0.1.5
jar
Hawking
Hawking is a natural language date time parser that extracts date and time from text with context and parse to the required format.
diff --git a/src/main/java/com/zoho/hawking/language/english/DateTimeGateWay.java b/src/main/java/com/zoho/hawking/language/english/DateTimeGateWay.java
index b292e34..61f37bf 100644
--- a/src/main/java/com/zoho/hawking/language/english/DateTimeGateWay.java
+++ b/src/main/java/com/zoho/hawking/language/english/DateTimeGateWay.java
@@ -105,9 +105,9 @@ private static int findDateType(DateTimeEssentials dateTimeEssentials) {
for (Triple triple : triples) {
if (triple.first.equals("R")) {
String relationWord = dateTimeEssentials.getSentence().substring(triple.second, triple.third).replace("[^a-zA-z]", ""); //No I18N
- if (PrepositionConstants.RELATIONSHIP_RANGE.contains(relationWord)) {
+ if (PrepositionConstants.RELATIONSHIP_RANGE.contains(relationWord.toLowerCase())) {
dateType = 0;
- } else if (PrepositionConstants.RELATIONSHIP_SET.contains(relationWord)) {
+ } else if (PrepositionConstants.RELATIONSHIP_SET.contains(relationWord.toLowerCase())) {
dateType = 1;
} else {
dateType = 2;
diff --git a/src/main/java/com/zoho/hawking/language/english/DateTimeWordProperties.java b/src/main/java/com/zoho/hawking/language/english/DateTimeWordProperties.java
index 3bec475..435ba60 100755
--- a/src/main/java/com/zoho/hawking/language/english/DateTimeWordProperties.java
+++ b/src/main/java/com/zoho/hawking/language/english/DateTimeWordProperties.java
@@ -195,7 +195,7 @@ public class DateTimeWordProperties {
public static final WordProperty AUGUST = new WordProperty("august", MonthsOfYear.MONTH_EIGHT, new String[]{"aug"});
- public static final WordProperty SEPTEMBER = new WordProperty("september", MonthsOfYear.MONTH_NINE, new String[]{"sep"});
+ public static final WordProperty SEPTEMBER = new WordProperty("september", MonthsOfYear.MONTH_NINE, new String[]{"sep","sept"});
public static final WordProperty OCTOBER = new WordProperty("october", MonthsOfYear.MONTH_TEN, new String[]{"oct"});
diff --git a/src/main/java/com/zoho/hawking/language/english/Parser.java b/src/main/java/com/zoho/hawking/language/english/Parser.java
index c9da9bd..3b3b3bd 100755
--- a/src/main/java/com/zoho/hawking/language/english/Parser.java
+++ b/src/main/java/com/zoho/hawking/language/english/Parser.java
@@ -41,6 +41,8 @@ private static CRFClassifier getCRFInstance() {
}
public static List> parse(String input) {
+ input = input.replaceAll("http","----"); //No I18N
+ input = input.replaceAll("www","---"); //No I18N
return crf.classifyToCharacterOffsets(input);
}
diff --git a/src/main/java/com/zoho/hawking/utils/RecognizerTagger.java b/src/main/java/com/zoho/hawking/utils/RecognizerTagger.java
index b8e7fa1..e0d151d 100644
--- a/src/main/java/com/zoho/hawking/utils/RecognizerTagger.java
+++ b/src/main/java/com/zoho/hawking/utils/RecognizerTagger.java
@@ -37,7 +37,7 @@ public class RecognizerTagger {
private final static Pattern exact_year = Pattern.compile("^\\d{4}$");
private final static Pattern exact_number_1 = Pattern.compile(
"^(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eightheenth|ninteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|st|nd|rd|th)$");
- private final static Pattern exact_number_2 = Pattern.compile("(\\d+)(st|nd|rd|th|ᵗʰ|ˢᵗ|ⁿᵈ|ʳᵈ)?");
+ private final static Pattern exact_number_2 = Pattern.compile("^((\\d+)(st|nd|rd|th|ᵗʰ|ˢᵗ|ⁿᵈ|ʳᵈ)?)$");
public static String getTagger(String word) {
String tag = "";
diff --git a/src/main/resources/parser/parser.crf.ser.gz b/src/main/resources/parser/parser.crf.ser.gz
index 3c82336..e2308a8 100644
Binary files a/src/main/resources/parser/parser.crf.ser.gz and b/src/main/resources/parser/parser.crf.ser.gz differ