diff --git a/pom.xml b/pom.xml index dd8353b..4e92078 100755 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.zoho hawking - 0.1.4 + 0.1.5 jar Hawking Hawking is a natural language date time parser that extracts date and time from text with context and parse to the required format. diff --git a/src/main/java/com/zoho/hawking/language/english/DateTimeGateWay.java b/src/main/java/com/zoho/hawking/language/english/DateTimeGateWay.java index b292e34..61f37bf 100644 --- a/src/main/java/com/zoho/hawking/language/english/DateTimeGateWay.java +++ b/src/main/java/com/zoho/hawking/language/english/DateTimeGateWay.java @@ -105,9 +105,9 @@ private static int findDateType(DateTimeEssentials dateTimeEssentials) { for (Triple triple : triples) { if (triple.first.equals("R")) { String relationWord = dateTimeEssentials.getSentence().substring(triple.second, triple.third).replace("[^a-zA-z]", ""); //No I18N - if (PrepositionConstants.RELATIONSHIP_RANGE.contains(relationWord)) { + if (PrepositionConstants.RELATIONSHIP_RANGE.contains(relationWord.toLowerCase())) { dateType = 0; - } else if (PrepositionConstants.RELATIONSHIP_SET.contains(relationWord)) { + } else if (PrepositionConstants.RELATIONSHIP_SET.contains(relationWord.toLowerCase())) { dateType = 1; } else { dateType = 2; diff --git a/src/main/java/com/zoho/hawking/language/english/DateTimeWordProperties.java b/src/main/java/com/zoho/hawking/language/english/DateTimeWordProperties.java index 3bec475..435ba60 100755 --- a/src/main/java/com/zoho/hawking/language/english/DateTimeWordProperties.java +++ b/src/main/java/com/zoho/hawking/language/english/DateTimeWordProperties.java @@ -195,7 +195,7 @@ public class DateTimeWordProperties { public static final WordProperty AUGUST = new WordProperty("august", MonthsOfYear.MONTH_EIGHT, new String[]{"aug"}); - public static final WordProperty SEPTEMBER = new WordProperty("september", MonthsOfYear.MONTH_NINE, new String[]{"sep"}); + public static final WordProperty SEPTEMBER = new WordProperty("september", MonthsOfYear.MONTH_NINE, new String[]{"sep","sept"}); public static final WordProperty OCTOBER = new WordProperty("october", MonthsOfYear.MONTH_TEN, new String[]{"oct"}); diff --git a/src/main/java/com/zoho/hawking/language/english/Parser.java b/src/main/java/com/zoho/hawking/language/english/Parser.java index c9da9bd..3b3b3bd 100755 --- a/src/main/java/com/zoho/hawking/language/english/Parser.java +++ b/src/main/java/com/zoho/hawking/language/english/Parser.java @@ -41,6 +41,8 @@ private static CRFClassifier getCRFInstance() { } public static List> parse(String input) { + input = input.replaceAll("http","----"); //No I18N + input = input.replaceAll("www","---"); //No I18N return crf.classifyToCharacterOffsets(input); } diff --git a/src/main/java/com/zoho/hawking/utils/RecognizerTagger.java b/src/main/java/com/zoho/hawking/utils/RecognizerTagger.java index b8e7fa1..e0d151d 100644 --- a/src/main/java/com/zoho/hawking/utils/RecognizerTagger.java +++ b/src/main/java/com/zoho/hawking/utils/RecognizerTagger.java @@ -37,7 +37,7 @@ public class RecognizerTagger { private final static Pattern exact_year = Pattern.compile("^\\d{4}$"); private final static Pattern exact_number_1 = Pattern.compile( "^(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eightheenth|ninteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|st|nd|rd|th)$"); - private final static Pattern exact_number_2 = Pattern.compile("(\\d+)(st|nd|rd|th|ᵗʰ|ˢᵗ|ⁿᵈ|ʳᵈ)?"); + private final static Pattern exact_number_2 = Pattern.compile("^((\\d+)(st|nd|rd|th|ᵗʰ|ˢᵗ|ⁿᵈ|ʳᵈ)?)$"); public static String getTagger(String word) { String tag = ""; diff --git a/src/main/resources/parser/parser.crf.ser.gz b/src/main/resources/parser/parser.crf.ser.gz index 3c82336..e2308a8 100644 Binary files a/src/main/resources/parser/parser.crf.ser.gz and b/src/main/resources/parser/parser.crf.ser.gz differ