From d5d4d0c5cdee1116268d52331bcb627f39f4e703 Mon Sep 17 00:00:00 2001 From: Viktor Govako Date: Wed, 23 Aug 2023 14:17:52 -0300 Subject: [PATCH] [search] Follow up b80e7c9ed05692443bc4a359806b4d6866c5a1a0. Signed-off-by: Viktor Govako --- search/house_numbers_matcher.cpp | 88 +++++++++++-------- .../search_quality_tests/real_mwm_tests.cpp | 48 +++++++++- .../house_numbers_matcher_test.cpp | 3 + 3 files changed, 98 insertions(+), 41 deletions(-) diff --git a/search/house_numbers_matcher.cpp b/search/house_numbers_matcher.cpp index 96bb38c0d9eef..b383493ea1997 100644 --- a/search/house_numbers_matcher.cpp +++ b/search/house_numbers_matcher.cpp @@ -32,51 +32,62 @@ namespace /// @todo By VNG: This list looks hillarious :) Definitely should set some lower bound number /// to filter very exotic entries in addr:housenumber. -/// Removed street keywords for now. -vector const g_strings = { - "a", "aa", "ab", "abc", "ac", "ad", "ae", "af", "ag", + +// Removed street keywords for now and ALL one-letter strings. It is sensitive for search speed, because: +// LooksLikeHouseNumber -> MatchBuildingsWithStreets -> *heavy* StreetVicinityLoader::GetStreet +// "av", "avenida", +// "ca", "cal", "calle", "carrera", "court", +// "da", "de", "di". +// "ga", +// "ł", "la", +// "ne", +// "pa", "par", "park", "plaza", +// "rd", "ro", "road", +// "so", "south", "st", "street", +// "vi", +// "way", "we", "west", + +char const * g_strings[] = { + "aa", "ab", "abc", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj", "ak", "al", "am", "an", "ao", "ap", - "aq", "ar", "are", "as", "at", "au", "av", /*"avenida",*/ "aw", - "ax", "ay", "az", "azm", "b", "ba", "bab", "bah", "bak", + "aq", "ar", "are", "as", "at", "au", "aw", + "ax", "ay", "az", "azm", "ba", "bab", "bah", "bak", "bb", "bc", "bd", "be", "bedr", "ben", "bf", "bg", "bh", "bij", "bis", "bk", "bl", "bldg", "blk", "bloc", "block", "bloco", "blok", "bm", "bmn", "bn", "bo", "boe", "bol", "bor", "bov", "box", "bp", "br", "bra", "brc", "bs", "bsa", "bu", "building", - "bv", "bwn", "bx", "by", "c", "ca", "cab", "cal", - /*"calle", "carrera",*/ "cat", "cbi", "cbu", "cc", "ccz", "cd", "ce", - "centre", "cfn", "cgc", "cjg", "cl", "club", "cottage", "cottages", /*"court",*/ - "cso", "cum", "d", "da", "db", "dd", "de", "df", "di", - "dia", "dvu", "e", "ec", "ee", "eh", "em", "en", "esm", - "ev", "f", "fdo", "fer", "ff", "flat", "flats", - "floor", /*"g",*/ "ga", "gar", "gara", "gas", "gb", "gg", "gr", - "grg", "h", "ha", "haus", "hh", "hl", "ho", "house", "hr", - "hs", "hv", "i", "ii", "iii", "int", "iv", "ix", "j", - "jab", "jf", "jj", "jms", "jtg", "k", "ka", "kab", "kk", - "kmb", "kmk", "knn", "koy", "kp", "kra", "ksn", "kud", - "l", "ł", "la", "ldo", "ll", "local", "loja", "lot", "lote", - "lsb", "lt", "m", "mac", "mad", "mah", "mak", "mat", "mb", + "bv", "bwn", "bx", "by", "cab", "cat", "cbi", "cbu", "cc", + "ccz", "cd", "ce", "centre", "cfn", "cgc", "cjg", "cl", "club", + "cottage", "cottages", "cso", "cum", "db", "dd", "df", + "dia", "dvu", "ec", "ee", "eh", "em", "en", "esm", + "ev", "fdo", "fer", "ff", "flat", "flats", "floor", + "gar", "gara", "gas", "gb", "gg", "gr", + "grg", "ha", "haus", "hh", "hl", "ho", "house", "hr", + "hs", "hv", "ii", "iii", "int", "iv", "ix", + "jab", "jf", "jj", "jms", "jtg", "ka", "kab", "kk", + "kmb", "kmk", "knn", "koy", "kp", "kra", "ksn", "kud", + "ldo", "ll", "local", "loja", "lot", "lote", + "lsb", "lt", "mac", "mad", "mah", "mak", "mat", "mb", "mbb", "mbn", "mch", "mei", "mks", "mm", "mny", "mo", "mok", - "mor", "msb", "mtj", "mtk", "mvd", "n", "na", - "ncc", "ne", "nij", "nn", "no", "nr", "nst", "nu", "nut", - "o", "of", "ofof", "old", "one", "oo", "opl", "p", "pa", - "pap", "par", /*"park",*/ "pav", "pb", "pch", "pg", "ph", "phd", - "pkf", /*"plaza",*/ "plot", "po", "pos", "pp", "pr", "pra", "pya", - "q", "qq", "quater", "r", "ra", "rbo", "rd", "rear", "reisach", - "rk", "rm", "ro", /*"road",*/ "rosso", "rs", "rw", "s", + "mor", "msb", "mtj", "mtk", "mvd", "na", + "ncc", "nij", "nn", "no", "nr", "nst", "nu", "nut", + "of", "ofof", "old", "one", "oo", "opl", "pa", + "pap", "pav", "pb", "pch", "pg", "ph", "phd", + "pkf", "plot", "po", "pos", "pp", "pr", "pra", "pya", + "qq", "quater", "ra", "rbo", "rear", "reisach", + "rk", "rm", "rosso", "rs", "rw", "sab", "sal", "sav", "sb", "sba", "sbb", "sbl", "sbn", "sbx", "sc", "sch", "sco", "seb", "sep", "sf", "sgr", "sir", - "sj", "sl", "sm", "sn", "snc", "so", "som", /*"south",*/ "sp", - "spi", "spn", "ss", "st", "sta", "stc", "std", "stiege", /*"street",*/ - "suite", "sur", "t", "tam", "ter", "terrace", "tf", "th", "the", + "sj", "sl", "sm", "sn", "snc", "som", "sp", + "spi", "spn", "ss", "sta", "stc", "std", "stiege", + "suite", "sur", "tam", "ter", "terrace", "tf", "th", "the", "tl", "to", "torre", "tr", "traf", "trd", "ts", "tt", "tu", - "u", "uhm", "unit", "utc", "v", "vi", "vii", "w", "wa", - /*"way",*/ "we", /*"west",*/ "wf", "wink", "wrh", "ws", "wsb", "x", - "xx", "y", "z", "za", "zh", "zona", "zu", "zw", "א", - "ב", "ג", "α", "а", "б", "бб", "бл", "в", "вл", - "вх", "г", "д", "е", "ж", "з", "и", "к", "л", - "лит", "м", "н", "о", "п", "р", "разр", "с", - "стр", "т", "тп", "у", "уч", "участок", "ф", "ц", "ა", - "丁目", "之", "号", "號", + "uhm", "unit", "utc", "vii", "wa", + "wf", "wink", "wrh", "ws", "wsb", + "xx", "za", "zh", "zona", "zu", "zw", "א", + "ב", "ג", "α", "бб", "бл", "вл", + "вх", "лит", "разр", "стр", "тп", "уч", "участок", "ა", + "丁目", "之", "号", "號", // List of exceptions "владение" @@ -105,9 +116,10 @@ vector const g_patternsStrict = { // List of common synonyms for building parts. Constructed by hand. -vector const g_buildingPartSynonyms = { +char const * g_buildingPartSynonyms[] = { "building", "bldg", "bld", "bl", "unit", "block", "blk", "корпус", - "корп", "кор", "литер", "лит", "строение", "стр", "блок", "бл"}; + "корп", "кор", "литер", "лит", "строение", "стр", "блок", "бл" +}; // List of common stop words for buildings. Constructed by hand. UniString const g_stopWords[] = {MakeUniString("дом"), MakeUniString("house"), MakeUniString("д")}; diff --git a/search/search_quality/search_quality_tests/real_mwm_tests.cpp b/search/search_quality/search_quality_tests/real_mwm_tests.cpp index 48f71352ddcc4..38d8c03995932 100644 --- a/search/search_quality/search_quality_tests/real_mwm_tests.cpp +++ b/search/search_quality/search_quality_tests/real_mwm_tests.cpp @@ -129,14 +129,15 @@ class MwmTestsFixture : public search::tests_support::SearchTestBase } /// @param[in] street, house May be empty. - static void HasAddress(Range const & results, std::string const & street, std::string const & house) + static void HasAddress(Range const & results, std::string const & street, std::string const & house, + base::StringIL classifType = {"building"}) { - auto const buildingType = classif().GetTypeByPath({"building"}); + auto const type = classif().GetTypeByPath(classifType); bool found = false; for (auto const & r : results) { - if (r.GetResultType() == search::Result::Type::Feature && EqualClassifType(r.GetFeatureType(), buildingType)) + if (r.GetResultType() == search::Result::Type::Feature && EqualClassifType(r.GetFeatureType(), type)) { auto const & addr = r.GetAddress(); if ((street.empty() || addr.find(street) != std::string::npos) && @@ -852,4 +853,45 @@ UNIT_CLASS_TEST(MwmTestsFixture, BA_SanMartin) classif().GetTypeByPath({"railway", "station"})), 2, ()); } } + +UNIT_CLASS_TEST(MwmTestsFixture, Full_Address) +{ + { + // Krakow + ms::LatLon const center(50.061431, 19.9361584); + SetViewportAndLoadMaps(center); + + auto request = MakeRequest("Sucha Beskidzka Armii Krajowej b-1 kozikowka 34-200 Poland"); + auto const & results = request->Results(); + TEST_GREATER(results.size(), kPopularPoiResultsCount, ()); + + HasAddress(Range(results, 0, 3), "Armii Krajowej", "B-1"); + HasAddress(Range(results, 0, 3), "Armii Krajowej", "B-1A"); + } + + { + // Regensburg (DE) + ms::LatLon const center(49.0195332, 12.0974856); + SetViewportAndLoadMaps(center); + + { + auto request = MakeRequest("Wörth an der Donau Gewerbepark A 1 93086 Germany"); + auto const & results = request->Results(); + TEST_GREATER(results.size(), kPopularPoiResultsCount, ()); + + HasAddress(Range(results, 0, 1), "Gewerbepark A", "A 1", {"shop", "car"}); + } + { + auto request = MakeRequest("Wörth an der Donau Gewerbepark C 1 93086 Germany"); + auto const & results = request->Results(); + TEST_GREATER(results.size(), kPopularPoiResultsCount, ()); + + /// @todo There is a tricky neighborhood here, so ranking gets dumb :) + /// 1: "Gewerbepark A", "A 1" near "Gewerbepark C" st + /// 2: "Gewerbepark B", "1" near "Gewerbepark C" st + /// 3: "Gewerbepark C", "1" + HasAddress(Range(results, 0, 3), "Gewerbepark C", "1"); + } + } +} } // namespace real_mwm_tests diff --git a/search/search_tests/house_numbers_matcher_test.cpp b/search/search_tests/house_numbers_matcher_test.cpp index 1861f68e5ba35..bfde0c68add70 100644 --- a/search/search_tests/house_numbers_matcher_test.cpp +++ b/search/search_tests/house_numbers_matcher_test.cpp @@ -215,6 +215,7 @@ UNIT_TEST(HouseNumber_LooksLike) { TEST(LooksLikeHouseNumber("1", false /* isPrefix */), ()); TEST(LooksLikeHouseNumber("ev 10", false /* isPrefix */), ()); + TEST(LooksLikeHouseNumber("ev.1", false /* isPrefix */), ()); TEST(LooksLikeHouseNumber("14 к", true /* isPrefix */), ()); TEST(LooksLikeHouseNumber("14 кор", true /* isPrefix */), ()); @@ -243,6 +244,8 @@ UNIT_TEST(HouseNumber_LooksLike) TEST(LooksLikeHouseNumber("дом ", true /* isPrefix */), ()); TEST(LooksLikeHouseNumber("дом ", false /* isPrefix */), ()); + TEST(LooksLikeHouseNumber("house", true /* isPrefix */), ()); + TEST(LooksLikeHouseNumber("house ", false /* isPrefix */), ()); TEST(LooksLikeHouseNumber("дом 39 строение 79", false /* isPrefix */), ());