Skip to content

Commit

Permalink
[search] Follow up b80e7c9.
Browse files Browse the repository at this point in the history
Signed-off-by: Viktor Govako <[email protected]>
  • Loading branch information
vng committed Aug 27, 2023
1 parent 5224d3d commit d5d4d0c
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 41 deletions.
88 changes: 50 additions & 38 deletions search/house_numbers_matcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,51 +32,62 @@ namespace

/// @todo By VNG: This list looks hillarious :) Definitely should set some lower bound number
/// to filter very exotic entries in addr:housenumber.
/// Removed street keywords for now.
vector<string> const g_strings = {
"a", "aa", "ab", "abc", "ac", "ad", "ae", "af", "ag",

// Removed street keywords for now and ALL one-letter strings. It is sensitive for search speed, because:
// LooksLikeHouseNumber -> MatchBuildingsWithStreets -> *heavy* StreetVicinityLoader::GetStreet
// "av", "avenida",
// "ca", "cal", "calle", "carrera", "court",
// "da", "de", "di".
// "ga",
// "ł", "la",
// "ne",
// "pa", "par", "park", "plaza",
// "rd", "ro", "road",
// "so", "south", "st", "street",
// "vi",
// "way", "we", "west",

char const * g_strings[] = {
"aa", "ab", "abc", "ac", "ad", "ae", "af", "ag",
"ah", "ai", "aj", "ak", "al", "am", "an", "ao", "ap",
"aq", "ar", "are", "as", "at", "au", "av", /*"avenida",*/ "aw",
"ax", "ay", "az", "azm", "b", "ba", "bab", "bah", "bak",
"aq", "ar", "are", "as", "at", "au", "aw",
"ax", "ay", "az", "azm", "ba", "bab", "bah", "bak",
"bb", "bc", "bd", "be", "bedr", "ben", "bf", "bg", "bh",
"bij", "bis", "bk", "bl", "bldg", "blk", "bloc", "block", "bloco",
"blok", "bm", "bmn", "bn", "bo", "boe", "bol", "bor", "bov",
"box", "bp", "br", "bra", "brc", "bs", "bsa", "bu", "building",
"bv", "bwn", "bx", "by", "c", "ca", "cab", "cal",
/*"calle", "carrera",*/ "cat", "cbi", "cbu", "cc", "ccz", "cd", "ce",
"centre", "cfn", "cgc", "cjg", "cl", "club", "cottage", "cottages", /*"court",*/
"cso", "cum", "d", "da", "db", "dd", "de", "df", "di",
"dia", "dvu", "e", "ec", "ee", "eh", "em", "en", "esm",
"ev", "f", "fdo", "fer", "ff", "flat", "flats",
"floor", /*"g",*/ "ga", "gar", "gara", "gas", "gb", "gg", "gr",
"grg", "h", "ha", "haus", "hh", "hl", "ho", "house", "hr",
"hs", "hv", "i", "ii", "iii", "int", "iv", "ix", "j",
"jab", "jf", "jj", "jms", "jtg", "k", "ka", "kab", "kk",
"kmb", "kmk", "knn", "koy", "kp", "kra", "ksn", "kud",
"l", "ł", "la", "ldo", "ll", "local", "loja", "lot", "lote",
"lsb", "lt", "m", "mac", "mad", "mah", "mak", "mat", "mb",
"bv", "bwn", "bx", "by", "cab", "cat", "cbi", "cbu", "cc",
"ccz", "cd", "ce", "centre", "cfn", "cgc", "cjg", "cl", "club",
"cottage", "cottages", "cso", "cum", "db", "dd", "df",
"dia", "dvu", "ec", "ee", "eh", "em", "en", "esm",
"ev", "fdo", "fer", "ff", "flat", "flats", "floor",
"gar", "gara", "gas", "gb", "gg", "gr",
"grg", "ha", "haus", "hh", "hl", "ho", "house", "hr",
"hs", "hv", "ii", "iii", "int", "iv", "ix",
"jab", "jf", "jj", "jms", "jtg", "ka", "kab", "kk",
"kmb", "kmk", "knn", "koy", "kp", "kra", "ksn", "kud",
"ldo", "ll", "local", "loja", "lot", "lote",
"lsb", "lt", "mac", "mad", "mah", "mak", "mat", "mb",
"mbb", "mbn", "mch", "mei", "mks", "mm", "mny", "mo", "mok",
"mor", "msb", "mtj", "mtk", "mvd", "n", "na",
"ncc", "ne", "nij", "nn", "no", "nr", "nst", "nu", "nut",
"o", "of", "ofof", "old", "one", "oo", "opl", "p", "pa",
"pap", "par", /*"park",*/ "pav", "pb", "pch", "pg", "ph", "phd",
"pkf", /*"plaza",*/ "plot", "po", "pos", "pp", "pr", "pra", "pya",
"q", "qq", "quater", "r", "ra", "rbo", "rd", "rear", "reisach",
"rk", "rm", "ro", /*"road",*/ "rosso", "rs", "rw", "s",
"mor", "msb", "mtj", "mtk", "mvd", "na",
"ncc", "nij", "nn", "no", "nr", "nst", "nu", "nut",
"of", "ofof", "old", "one", "oo", "opl", "pa",
"pap", "pav", "pb", "pch", "pg", "ph", "phd",
"pkf", "plot", "po", "pos", "pp", "pr", "pra", "pya",
"qq", "quater", "ra", "rbo", "rear", "reisach",
"rk", "rm", "rosso", "rs", "rw",
"sab", "sal", "sav", "sb", "sba", "sbb", "sbl", "sbn", "sbx",
"sc", "sch", "sco", "seb", "sep", "sf", "sgr", "sir",
"sj", "sl", "sm", "sn", "snc", "so", "som", /*"south",*/ "sp",
"spi", "spn", "ss", "st", "sta", "stc", "std", "stiege", /*"street",*/
"suite", "sur", "t", "tam", "ter", "terrace", "tf", "th", "the",
"sj", "sl", "sm", "sn", "snc", "som", "sp",
"spi", "spn", "ss", "sta", "stc", "std", "stiege",
"suite", "sur", "tam", "ter", "terrace", "tf", "th", "the",
"tl", "to", "torre", "tr", "traf", "trd", "ts", "tt", "tu",
"u", "uhm", "unit", "utc", "v", "vi", "vii", "w", "wa",
/*"way",*/ "we", /*"west",*/ "wf", "wink", "wrh", "ws", "wsb", "x",
"xx", "y", "z", "za", "zh", "zona", "zu", "zw", "א",
"ב", "ג", "α", "а", "б", "бб", "бл", "в", "вл",
"вх", "г", "д", "е", "ж", "з", "и", "к", "л",
"лит", "м", "н", "о", "п", "р", "разр", "с",
"стр", "т", "тп", "у", "уч", "участок", "ф", "ц", "",
"丁目", "", "", "",
"uhm", "unit", "utc", "vii", "wa",
"wf", "wink", "wrh", "ws", "wsb",
"xx", "za", "zh", "zona", "zu", "zw", "א",
"ב", "ג", "α", "бб", "бл", "вл",
"вх", "лит", "разр", "стр", "тп", "уч", "участок", "",
"丁目", "", "", "",

// List of exceptions
"владение"
Expand Down Expand Up @@ -105,9 +116,10 @@ vector<string> const g_patternsStrict = {


// List of common synonyms for building parts. Constructed by hand.
vector<string> const g_buildingPartSynonyms = {
char const * g_buildingPartSynonyms[] = {
"building", "bldg", "bld", "bl", "unit", "block", "blk", "корпус",
"корп", "кор", "литер", "лит", "строение", "стр", "блок", "бл"};
"корп", "кор", "литер", "лит", "строение", "стр", "блок", "бл"
};

// List of common stop words for buildings. Constructed by hand.
UniString const g_stopWords[] = {MakeUniString("дом"), MakeUniString("house"), MakeUniString("д")};
Expand Down
48 changes: 45 additions & 3 deletions search/search_quality/search_quality_tests/real_mwm_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,15 @@ class MwmTestsFixture : public search::tests_support::SearchTestBase
}

/// @param[in] street, house May be empty.
static void HasAddress(Range const & results, std::string const & street, std::string const & house)
static void HasAddress(Range const & results, std::string const & street, std::string const & house,
base::StringIL classifType = {"building"})
{
auto const buildingType = classif().GetTypeByPath({"building"});
auto const type = classif().GetTypeByPath(classifType);

bool found = false;
for (auto const & r : results)
{
if (r.GetResultType() == search::Result::Type::Feature && EqualClassifType(r.GetFeatureType(), buildingType))
if (r.GetResultType() == search::Result::Type::Feature && EqualClassifType(r.GetFeatureType(), type))
{
auto const & addr = r.GetAddress();
if ((street.empty() || addr.find(street) != std::string::npos) &&
Expand Down Expand Up @@ -852,4 +853,45 @@ UNIT_CLASS_TEST(MwmTestsFixture, BA_SanMartin)
classif().GetTypeByPath({"railway", "station"})), 2, ());
}
}

UNIT_CLASS_TEST(MwmTestsFixture, Full_Address)
{
{
// Krakow
ms::LatLon const center(50.061431, 19.9361584);
SetViewportAndLoadMaps(center);

auto request = MakeRequest("Sucha Beskidzka Armii Krajowej b-1 kozikowka 34-200 Poland");
auto const & results = request->Results();
TEST_GREATER(results.size(), kPopularPoiResultsCount, ());

HasAddress(Range(results, 0, 3), "Armii Krajowej", "B-1");
HasAddress(Range(results, 0, 3), "Armii Krajowej", "B-1A");
}

{
// Regensburg (DE)
ms::LatLon const center(49.0195332, 12.0974856);
SetViewportAndLoadMaps(center);

{
auto request = MakeRequest("Wörth an der Donau Gewerbepark A 1 93086 Germany");
auto const & results = request->Results();
TEST_GREATER(results.size(), kPopularPoiResultsCount, ());

HasAddress(Range(results, 0, 1), "Gewerbepark A", "A 1", {"shop", "car"});
}
{
auto request = MakeRequest("Wörth an der Donau Gewerbepark C 1 93086 Germany");
auto const & results = request->Results();
TEST_GREATER(results.size(), kPopularPoiResultsCount, ());

/// @todo There is a tricky neighborhood here, so ranking gets dumb :)
/// 1: "Gewerbepark A", "A 1" near "Gewerbepark C" st
/// 2: "Gewerbepark B", "1" near "Gewerbepark C" st
/// 3: "Gewerbepark C", "1"
HasAddress(Range(results, 0, 3), "Gewerbepark C", "1");
}
}
}
} // namespace real_mwm_tests
3 changes: 3 additions & 0 deletions search/search_tests/house_numbers_matcher_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ UNIT_TEST(HouseNumber_LooksLike)
{
TEST(LooksLikeHouseNumber("1", false /* isPrefix */), ());
TEST(LooksLikeHouseNumber("ev 10", false /* isPrefix */), ());
TEST(LooksLikeHouseNumber("ev.1", false /* isPrefix */), ());

TEST(LooksLikeHouseNumber("14 к", true /* isPrefix */), ());
TEST(LooksLikeHouseNumber("14 кор", true /* isPrefix */), ());
Expand Down Expand Up @@ -243,6 +244,8 @@ UNIT_TEST(HouseNumber_LooksLike)

TEST(LooksLikeHouseNumber("дом ", true /* isPrefix */), ());
TEST(LooksLikeHouseNumber("дом ", false /* isPrefix */), ());
TEST(LooksLikeHouseNumber("house", true /* isPrefix */), ());
TEST(LooksLikeHouseNumber("house ", false /* isPrefix */), ());

TEST(LooksLikeHouseNumber("дом 39 строение 79", false /* isPrefix */), ());

Expand Down

0 comments on commit d5d4d0c

Please sign in to comment.