Skip to content

Commit

Permalink
refactor(Locations): keep only website_url domain (#555)
Browse files Browse the repository at this point in the history
  • Loading branch information
raphodn authored Nov 9, 2024
1 parent a2fc3c4 commit cc5d589
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 14 deletions.
39 changes: 38 additions & 1 deletion open_prices/common/tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from django.test import TestCase

from open_prices.common.utils import is_float, truncate_decimal
from open_prices.common.utils import (
is_float,
truncate_decimal,
url_add_missing_https,
url_keep_only_domain,
)


class UtilsTest(TestCase):
Expand All @@ -20,3 +25,35 @@ def test_truncate_decimal(self):
self.assertEqual(
truncate_decimal("0.123456789", max_decimal_places=9), "0.123456789"
)

def url_add_missing_https(self):
self.assertEqual(
url_add_missing_https("http://abc.hostname.com/somethings/anything/"),
"http://abc.hostname.com/somethings/anything/",
)
self.assertEqual(
url_add_missing_https("abc.hostname.com/somethings/anything/"),
"https://abc.hostname.com/somethings/anything/",
)

def test_url_keep_only_domain(self):
self.assertEqual(
url_keep_only_domain("http://abc.hostname.com/somethings/anything/"),
"http://abc.hostname.com",
)
self.assertEqual(
url_keep_only_domain("https://abc.hostname.com/somethings/anything/"),
"https://abc.hostname.com",
)
self.assertEqual(
url_keep_only_domain("abc.hostname.com/somethings/anything/"),
"https://abc.hostname.com",
)
self.assertEqual(
url_keep_only_domain("abc.hostname.com/"),
"https://abc.hostname.com",
)
self.assertEqual(
url_keep_only_domain("abc.hostname.com"),
"https://abc.hostname.com",
)
19 changes: 19 additions & 0 deletions open_prices/common/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import gzip
import json
import os
from urllib.parse import urlparse

import tqdm
from django.core.serializers.json import DjangoJSONEncoder
Expand Down Expand Up @@ -46,3 +47,21 @@ def truncate_decimal(value, max_decimal_places=7):
decimal_part = decimal_part[:max_decimal_places]
value = f"{integer_part}.{decimal_part}"
return value


def url_add_missing_https(url):
if not url.startswith(("http://", "https://")):
url = f"https://{url}"
return url


def url_keep_only_domain(url):
"""
- input: http://abc.hostname.com/somethings/anything/
- urlparse: ParseResult(scheme='http', netloc='abc.hostname.com', path='/somethings/anything/', params='', query='', fragment='') # noqa
- output: http://abc.hostname.com
"""
if not url.startswith(("http://", "https://")):
url = url_add_missing_https(url)
url_parsed = urlparse(url)
return f"{url_parsed.scheme}://{url_parsed.netloc}"
11 changes: 6 additions & 5 deletions open_prices/locations/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
OSM_TYPE_OK_LIST = [OSM_TYPE_NODE, OSM_TYPE_WAY]
OSM_TYPE_NOT_OK_LIST = ["way", "W", "test", None, "None"]

WEBSITE_URL_OK_LIST = [
"https://www.decathlon.fr/",
"https://www.alltricks.fr",
"www.ekosport.fr/",
"www.auvieuxcampeur.fr",
WEBSITE_URL_OK_TUPLE_LIST = [
# (input, output)
("https://www.decathlon.fr/", "https://www.decathlon.fr"),
("https://www.alltricks.fr", "https://www.alltricks.fr"),
("www.ekosport.fr/produit/1234", "https://www.ekosport.fr"),
("auvieuxcampeur.fr", "https://auvieuxcampeur.fr"),
]
8 changes: 6 additions & 2 deletions open_prices/locations/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,12 @@ def truncate_lat_lon(self):
def cleanup_url(self):
for field_name in self.URL_FIELDS:
if getattr(self, field_name) is not None:
if not getattr(self, field_name).startswith(("http://", "https://")):
setattr(self, field_name, f"https://{getattr(self, field_name)}")
url = getattr(self, field_name)
# add https:// if missing
url = utils.url_add_missing_https(url)
# keep only the domain
url = utils.url_keep_only_domain(url)
setattr(self, field_name, url)

def clean(self, *args, **kwargs):
# dict to store all ValidationErrors
Expand Down
14 changes: 8 additions & 6 deletions open_prices/locations/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,22 +103,24 @@ def test_location_online_validation(self):
ValidationError,
LocationFactory,
type=location_constants.TYPE_ONLINE,
website_url=location_constants.WEBSITE_URL_OK_LIST[0],
website_url=location_constants.WEBSITE_URL_OK_TUPLE_LIST[0][0],
osm_id=6509705997,
osm_type=location_constants.OSM_TYPE_OK_LIST[0],
)
# ok
for WEBSITE_URL in location_constants.WEBSITE_URL_OK_LIST:
with self.subTest(website_url=WEBSITE_URL):
LocationFactory(
type=location_constants.TYPE_ONLINE, website_url=WEBSITE_URL
for WEBSITE_URL_TUPLE in location_constants.WEBSITE_URL_OK_TUPLE_LIST:
with self.subTest(website_url=WEBSITE_URL_TUPLE):
location = LocationFactory(
type=location_constants.TYPE_ONLINE,
website_url=WEBSITE_URL_TUPLE[0],
)
self.assertEqual(location.website_url, WEBSITE_URL_TUPLE[1])
# unique constraint
self.assertRaises(
ValidationError,
LocationFactory,
type=location_constants.TYPE_ONLINE,
website_url=location_constants.WEBSITE_URL_OK_LIST[0],
website_url=location_constants.WEBSITE_URL_OK_TUPLE_LIST[0][0],
)


Expand Down

0 comments on commit cc5d589

Please sign in to comment.