Skip to content

Commit

Permalink
chore: Update urllib.parse imports and handle URL paths consistently
Browse files Browse the repository at this point in the history
  • Loading branch information
techtanic committed Jul 4, 2024
1 parent 7121a09 commit 6c93de9
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import traceback
from datetime import datetime, timezone
from decimal import Decimal
from urllib.parse import parse_qs, unquote, urlsplit
from urllib.parse import parse_qs, unquote, urlsplit, urlunparse, urlparse

import cloudscraper
import requests
Expand Down Expand Up @@ -210,7 +210,7 @@ def rd(self):
r = requests.get(
"https://www.real.discount/api-web/all-courses/?store=Udemy&page=1&per_page=500&orderby=date&free=1&editorschoices=0",
headers=headers,
timeout=(10,30)
timeout=(10, 30),
).json()
except requests.exceptions.Timeout:
self.rd_error = "Timeout"
Expand Down Expand Up @@ -576,6 +576,20 @@ def remove_duplicate_courses(self):
for key in data:
new_data[key] = []
for title, link in data[key]:
parsed_url = urlparse(link)
path = parsed_url.path
if not path.endswith("/"):
path += "/"
link = urlunparse(
(
parsed_url.scheme,
parsed_url.netloc,
path,
parsed_url.params,
parsed_url.query,
parsed_url.fragment,
)
)
if link not in existing_links:
new_data[key].append((title, link))
existing_links.add(link)
Expand Down

0 comments on commit 6c93de9

Please sign in to comment.