Skip to content

Commit

Permalink
Add: jpg4 module
Browse files Browse the repository at this point in the history
  • Loading branch information
eight04 committed Aug 2, 2024
1 parent 980d624 commit 42398df
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 3 deletions.
3 changes: 1 addition & 2 deletions comiccrawler/grabber.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,7 @@ def do_request(s, url, proxies, retry, **kwargs):
sleep_time = 5
while True:
with get_request_lock(url):
r = s.request(kwargs.pop("method", "GET"), url, timeout=(22, 60),
proxies=proxies, **kwargs)
r = s.request(kwargs.pop("method", "GET"), url, proxies=proxies, **kwargs)
grabber_log(list((r.status_code, r.url, r.request.headers, r.headers) for r in (r.history + [r])))

if r.status_code in SUCCESS_CODES:
Expand Down
35 changes: 35 additions & 0 deletions comiccrawler/mods/jpg4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#! python3

"""
https://jpg4.su/a/{id}/?sort=date_desc&page=1
"""

import re

from html import unescape

from ..episode import Episode
from ..session_manager import session_manager

domain = ["jpg4.su"]
name = "jpg4"
noepfolder = True

s = session_manager.get("https://jpg4.su/")
s.timeout = (22, 180)

def get_title(html, url):
title = re.search(r'og:title" content="([^"]+)', html).group(1)
return "[jpg4] {}".format(unescape(title).strip())

def get_episodes(html, url):
# FIXME: multiple pages?
result = []
for match in re.finditer(r'<a href="(https://jpg4\.su/img/[^"]+)"[^>]*>\s*<img src="([^"]*)" alt="([^"]*)', html):
ep_url, thumb, title = match.groups()
title = re.sub(r"\.\w+$", "", title)
image = thumb.replace(".md.", ".")
result.append(Episode(title, ep_url, image=image))

return result[::-1]

10 changes: 9 additions & 1 deletion comiccrawler/session_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from threading import Lock
from typing import Callable, Any

from requests import Session
from requests import Session as RequestsSession

from .util import extract_curl

Expand All @@ -16,6 +16,14 @@ def default_key(url: str) -> tuple:
r = urlparse(url)
return (r.scheme, r.netloc)

class Session(RequestsSession):
timeout: Any = (22, 60)

def request(self, *args, **kwargs):
if "timeout" not in kwargs:
kwargs["timeout"] = self.timeout
return super().request(*args, **kwargs)

class SessionManager:
def __init__(self) -> None:
self.lock = Lock()
Expand Down

0 comments on commit 42398df

Please sign in to comment.