Skip to content

Commit

Permalink
v2.1.11: 更新headers,修复对title过长的本子的解析 (#93)
Browse files Browse the repository at this point in the history
  • Loading branch information
hect0x7 authored Aug 12, 2023
1 parent 8df766e commit 476efc6
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 13 deletions.
2 changes: 1 addition & 1 deletion src/jmcomic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
# 被依赖方 <--- 使用方
# config <--- entity <--- toolkit <--- client <--- option <--- downloader

__version__ = '2.1.10'
__version__ = '2.1.11'

from .api import *
2 changes: 0 additions & 2 deletions src/jmcomic/jm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,7 @@ def headers(cls, domain='18comic.vip'):
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,'
'application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'no-cache',
'referer': f'https://{domain}',
'pragma': 'no-cache',
'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
Expand Down
22 changes: 16 additions & 6 deletions src/jmcomic/jm_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,21 @@


class JmBaseEntity:
pass

@staticmethod
def fix_title(title: str, limit=50):
"""
一些过长的标题可能含有 \n,例如album: 360537
该方法会把 \n 去除
"""
if len(title) > limit and '\n' in title:
title = title.replace('\n', '')

return title.strip()

def save_to_file(self, filepath):
from common import PackerUtil
PackerUtil.pack(self, filepath)


class DetailEntity(JmBaseEntity, IterableEntity):
Expand All @@ -17,10 +31,6 @@ def id(self) -> str:
def name(self) -> str:
return getattr(self, 'title')

def save_to_file(self, filepath):
from common import PackerUtil
PackerUtil.pack(self, filepath)

@classmethod
def __jm_type__(cls):
# "JmAlbumDetail" -> "album" (本子)
Expand Down Expand Up @@ -130,7 +140,7 @@ def __init__(self,
):
self.photo_id: str = photo_id
self.scramble_id: str = scramble_id
self.title: str = str(title).strip()
self.title: str = self.fix_title(str(title))
self.sort: int = int(sort)
self._keywords: str = keywords
self._series_id: int = int(series_id)
Expand Down
7 changes: 3 additions & 4 deletions src/jmcomic/jm_toolkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class JmcomicText:

pattern_html_photo_photo_id = compile('<meta property="og:url" content=".*?/photo/(\d+)/?.*?">')
pattern_html_photo_scramble_id = compile('var scramble_id = (\d+);')
pattern_html_photo_title = compile('<title>(.*?)\|.*</title>')
pattern_html_photo_title = compile('<title>([\s\S]*?)\|.*</title>')
# pattern_html_photo_data_original_list = compile('data-original="(.*?)" id="album_photo_.+?"')
pattern_html_photo_data_original_domain = compile('src="https://(.*?)/media/albums/blank')
pattern_html_photo_data_original_0 = compile('data-original="(.*?)"[ \n]*?id="album_photo')
Expand All @@ -20,9 +20,9 @@ class JmcomicText:

pattern_html_album_album_id = compile('<span class="number">.*?:JM(\d+)</span>')
pattern_html_album_scramble_id = compile('var scramble_id = (\d+);')
pattern_html_album_title = compile('panel-heading[\s\S]*?<h1>(.*?)</h1>')
pattern_html_album_title = compile('<h1 class="book-name" id="book-name">([\s\S]*?)</h1>')
pattern_html_album_episode_list = compile('data-album="(\d+)">\n *?<li.*?>\n *'
'第(\d+)話\n(.*)\n *'
'第(\d+)話\n([\s\S]*?)\n *'
'<[\s\S]*?>(\d+-\d+-\d+).*?')
pattern_html_album_page_count = compile('<span class="pagecount">.*?:(\d+)</span>')
pattern_html_album_pub_date = compile('>上架日期 : (.*?)</span>')
Expand Down Expand Up @@ -135,7 +135,6 @@ def match_field(field_key: str, pattern: Union[Pattern, List[Pattern]], text):

field_dict = {}
pattern_name: str

for pattern_name, pattern_value in cls.__dict__.items():
if not pattern_name.startswith(cls_field_prefix):
continue
Expand Down

0 comments on commit 476efc6

Please sign in to comment.