diff --git a/README.md b/README.md index 5746fb84..7c1ddd3a 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ jmcomic.download_album('422866') # 传入要下载的album的id,即可下载 # 如果你想要配置,请参考assets/config/和usgae/下的文档和示例. ``` -进一步的使用可以参考usage文件夹下的示例代码: `getting_started.py` `sample_usage.py` +进一步的使用可以参考usage文件夹下的示例代码: `getting_started.py` `usage_simple.py` `usage_feature_filter` ## 项目特点 @@ -42,7 +42,7 @@ jmcomic.download_album('422866') # 传入要下载的album的id,即可下载 - **可配置性强** - 不配置也能使用,十分方便 - 配置可以从**配置文件**生成,支持多种文件格式,无需写Python代码 - - 配置点有:`是否使用磁盘缓存` `图片类型转换` `下载路径` `请求元信息(headers,cookies,代理)`等 + - 配置点有:`是否使用磁盘缓存` `图片类型转换` `下载路径` `请求元信息(headers,cookies,代理)`等 - **可扩展性强** - 支持自定义本子/章节/图片下载前后的回调函数 - 支持自定义debug日志的开关/格式 diff --git a/setup.py b/setup.py index 0c9a99fe..47b135ac 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ package_dir={"": "src"}, python_requires=">=3.7", install_requires=[ - 'commonX', + 'commonX>=0.5.3', 'curl_cffi', 'PyYAML', 'Pillow', diff --git a/src/jmcomic/__init__.py b/src/jmcomic/__init__.py index 759f0ac7..1b9e0b77 100644 --- a/src/jmcomic/__init__.py +++ b/src/jmcomic/__init__.py @@ -2,6 +2,6 @@ # 被依赖方 <--- 使用方 # config <--- entity <--- toolkit <--- client <--- option <--- downloader -__version__ = '2.1.13' +__version__ = '2.1.14' from .api import * diff --git a/src/jmcomic/jm_client_impl.py b/src/jmcomic/jm_client_impl.py index 2b6fd984..2efa0587 100644 --- a/src/jmcomic/jm_client_impl.py +++ b/src/jmcomic/jm_client_impl.py @@ -420,3 +420,7 @@ def __init__(self, workers=None) -> None: def save_image_resp(self, *args, **kwargs): future = self.executor.submit(lambda: super().save_image_resp(*args, **kwargs)) self.future_list.append(future) + + +JmModuleConfig.CLASS_CLIENT_IMPL['html'] = JmHtmlClient +JmModuleConfig.CLASS_CLIENT_IMPL['api'] = JmApiClient diff --git a/src/jmcomic/jm_client_interface.py b/src/jmcomic/jm_client_interface.py index 4589da29..b528b19f 100644 --- a/src/jmcomic/jm_client_interface.py +++ b/src/jmcomic/jm_client_interface.py @@ -221,7 +221,7 @@ class JmImageClient: def download_image(self, img_url: str, img_save_path: str, - scramble_id: str, + scramble_id=None, decode_image=True, ): """ @@ -231,6 +231,10 @@ def download_image(self, @param scramble_id: 图片所在photo的scramble_id @param decode_image: 要保存的是解密后的图还是原图 """ + if scramble_id is None: + # 大多数情况下,scramble_id = photo_id + scramble_id = JmcomicText.parse_to_photo_id(scramble_id) + # 请求图片 resp = self.get_jm_image(img_url) diff --git a/src/jmcomic/jm_config.py b/src/jmcomic/jm_config.py index a8396c1c..90f9d7cd 100644 --- a/src/jmcomic/jm_config.py +++ b/src/jmcomic/jm_config.py @@ -56,6 +56,7 @@ class JmModuleConfig: CLASS_OPTION = None CLASS_ALBUM = None CLASS_PHOTO = None + CLASS_CLIENT_IMPL = {} # 执行debug的函数 debug_executor = default_jm_debug @@ -96,6 +97,16 @@ def photo_class(cls): from .jm_entity import JmPhotoDetail return JmPhotoDetail + @classmethod + def client_impl_class(cls, client_key: str): + client_impl_dict = cls.CLASS_CLIENT_IMPL + + impl_class = client_impl_dict.get(client_key, None) + if impl_class is None: + raise NotImplementedError(f'not found client impl class for key: "{client_key}"') + + return impl_class + @classmethod @field_cache("DOMAIN") def domain(cls, postman=None): diff --git a/src/jmcomic/jm_entity.py b/src/jmcomic/jm_entity.py index f5236f03..15d9eab2 100644 --- a/src/jmcomic/jm_entity.py +++ b/src/jmcomic/jm_entity.py @@ -318,10 +318,12 @@ def __init__(self, # 有的 album 没有章节,则自成一章。 if len(episode_list) == 0: - # photo_id, photo_index_of_album, photo_title, photo_pub_date - episode_list = [(album_id, 0, title, pub_date)] + # photo_id, photo_index, photo_title, photo_pub_date + episode_list = [(album_id, 1, title, pub_date)] + else: + episode_list = self.distinct_episode(episode_list) - self.episode_list: List[Tuple] = self.distinct_episode(episode_list) + self.episode_list: List[Tuple] = episode_list def create_photo_detail(self, index) -> Tuple[JmPhotoDetail, Tuple]: # 校验参数 @@ -332,7 +334,7 @@ def create_photo_detail(self, index) -> Tuple[JmPhotoDetail, Tuple]: # episode_info: ('212214', '81', '94 突然打來', '2020-08-29') episode_info: tuple = self.episode_list[index] - photo_id, photo_index_of_album, photo_title, photo_pub_date = episode_info + photo_id, photo_index, photo_title, photo_pub_date = episode_info photo = JmPhotoDetail( photo_id=photo_id, @@ -340,7 +342,7 @@ def create_photo_detail(self, index) -> Tuple[JmPhotoDetail, Tuple]: title=photo_title, keywords='', series_id=self.album_id, - sort=episode_info[1] if len(self) != 1 else 1, + sort=photo_index, author=self.author, from_album=self, page_arr=None, diff --git a/src/jmcomic/jm_option.py b/src/jmcomic/jm_option.py index c87d26cb..84b0d183 100644 --- a/src/jmcomic/jm_option.py +++ b/src/jmcomic/jm_option.py @@ -222,12 +222,6 @@ def to_file(self, filepath=None): 下面是 build 方法 """ - # 缓存 - jm_client_impl_mapping: Dict[str, Type[AbstractJmClient]] = { - 'html': JmHtmlClient, - 'api': JmApiClient, - } - @field_cache("__jm_client_cache__") def build_jm_client(self, **kwargs) -> JmcomicClient: """ @@ -255,7 +249,7 @@ def new_jm_client(self, **kwargs) -> JmcomicClient: domain_list = [JmcomicText.parse_to_jm_domain(JmModuleConfig.get_jmcomic_url(postman))] # client - client = self.jm_client_impl_mapping[self.client.impl]( + client = JmModuleConfig.client_impl_class(self.client.impl)( postman, self.client.retry_times, fallback_domain_list=domain_list, diff --git a/usage/usage_feature_filter.py b/usage/usage_feature_filter.py new file mode 100644 index 00000000..f5536811 --- /dev/null +++ b/usage/usage_feature_filter.py @@ -0,0 +1,72 @@ +""" + +本文件演示 jmcomic 的filter(过滤器) +利用filter,你可以实现下载时过滤本子/章节/图片,完全控制你要下载的内容。 + +使用filter的步骤如下: +1. 自定义class,继承JmDownloader,重写filter_iter_objs方法,即: + class MyDownloader(JmDownloader): + def filter_iter_objs(self, iter_objs: DownloadIterObjs): + # 如何重写?参考JmDownloader.filter_iter_objs和下面的示例 + ... + +2. 让你的class生效,使用如下代码: + JmModuleConfig.CLASS_DOWNLOADER = MyDownloader + +3. 照常使用下载api: + download_album(xxx, option) + +** 本文件下面的示例只演示步骤1 ** + +本文件包含如下示例: +- 只下载本子的特定章节以后的章节 +- 只下载章节的前三张图 + + +""" + +from jmcomic import * + + +# 示例:只下载本子的特定章节以后的章节 +# 参考:https://github.com/hect0x7/JMComic-Crawler-Python/issues/95 +class FindUpdateDownloader(JmDownloader): + album_after_photo = { + 'xxx': 'yyy' + } + + def filter_iter_objs(self, iter_objs: DownloadIterObjs): + if not isinstance(iter_objs, JmAlbumDetail): + return iter_objs + + return self.find_update(iter_objs) + + # 带入漫画id, 章节id(第x章),寻找该漫画下第x章节後的所有章节Id + def find_update(self, album: JmAlbumDetail): + if album.album_id not in self.album_after_photo: + return album + + photo_ls = [] + photo_begin = self.album_after_photo[album.album_id] + is_new_photo = False + + for photo in album: + if is_new_photo: + photo_ls.append(photo) + + if photo.photo_id == photo_begin: + is_new_photo = True + + return photo_ls + + +# 示例:只下载章节的前三张图 +class First3ImageDownloader(JmDownloader): + + def filter_iter_objs(self, iter_objs: DownloadIterObjs): + if isinstance(iter_objs, JmPhotoDetail): + photo: JmPhotoDetail = iter_objs + # 支持[start,end,step] + return photo[:3] + + return iter_objs diff --git a/usage/sample_usage.py b/usage/usage_simple.py similarity index 86% rename from usage/sample_usage.py rename to usage/usage_simple.py index 637e6ed2..d08882b7 100644 --- a/usage/sample_usage.py +++ b/usage/usage_simple.py @@ -1,3 +1,13 @@ +""" + +本文件仅演示一些简单的api使用,包含以下内容: +1. 下载本子 +2. 获取实体类(本子/章节/图片) +3. 搜索本子 +4. 搜索并下载本子(以下载带有 [無修正] 标签的本子为例) + +""" + from jmcomic import * option = create_option( @@ -6,7 +16,7 @@ client = option.build_jm_client() -@timeit('下载本子集: ') +@timeit('下载本子: ') def download_jm_album(): ls = str_to_list(''' 438696 @@ -54,6 +64,7 @@ def search_jm_album(): @timeit('搜索并下载本子: ') def search_and_download(): tag = '無修正' + # 搜索第一页 search_page: JmSearchPage = client.search_album(tag, main_tag=3) id_list = []