From bc73ca3bdbac1913222ca1a286955802af1ca9c2 Mon Sep 17 00:00:00 2001 From: lc4t <11334449+lc4t@users.noreply.github.com> Date: Sat, 10 Jun 2023 21:09:53 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat:=20=E6=89=B9=E9=87=8F=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E6=8C=89=E7=85=A7=E7=A8=BF=E4=BB=B6=E5=8F=91=E5=B8=83?= =?UTF-8?q?=E6=97=B6=E9=97=B4=E8=BF=87=E6=BB=A4=20(#146)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Nyakku Shigure --- README.md | 12 +++++++++++ yutto/__main__.py | 2 ++ yutto/extractor/collection.py | 4 ++++ yutto/extractor/favourites.py | 4 ++++ yutto/extractor/series.py | 4 ++++ yutto/extractor/user_all_favourites.py | 4 ++++ yutto/extractor/user_all_ugc_videos.py | 4 ++++ yutto/extractor/user_watch_later.py | 4 ++++ yutto/utils/filter.py | 28 ++++++++++++++++++++++++++ yutto/validator.py | 7 +++++++ 10 files changed, 73 insertions(+) create mode 100644 yutto/utils/filter.py diff --git a/README.md b/README.md index 2231cdec1..eedcb8566 100644 --- a/README.md +++ b/README.md @@ -509,6 +509,18 @@ yutto -b -p "~3,10,12~14,16,-4~" - 参数 `-s` 或 `--with-section` - 默认值 `False` +#### 指定稿件发布时间范围 + +- 参数 `--batch-filter-start-time` 和 `--batch-filter-end-time` 分别表示`开始`和`结束`时间,该区间**左闭右开** +- 默认 `不限制` +- 支持的格式 + - `%Y-%m-%d` + - `%Y-%m-%d %H:%M:%S` + + 例如仅下载2020年投稿的视频,可以这样: + + `--batch-filter-start-time=2020-01-01 --batch-filter-end-time=2021-01-01` + ## 从 bilili1.x 迁移 diff --git a/yutto/__main__.py b/yutto/__main__.py index af0697778..9a9b6a57c 100644 --- a/yutto/__main__.py +++ b/yutto/__main__.py @@ -181,6 +181,8 @@ def cli() -> argparse.ArgumentParser: group_batch.add_argument("-b", "--batch", action="store_true", help="批量下载") group_batch.add_argument("-p", "--episodes", default="1~-1", help="选集") group_batch.add_argument("-s", "--with-section", action="store_true", help="同时下载附加剧集(PV、预告以及特别篇等专区内容)") + group_batch.add_argument("--batch-filter-start-time", help="只下载该时间之后(包含临界值)发布的稿件") + group_batch.add_argument("--batch-filter-end-time", help="只下载该时间之前(不包含临界值)发布的稿件") # 仅任务列表中使用 group_batch_file = parser.add_argument_group("batch file", "批量下载文件参数") diff --git a/yutto/extractor/collection.py b/yutto/extractor/collection.py index 0934c4856..d5c32814f 100644 --- a/yutto/extractor/collection.py +++ b/yutto/extractor/collection.py @@ -17,6 +17,7 @@ from yutto.utils.asynclib import CoroutineWrapper from yutto.utils.console.logger import Badge, Logger from yutto.utils.fetcher import Fetcher +from yutto.utils.filter import Filter class CollectionExtractor(BatchExtractor): @@ -67,6 +68,9 @@ async def extract( try: avid = item["avid"] ugc_video_list = await get_ugc_video_list(session, avid) + if not Filter.verify_timer(ugc_video_list["pubdate"]): + Logger.debug(f"因为发布时间为 {ugc_video_list['pubdate']},跳过 {ugc_video_list['title']}") + continue await Fetcher.touch_url(session, avid.to_url()) if len(ugc_video_list["pages"]) != 1: Logger.error(f"视频合集 {collection_title} 中的视频 {item['avid']} 包含多个视频!") diff --git a/yutto/extractor/favourites.py b/yutto/extractor/favourites.py index fb424c464..d3845193f 100644 --- a/yutto/extractor/favourites.py +++ b/yutto/extractor/favourites.py @@ -15,6 +15,7 @@ from yutto.utils.asynclib import CoroutineWrapper from yutto.utils.console.logger import Badge, Logger from yutto.utils.fetcher import Fetcher +from yutto.utils.filter import Filter class FavouritesExtractor(BatchExtractor): @@ -49,6 +50,9 @@ async def extract( ugc_video_list = await get_ugc_video_list(session, avid) # 在使用 SESSDATA 时,如果不去事先 touch 一下视频链接的话,是无法获取 episode_data 的 # 至于为什么前面那俩(投稿视频页和番剧页)不需要额外 touch,因为在 get_redirected_url 阶段连接过了呀 + if not Filter.verify_timer(ugc_video_list["pubdate"]): + Logger.debug(f"因为发布时间为 {ugc_video_list['pubdate']},跳过 {ugc_video_list['title']}") + continue await Fetcher.touch_url(session, avid.to_url()) for ugc_video_item in ugc_video_list["pages"]: ugc_video_info_list.append( diff --git a/yutto/extractor/series.py b/yutto/extractor/series.py index 3f0b8f2c9..053a00b1a 100644 --- a/yutto/extractor/series.py +++ b/yutto/extractor/series.py @@ -15,6 +15,7 @@ from yutto.utils.asynclib import CoroutineWrapper from yutto.utils.console.logger import Badge, Logger from yutto.utils.fetcher import Fetcher +from yutto.utils.filter import Filter class SeriesExtractor(BatchExtractor): @@ -50,6 +51,9 @@ async def extract( for avid in await get_medialist_avids(session, self.series_id, self.mid): try: ugc_video_list = await get_ugc_video_list(session, avid) + if not Filter.verify_timer(ugc_video_list["pubdate"]): + Logger.debug(f"因为发布时间为 {ugc_video_list['pubdate']},跳过 {ugc_video_list['title']}") + continue await Fetcher.touch_url(session, avid.to_url()) for ugc_video_item in ugc_video_list["pages"]: ugc_video_info_list.append( diff --git a/yutto/extractor/user_all_favourites.py b/yutto/extractor/user_all_favourites.py index 82c9127d1..2d6f22008 100644 --- a/yutto/extractor/user_all_favourites.py +++ b/yutto/extractor/user_all_favourites.py @@ -14,6 +14,7 @@ from yutto.utils.asynclib import CoroutineWrapper from yutto.utils.console.logger import Badge, Logger from yutto.utils.fetcher import Fetcher +from yutto.utils.filter import Filter class UserAllFavouritesExtractor(BatchExtractor): @@ -44,6 +45,9 @@ async def extract( for avid in await get_favourite_avids(session, fid): try: ugc_video_list = await get_ugc_video_list(session, avid) + if not Filter.verify_timer(ugc_video_list["pubdate"]): + Logger.debug(f"因为发布时间为 {ugc_video_list['pubdate']},跳过 {ugc_video_list['title']}") + continue await Fetcher.touch_url(session, avid.to_url()) for ugc_video_item in ugc_video_list["pages"]: ugc_video_info_list.append( diff --git a/yutto/extractor/user_all_ugc_videos.py b/yutto/extractor/user_all_ugc_videos.py index 08fadddf8..a4c363862 100644 --- a/yutto/extractor/user_all_ugc_videos.py +++ b/yutto/extractor/user_all_ugc_videos.py @@ -14,6 +14,7 @@ from yutto.utils.asynclib import CoroutineWrapper from yutto.utils.console.logger import Badge, Logger from yutto.utils.fetcher import Fetcher +from yutto.utils.filter import Filter class UserAllUgcVideosExtractor(BatchExtractor): @@ -40,6 +41,9 @@ async def extract( for avid in await get_user_space_all_videos_avids(session, self.mid): try: ugc_video_list = await get_ugc_video_list(session, avid) + if not Filter.verify_timer(ugc_video_list["pubdate"]): + Logger.debug(f"因为发布时间为 {ugc_video_list['pubdate']},跳过 {ugc_video_list['title']}") + continue await Fetcher.touch_url(session, avid.to_url()) for ugc_video_item in ugc_video_list["pages"]: ugc_video_info_list.append( diff --git a/yutto/extractor/user_watch_later.py b/yutto/extractor/user_watch_later.py index 70b022834..a9c09b54a 100644 --- a/yutto/extractor/user_watch_later.py +++ b/yutto/extractor/user_watch_later.py @@ -14,6 +14,7 @@ from yutto.utils.asynclib import CoroutineWrapper from yutto.utils.console.logger import Badge, Logger from yutto.utils.fetcher import Fetcher +from yutto.utils.filter import Filter class UserWatchLaterExtractor(BatchExtractor): @@ -43,6 +44,9 @@ async def extract( for avid in avid_list: try: ugc_video_list = await get_ugc_video_list(session, avid) + if not Filter.verify_timer(ugc_video_list["pubdate"]): + Logger.debug(f"因为发布时间为 {ugc_video_list['pubdate']},跳过 {ugc_video_list['title']}") + continue await Fetcher.touch_url(session, avid.to_url()) for ugc_video_item in ugc_video_list["pages"]: ugc_video_info_list.append( diff --git a/yutto/utils/filter.py b/yutto/utils/filter.py new file mode 100644 index 000000000..f21db1ab1 --- /dev/null +++ b/yutto/utils/filter.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +import datetime +import re + +from yutto.utils.console.logger import Logger + + +class Filter: + batch_filter_start_time: datetime.datetime = datetime.datetime.min + batch_filter_end_time: datetime.datetime = datetime.datetime.max + + @staticmethod + def set_timer(key: str, user_input: str): + """设置过滤器的时间""" + timer: datetime.datetime | None = None + if re.match(r"^\d{4}-\d{2}-\d{2}$", user_input): + timer = datetime.datetime.strptime(user_input, "%Y-%m-%d") + elif re.match(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$", user_input): + timer = datetime.datetime.strptime(user_input, "%Y-%m-%d %H:%M:%S") + else: + Logger.error(f"稿件过滤参数: {user_input} 看不懂呢┭┮﹏┭┮,不会生效哦") + return + setattr(Filter, key, timer) + + @staticmethod + def verify_timer(timestamp: int) -> bool: + return Filter.batch_filter_start_time.timestamp() <= timestamp < Filter.batch_filter_end_time.timestamp() diff --git a/yutto/validator.py b/yutto/validator.py index 93b7ce02e..7a6ddf88c 100644 --- a/yutto/validator.py +++ b/yutto/validator.py @@ -20,6 +20,7 @@ from yutto.utils.console.logger import Badge, Logger, set_logger_debug from yutto.utils.fetcher import Fetcher from yutto.utils.ffmpeg import FFmpeg +from yutto.utils.filter import Filter def initial_validation(args: argparse.Namespace): @@ -56,6 +57,12 @@ def initial_validation(args: argparse.Namespace): else: Logger.warning("以非大会员身份登录,注意无法下载会员专享剧集喔~") + # 批量下载时的过滤器设置 + if args.batch_filter_start_time: + Filter.set_timer("batch_filter_start_time", args.batch_filter_start_time) + if args.batch_filter_end_time: + Filter.set_timer("batch_filter_end_time", args.batch_filter_end_time) + def validate_basic_arguments(args: argparse.Namespace): """检查 argparse 无法检查的选项,并设置某些全局的状态"""