Skip to content

Commit

Permalink
✨ feat: 批量支持按照稿件发布时间过滤 (#146)
Browse files Browse the repository at this point in the history
Co-authored-by: Nyakku Shigure <[email protected]>
  • Loading branch information
lc4t and SigureMo authored Jun 10, 2023
1 parent b5ff4b6 commit bc73ca3
Show file tree
Hide file tree
Showing 10 changed files with 73 additions and 0 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,18 @@ yutto <url> -b -p "~3,10,12~14,16,-4~"
- 参数 `-s``--with-section`
- 默认值 `False`

#### 指定稿件发布时间范围

- 参数 `--batch-filter-start-time``--batch-filter-end-time` 分别表示`开始``结束`时间,该区间**左闭右开**
- 默认 `不限制`
- 支持的格式
- `%Y-%m-%d`
- `%Y-%m-%d %H:%M:%S`

例如仅下载2020年投稿的视频,可以这样:

`--batch-filter-start-time=2020-01-01 --batch-filter-end-time=2021-01-01`

</details>

## 从 bilili1.x 迁移
Expand Down
2 changes: 2 additions & 0 deletions yutto/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ def cli() -> argparse.ArgumentParser:
group_batch.add_argument("-b", "--batch", action="store_true", help="批量下载")
group_batch.add_argument("-p", "--episodes", default="1~-1", help="选集")
group_batch.add_argument("-s", "--with-section", action="store_true", help="同时下载附加剧集(PV、预告以及特别篇等专区内容)")
group_batch.add_argument("--batch-filter-start-time", help="只下载该时间之后(包含临界值)发布的稿件")
group_batch.add_argument("--batch-filter-end-time", help="只下载该时间之前(不包含临界值)发布的稿件")

# 仅任务列表中使用
group_batch_file = parser.add_argument_group("batch file", "批量下载文件参数")
Expand Down
4 changes: 4 additions & 0 deletions yutto/extractor/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from yutto.utils.asynclib import CoroutineWrapper
from yutto.utils.console.logger import Badge, Logger
from yutto.utils.fetcher import Fetcher
from yutto.utils.filter import Filter


class CollectionExtractor(BatchExtractor):
Expand Down Expand Up @@ -67,6 +68,9 @@ async def extract(
try:
avid = item["avid"]
ugc_video_list = await get_ugc_video_list(session, avid)
if not Filter.verify_timer(ugc_video_list["pubdate"]):
Logger.debug(f"因为发布时间为 {ugc_video_list['pubdate']},跳过 {ugc_video_list['title']}")
continue
await Fetcher.touch_url(session, avid.to_url())
if len(ugc_video_list["pages"]) != 1:
Logger.error(f"视频合集 {collection_title} 中的视频 {item['avid']} 包含多个视频!")
Expand Down
4 changes: 4 additions & 0 deletions yutto/extractor/favourites.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from yutto.utils.asynclib import CoroutineWrapper
from yutto.utils.console.logger import Badge, Logger
from yutto.utils.fetcher import Fetcher
from yutto.utils.filter import Filter


class FavouritesExtractor(BatchExtractor):
Expand Down Expand Up @@ -49,6 +50,9 @@ async def extract(
ugc_video_list = await get_ugc_video_list(session, avid)
# 在使用 SESSDATA 时,如果不去事先 touch 一下视频链接的话,是无法获取 episode_data 的
# 至于为什么前面那俩(投稿视频页和番剧页)不需要额外 touch,因为在 get_redirected_url 阶段连接过了呀
if not Filter.verify_timer(ugc_video_list["pubdate"]):
Logger.debug(f"因为发布时间为 {ugc_video_list['pubdate']},跳过 {ugc_video_list['title']}")
continue
await Fetcher.touch_url(session, avid.to_url())
for ugc_video_item in ugc_video_list["pages"]:
ugc_video_info_list.append(
Expand Down
4 changes: 4 additions & 0 deletions yutto/extractor/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from yutto.utils.asynclib import CoroutineWrapper
from yutto.utils.console.logger import Badge, Logger
from yutto.utils.fetcher import Fetcher
from yutto.utils.filter import Filter


class SeriesExtractor(BatchExtractor):
Expand Down Expand Up @@ -50,6 +51,9 @@ async def extract(
for avid in await get_medialist_avids(session, self.series_id, self.mid):
try:
ugc_video_list = await get_ugc_video_list(session, avid)
if not Filter.verify_timer(ugc_video_list["pubdate"]):
Logger.debug(f"因为发布时间为 {ugc_video_list['pubdate']},跳过 {ugc_video_list['title']}")
continue
await Fetcher.touch_url(session, avid.to_url())
for ugc_video_item in ugc_video_list["pages"]:
ugc_video_info_list.append(
Expand Down
4 changes: 4 additions & 0 deletions yutto/extractor/user_all_favourites.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from yutto.utils.asynclib import CoroutineWrapper
from yutto.utils.console.logger import Badge, Logger
from yutto.utils.fetcher import Fetcher
from yutto.utils.filter import Filter


class UserAllFavouritesExtractor(BatchExtractor):
Expand Down Expand Up @@ -44,6 +45,9 @@ async def extract(
for avid in await get_favourite_avids(session, fid):
try:
ugc_video_list = await get_ugc_video_list(session, avid)
if not Filter.verify_timer(ugc_video_list["pubdate"]):
Logger.debug(f"因为发布时间为 {ugc_video_list['pubdate']},跳过 {ugc_video_list['title']}")
continue
await Fetcher.touch_url(session, avid.to_url())
for ugc_video_item in ugc_video_list["pages"]:
ugc_video_info_list.append(
Expand Down
4 changes: 4 additions & 0 deletions yutto/extractor/user_all_ugc_videos.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from yutto.utils.asynclib import CoroutineWrapper
from yutto.utils.console.logger import Badge, Logger
from yutto.utils.fetcher import Fetcher
from yutto.utils.filter import Filter


class UserAllUgcVideosExtractor(BatchExtractor):
Expand All @@ -40,6 +41,9 @@ async def extract(
for avid in await get_user_space_all_videos_avids(session, self.mid):
try:
ugc_video_list = await get_ugc_video_list(session, avid)
if not Filter.verify_timer(ugc_video_list["pubdate"]):
Logger.debug(f"因为发布时间为 {ugc_video_list['pubdate']},跳过 {ugc_video_list['title']}")
continue
await Fetcher.touch_url(session, avid.to_url())
for ugc_video_item in ugc_video_list["pages"]:
ugc_video_info_list.append(
Expand Down
4 changes: 4 additions & 0 deletions yutto/extractor/user_watch_later.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from yutto.utils.asynclib import CoroutineWrapper
from yutto.utils.console.logger import Badge, Logger
from yutto.utils.fetcher import Fetcher
from yutto.utils.filter import Filter


class UserWatchLaterExtractor(BatchExtractor):
Expand Down Expand Up @@ -43,6 +44,9 @@ async def extract(
for avid in avid_list:
try:
ugc_video_list = await get_ugc_video_list(session, avid)
if not Filter.verify_timer(ugc_video_list["pubdate"]):
Logger.debug(f"因为发布时间为 {ugc_video_list['pubdate']},跳过 {ugc_video_list['title']}")
continue
await Fetcher.touch_url(session, avid.to_url())
for ugc_video_item in ugc_video_list["pages"]:
ugc_video_info_list.append(
Expand Down
28 changes: 28 additions & 0 deletions yutto/utils/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import annotations

import datetime
import re

from yutto.utils.console.logger import Logger


class Filter:
batch_filter_start_time: datetime.datetime = datetime.datetime.min
batch_filter_end_time: datetime.datetime = datetime.datetime.max

@staticmethod
def set_timer(key: str, user_input: str):
"""设置过滤器的时间"""
timer: datetime.datetime | None = None
if re.match(r"^\d{4}-\d{2}-\d{2}$", user_input):
timer = datetime.datetime.strptime(user_input, "%Y-%m-%d")
elif re.match(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$", user_input):
timer = datetime.datetime.strptime(user_input, "%Y-%m-%d %H:%M:%S")
else:
Logger.error(f"稿件过滤参数: {user_input} 看不懂呢┭┮﹏┭┮,不会生效哦")
return
setattr(Filter, key, timer)

@staticmethod
def verify_timer(timestamp: int) -> bool:
return Filter.batch_filter_start_time.timestamp() <= timestamp < Filter.batch_filter_end_time.timestamp()
7 changes: 7 additions & 0 deletions yutto/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from yutto.utils.console.logger import Badge, Logger, set_logger_debug
from yutto.utils.fetcher import Fetcher
from yutto.utils.ffmpeg import FFmpeg
from yutto.utils.filter import Filter


def initial_validation(args: argparse.Namespace):
Expand Down Expand Up @@ -56,6 +57,12 @@ def initial_validation(args: argparse.Namespace):
else:
Logger.warning("以非大会员身份登录,注意无法下载会员专享剧集喔~")

# 批量下载时的过滤器设置
if args.batch_filter_start_time:
Filter.set_timer("batch_filter_start_time", args.batch_filter_start_time)
if args.batch_filter_end_time:
Filter.set_timer("batch_filter_end_time", args.batch_filter_end_time)


def validate_basic_arguments(args: argparse.Namespace):
"""检查 argparse 无法检查的选项,并设置某些全局的状态"""
Expand Down

0 comments on commit bc73ca3

Please sign in to comment.