Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

把url 换成了网页版抖音直播间的网址,因为这个网址是永恒不变的。 #380

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Real-Url

## 2023.05 add support of web live url to douyin.py
selenium依赖已添加入requirements.txt
手动安装chrome浏览器
手动下载对应版本chromedriver,跟douyin.py放在同一个文件夹中
[CNPM Binaries Mirror (npmmirror.com)](https://registry.npmmirror.com/binary.html?path=chromedriver/)

## 说明

没想到还有这么多朋友发 issue 和邮件咨询问题,感谢大家的支持🎈!因为有时很忙,回复和提交代码的周期会有点长,抱歉哦😋
Expand Down
Binary file added chromedriver.exe
Binary file not shown.
19 changes: 17 additions & 2 deletions douyin.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
import re
import sys
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from urllib import parse

import requests

DEBUG = False
DEBUG = True

headers = {
'authority': 'v.douyin.com',
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
}

url = input('请输入抖音直播链接或19位room_id:')
url = input('请输入【抖音直播链接】或【19位room_id】或【网页端直播间链接】:')
if re.match(r'\d{19}', url):
room_id = url

if 'live' in url:
option = Options()
option.add_argument('--headless')
browser = webdriver.Chrome(options=option)
browser.get(url)
ps = browser.page_source
ps_parsed = parse.unquote(ps)
id_raw = ps_parsed.split('roomId')[1].split('id_str')[0]
room_id = re.search(r'\d{19}', id_raw).group(0)


else:
try:
url = re.search(r'(https.*)', url).group(1)
Expand Down
142 changes: 142 additions & 0 deletions douyin_infinite_linux.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import re
import os
import sys
import time
import wget
import requests
from progressbar import ProgressBar, Percentage, Bar, Timer, ETA, FileTransferSpeed
from datetime import datetime
from bs4 import BeautifulSoup
from urllib.parse import unquote

#设置进度条
widgets = ['Progress: ', Percentage(), ' ', Bar('#'), ' ', Timer(), ' ', ETA(), ' ', FileTransferSpeed()]
progress = ProgressBar(widgets=widgets)


DEBUG = False

headers = {
'authority': 'v.douyin.com',
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
}

#url = input('请输入抖音直播链接或19位room_id:')
url = input('请输入网页版抖音直播房间url或19位room_id:')
if re.match(r'\d{19}', url):
room_id = url

#else:
# try:
# url = re.search(r'(https.*)', url).group(1)
# response = requests.head(url, headers=headers)
# url = response.headers['location']
# room_id = re.search(r'\d{19}', url).group(0)
# except Exception as e:
# if DEBUG:
# print(e)
# print('获取room_id失败')
# sys.exit(1)
#print('room_id', room_id)

# 从live.douyin.com160465665562这种永久链接获取room_id
else:
s_maxchiron = requests.Session()

def get_Cookies(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4515.159 Safari/537.36'
}
session = requests.session()
res = session.post(url,headers=headers)
cookies = res.cookies.items()
cookie = ''
for name, value in cookies:
cookie += '{0}={1};'.format(name, value)
return cookie

def find_id(str):
splited = str.split('roomId":"')
roomId_raw = splited[-1]
roomId = ''
for i in range(19):
roomId += roomId_raw[i]
return roomId

try:
cookie_maxchiron = get_Cookies(url)
except Exception as e:
if DEBUG:
print(e)
print('获取cookie失败')

headers = {
'authority': 'v.douyin.com',
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
'cookie': cookie_maxchiron
}

try:
r_maxchiron = s_maxchiron.get(url, headers=headers)
except Exception as e:
if DEBUG:
print(e)
print('请求url失败')
soup = BeautifulSoup(r_maxchiron.text, 'html.parser')
soup_decode = unquote(str(soup))
#print(soup_decode)
room_id = find_id(soup_decode)
print(room_id)


inter = input('请输入重连间隔(秒):') or '10'
target = input('请输入目标文件夹(path):')

while True:
try:
headers.update({
'authority': 'webcast.amemv.com',
'cookie': '_tea_utm_cache_1128={%22utm_source%22:%22copy%22%2C%22utm_medium%22:%22android%22%2C%22utm_campaign%22:%22client_share%22}',
})

params = (
('type_id', '0'),
('live_id', '1'),
('room_id', room_id),
('app_id', '1128'),
)

response = requests.get('https://webcast.amemv.com/webcast/room/reflow/info/', headers=headers, params=params).json()

rtmp_pull_url = response['data']['room']['stream_url']['rtmp_pull_url']
hls_pull_url = response['data']['room']['stream_url']['hls_pull_url']
print(rtmp_pull_url)
print(hls_pull_url)

try:
#wget.download(rtmp_pull_url, target, bar=wget.bar_adaptive)
filename = str(datetime.now()).replace(' ', '_').replace(':', '_') + '.flv'
#print(os.path.join(target, room_id))
true_target = os.path.join(target, room_id)

if os.path.exists(true_target) == False :
os.mkdir(true_target)
print('已创建:', true_target)
else:
print(true_target, '已存在。准备下载中...')
for i in progress(range(500)):
print(os.path.join(true_target, filename))
wget.download(rtmp_pull_url, os.path.join(true_target, filename))

except Exception as e:
if DEBUG:
print(e)
print('wget下载失败')

except Exception as e:
if DEBUG:
print(e)
print('获取real url失败')

print('正在重连......')
time.sleep(int(inter))
150 changes: 150 additions & 0 deletions douyin_infinite_win.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import re
import os
import sys
import time
import wget
import requests
from progressbar import ProgressBar, Percentage, Bar, Timer, ETA, FileTransferSpeed
from datetime import datetime
from bs4 import BeautifulSoup
from urllib.parse import unquote

#设置进度条
widgets = ['Progress: ', Percentage(), ' ', Bar('#'), ' ', Timer(), ' ', ETA(), ' ', FileTransferSpeed()]
progress = ProgressBar(widgets=widgets)


DEBUG = False

headers = {
'authority': 'v.douyin.com',
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
}

#url = input('请输入抖音直播链接或19位room_id:')
#url = input('请输入网页版抖音直播房间url或19位room_id:')
url = input('请输入网页版抖音直播房间url(不支持真实19位roomId):')
inter = input('请输入重连间隔(秒)默认10s:') or '10'
target = input('请输入目标文件夹(path)默认D盘根目录:') or 'D:\\'


def get_Cookies(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4515.159 Safari/537.36'
}
session = requests.session()
res = session.post(url,headers=headers)
cookies = res.cookies.items()
cookie = ''
for name, value in cookies:
cookie += '{0}={1};'.format(name, value)
return cookie

def find_id(str):
splited = str.split('roomId":"')
roomId_raw = splited[-1]
roomId = ''
for i in range(19):
roomId += roomId_raw[i]
return roomId


if re.match(r'\d{19}', url): # 失效,todo
room_id = url

#else:
# try:
# url = re.search(r'(https.*)', url).group(1)
# response = requests.head(url, headers=headers)
# url = response.headers['location']
# room_id = re.search(r'\d{19}', url).group(0)
# except Exception as e:
# if DEBUG:
# print(e)
# print('获取room_id失败')
# sys.exit(1)
#print('room_id', room_id)

# 从live.douyin.com160465665562这种永久链接获取room_id
else:

while True:

s_maxchiron = requests.Session()

try:
cookie_maxchiron = get_Cookies(url)
except Exception as e:
if DEBUG:
print(e)
print('获取cookie失败')

headers = {
'authority': 'v.douyin.com',
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
'cookie': cookie_maxchiron
}

try:
r_maxchiron = s_maxchiron.get(url, headers=headers)
except Exception as e:
if DEBUG:
print(e)
print('请求url失败')
soup = BeautifulSoup(r_maxchiron.text, 'html.parser')
soup_decode = unquote(str(soup))
#print(soup_decode)
room_id = find_id(soup_decode)
print(room_id)



#while True:

try:
headers.update({
'authority': 'webcast.amemv.com',
'cookie': '_tea_utm_cache_1128={%22utm_source%22:%22copy%22%2C%22utm_medium%22:%22android%22%2C%22utm_campaign%22:%22client_share%22}',
})

params = (
('type_id', '0'),
('live_id', '1'),
('room_id', room_id),
('app_id', '1128'),
)

response = requests.get('https://webcast.amemv.com/webcast/room/reflow/info/', headers=headers, params=params).json()

rtmp_pull_url = response['data']['room']['stream_url']['rtmp_pull_url']
hls_pull_url = response['data']['room']['stream_url']['hls_pull_url']
print(rtmp_pull_url)
print(hls_pull_url)

try:
#wget.download(rtmp_pull_url, target, bar=wget.bar_adaptive)
filename = str(datetime.now()).replace(' ', '_').replace(':', '_') + '.flv'
#print(os.path.join(target, room_id))
true_target = os.path.join(target, room_id)

if os.path.exists(true_target) == False :
os.mkdir(true_target)
print('已创建:', true_target)
else:
print(true_target, '已存在。准备下载中...')
#for i in progress(range(500)):
#print(os.path.join(true_target, filename))
wget.download(rtmp_pull_url, os.path.join(true_target, filename))

except Exception as e:
if DEBUG:
print(e)
print('wget下载失败')

except Exception as e:
if DEBUG:
print(e)
print('获取real url失败')

print('正在重连......')
time.sleep(int(inter))
33 changes: 18 additions & 15 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
aiohttp==3.7.4
async-timeout==3.0.1
attrs==20.2.0
certifi==2020.6.20
chardet==3.0.4
idna==2.10
multidict==4.7.6
protobuf==3.12.2
pycryptodome==3.9.8
PyExecJS==1.5.1
requests==2.26.0
six==1.15.0
typing-extensions==3.7.4.3
urllib3==1.26.5
yarl==1.5.1
aiohttp==3.7.4
async-timeout==3.0.1
attrs==20.2.0
certifi==2020.6.20
chardet==3.0.4
idna==2.10
multidict==4.7.6
protobuf==3.12.2
pycryptodome==3.9.8
PyExecJS==1.5.1
requests
six==1.15.0
typing-extensions==3.7.4.3
urllib3==1.26.5
yarl==1.5.1
selenium
urllib3
progressbar