Skip to content
This repository has been archived by the owner on Jan 9, 2022. It is now read-only.

Commit

Permalink
fix(icourse163): 修复部分课程中富文本意外匹配到附件的情况
Browse files Browse the repository at this point in the history
Fixes #19, #21
  • Loading branch information
Foair committed Dec 8, 2018
1 parent abfcc43 commit 7ae2d38
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 13 deletions.
15 changes: 15 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# EditorConfig
# https://editorconfig.org/

root = true

[*]
indent_style = space
indent_size = 2
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true

[*.py]
indent_size = 4
8 changes: 3 additions & 5 deletions .github/ISSUE_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
## 错误简述
## 请在这里填写错误简述

网站:中国大学MOOC(网易云课堂 MOOC、学堂在线)

课程地址:(课程地址
课程地址:(请在这里填写课程地址

问题描述:(问题描述)

(请将相关内容替换,按照格式填写 issue,方便我快速找到错误)
问题描述:(请在这里填写问题描述)
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,10 @@ ENV/

# course dir
* - */

# Visual Studio Code
.vscode/

# Node
node_modules/
yarn.lock
14 changes: 7 additions & 7 deletions mooc/icourse163.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def get_summary(url):
term_id = re.search(r'termId : "(\d+)"', res).group(1)
names = re.findall(r'name:"(.+)"', res)

dir_name = course_dir(names[0], names[1])
dir_name = course_dir(*names[:2])

print(dir_name)
return term_id, dir_name
Expand All @@ -30,7 +30,7 @@ def parse_resource(resource):
'httpSessionId': '5531d06316b34b9486a6891710115ebc', 'c0-scriptName': 'CourseBean',
'c0-methodName': 'getLessonUnitLearnVo', 'c0-id': '0', 'c0-param0': 'number:' + resource.meta[0],
'c0-param1': 'number:' + resource.meta[1], 'c0-param2': 'number:0',
'c0-param3': 'number:' + resource.meta[2], 'batchId': str(int(time.time() * 1000))}
'c0-param3': 'number:' + resource.meta[2], 'batchId': str(int(time.time()) * 1000)}
res = CANDY.post('https://www.icourse163.org/dwr/call/plaincall/CourseBean.getLessonUnitLearnVo.dwr',
data=post_data).text

Expand Down Expand Up @@ -91,7 +91,7 @@ def get_resource(term_id):

post_data = {'callCount': '1', 'scriptSessionId': '${scriptSessionId}190', 'c0-scriptName': 'CourseBean',
'c0-methodName': 'getMocTermDto', 'c0-id': '0', 'c0-param0': 'number:' + term_id,
'c0-param1': 'number:0', 'c0-param2': 'boolean:true', 'batchId': str(int(time.time() * 1000))}
'c0-param1': 'number:0', 'c0-param2': 'boolean:true', 'batchId': str(int(time.time()) * 1000)}
res = CANDY.post('https://www.icourse163.org/dwr/call/plaincall/CourseBean.getMocTermDto.dwr',
data=post_data).text.encode('utf_8').decode('unicode_escape')

Expand Down Expand Up @@ -122,7 +122,7 @@ def get_resource(term_id):
pdf_list.append(Document(counter, pdf[3], pdf))
counter.reset()

rich_text = re.findall(r'contentId=(\d+).+contentType=(4).+id=(\d+).+jsonContent=(.+);.+lessonId=' +
rich_text = re.findall(r'contentId=(\d+).+contentType=(4).+id=(\d+).+jsonContent=(.+?);.+lessonId=' +
lesson[0] + r'.+name="(.+)"', res)
for text in rich_text:
counter.add(2)
Expand Down Expand Up @@ -165,11 +165,11 @@ def start(url, config):
global WORK_DIR

CONFIG.update(config)
course_info = get_summary(url)
term_id, dir_name = get_summary(url)

WORK_DIR = WorkingDir(CONFIG['dir'], course_info[1])
WORK_DIR = WorkingDir(CONFIG['dir'], dir_name)
WORK_DIR.change('Videos')
FILES['renamer'] = Renamer(WORK_DIR.file('Rename.bat'))
FILES['video'] = ClassicFile(WORK_DIR.file('Videos.txt'))

get_resource(course_info[0])
get_resource(term_id)
2 changes: 1 addition & 1 deletion mooc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ class Crawler(requests.Session):
"""

header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/68.0.3440.106 Safari/537.36'}
'Chrome/71.0.3578.80 Safari/537.36'}

def __init__(self):
"""初始化 Session,并更新头部"""
Expand Down

0 comments on commit 7ae2d38

Please sign in to comment.