diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..8ba9cf3 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,15 @@ +# EditorConfig +# https://editorconfig.org/ + +root = true + +[*] +indent_style = space +indent_size = 2 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[*.py] +indent_size = 4 diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3bcd57d..0c717c1 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,9 +1,7 @@ -## (错误简述) +## (请在这里填写错误简述) 网站:中国大学MOOC(网易云课堂 MOOC、学堂在线) -课程地址:(课程地址) +课程地址:(请在这里填写课程地址) -问题描述:(问题描述) - -(请将相关内容替换,按照格式填写 issue,方便我快速找到错误) +问题描述:(请在这里填写问题描述) diff --git a/.gitignore b/.gitignore index 81d9802..5d0f0bc 100644 --- a/.gitignore +++ b/.gitignore @@ -108,3 +108,10 @@ ENV/ # course dir * - */ + +# Visual Studio Code +.vscode/ + +# Node +node_modules/ +yarn.lock diff --git a/mooc/icourse163.py b/mooc/icourse163.py index f2cd0b0..27c8514 100644 --- a/mooc/icourse163.py +++ b/mooc/icourse163.py @@ -17,7 +17,7 @@ def get_summary(url): term_id = re.search(r'termId : "(\d+)"', res).group(1) names = re.findall(r'name:"(.+)"', res) - dir_name = course_dir(names[0], names[1]) + dir_name = course_dir(*names[:2]) print(dir_name) return term_id, dir_name @@ -30,7 +30,7 @@ def parse_resource(resource): 'httpSessionId': '5531d06316b34b9486a6891710115ebc', 'c0-scriptName': 'CourseBean', 'c0-methodName': 'getLessonUnitLearnVo', 'c0-id': '0', 'c0-param0': 'number:' + resource.meta[0], 'c0-param1': 'number:' + resource.meta[1], 'c0-param2': 'number:0', - 'c0-param3': 'number:' + resource.meta[2], 'batchId': str(int(time.time() * 1000))} + 'c0-param3': 'number:' + resource.meta[2], 'batchId': str(int(time.time()) * 1000)} res = CANDY.post('https://www.icourse163.org/dwr/call/plaincall/CourseBean.getLessonUnitLearnVo.dwr', data=post_data).text @@ -91,7 +91,7 @@ def get_resource(term_id): post_data = {'callCount': '1', 'scriptSessionId': '${scriptSessionId}190', 'c0-scriptName': 'CourseBean', 'c0-methodName': 'getMocTermDto', 'c0-id': '0', 'c0-param0': 'number:' + term_id, - 'c0-param1': 'number:0', 'c0-param2': 'boolean:true', 'batchId': str(int(time.time() * 1000))} + 'c0-param1': 'number:0', 'c0-param2': 'boolean:true', 'batchId': str(int(time.time()) * 1000)} res = CANDY.post('https://www.icourse163.org/dwr/call/plaincall/CourseBean.getMocTermDto.dwr', data=post_data).text.encode('utf_8').decode('unicode_escape') @@ -122,7 +122,7 @@ def get_resource(term_id): pdf_list.append(Document(counter, pdf[3], pdf)) counter.reset() - rich_text = re.findall(r'contentId=(\d+).+contentType=(4).+id=(\d+).+jsonContent=(.+);.+lessonId=' + + rich_text = re.findall(r'contentId=(\d+).+contentType=(4).+id=(\d+).+jsonContent=(.+?);.+lessonId=' + lesson[0] + r'.+name="(.+)"', res) for text in rich_text: counter.add(2) @@ -165,11 +165,11 @@ def start(url, config): global WORK_DIR CONFIG.update(config) - course_info = get_summary(url) + term_id, dir_name = get_summary(url) - WORK_DIR = WorkingDir(CONFIG['dir'], course_info[1]) + WORK_DIR = WorkingDir(CONFIG['dir'], dir_name) WORK_DIR.change('Videos') FILES['renamer'] = Renamer(WORK_DIR.file('Rename.bat')) FILES['video'] = ClassicFile(WORK_DIR.file('Videos.txt')) - get_resource(course_info[0]) + get_resource(term_id) diff --git a/mooc/utils.py b/mooc/utils.py index c1797fe..43c545e 100644 --- a/mooc/utils.py +++ b/mooc/utils.py @@ -203,7 +203,7 @@ class Crawler(requests.Session): """ header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' - 'Chrome/68.0.3440.106 Safari/537.36'} + 'Chrome/71.0.3578.80 Safari/537.36'} def __init__(self): """初始化 Session,并更新头部"""