From 4040308cde8bbd2a9b5b67c5cb7b4b6dbd88e4a7 Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Thu, 17 Aug 2023 16:51:48 -0400 Subject: [PATCH] fix(find-dependencies): http->https redirections weren't followed --- .../find_dependencies/find_dependencies.py | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/edx_repo_tools/find_dependencies/find_dependencies.py b/edx_repo_tools/find_dependencies/find_dependencies.py index dcfbbf42..93a6cd95 100644 --- a/edx_repo_tools/find_dependencies/find_dependencies.py +++ b/edx_repo_tools/find_dependencies/find_dependencies.py @@ -119,6 +119,8 @@ def find_real_url(url: str) -> Optional[str]: # I didn't know you could get 429 from https://github.com, but you can... wait = int(resp.headers.get("Retry-After", 10)) time.sleep(wait + 1) + elif resp.status_code in {301, 302}: + url = resp.headers.get("Location") else: break @@ -303,6 +305,8 @@ def process_directory(): repo_urls.update(check_py_dependencies()) return repo_urls +FIRST_PARTY_ORGS = ["openedx"] + SECOND_PARTY_ORGS = [ "edx", "edx-unsupported", "edx-solutions", "mitodl", @@ -310,6 +314,16 @@ def process_directory(): "open-craft", "eduNEXT", "raccoongang", ] +def urls_in_orgs(urls, orgs): + """ + Find urls that are in any of the `orgs`. + """ + return sorted( + url for url in urls + if any(f"/{org}/" in url for org in orgs) + ) + + def main(dirs=None): """ Analyze the requirements in all of the directories mentioned on the command line. @@ -331,11 +345,10 @@ def main(dirs=None): write_list(WORK_DIR / "repo_urls.txt", sorted(repo_urls)) - seconds = sorted( - url for url in repo_urls - if any(f"/{org}/" in url for org in SECOND_PARTY_ORGS) - ) - write_list(WORK_DIR / "second_party_urls.txt", sorted(seconds)) + firsts = urls_in_orgs(repo_urls, FIRST_PARTY_ORGS) + write_list(WORK_DIR / "first_party_urls.txt", firsts) + seconds = urls_in_orgs(repo_urls, SECOND_PARTY_ORGS) + write_list(WORK_DIR / "second_party_urls.txt", seconds) print("== DONE ==============") print("Second-party:")