From 8f0d81d1d8efcda165b6136b29df9daca3a9c5af Mon Sep 17 00:00:00 2001 From: Joaquim d'Souza Date: Tue, 17 Sep 2024 19:35:52 +0200 Subject: [PATCH] feat: improve linkcheck command --- wagtaillinkchecker/management/commands/linkcheck.py | 10 ++++++---- wagtaillinkchecker/models.py | 7 +++++-- wagtaillinkchecker/scanner.py | 4 ++-- wagtaillinkchecker/tasks.py | 4 ++++ 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/wagtaillinkchecker/management/commands/linkcheck.py b/wagtaillinkchecker/management/commands/linkcheck.py index a98fde2..5109b40 100644 --- a/wagtaillinkchecker/management/commands/linkcheck.py +++ b/wagtaillinkchecker/management/commands/linkcheck.py @@ -7,7 +7,7 @@ from wagtaillinkchecker.scanner import broken_link_scan from wagtaillinkchecker.models import ScanLink -from wagtail.models import PageRevision, Site +from wagtail.models import Revision, Site class Command(BaseCommand): @@ -21,10 +21,10 @@ def add_arguments(self, parser): def handle(self, *args, **kwargs): site = Site.objects.filter(is_default_site=True).first() pages = site.root_page.get_descendants(inclusive=True).live().public() - verbosity = kwargs.get("verbosity") or 1 + verbosity = 2 print(f"Scanning {len(pages)} pages...") - scan = broken_link_scan(site, verbosity) + scan = broken_link_scan(site, verbosity, sync=True) total_links = ScanLink.objects.filter(scan=scan, crawled=True) broken_links = ScanLink.objects.filter(scan=scan, broken=True) print( @@ -37,7 +37,7 @@ def handle(self, *args, **kwargs): messages = [] for page in pages: - revisions = PageRevision.objects.filter(page=page) + revisions = page.revisions user = None user_email = settings.DEFAULT_FROM_EMAIL if revisions: @@ -48,6 +48,8 @@ def handle(self, *args, **kwargs): for link in broken_links: if link.page == page: page_broken_links.append(link) + if not page_broken_links: + continue email_message = render_to_string( "wagtaillinkchecker/emails/broken_links.html", { diff --git a/wagtaillinkchecker/models.py b/wagtaillinkchecker/models.py index 73681f8..4a8cb49 100644 --- a/wagtaillinkchecker/models.py +++ b/wagtaillinkchecker/models.py @@ -102,8 +102,11 @@ def __str__(self): def page_is_deleted(self): return self.page_deleted and self.page_slug - def check_link(self, verbosity=1): - from wagtaillinkchecker.tasks import check_link + def check_link(self, verbosity=1, sync=False): + from wagtaillinkchecker.tasks import check_link, check_link_sync + + if sync: + return check_link_sync(self.pk, verbosity=verbosity) check_link(self.pk, verbosity=verbosity) diff --git a/wagtaillinkchecker/scanner.py b/wagtaillinkchecker/scanner.py index 00c07df..e17ab0f 100644 --- a/wagtaillinkchecker/scanner.py +++ b/wagtaillinkchecker/scanner.py @@ -109,7 +109,7 @@ def clean_url(url, site): return url -def broken_link_scan(site, verbosity=1): +def broken_link_scan(site, verbosity=1, sync=False): from wagtaillinkchecker.models import Scan, ScanLink pages = site.root_page.get_descendants(inclusive=True).live().public() @@ -123,6 +123,6 @@ def broken_link_scan(site, verbosity=1): ScanLink.objects.get(url=url, scan=scan) except ScanLink.DoesNotExist: link = ScanLink.objects.create(url=page.full_url, page=page, scan=scan) - link.check_link(verbosity=verbosity) + link.check_link(verbosity=verbosity, sync=sync) return scan diff --git a/wagtaillinkchecker/tasks.py b/wagtaillinkchecker/tasks.py index 5949a20..e960d31 100644 --- a/wagtaillinkchecker/tasks.py +++ b/wagtaillinkchecker/tasks.py @@ -13,6 +13,10 @@ def check_link( link_pk, verbosity=1, ): + return check_link_sync(link_pk, verbosity=verbosity) + + +def check_link_sync(link_pk, verbosity=1): link = ScanLink.objects.get(pk=link_pk) site = link.scan.site url = get_url(link.url, link.page, site)