From 4dc6362a523a92ceccc5d0b0a1c4d82dead91ae6 Mon Sep 17 00:00:00 2001 From: John Tordoff <> Date: Fri, 17 Mar 2023 10:01:57 -0400 Subject: [PATCH] improve logs subquery --- osf/metrics/reporters/spam_count.py | 142 ++++++++++------------------ 1 file changed, 52 insertions(+), 90 deletions(-) diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py index 169635789c73..39b8dab67784 100644 --- a/osf/metrics/reporters/spam_count.py +++ b/osf/metrics/reporters/spam_count.py @@ -12,8 +12,6 @@ from django.db.models import Subquery, OuterRef - - class SpamCountReporter(MonthlyReporter): def report(self, report_yearmonth): @@ -36,7 +34,43 @@ def report(self, report_yearmonth): action=NodeLog.CONFIRM_SPAM ).values('id') ) - ).count() + ) + + registration_flagged_reversed = Registration.objects.filter( + logs__id__in=Subquery( + NodeLog.objects.filter( + node=OuterRef('id'), + action=NodeLog.FLAG_SPAM + ).values('id') + ) + ).filter( + logs__id__in=Subquery( + NodeLog.objects.filter( + node=OuterRef('id'), + created__gt=target_month, + created__lt=next_month, + action=NodeLog.CONFIRM_SPAM + ).values('id') + ) + ) + + preprint_flagged_reversed = Preprint.objects.filter( + logs__id__in=Subquery( + PreprintLog.objects.filter( + node=OuterRef('id'), + action=PreprintLog.FLAG_SPAM + ).values('id') + ) + ).filter( + logs__id__in=Subquery( + PreprintLog.objects.filter( + node=OuterRef('id'), + created__gt=target_month, + created__lt=next_month, + action=PreprintLog.CONFIRM_SPAM + ).values('id') + ) + ) report = SpamSummaryReport( report_yearmonth=str(report_yearmonth), @@ -60,33 +94,9 @@ def report(self, report_yearmonth): node__type='osf.node', ).count(), node_flagged_reversed=node_flagged_reversed, - node_flagged_reversed_akismet=Node.objects.filter( - logs__action=NodeLog.FLAG_SPAM - ).filter( - logs__action=NodeLog.CONFIRM_HAM, - logs__created__gt=target_month, - logs__created__lt=next_month, - ).filter( - spam_data__who_flagged='akismet' - ).distinct().count(), - node_flagged_reversed_oopspam=Node.objects.filter( - logs__action=NodeLog.FLAG_SPAM - ).filter( - logs__action=NodeLog.CONFIRM_HAM, - logs__created__gt=target_month, - logs__created__lt=next_month, - ).filter( - spam_data__who_flagged='oopspam' - ).distinct().count(), - node_flagged_reversed_both=Node.objects.filter( - logs__action=NodeLog.FLAG_SPAM - ).filter( - logs__action=NodeLog.CONFIRM_HAM, - logs__created__gt=target_month, - logs__created__lt=next_month, - ).filter( - spam_data__who_flagged='both' - ).distinct().count(), + node_flagged_reversed_akismet=node_flagged_reversed.filter(spam_data__who_flagged='akismet').count(), + node_flagged_reversed_oopspam=node_flagged_reversed.filter(spam_data__who_flagged='oopspam').count(), + node_flagged_reversed_both=node_flagged_reversed.filter(spam_data__who_flagged='both').count(), # Registration Log entries registration_confirmed_spam=NodeLog.objects.filter( action=NodeLog.CONFIRM_SPAM, @@ -106,40 +116,16 @@ def report(self, report_yearmonth): created__lt=next_month, node__type='osf.registration', ).count(), - registration_flagged_reversed=Registration.objects.filter( - logs__action=NodeLog.FLAG_SPAM - ).filter( - logs__action=NodeLog.CONFIRM_HAM, - logs__created__gt=target_month, - logs__created__lt=next_month, - ).distinct().count(), - registration_flagged_reversed_akismet=Registration.objects.filter( - logs__action=NodeLog.FLAG_SPAM - ).filter( - logs__action=NodeLog.CONFIRM_HAM, - logs__created__gt=target_month, - logs__created__lt=next_month, - ).filter( + registration_flagged_reversed=registration_flagged_reversed, + registration_flagged_reversed_akismet=registration_flagged_reversed.filter( spam_data__who_flagged='akismet' - ).distinct().count(), + ).count(), registration_flagged_reversed_oopspam=Registration.objects.filter( - logs__action=NodeLog.FLAG_SPAM - ).filter( - logs__action=NodeLog.CONFIRM_HAM, - logs__created__gt=target_month, - logs__created__lt=next_month, - ).filter( spam_data__who_flagged='oopspam' - ).distinct().count(), + ).count(), registration_flagged_reversed_both=Registration.objects.filter( - logs__action=NodeLog.FLAG_SPAM - ).filter( - logs__action=NodeLog.CONFIRM_HAM, - logs__created__gt=target_month, - logs__created__lt=next_month, - ).filter( spam_data__who_flagged='both' - ).distinct().count(), + ).count(), # Preprint Log entries preprint_confirmed_spam=PreprintLog.objects.filter( action=PreprintLog.CONFIRM_SPAM, @@ -156,40 +142,16 @@ def report(self, report_yearmonth): created__gt=target_month, created__lt=next_month, ).count(), - preprint_flagged_reversed=Preprint.objects.filter( - logs__action=PreprintLog.FLAG_SPAM - ).filter( - logs__action=PreprintLog.CONFIRM_HAM, - logs__created__gt=target_month, - logs__created__lt=next_month, - ).distinct().count(), - preprint_flagged_reversed_akismet=Preprint.objects.filter( - logs__action=PreprintLog.FLAG_SPAM - ).filter( - logs__action=PreprintLog.CONFIRM_HAM, - logs__created__gt=target_month, - logs__created__lt=next_month, - ).filter( + preprint_flagged_reversed=preprint_flagged_reversed, + preprint_flagged_reversed_akismet=preprint_flagged_reversed.filter( spam_data__who_flagged='akismet' - ).distinct().count(), - preprint_flagged_reversed_oopspam=Preprint.objects.filter( - logs__action=PreprintLog.FLAG_SPAM - ).filter( - logs__action=PreprintLog.CONFIRM_HAM, - logs__created__gt=target_month, - logs__created__lt=next_month, - ).filter( + ).count(), + preprint_flagged_reversed_oopspam=preprint_flagged_reversed.filter( spam_data__who_flagged='oopspam' - ).distinct().count(), - preprint_flagged_reversed_both=Preprint.objects.filter( - logs__action=PreprintLog.FLAG_SPAM - ).filter( - logs__action=PreprintLog.CONFIRM_HAM, - logs__created__gt=target_month, - logs__created__lt=next_month, - ).filter( + ).count(), + preprint_flagged_reversed_both=preprint_flagged_reversed.filter( spam_data__who_flagged='both' - ).distinct().count(), + ).count(), # New Users marked as Spam/Ham user_marked_as_spam=OSFUser.objects.filter( spam_status=SpamStatus.SPAM,