Skip to content

Commit

Permalink
allow monthly metrics reports on multiple subjects
Browse files Browse the repository at this point in the history
replace `DailyReport.DAILY_UNIQUE_FIELDS` with `UNIQUE_TOGETHER_FIELDS`
on both `DailyReport` and `MonthlyReport`, so we can have (for example)
monthly reports for each institution or each institutional user account
  • Loading branch information
aaxelb committed Aug 23, 2024
1 parent 5bf4ee4 commit 45854c5
Showing 1 changed file with 21 additions and 21 deletions.
42 changes: 21 additions & 21 deletions osf/metrics/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class DailyReport(metrics.Metric):
There's something we'd like to know about every so often,
so let's regularly run a report and stash the results here.
"""
DAILY_UNIQUE_FIELD = None # set in subclasses that expect multiple reports per day
UNIQUE_TOGETHER_FIELDS = ('report_date',) # override in subclasses for multiple reports per day

report_date = metrics.Date(format='strict_date', required=True)

Expand Down Expand Up @@ -58,6 +58,7 @@ def serialize(self, data):
class MonthlyReport(metrics.Metric):
"""MonthlyReport (abstract base for report-based metrics that run monthly)
"""
UNIQUE_TOGETHER_FIELDS = ('report_yearmonth',) # override in subclasses for multiple reports per month

report_yearmonth = YearmonthField()

Expand All @@ -69,23 +70,22 @@ class Meta:

@receiver(metrics_pre_save)
def set_report_id(sender, instance, **kwargs):
# Set the document id to a hash of "unique together"
# values (just `report_date` by default) to get
# "ON CONFLICT UPDATE" behavior -- if the document
# already exists, it will be updated rather than duplicated.
# Cannot detect/avoid conflicts this way, but that's ok.

if issubclass(sender, DailyReport):
duf_name = instance.DAILY_UNIQUE_FIELD
if duf_name is None:
instance.meta.id = stable_key(instance.report_date)
else:
duf_value = getattr(instance, duf_name)
if not duf_value or not isinstance(duf_value, str):
raise ReportInvalid(f'{sender.__name__}.{duf_name} MUST have a non-empty string value (got {duf_value})')
instance.meta.id = stable_key(instance.report_date, duf_value)
elif issubclass(sender, MonthlyReport):
instance.meta.id = stable_key(instance.report_yearmonth)
try:
_unique_together_fields = instance.UNIQUE_TOGETHER_FIELDS
except AttributeError:
pass
else:
# Set the document id to a hash of "unique together" fields
# for "ON CONFLICT UPDATE" behavior -- if the document
# already exists, it will be updated rather than duplicated.
# Cannot detect/avoid conflicts this way, but that's ok.
_key_values = []
for _field_name in _unique_together_fields:
_field_value = getattr(instance, _field_name)
if not _field_value or not isinstance(_field_value, str):
raise ReportInvalid(f'{sender.__name__}.{_field_name} MUST have a non-empty string value (got {_field_value})')
_key_values.append(_field_value)
instance.meta.id = stable_key(*_key_values)


#### BEGIN reusable inner objects #####
Expand Down Expand Up @@ -157,7 +157,7 @@ class DownloadCountReport(DailyReport):


class InstitutionSummaryReport(DailyReport):
DAILY_UNIQUE_FIELD = 'institution_id'
UNIQUE_TOGETHER_FIELDS = ('report_date', 'institution_id',)

institution_id = metrics.Keyword()
institution_name = metrics.Keyword()
Expand All @@ -169,7 +169,7 @@ class InstitutionSummaryReport(DailyReport):


class NewUserDomainReport(DailyReport):
DAILY_UNIQUE_FIELD = 'domain_name'
UNIQUE_TOGETHER_FIELDS = ('report_date', 'domain_name',)

domain_name = metrics.Keyword()
new_user_count = metrics.Integer()
Expand All @@ -187,7 +187,7 @@ class OsfstorageFileCountReport(DailyReport):


class PreprintSummaryReport(DailyReport):
DAILY_UNIQUE_FIELD = 'provider_key'
UNIQUE_TOGETHER_FIELDS = ('report_date', 'provider_key',)

provider_key = metrics.Keyword()
preprint_count = metrics.Integer()
Expand Down

0 comments on commit 45854c5

Please sign in to comment.