Skip to content

Commit

Permalink
add better tests for ignoring pagination on downloadable data
Browse files Browse the repository at this point in the history
  • Loading branch information
John Tordoff committed Oct 24, 2024
1 parent 62907cf commit 09370fe
Show file tree
Hide file tree
Showing 2 changed files with 200 additions and 43 deletions.
2 changes: 1 addition & 1 deletion api/metrics/renderers.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def get_filename(self, renderer_context: dict, format_type: str) -> str:
return USER_INSTITUTION_REPORT_FILENAME.format(
date_created=current_date,
institution_id=renderer_context['view'].kwargs['institution_id'],
format_type=format_type
format_type=format_type,
)
else:
raise NotImplementedError('Missing format filename')
Expand Down
241 changes: 199 additions & 42 deletions api_tests/institutions/views/test_institution_user_metric_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,18 +407,18 @@ def test_paginate_reports(self, app, url, institutional_admin, institution, repo

@pytest.mark.parametrize('format_type, delimiter, content_type', [
('csv', ',', 'text/csv; charset=utf-8'),
('tsv', '\t', 'text/tab-separated-values; charset=utf-8'),
('json_file', None, 'application/json; charset=utf-8')
('tsv', '\t', 'text/tab-separated-values; charset=utf-8')
])
def test_get_report_formats(self, app, url, institutional_admin, institution, format_type, delimiter, content_type):
def test_get_report_formats_csv_tsv(self, app, url, institutional_admin, institution, format_type, delimiter,
content_type):
_report_factory(
'2024-08',
institution,
user_id=f'u_orcomma',
user_id='u_orcomma',
account_creation_date='2018-02',
user_name=f'Jason Kelce',
user_name='Jason Kelce',
orcid_id='4444-3333-2222-1111',
department_name='Center \t Greatest Ever',
department_name='Center, \t Greatest Ever',
storage_byte_count=736662999298,
embargoed_registration_count=1,
published_preprint_count=1,
Expand All @@ -438,47 +438,204 @@ def test_get_report_formats(self, app, url, institutional_admin, institution, fo
expected_filename = USER_INSTITUTION_REPORT_FILENAME.format(
date_created=current_date,
institution_id=institution._id,
format_type='json' if format_type == 'json_file' else format_type
format_type=format_type
)
assert resp.headers['Content-Disposition'] == f'attachment; filename="{expected_filename}"'

if format_type == 'json_file':
# Validate JSON structure and content
response_data = json.loads(resp.body.decode('utf-8'))
expected_data = [
{
'account_creation_date': '2018-02',
'department_name': 'Center \t Greatest Ever',
'embargoed_registration_count': 1,
'month_last_active': '2018-02',
'month_last_login': '2018-02',
'orcid_id': '4444-3333-2222-1111',
'private_project_count': 5,
'public_file_count': 4,
'public_project_count': 3,
'public_registration_count': 2,
'published_preprint_count': 1,
'storage_byte_count': 736662999298,
'user_name': 'Jason Kelce'
}
]
assert response_data == expected_data
else:
response_body = resp.text
expected_response = [
['account_creation_date', 'department_name', 'embargoed_registration_count', 'month_last_active',
'month_last_login', 'orcid_id', 'private_projects', 'public_file_count', 'public_projects',
'public_registration_count', 'published_preprint_count', 'storage_byte_count', 'user_name'],
['2018-02', 'Center \t Greatest Ever', '1', '2018-02', '2018-02', '4444-3333-2222-1111', '5', '4', '3',
'2', '1', '736662999298', 'Jason Kelce'],
response_body = resp.text
expected_response = [
[
'report_yearmonth',
'institution_id',
'user_id',
'account_creation_date',
'user_name',
'orcid_id',
'department_name',
'storage_byte_count',
'embargoed_registration_count',
'published_preprint_count',
'public_registration_count',
'public_project_count',
'public_file_count',
'private_project_count',
'month_last_active',
'month_last_login',
'timestamp'
],
[
'2024-08',
institution._id,
'u_orcomma',
'2018-02',
'Jason Kelce',
'4444-3333-2222-1111',
'Center, \t Greatest Ever',
'736662999298',
'1',
'1',
'2',
'3',
'4',
'5',
'2018-02',
'2018-02',
response_body.splitlines()[1].split(delimiter)[-1]
]
]

if delimiter:
with StringIO(response_body) as file:
reader = csv.reader(file, delimiter=delimiter)
response_rows = list(reader)
assert response_rows[0] == expected_response[0]
assert sorted(response_rows[1:]) == sorted(expected_response[1:])

def test_get_report_format_json(self, app, url, institutional_admin, institution):
_report_factory(
'2024-08',
institution,
user_id='u_orcomma',
account_creation_date='2018-02',
user_name='Brian Dawkins',
orcid_id='4444-3333-2222-1111',
department_name='Safety "The Wolverine" Weapon X',
storage_byte_count=736662999298,
embargoed_registration_count=1,
published_preprint_count=1,
public_registration_count=2,
public_project_count=3,
public_file_count=4,
private_project_count=5,
month_last_active='2018-02',
month_last_login='2018-02',
)

resp = app.get(f'{url}?format=json_file', auth=institutional_admin.auth)
assert resp.status_code == 200
assert resp.headers['Content-Type'] == 'application/json; charset=utf-8'

current_date = datetime.datetime.now().strftime('%Y-%m')
expected_filename = USER_INSTITUTION_REPORT_FILENAME.format(
date_created=current_date,
institution_id=institution._id,
format_type='json'
)
assert resp.headers['Content-Disposition'] == f'attachment; filename="{expected_filename}"'

if delimiter:
with StringIO(response_body) as file:
reader = csv.reader(file, delimiter=delimiter)
response_rows = list(reader)
assert response_rows[0] == expected_response[0]
assert sorted(response_rows[1:]) == sorted(expected_response[1:])
# Validate JSON structure and content
response_data = json.loads(resp.body.decode('utf-8'))
expected_data = [
{
'account_creation_date': '2018-02',
'department_name': 'Safety "The Wolverine" Weapon X',
'embargoed_registration_count': 1,
'month_last_active': '2018-02',
'month_last_login': '2018-02',
'orcid_id': '4444-3333-2222-1111',
'private_project_count': 5,
'public_file_count': 4,
'public_project_count': 3,
'public_registration_count': 2,
'published_preprint_count': 1,
'storage_byte_count': 736662999298,
'user_name': 'Brian Dawkins',
'institution_id': institution._id,
'report_yearmonth': '2024-08',
'user_id': 'u_orcomma',
'timestamp': response_data[0]['timestamp'] # dynamically compare timestamp
}
]
assert response_data == expected_data

@pytest.mark.parametrize('format_type, delimiter, content_type', [
('csv', ',', 'text/csv; charset=utf-8'),
('tsv', '\t', 'text/tab-separated-values; charset=utf-8')
])
def test_csv_tsv_ignores_pagination(self, app, url, institutional_admin, institution, format_type, delimiter,
content_type):
# Create 15 records, exceeding the default page size of 10
num_records = 15
expected_data = []
for i in range(num_records):
_report_factory(
'2024-08',
institution,
user_id=f'u_orcomma_{i}',
account_creation_date=f'2018-0{i % 9 + 1}', # Vary the date
user_name=f'Jalen Hurts #{i}',
orcid_id=f'4444-3333-2222-111{i}',
department_name='QBatman',
storage_byte_count=736662999298 + i,
embargoed_registration_count=1,
published_preprint_count=1,
public_registration_count=2,
public_project_count=3,
public_file_count=4,
private_project_count=5,
month_last_active=f'2018-0{i % 9 + 1}',
month_last_login=f'2018-0{i % 9 + 1}',
)
expected_data.append([
'2024-08',
institution._id,
f'u_orcomma_{i}',
f'2018-0{i % 9 + 1}',
f'Jalen Hurts #{i}',
f'4444-3333-2222-111{i}',
'QBatman',
str(736662999298 + i),
'1',
'1',
'2',
'3',
'4',
'5',
f'2018-0{i % 9 + 1}',
f'2018-0{i % 9 + 1}',
None # Placeholder for the dynamic timestamp
])

# Make request for CSV format with page[size]=10
resp = app.get(f'{url}?format={format_type}', auth=institutional_admin.auth)
assert resp.status_code == 200
assert resp.headers['Content-Type'] == content_type

current_date = datetime.datetime.now().strftime('%Y-%m')
expected_filename = USER_INSTITUTION_REPORT_FILENAME.format(
date_created=current_date,
institution_id=institution._id,
format_type=format_type
)
assert resp.headers['Content-Disposition'] == f'attachment; filename="{expected_filename}"'

# Validate the CSV content contains all 15 records, ignoring the default pagination of 10
response_body = resp.text
rows = response_body.splitlines()

assert len(rows) == num_records + 1 == 16 # 1 header + 15 records

if delimiter:
with StringIO(response_body) as file:
reader = csv.reader(file, delimiter=delimiter)
response_rows = list(reader)

# Validate header row
expected_header = [
'report_yearmonth', 'institution_id', 'user_id', 'account_creation_date', 'user_name', 'orcid_id',
'department_name', 'storage_byte_count', 'embargoed_registration_count', 'published_preprint_count',
'public_registration_count', 'public_project_count', 'public_file_count', 'private_project_count',
'month_last_active', 'month_last_login', 'timestamp'
]
assert response_rows[0] == expected_header

# Sort both expected and actual rows (ignoring the header) before comparison
sorted_response_rows = sorted(response_rows[1:], key=lambda x: x[2]) # Sort by 'user_id'
sorted_expected_data = sorted(expected_data, key=lambda x: x[2]) # Sort by 'user_id'

for i in range(num_records):
sorted_expected_data[i][-1] = sorted_response_rows[i][-1] # Dynamically compare the timestamp
assert sorted_response_rows[i] == sorted_expected_data[i]


def _user_ids(api_response):
Expand Down

0 comments on commit 09370fe

Please sign in to comment.