Skip to content

Commit

Permalink
Add "downloads" page to console with link to CSV files (#2270)
Browse files Browse the repository at this point in the history
This pull request adds a new page at "/console/downloads/" that allows
administrators (`@console_permission_required('project.can_view_stats')`)
to download the following CSV files:

- "Users": Contains information on all active users.  Example:
[users_example.csv](https://github.com/user-attachments/files/16530731/users_example.csv)
- "Projects": Contains information on all published projects. Example:
[projects_example.csv](https://github.com/user-attachments/files/16530736/projects_example.csv)

Some additional notes:

- I sneaked in a change to the the menu item from "Usage stats" to
"Metrics".
- We may want to introduce a new permission for these tools
later (preferably in a new pull request).
- We will want to introduce new variables and datasets later.
- This pull request is needed to generate metrics for our U24
proposal, so a quick review/merge would be appreciated!
  • Loading branch information
Benjamin Moody committed Aug 7, 2024
2 parents a975a6c + 9fc205a commit 73a0a9c
Show file tree
Hide file tree
Showing 7 changed files with 259 additions and 10 deletions.
3 changes: 2 additions & 1 deletion physionet-django/console/navbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,10 +204,11 @@ def get_menu_items(self, request):
NavLink(_('Project review'), 'guidelines_review'),
]),

NavSubmenu(_('Usage Stats'), 'stats', 'chart-area', [
NavSubmenu(_('Metrics'), 'stats', 'chart-area', [
NavLink(_('Editorial'), 'editorial_stats'),
NavLink(_('Credentialing'), 'credentialing_stats'),
NavLink(_('Submissions'), 'submission_stats'),
NavLink(_('Download data'), 'downloads'),
]),

NavSubmenu(_('Pages'), 'pages', 'window-maximize', [
Expand Down
36 changes: 36 additions & 0 deletions physionet-django/console/templates/console/downloads.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{% extends "console/base_console.html" %}

{% load static %}

{% block title %}Download data{% endblock %}

{% block content %}

<div class="card mb-3">
<div class="card-header">
Download data
</div>
<div class="card-body">
<div class="table-responsive">

<div class="mb-4">
<h6>Users</h6>
<p>Download a complete list of users, including their names, email addresses, and registration dates.</p>
<a href="{% url 'download_users' %}" class="btn btn-primary">
Download users
</a>
</div>

<div class="mb-4">
<h6>Projects</h6>
<p>Download a complete list of published projects, including project names, descriptions, and timelines.</p>
<a href="{% url 'download_projects' %}" class="btn btn-primary">
Download projects
</a>
</div>

</div>
</div>
</div>

{% endblock %}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
{% block content %}
<div class="card mb-3">
<div class="card-header">
Usage stats for reviewers
Time to review
</div>
<div class="card-body">
<div class="table-responsive">
Expand Down
10 changes: 5 additions & 5 deletions physionet-django/console/templates/console/submission_stats.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
{% block content %}
<div class="card mb-3">
<div class="card-header">
Project statistics over previous 18 months
New projects (past 18 months)
</div>
<div class="card-body">
<div class="table-responsive">
Expand All @@ -13,10 +13,10 @@
<tr>
<th>Year</th>
<th>Month</th>
<th>Projects Created</th>
<th>New Submissions</th>
<th>Resubmissions</th>
<th>Publications</th>
<th>Created</th>
<th>Submitted</th>
<th>Resubmitted</th>
<th>Published</th>
</tr>
{% for year, month_list in stats.items%}
{% for month, val in month_list.items%}
Expand Down
3 changes: 3 additions & 0 deletions physionet-django/console/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@
path('usage/editorial/stats/', views.editorial_stats, name='editorial_stats'),
path('usage/credentialing/stats/', views.credentialing_stats, name='credentialing_stats'),
path('usage/submission/stats/', views.submission_stats, name='submission_stats'),
path('downloads/', views.downloads, name='downloads'),
path('download/users/', views.download_users, name='download_users'),
path('download/projects/', views.download_projects, name='download_projects'),

# redirects
path('redirects/', views.view_redirects, name='redirects'),
Expand Down
208 changes: 206 additions & 2 deletions physionet-django/console/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,18 @@
from django.contrib.contenttypes.forms import generic_inlineformset_factory
from django.contrib.contenttypes.models import ContentType
from django.contrib.redirects.models import Redirect
from django.db.models import Count, DurationField, F, Q
from django.db.models import Count, DurationField, F, Q, Prefetch
from django.db.models.functions import Cast, TruncDate
from django.forms import Select, Textarea, modelformset_factory
from django.forms.models import model_to_dict
from django.http import Http404, HttpResponse, JsonResponse, HttpResponseRedirect
from django.http import Http404, HttpResponse, JsonResponse, HttpResponseRedirect, StreamingHttpResponse
from django.shortcuts import get_object_or_404, redirect, render
from django.urls import reverse
from django.utils import timezone
from django.core.exceptions import PermissionDenied
from events.forms import EventAgreementForm, EventDatasetForm
from events.models import Event, EventAgreement, EventDataset, EventApplication
from html2text import html2text
from notification.models import News
from physionet.forms import set_saved_fields_cookie
from physionet.middleware.maintenance import ServiceUnavailable
Expand Down Expand Up @@ -2336,6 +2337,209 @@ def submission_stats(request):
{'submenu': 'submission', 'stats': stats})


@console_permission_required('project.can_view_stats')
def downloads(request):
"""
Display page in the console with a list of downloadable CSVs.
"""
return render(request, 'console/downloads.html',
{'submenu': 'submission'})


class Echo:
"""
Used in StreamingHttpResponse to deliver large CSVs without timeout.
"""
def write(self, value):
"""
Write the value by returning it, instead of storing in a buffer.
"""
return value


@console_permission_required('user.change_credentialapplication')
def download_users(request):
"""
Delivers a CSV file containing data on users.
"""
users = User.objects.select_related('profile').prefetch_related(
Prefetch('credential_applications',
queryset=CredentialApplication.objects.filter(
status=CredentialApplication.Status.ACCEPTED
).order_by('decision_datetime'),
to_attr='accepted_credentials'))

# Use StreamingHttpResponse to stream data
response = StreamingHttpResponse(
(csv.writer(Echo(), quoting=csv.QUOTE_ALL).writerow(row) for row in generate_user_csv_data(users)),
content_type='text/csv'
)

response['Content-Disposition'] = 'attachment; filename="users.csv"'
return response


def generate_user_csv_data(users):
"""
Generates user data for download
"""
csv_header = ["user_id",
"username",
"join_date",
"last_login",
"registration_ip",
"is_active_user",
"primary_email",
"all_emails",
"first_names",
"last_name",
"full_name",
"affiliation",
"location",
"website",
"orcid_id",
"credentialing_status",
"credentialing_organization_name",
"credentialing_job_title",
"credentialing_city",
"credentialing_state_or_province",
"credentialing_country",
"credentialing_webpage",
"credentialing_reference_name",
"credentialing_reference_email",
"credentialing_reference_org",
"credentialing_reference_response",
"credentialing_research_summary"]

yield csv_header

for user in users:
credentials = user.credential_applications.filter(
status=CredentialApplication.Status.ACCEPTED).order_by('decision_datetime').last()

yield [user.id,
user.username,
user.join_date,
user.last_login,
user.registration_ip,
user.is_active,
user.email,
', '.join(user.get_emails()),
user.profile.first_names,
user.profile.last_name,
user.profile.get_full_name(),
user.profile.affiliation,
user.profile.location,
user.profile.website,
user.get_orcid_id(),
user.get_credentialing_status(),
credentials.organization_name if credentials else None,
credentials.job_title if credentials else None,
credentials.city if credentials else None,
credentials.state_province if credentials else None,
credentials.country if credentials else None,
credentials.webpage if credentials else None,
credentials.reference_name if credentials else None,
credentials.reference_email if credentials else None,
credentials.reference_organization if credentials else None,
credentials.reference_response_text if credentials else None,
credentials.research_summary if credentials else None,
]


@console_permission_required('user.change_credentialapplication')
def download_projects(request):
"""
Delivers a CSV file containing data on published projects.
"""
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="projects.csv"'

writer = csv.writer(response, quoting=csv.QUOTE_ALL)
writer.writerow(["project_id",
"core_project_id",
"project_slug",
"version",
"publish_date",
"has_other_versions",
"version_order",
"is_latest_version",
"project_doi",
"core_project_doi",
"full_description",
"submitting_author_id",
"title",
"abstract",
"background",
"methods",
"content_description",
"usage_notes",
"installation",
"acknowledgements",
"conflicts_of_interest",
"release_notes",
"short_description",
"access_policy",
"license",
"data_use_agreement",
"project_home_page",
"ethics_statement",
"corresponding_author_id",
"author_ids",
"associated_paper",
"associated_paper_url",
])

projects = PublishedProject.objects.all()

# Function to process and sanitize HTML content
def clean_html(html_content):
text = html2text(html_content)
text = text.replace('\n', ' ').replace('"', '""')
return text.strip()

for project in projects:
authors = project.authors.all().order_by('display_order')
publication = project.publications.first()

project_data = [project.id,
project.core_project.id,
project.slug,
project.version,
project.publish_datetime,
project.has_other_versions,
project.version_order,
project.is_latest_version,
project.doi,
project.core_project.doi,
clean_html(project.full_description),
', '.join(str(author.id) for author in authors if author.is_submitting),
project.title,
clean_html(project.abstract),
clean_html(project.background),
clean_html(project.methods),
clean_html(project.content_description),
clean_html(project.usage_notes),
clean_html(project.installation),
clean_html(project.acknowledgements),
clean_html(project.conflicts_of_interest),
clean_html(project.release_notes),
project.short_description,
project.access_policy,
project.license,
project.dua,
project.project_home_page,
clean_html(project.ethics_statement),
', '.join(str(author.id) for author in authors if author.is_corresponding),
', '.join(str(author.id) for author in authors),
publication.citation if publication else None,
publication.url if publication else None,
]

writer.writerow(project_data)
return response


@console_permission_required('project.can_view_access_logs')
def download_credentialed_users(request):
"""
Expand Down
7 changes: 6 additions & 1 deletion physionet-django/physionet/test_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from django.conf import settings
from django.contrib.auth import get_user_model
from django.contrib.redirects.models import Redirect
from django.http import StreamingHttpResponse
from django.test import TestCase
from django.urls import URLPattern, URLResolver, get_resolver
from django.utils.regex_helper import normalize
Expand Down Expand Up @@ -204,7 +205,11 @@ def _handle_request(self, url, _user_=None, _query_={}, _skip_=False,
path = os.path.join(self._dump_dir, path)
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'wb') as f:
f.write(response.content)
if isinstance(response, StreamingHttpResponse):
for chunk in response.streaming_content:
f.write(chunk)
else:
f.write(response.content)

def _output_filename(self, url, query, response):
path = url
Expand Down

0 comments on commit 73a0a9c

Please sign in to comment.