Skip to content
This repository has been archived by the owner on Sep 20, 2021. It is now read-only.

export: entry point for impact graph record export #241

Open
wants to merge 1 commit into
base: prod
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 58 additions & 1 deletion modules/websearch/lib/websearch_webinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@
from invenio.bibfield import get_record
from invenio.shellutils import mymkdir

# imports for impact graphs
import json
import re
from invenio.bibauthorid_dbinterface import get_title_of_paper
from invenio.bibrank_citation_searcher import get_refers_to
from invenio.bibrank_citation_searcher import get_cited_by

import invenio.template
websearch_templates = invenio.template.load('websearch')

Expand All @@ -119,7 +126,8 @@
get_output_formats(with_attributes=True).values()]
except KeyError:
output_formats = ['xd', 'xm', 'hd', 'hb', 'hs', 'hx']
output_formats.extend(['hm', 't', 'h'])
output_formats.extend(['hm', 't', 'h', 'impact'])


def wash_search_urlargd(form):
"""
Expand Down Expand Up @@ -159,6 +167,48 @@ def wash_search_urlargd(form):

return argd


def get_year(recid):
for tag in ["773__y", "260__c", "269__c", "909C4y", "925__a"]:
date = get_fieldvalues([recid], tag)

if len(date) == 1:
date = date[0]
match_obj = re.search("\d\d\d\d", date)
if match_obj is not None:
return int(match_obj.group())
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@glouppe you can better retrieve the year using bibrec.earliest_date which is maintained by a bibcheck scripts that looks into these and many more fields.


return None


def get_impact_graph(root_recid, max_depth=1):
records = {}
stack = [(int(root_recid), 0)]

while len(stack) > 0:
recid, depth = stack.pop(0)

if recid in records:
continue

record = {}
records[recid] = record

record["recid"] = recid
record["references"] = list(get_refers_to(recid))
record["citations"] = list(get_cited_by(recid))
record["title"] = get_title_of_paper(recid)
record["year"] = get_year(recid)

if depth < max_depth:
for neighbor in record["references"]:
stack.append((neighbor, depth + 1))
for neighbor in record["citations"]:
stack.append((neighbor, depth + 1))

return json.dumps(records)


class WebInterfaceUnAPIPages(WebInterfaceDirectory):
""" Handle /unapi set of pages."""
_exports = ['']
Expand Down Expand Up @@ -1193,6 +1243,13 @@ def __call__(self, req, form):
text=auth_msg, \
navmenuid='search')

# hack in impact graphs as an additional export format
if self.format == "impact":
req.content_type = "application/json"
req.send_http_header()
req.write(get_impact_graph(self.recid))
return

# mod_python does not like to return [] in case when of=id:
out = perform_request_search(req, **argd)
if isinstance(out, intbitset):
Expand Down