Skip to content

Commit

Permalink
Update: Fix OpenAlex schema and add missing fields (#204)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexmassen-hane authored Dec 6, 2023
1 parent a00ce0e commit 3f0bad1
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 9 deletions.
202 changes: 197 additions & 5 deletions academic_observatory_workflows/database/schema/openalex/works.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@
"mode": "NULLABLE",
"description": "APC converted to USD"
},
{
"mode": "NULLABLE",
"name": "value",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "value_usd",
"type": "INTEGER"
},
{
"name": "provenance",
"type": "STRING",
Expand Down Expand Up @@ -69,6 +79,16 @@
"mode": "NULLABLE",
"description": "APC converted to USD"
},
{
"mode": "NULLABLE",
"name": "value",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "value_usd",
"type": "INTEGER"
},
{
"name": "provenance",
"type": "STRING",
Expand All @@ -77,6 +97,16 @@
],
"description": "Object: Information about the paid APC (article processing charge) for this work. You can find the listed APC price (when we know it) for a given work using apc_list. However, authors don\u2019t always pay the listed price; often they get a discounted price from publishers. So it\u2019s useful to know the APC actually paid by authors, as distinct from the list price. This is our effort to provide this. Our best source for the actually paid price is the OpenAPC project. Where available, we use that data, and so apc_paid.provenance is openapc. Where OpenAPC data is unavailable (and unfortunately this is common) we make our best guess by assuming the author paid the APC list price, and apc_paid.provenance will be set to wherever we got the list price from."
},
{
"mode": "NULLABLE",
"name": "authors_count",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "authorships_truncated",
"type": "BOOLEAN"
},
{
"name": "authorships",
"type": "RECORD",
Expand Down Expand Up @@ -114,6 +144,11 @@
"mode": "NULLABLE",
"description": "A summarized description of this author's position in the work's author list. Possible values are first, middle, and last. It's not strictly necessary, because author order is already implicitly recorded by the list order of Authorship objects; however it's useful in some contexts to have this as a categorical value."
},
{
"mode": "REPEATED",
"name": "countries",
"type": "STRING"
},
{
"name": "institutions",
"type": "RECORD",
Expand Down Expand Up @@ -188,12 +223,27 @@
"type": "RECORD",
"mode": "NULLABLE",
"fields": [
{
"mode": "NULLABLE",
"name": "doi",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "is_accepted",
"type": "BOOLEAN"
},
{
"name": "is_oa",
"type": "BOOLEAN",
"mode": "NULLABLE",
"description": "True if this work is Open Access (OA)."
},
{
"mode": "NULLABLE",
"name": "is_published",
"type": "BOOLEAN"
},
{
"name": "landing_page_url",
"type": "STRING",
Expand Down Expand Up @@ -223,6 +273,16 @@
"mode": "NULLABLE",
"description": "The name of the source."
},
{
"mode": "REPEATED",
"name": "host_institution_lineage",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "host_institution_lineage_names",
"type": "STRING"
},
{
"name": "host_organization",
"type": "STRING",
Expand Down Expand Up @@ -259,6 +319,11 @@
"mode": "NULLABLE",
"description": "Whether this is a journal listed in the Directory of Open Access Journals (DOAJ)."
},
{
"name": "is_oa",
"type": "BOOLEAN",
"mode": "NULLABLE"
},
{
"name": "issn",
"type": "STRING",
Expand All @@ -275,13 +340,23 @@
"name": "publisher",
"type": "STRING",
"mode": "NULLABLE",
"description": ""
"description": "The publisher name."
},
{
"name": "publisher_id",
"type": "STRING",
"mode": "NULLABLE",
"description": ""
"description": "The OpenAlex ID of the publisher."
},
{
"mode": "REPEATED",
"name": "publisher_lineage",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "publisher_lineage_names",
"type": "STRING"
},
{
"name": "type",
Expand Down Expand Up @@ -378,6 +453,11 @@
],
"description": "List of dehydrated Concept objects. \nEach Concept object in the list also has one additional property"
},
{
"mode": "NULLABLE",
"name": "concepts_count",
"type": "INTEGER"
},
{
"name": "corresponding_author_ids",
"type": "STRING",
Expand Down Expand Up @@ -440,6 +520,16 @@
"mode": "NULLABLE",
"description": "The DOI for the work. This is the Canonical External ID for works.\nOccasionally, a work has more than one DOI--for example, there might be one DOI for a preprint version hosted on bioRxiv, and another DOI for the published version. However, this field always has just one DOI, the DOI for the published work."
},
{
"mode": "NULLABLE",
"name": "doi_registration_agency",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "fulltext_origin",
"type": "STRING"
},
{
"name": "grants",
"type": "RECORD",
Expand All @@ -463,6 +553,11 @@
],
"description": "List of grant objects, which include the Funder and the award ID, if available. Our grants data comes from Crossref, and is currently fairly limited."
},
{
"mode": "NULLABLE",
"name": "has_fulltext",
"type": "BOOLEAN"
},
{
"name": "id",
"type": "STRING",
Expand All @@ -474,6 +569,11 @@
"type": "RECORD",
"mode": "NULLABLE",
"fields": [
{
"mode": "NULLABLE",
"name": "arxiv_id",
"type": "STRING"
},
{
"name": "doi",
"type": "STRING",
Expand Down Expand Up @@ -542,6 +642,11 @@
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"mode": "NULLABLE",
"name": "doi",
"type": "STRING"
},
{
"name": "is_accepted",
"type": "BOOLEAN",
Expand Down Expand Up @@ -589,6 +694,16 @@
"mode": "NULLABLE",
"description": "The name of the source."
},
{
"mode": "REPEATED",
"name": "host_institution_lineage",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "host_institution_lineage_names",
"type": "STRING"
},
{
"name": "host_organization",
"type": "STRING",
Expand Down Expand Up @@ -625,6 +740,11 @@
"mode": "NULLABLE",
"description": "Whether this is a journal listed in the Directory of Open Access Journals (DOAJ)."
},
{
"name": "is_oa",
"type": "BOOLEAN",
"mode": "NULLABLE"
},
{
"name": "issn",
"type": "STRING",
Expand All @@ -647,7 +767,17 @@
"name": "publisher_id",
"type": "STRING",
"mode": "NULLABLE",
"description": "The OpenAlex publisher ID."
"description": "The OpenAlex ID of the publisher."
},
{
"mode": "REPEATED",
"name": "publisher_lineage",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "publisher_lineage_names",
"type": "STRING"
},
{
"name": "type",
Expand Down Expand Up @@ -741,12 +871,27 @@
"type": "RECORD",
"mode": "NULLABLE",
"fields": [
{
"mode": "NULLABLE",
"name": "doi",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "is_accepted",
"type": "BOOLEAN"
},
{
"name": "is_oa",
"type": "BOOLEAN",
"mode": "NULLABLE",
"description": "True if this work is Open Access (OA)."
},
{
"mode": "NULLABLE",
"name": "is_published",
"type": "BOOLEAN"
},
{
"name": "landing_page_url",
"type": "STRING",
Expand Down Expand Up @@ -776,6 +921,16 @@
"mode": "NULLABLE",
"description": "The name of the source."
},
{
"mode": "REPEATED",
"name": "host_institution_lineage",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "host_institution_lineage_names",
"type": "STRING"
},
{
"name": "host_organization",
"type": "STRING",
Expand Down Expand Up @@ -812,6 +967,11 @@
"mode": "NULLABLE",
"description": "Whether this is a journal listed in the Directory of Open Access Journals (DOAJ)."
},
{
"name": "is_oa",
"type": "BOOLEAN",
"mode": "NULLABLE"
},
{
"name": "issn",
"type": "STRING",
Expand All @@ -836,6 +996,16 @@
"mode": "NULLABLE",
"description": "The OpenAlex ID of the publisher."
},
{
"mode": "REPEATED",
"name": "publisher_lineage",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "publisher_lineage_names",
"type": "STRING"
},
{
"name": "type",
"type": "STRING",
Expand Down Expand Up @@ -871,6 +1041,11 @@
"mode": "REPEATED",
"description": "OpenAlex IDs for works that this work cites. These are citations that go from this work out to another work: This work \u279e Other works."
},
{
"mode": "NULLABLE",
"name": "referenced_works_count",
"type": "INTEGER"
},
{
"name": "related_works",
"type": "STRING",
Expand Down Expand Up @@ -974,6 +1149,11 @@
"mode": "NULLABLE",
"description": "Legacy type information, using Crossref's \"type\" controlled vocabulary."
},
{
"mode": "NULLABLE",
"name": "updated",
"type": "TIMESTAMP"
},
{
"name": "updated_date",
"type": "TIMESTAMP",
Expand All @@ -994,8 +1174,20 @@
},
{
"name": "keywords",
"type": "STRING",
"mode": "REPEATED"
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"mode": "NULLABLE",
"name": "keyword",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "score",
"type": "FLOAT"
}
]
},
{
"name": "cited_by_percentile_year",
Expand Down
Git LFS file not shown
Git LFS file not shown

0 comments on commit 3f0bad1

Please sign in to comment.