From 8398a3573162bb584506a1ce0fa0c88f9db4fd91 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 4 Jan 2018 15:44:17 -0800 Subject: [PATCH] pull: Add JSON-LD markup This should help with forward/backward compatibility, because consumers can use JSON-LD to extract the semantic meaning regardless of how I lay out the data. From [1]: This information allows developers to re-use each other's data without having to agree to how their data will interoperate on a site-by-site basis. There's a framing spec in the works [2] to support "request your own layout" in an API. I had to cludge a bit to support FSF IDs in licenses-full.json. The released JSON-LD 1.0 has index maps, but [3]: Note how the index keys do not appear in the Linked Data below, but would continue to exist if the document were compacted or expanded... which isn't very useful for folks who are parsing the file as JSON-LD and who need access to the FSF IDs. To work around that, I've used the FSF IDs as both the 'licenses' keys and as 'id' entries in the 'licenses' values. We might be able to specify the semantics of the object keys with the unreleased 1.1 [4] and its ID maps [5], but I've left that off for now. There is some background on classifying non-URI identifiers in [6]. Currently I'm not classifying my identifiers. I'm using HTTPS identifiers for schema.org, because that's the long term target [7]. I've tested the output JSON-LD in [8], and the compacted licenses-full.json looks like: { "http://tremily.us/fsf/schema/license.jsonld": [ { "@index": "ACDL", "https://schema.org/identifier": "ACDL", "https://schema.org/keywords": "libre", "https://schema.org/name": "Apple's Common Documentation License, Version 1.0", "https://schema.org/url": { "@list": [ "https://www.gnu.org/licenses/license-list.html#ACDL", "http://fedoraproject.org/wiki/Licensing/Common_Documentation_License" ] } }, { "@index": "AGPLv1.0", "https://schema.org/identifier": [ "AGPLv1.0", { "@index": "spdx", "@value": "AGPL-1.0" } ], "https://schema.org/keywords": "libre", "https://schema.org/name": "Affero General Public License version 1", "https://schema.org/url": { "@list": [ "https://www.gnu.org/licenses/license-list.html#AGPLv1.0", "http://directory.fsf.org/wiki/License:AGPLv1" ] } }, ... ] } [1]: https://www.w3.org/TR/2014/REC-json-ld-20140116/#h3_the-context [2]: https://json-ld.org/spec/latest/json-ld-framing/ [3]: https://www.w3.org/TR/2014/REC-json-ld-20140116/#data-indexing [4]: https://json-ld.org/spec/latest/json-ld/#changes-since-1-0-recommendation-of-16-january-2014 [5]: https://json-ld.org/spec/latest/json-ld/#node-identifier-indexing [6]: http://meta.schema.org/docs/datamodel.html#identifierBg [7]: http://schema.org/docs/faq.html#19 [8]: https://json-ld.org/playground/ --- pull.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/pull.py b/pull.py index 995e2a7..0baab43 100755 --- a/pull.py +++ b/pull.py @@ -266,8 +266,9 @@ def extract(root, base_uri=None): return licenses -def save(licenses, dir=os.curdir): - os.makedirs(dir, exist_ok=True) +def save(licenses, base_uri, dir=os.curdir): + schema_dir = os.path.join(dir, 'schema') + os.makedirs(schema_dir, exist_ok=True) if sys.version_info >= (3, 5): paths = glob.glob(os.path.join(dir, '**', '*.json'), recursive=True) else: @@ -277,17 +278,60 @@ def save(licenses, dir=os.curdir): ) for path in paths: os.remove(path) + license_schema = { + '@context': { + 'schema': 'https://schema.org/', + 'id': { + '@id': 'schema:identifier' + }, + 'name': { + '@id': 'schema:name', + }, + 'uris': { + '@container': '@list', + '@id': 'schema:url', + }, + 'tags': { + '@id': 'schema:keywords', + }, + 'identifiers': { + '@container': '@index', + '@id': 'schema:identifier', + }, + }, + } + with open(os.path.join(schema_dir, 'license.jsonld'), 'w') as f: + json.dump(obj=license_schema, fp=f, indent=2) + f.write('\n') + license_schema_uri = urllib.parse.urljoin( + base=base_uri, url='schema/license.jsonld') + licenses_schema = license_schema.copy() + licenses_schema['@context']['licenses'] = { + '@container': '@index', + '@id': license_schema_uri, + } + licenses_schema.update(license_schema) + with open(os.path.join(schema_dir, 'licenses.jsonld'), 'w') as f: + json.dump(obj=licenses_schema, fp=f, indent=2, sort_keys=True) + f.write('\n') + licenses_schema_uri = urllib.parse.urljoin( + base=base_uri, url='schema/licenses.jsonld') index = sorted(licenses.keys()) with open(os.path.join(dir, 'licenses.json'), 'w') as f: - json.dump(obj=index, fp=f, indent=2) + json.dump(obj=index, fp=f, indent=2, sort_keys=True) f.write('\n') - full_index = {} + full_index = { + '@context': licenses_schema_uri, + 'licenses': {}, + } for id, license in licenses.items(): license = license.copy() if 'tags' in license: license['tags'] = sorted(license['tags']) - full_index[id] = license.copy() license['id'] = id + full_index['licenses'][id] = license.copy() + license['@context'] = urllib.parse.urljoin( + base=base_uri, url='schema/license.jsonld') license_path = os.path.join(dir, '{}.json'.format(id)) with open(license_path, 'w') as f: json.dump(obj=license, fp=f, indent=2, sort_keys=True) @@ -317,4 +361,4 @@ def save(licenses, dir=os.curdir): if unused_identifiers: raise ValueError('unused IDENTIFIERS keys: {}'.format( ', '.join(sorted(unused_identifiers)))) - save(licenses=licenses, dir=dir) + save(licenses=licenses, base_uri='https://wking.github.io/fsf-api/', dir=dir)