diff --git a/pull.py b/pull.py index 995e2a7..0baab43 100755 --- a/pull.py +++ b/pull.py @@ -266,8 +266,9 @@ def extract(root, base_uri=None): return licenses -def save(licenses, dir=os.curdir): - os.makedirs(dir, exist_ok=True) +def save(licenses, base_uri, dir=os.curdir): + schema_dir = os.path.join(dir, 'schema') + os.makedirs(schema_dir, exist_ok=True) if sys.version_info >= (3, 5): paths = glob.glob(os.path.join(dir, '**', '*.json'), recursive=True) else: @@ -277,17 +278,60 @@ def save(licenses, dir=os.curdir): ) for path in paths: os.remove(path) + license_schema = { + '@context': { + 'schema': 'https://schema.org/', + 'id': { + '@id': 'schema:identifier' + }, + 'name': { + '@id': 'schema:name', + }, + 'uris': { + '@container': '@list', + '@id': 'schema:url', + }, + 'tags': { + '@id': 'schema:keywords', + }, + 'identifiers': { + '@container': '@index', + '@id': 'schema:identifier', + }, + }, + } + with open(os.path.join(schema_dir, 'license.jsonld'), 'w') as f: + json.dump(obj=license_schema, fp=f, indent=2) + f.write('\n') + license_schema_uri = urllib.parse.urljoin( + base=base_uri, url='schema/license.jsonld') + licenses_schema = license_schema.copy() + licenses_schema['@context']['licenses'] = { + '@container': '@index', + '@id': license_schema_uri, + } + licenses_schema.update(license_schema) + with open(os.path.join(schema_dir, 'licenses.jsonld'), 'w') as f: + json.dump(obj=licenses_schema, fp=f, indent=2, sort_keys=True) + f.write('\n') + licenses_schema_uri = urllib.parse.urljoin( + base=base_uri, url='schema/licenses.jsonld') index = sorted(licenses.keys()) with open(os.path.join(dir, 'licenses.json'), 'w') as f: - json.dump(obj=index, fp=f, indent=2) + json.dump(obj=index, fp=f, indent=2, sort_keys=True) f.write('\n') - full_index = {} + full_index = { + '@context': licenses_schema_uri, + 'licenses': {}, + } for id, license in licenses.items(): license = license.copy() if 'tags' in license: license['tags'] = sorted(license['tags']) - full_index[id] = license.copy() license['id'] = id + full_index['licenses'][id] = license.copy() + license['@context'] = urllib.parse.urljoin( + base=base_uri, url='schema/license.jsonld') license_path = os.path.join(dir, '{}.json'.format(id)) with open(license_path, 'w') as f: json.dump(obj=license, fp=f, indent=2, sort_keys=True) @@ -317,4 +361,4 @@ def save(licenses, dir=os.curdir): if unused_identifiers: raise ValueError('unused IDENTIFIERS keys: {}'.format( ', '.join(sorted(unused_identifiers)))) - save(licenses=licenses, dir=dir) + save(licenses=licenses, base_uri='https://wking.github.io/fsf-api/', dir=dir)