Skip to content

Commit

Permalink
pull: Add JSON-LD markup
Browse files Browse the repository at this point in the history
This should help with forward/backward compatibility, because
consumers can use JSON-LD to extract the semantic meaning regardless
of how I lay out the data.  From [1]:

  This information allows developers to re-use each other's data
  without having to agree to how their data will interoperate on a
  site-by-site basis.

There's a framing spec in the works [2] to support "request your own
layout" in an API.

I had to cludge a bit to support FSF IDs in licenses-full.json.  The
released JSON-LD 1.0 has index maps, but [3]:

  Note how the index keys do not appear in the Linked Data below, but
  would continue to exist if the document were compacted or
  expanded...

which isn't very useful for folks who are parsing the file as JSON-LD
and who need access to the FSF IDs.  To work around that, I've used
the FSF IDs as both the 'licenses' keys and as 'id' entries in the
'licenses' values.

We might be able to specify the semantics of the object keys with the
unreleased 1.1 [4] and its ID maps [5], but I've left that off for
now.

There is some background on classifying non-URI identifiers in [6].
Currently I'm not classifying my identifiers.

I'm using HTTPS identifiers for schema.org, because that's the
long term target [7].

I've tested the output JSON-LD in [8], and the compacted
licenses-full.json looks like:

  {
    "http://tremily.us/fsf/schema/license.jsonld": [
      {
        "@Index": "ACDL",
        "https://schema.org/identifier": "ACDL",
        "https://schema.org/keywords": "libre",
        "https://schema.org/name": "Apple's Common Documentation License, Version 1.0",
        "https://schema.org/url": {
          "@list": [
            "https://www.gnu.org/licenses/license-list.html#ACDL",
            "http://fedoraproject.org/wiki/Licensing/Common_Documentation_License"
          ]
        }
      },
      {
        "@Index": "AGPLv1.0",
        "https://schema.org/identifier": [
          "AGPLv1.0",
          {
            "@Index": "spdx",
            "@value": "AGPL-1.0"
          }
        ],
        "https://schema.org/keywords": "libre",
        "https://schema.org/name": "Affero General Public License version 1",
        "https://schema.org/url": {
          "@list": [
            "https://www.gnu.org/licenses/license-list.html#AGPLv1.0",
            "http://directory.fsf.org/wiki/License:AGPLv1"
          ]
        }
      },
      ...
    ]
  }

[1]: https://www.w3.org/TR/2014/REC-json-ld-20140116/#h3_the-context
[2]: https://json-ld.org/spec/latest/json-ld-framing/
[3]: https://www.w3.org/TR/2014/REC-json-ld-20140116/#data-indexing
[4]: https://json-ld.org/spec/latest/json-ld/#changes-since-1-0-recommendation-of-16-january-2014
[5]: https://json-ld.org/spec/latest/json-ld/#node-identifier-indexing
[6]: http://meta.schema.org/docs/datamodel.html#identifierBg
[7]: http://schema.org/docs/faq.html#19
[8]: https://json-ld.org/playground/
  • Loading branch information
wking committed Mar 26, 2018
1 parent ee338b3 commit 8398a35
Showing 1 changed file with 50 additions and 6 deletions.
56 changes: 50 additions & 6 deletions pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,9 @@ def extract(root, base_uri=None):
return licenses


def save(licenses, dir=os.curdir):
os.makedirs(dir, exist_ok=True)
def save(licenses, base_uri, dir=os.curdir):
schema_dir = os.path.join(dir, 'schema')
os.makedirs(schema_dir, exist_ok=True)
if sys.version_info >= (3, 5):
paths = glob.glob(os.path.join(dir, '**', '*.json'), recursive=True)
else:
Expand All @@ -277,17 +278,60 @@ def save(licenses, dir=os.curdir):
)
for path in paths:
os.remove(path)
license_schema = {
'@context': {
'schema': 'https://schema.org/',
'id': {
'@id': 'schema:identifier'
},
'name': {
'@id': 'schema:name',
},
'uris': {
'@container': '@list',
'@id': 'schema:url',
},
'tags': {
'@id': 'schema:keywords',
},
'identifiers': {
'@container': '@index',
'@id': 'schema:identifier',
},
},
}
with open(os.path.join(schema_dir, 'license.jsonld'), 'w') as f:
json.dump(obj=license_schema, fp=f, indent=2)
f.write('\n')
license_schema_uri = urllib.parse.urljoin(
base=base_uri, url='schema/license.jsonld')
licenses_schema = license_schema.copy()
licenses_schema['@context']['licenses'] = {
'@container': '@index',
'@id': license_schema_uri,
}
licenses_schema.update(license_schema)
with open(os.path.join(schema_dir, 'licenses.jsonld'), 'w') as f:
json.dump(obj=licenses_schema, fp=f, indent=2, sort_keys=True)
f.write('\n')
licenses_schema_uri = urllib.parse.urljoin(
base=base_uri, url='schema/licenses.jsonld')
index = sorted(licenses.keys())
with open(os.path.join(dir, 'licenses.json'), 'w') as f:
json.dump(obj=index, fp=f, indent=2)
json.dump(obj=index, fp=f, indent=2, sort_keys=True)
f.write('\n')
full_index = {}
full_index = {
'@context': licenses_schema_uri,
'licenses': {},
}
for id, license in licenses.items():
license = license.copy()
if 'tags' in license:
license['tags'] = sorted(license['tags'])
full_index[id] = license.copy()
license['id'] = id
full_index['licenses'][id] = license.copy()
license['@context'] = urllib.parse.urljoin(
base=base_uri, url='schema/license.jsonld')
license_path = os.path.join(dir, '{}.json'.format(id))
with open(license_path, 'w') as f:
json.dump(obj=license, fp=f, indent=2, sort_keys=True)
Expand Down Expand Up @@ -317,4 +361,4 @@ def save(licenses, dir=os.curdir):
if unused_identifiers:
raise ValueError('unused IDENTIFIERS keys: {}'.format(
', '.join(sorted(unused_identifiers))))
save(licenses=licenses, dir=dir)
save(licenses=licenses, base_uri='https://wking.github.io/fsf-api/', dir=dir)

0 comments on commit 8398a35

Please sign in to comment.