Skip to content

Commit

Permalink
Expose JSON BinPack's canonicalizer as a canonicalize command
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <[email protected]>
  • Loading branch information
jviotti committed Sep 27, 2024
1 parent 01e40b4 commit 30d43c5
Show file tree
Hide file tree
Showing 85 changed files with 6,500 additions and 1 deletion.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ option(JSONSCHEMA_CONTINUOUS "Perform a continuous JSON Schema CLI release" ON)

find_package(JSONToolkit REQUIRED)
find_package(AlterSchema REQUIRED)
find_package(JSONBinPack REQUIRED)
find_package(Hydra REQUIRED)
add_subdirectory(src)

Expand Down
1 change: 1 addition & 0 deletions DEPENDENCIES
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ noa https://github.com/sourcemeta/noa 7e26abce7a4e31e86a16ef2851702a56773ca527
jsontoolkit https://github.com/sourcemeta/jsontoolkit 3ef19daf7ca042544239111c701a51232f3f5576
hydra https://github.com/sourcemeta/hydra 3c53d3fdef79e9ba603d48470a508cc45472a0dc
alterschema https://github.com/sourcemeta/alterschema 744cf03a950b681a61f1f4cf6a7bb55bc52836c9
jsonbinpack https://github.com/sourcemeta/jsonbinpack 43d53dd32c432333deb1aea147095ed8707b5f11
1 change: 1 addition & 0 deletions README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ documentation:
- [`jsonschema frame`](./docs/frame.markdown) (for debugging references)
- [`jsonschema compile`](./docs/compile.markdown) (for internal debugging)
- [`jsonschema identify`](./docs/identify.markdown)
- [`jsonschema canonicalize`](./docs/canonicalize.markdown) (for static analysis)

Installation
------------
Expand Down
7 changes: 7 additions & 0 deletions cmake/FindJSONBinPack.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
if(NOT JSONBinPack_FOUND)
set(JSONBINPACK_INSTALL OFF CACHE BOOL "disable installation")
set(JSONBINPACK_CLI OFF CACHE BOOL "disable the JSON BinPack CLI module")
set(JSONBINPACK_RUNTIME OFF CACHE BOOL "disable the JSON BinPack runtime module")
add_subdirectory("${PROJECT_SOURCE_DIR}/vendor/jsonbinpack")
set(JSONBinPack_FOUND ON)
endif()
83 changes: 83 additions & 0 deletions docs/canonicalize.markdown
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
Canonicalize
============

```sh
jsonschema canonicalize <schema.json>
```

JSON Schema is an extremely expressive schema language. As such, schema authors
can express the same constraints in a variety of ways, making the process of
statically analyzing schemas complex. This command attempts to tackle the
problem by transforming a given JSON Schema into a simpler (but more verbose)
normalized form referred to as _canonical_.

> Refer to [Juan Cruz Viotti's dissertation on JSON
> BinPack's](https://www.jviotti.com/dissertation.pdf) for how JSON Schema
> canonicalization was originally defined.
Examples
--------

For example, consider the following simple schema:

```json
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"properties": {
"foo": { "type": "string" }
}
}
```

The canonicalization process will result in something like this:

```
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"anyOf": [
{
"enum": [
null
]
},
{
"enum": [
false,
true
]
},
{
"type": "object",
"minProperties": 0,
"properties": {
"foo": {
"type": "string",
"minLength": 0
}
}
},
{
"type": "array",
"minItems": 0
},
{
"type": "string",
"minLength": 0
},
{
"type": "number",
"multipleOf": 1
},
{
"type": "integer",
"multipleOf": 1
}
]
}
```

### Canonicalize a JSON Schema

```sh
jsonschema canonicalize path/to/my/schema.json
```
4 changes: 3 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ add_executable(jsonschema_cli
command_lint.cc
command_metaschema.cc
command_validate.cc
command_identify.cc)
command_identify.cc
command_canonicalize.cc)

noa_add_default_options(PRIVATE jsonschema_cli)
set_target_properties(jsonschema_cli PROPERTIES OUTPUT_NAME jsonschema)
Expand All @@ -20,6 +21,7 @@ target_link_libraries(jsonschema_cli PRIVATE sourcemeta::jsontoolkit::jsonschema
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::alterschema::engine)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::alterschema::linter)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::hydra::httpclient)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::jsonbinpack::compiler)

configure_file(configure.h.in configure.h @ONLY)
target_include_directories(jsonschema_cli PRIVATE "${CMAKE_CURRENT_BINARY_DIR}")
Expand Down
1 change: 1 addition & 0 deletions src/command.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ auto lint(const std::span<const std::string> &arguments) -> int;
auto validate(const std::span<const std::string> &arguments) -> int;
auto metaschema(const std::span<const std::string> &arguments) -> int;
auto identify(const std::span<const std::string> &arguments) -> int;
auto canonicalize(const std::span<const std::string> &arguments) -> int;
} // namespace sourcemeta::jsonschema::cli

#endif
30 changes: 30 additions & 0 deletions src/command_canonicalize.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#include <sourcemeta/jsonbinpack/compiler.h>
#include <sourcemeta/jsontoolkit/json.h>
#include <sourcemeta/jsontoolkit/jsonschema.h>

#include <cstdlib> // EXIT_SUCCESS
#include <iostream> // std::cout, std::endl

#include "command.h"
#include "utils.h"

auto sourcemeta::jsonschema::cli::canonicalize(
const std::span<const std::string> &arguments) -> int {
const auto options{parse_options(arguments, {})};

if (options.at("").size() < 1) {
std::cerr
<< "error: This command expects a path to a schema. For example:\n\n"
<< " jsonschema canonicalize path/to/schema.json\n";
return EXIT_FAILURE;
}

auto schema{sourcemeta::jsontoolkit::from_file(options.at("").front())};
sourcemeta::jsonbinpack::canonicalize(
schema, sourcemeta::jsontoolkit::default_schema_walker,
resolver(options, options.contains("h") || options.contains("http")));
sourcemeta::jsontoolkit::prettify(
schema, std::cout, sourcemeta::jsontoolkit::schema_format_compare);
std::cout << std::endl;
return EXIT_SUCCESS;
}
7 changes: 7 additions & 0 deletions src/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ Global Options:
Print the URI of the given schema to standard output, optionally
relative to a given base URI.
canonicalize <schema.json>
Pre-process a JSON Schema into JSON BinPack's canonical form
for static analysis.
For more documentation, visit https://github.com/sourcemeta/jsonschema
)EOF"};

Expand All @@ -92,6 +97,8 @@ auto jsonschema_main(const std::string &program, const std::string &command,
return sourcemeta::jsonschema::cli::test(arguments);
} else if (command == "identify") {
return sourcemeta::jsonschema::cli::identify(arguments);
} else if (command == "canonicalize") {
return sourcemeta::jsonschema::cli::canonicalize(arguments);
} else {
std::cout << "JSON Schema CLI - v"
<< sourcemeta::jsonschema::cli::PROJECT_VERSION << "\n";
Expand Down
6 changes: 6 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,12 @@ add_jsonschema_test_unix(lint/pass_lint_fix)
add_jsonschema_test_unix(lint/pass_lint_no_fix)
add_jsonschema_test_unix(lint/fail_lint)

# Canonicalize
add_jsonschema_test_unix(canonicalize/pass_1)
add_jsonschema_test_unix(canonicalize/fail_no_schema)
add_jsonschema_test_unix(canonicalize/fail_schema_invalid_json)
add_jsonschema_test_unix(canonicalize/fail_unknown_metaschema)

# CI specific tests
add_jsonschema_test_unix_ci(pass_bundle_http)
add_jsonschema_test_unix_ci(fail_bundle_http_non_200)
Expand Down
19 changes: 19 additions & 0 deletions test/canonicalize/fail_no_schema.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

"$1" canonicalize 2>"$TMP/stderr.txt" && CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << 'EOF' > "$TMP/expected.txt"
error: This command expects a path to a schema. For example:
jsonschema canonicalize path/to/schema.json
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
25 changes: 25 additions & 0 deletions test/canonicalize/fail_schema_invalid_json.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"type" string
}
EOF

"$1" canonicalize "$TMP/schema.json" 2>"$TMP/stderr.txt" \
&& CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
error: Failed to parse the JSON document at line 2 and column 10
$(realpath "$TMP")/schema.json
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
26 changes: 26 additions & 0 deletions test/canonicalize/fail_unknown_metaschema.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "https://example.com/unknown",
"$id": "https://example.com",
"$ref": "nested"
}
EOF

"$1" canonicalize "$TMP/schema.json" 2>"$TMP/stderr.txt" && CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
error: Could not resolve the requested schema
at https://example.com/unknown
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
68 changes: 68 additions & 0 deletions test/canonicalize/pass_1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"properties": {
"foo": { "type": "string" }
}
}
EOF

"$1" canonicalize "$TMP/schema.json" > "$TMP/result.json"

cat "$TMP/result.json"

cat << 'EOF' > "$TMP/expected.json"
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"anyOf": [
{
"enum": [
null
]
},
{
"enum": [
false,
true
]
},
{
"type": "object",
"minProperties": 0,
"properties": {
"foo": {
"type": "string",
"minLength": 0
}
}
},
{
"type": "array",
"minItems": 0
},
{
"type": "string",
"minLength": 0
},
{
"type": "number",
"multipleOf": 1
},
{
"type": "integer",
"multipleOf": 1
}
]
}
EOF

diff "$TMP/result.json" "$TMP/expected.json"
Loading

0 comments on commit 30d43c5

Please sign in to comment.