Skip to content

Commit

Permalink
Integrate get_shacl_results into CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
adamkarvonen authored Oct 25, 2023
1 parent 5122371 commit 4030979
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 1 deletion.
2 changes: 1 addition & 1 deletion cli/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dependencies = [
"requests==2.31.0",
"Pillow==10.0.1",
"plotly==5.15.0",
"semtk-python3 @ git+https://github.com/ge-semtk/semtk-python3@e9a16cf52ca8838cf989f268e62fb288282bb9e6",
"semtk-python3 @ git+https://github.com/ge-semtk/semtk-python3@15e9a0646f7ced2e89deae5664e5349087bd7fd4",
"six==1.16.0",
"tabulate==0.9.0",
"urllib3==2.0.7",
Expand Down
70 changes: 70 additions & 0 deletions cli/rack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from types import SimpleNamespace
from tempfile import TemporaryDirectory
import shutil
import json

# library imports
from colorama import Fore, Style
Expand Down Expand Up @@ -587,6 +588,59 @@ def cardinality_driver(
semtk_table = semtk3.get_cardinality_violations(conn, max_rows=max_rows, concise_format=concise)
print(format_semtk_table(semtk_table, export_format=export_format, headers=headers))

def shacl_results_driver(
base_url: Url,
model_graphs: Optional[List[Url]],
data_graphs: Optional[List[Url]],
triple_store: Optional[Url],
triple_store_type: Optional[str],
headers: bool,
export_format: ExportFormat,
shacl_ttl_path: str,
severity: str
) -> None:
"""Evaluate SHACL constraints and return results as pretty printed json (format=text) or as an output table (format=csv)"""

def json_to_semtk_table(json_data: dict) -> SemtkTable:
"""Converts the 'reportEntries' in the get_shacl_results() JSON data to a SemtkTable."""

entries = json_data["reportEntries"]

# Given that all entries share the same structure, extract column names and types from the first entry.
col_names = list(entries[0].keys())
col_types = ['string'] * len(col_names)

rows = [list(entry.values()) for entry in entries]

table_dict = SemtkTable.create_table_dict(col_names, col_types, rows)
return SemtkTable(table_dict)

legal_severity_values = {"Info", "Warning", "Violation"}
if severity not in legal_severity_values:
print(str_bad(f"Error: Invalid severity value '{severity}'. Allowed values are: {', '.join(legal_severity_values)}"))
sys.exit(1)

if data_graphs is not None:
data_graph = data_graphs[0]
else:
logger.warning("Defaulting data-graph to %s", DEFAULT_DATA_GRAPH)
data_graph = DEFAULT_DATA_GRAPH

if data_graphs is not None:
extra_data_graphs = data_graphs[1:]
else:
extra_data_graphs = []

conn = sparql_connection(base_url, model_graphs, data_graph, extra_data_graphs, triple_store, triple_store_type)

json_result = semtk3.get_shacl_results(conn, shacl_ttl_path=shacl_ttl_path, severity=severity)

if export_format == ExportFormat.TEXT:
print(json.dumps(json_result, indent=4))
elif export_format == ExportFormat.CSV:
semtk_table = json_to_semtk_table(json_result)
print(format_semtk_table(semtk_table, export_format=export_format, headers=headers))

def ingest_data_driver(config_path: Path, base_url: Url, model_graphs: Optional[List[Url]], data_graphs: Optional[List[Url]], triple_store: Optional[Url], triple_store_type: Optional[str], clear: bool) -> None:
"""Use an import.yaml file to ingest multiple CSV files into the data graph."""
with open(config_path, mode='r', encoding='utf-8-sig') as config_file:
Expand Down Expand Up @@ -879,6 +933,13 @@ def dispatch_data_cardinality(args: SimpleNamespace) -> None:
cardinality_driver(args.base_url, args.model_graph, args.data_graph, args.triple_store, args.triple_store_type,
export_format=args.format, headers=not args.no_headers, concise=args.concise, max_rows=args.max_rows)

def dispatch_data_shacl_results(args: SimpleNamespace) -> None:
"""Implementation of the data SHACL results subcommand"""
global cliMethod
cliMethod = CLIMethod.DATA_IMPORT
shacl_results_driver(args.base_url, args.model_graph, args.data_graph, args.triple_store, args.triple_store_type,
export_format=args.format, headers=not args.no_headers, shacl_ttl_path=args.shacl_ttl_path, severity=args.severity)

def dispatch_model_import(args: SimpleNamespace) -> None:
"""Implementation of the plumbing model subcommand"""
global cliMethod
Expand Down Expand Up @@ -937,6 +998,7 @@ def get_argument_parser() -> argparse.ArgumentParser:
data_subparsers = data_parser.add_subparsers(dest='command')
data_import_parser = data_subparsers.add_parser('import', help='Import CSV data')
data_cardinality_parser = data_subparsers.add_parser('cardinality', help='Check data cardinality')
data_shacl_results_parser = data_subparsers.add_parser('shacl_results', help='Evaluate SHACL constraints and get results')
data_export_parser = data_subparsers.add_parser('export', help='Export query results')
data_count_parser = data_subparsers.add_parser('count', help='Count matched query rows')
data_clear_parser = data_subparsers.add_parser('clear', help='Clear data graph')
Expand Down Expand Up @@ -989,6 +1051,14 @@ def get_argument_parser() -> argparse.ArgumentParser:
data_cardinality_parser.add_argument('--concise', default=False, action='store_true', help='Use concise output')
data_cardinality_parser.set_defaults(func=dispatch_data_cardinality)

data_shacl_results_parser.add_argument('--model-graph', type=str, action='append', help='Model graph URL')
data_shacl_results_parser.add_argument('--data-graph', type=str, action='append', help='Data graph URL')
data_shacl_results_parser.add_argument('--format', type=ExportFormat, help='Export format. text is pretty printed json, csv is SemtkTable.', choices=list(ExportFormat), default=ExportFormat.TEXT)
data_shacl_results_parser.add_argument('--no-headers', action='store_true', help='Omit header row')
data_shacl_results_parser.add_argument('--severity', default='Info', type=str, help='Minimum severity filter: Info, Warning, or Violation')
data_shacl_results_parser.add_argument('shacl_ttl_path', type=str, help='Path to a SHACL file in TTL format')
data_shacl_results_parser.set_defaults(func=dispatch_data_shacl_results)

data_export_parser.add_argument('nodegroup', type=str, help='ID of nodegroup')
data_export_parser.add_argument('--model-graph', type=str, action='append', help='Model graph URL')
data_export_parser.add_argument('--data-graph', type=str, required=True, action='append', help='Data graph URL')
Expand Down

0 comments on commit 4030979

Please sign in to comment.