Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add JsonImporter #22

Merged
merged 1 commit into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 125 additions & 2 deletions hunter/importer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import csv
import json
from collections import OrderedDict
from contextlib import contextmanager
from dataclasses import dataclass
Expand All @@ -16,6 +17,7 @@
CsvTestConfig,
GraphiteTestConfig,
HistoStatTestConfig,
JsonTestConfig,
PostgresMetric,
PostgresTestConfig,
TestConfig,
Expand Down Expand Up @@ -268,7 +270,7 @@ def fetch_data(self, test_conf: TestConfig, selector: DataSelector = DataSelecto
# Read metric values. Note we can still fail on conversion to float,
# because the user is free to override the column selection and thus
# they may select a column that contains non-numeric data:
for (name, i) in zip(metric_names, metric_indexes):
for name, i in zip(metric_names, metric_indexes):
try:
data[name].append(float(row[i]))
except ValueError as err:
Expand Down Expand Up @@ -498,7 +500,7 @@ def fetch_data(self, test_conf: TestConfig, selector: DataSelector = DataSelecto
# Read metric values. Note we can still fail on conversion to float,
# because the user is free to override the column selection and thus
# they may select a column that contains non-numeric data:
for (name, i) in zip(metric_names, metric_indexes):
for name, i in zip(metric_names, metric_indexes):
try:
data[name].append(float(row[i]))
except ValueError as err:
Expand Down Expand Up @@ -537,19 +539,133 @@ def fetch_all_metric_names(self, test_conf: PostgresTestConfig) -> List[str]:
return [m for m in test_conf.metrics.keys()]


class JsonImporter(Importer):
def __init__(self):
self._data = {}

@staticmethod
def _read_json_file(filename: str):
try:
return json.load(open(filename))
except FileNotFoundError:
raise DataImportError(f"Input file not found: {filename}")

def inputfile(self, test_conf: JsonTestConfig):
if test_conf.file not in self._data:
self._data[test_conf.file] = self._read_json_file(test_conf.file)
return self._data[test_conf.file]

def fetch_data(self, test_conf: TestConfig, selector: DataSelector = DataSelector()) -> Series:

if not isinstance(test_conf, JsonTestConfig):
raise ValueError("Expected JsonTestConfig")

# TODO: refactor. THis is copy pasted from CSV importer
since_time = selector.since_time
until_time = selector.until_time

if since_time.timestamp() > until_time.timestamp():
raise DataImportError(
f"Invalid time range: ["
f"{format_timestamp(int(since_time.timestamp()))}, "
f"{format_timestamp(int(until_time.timestamp()))}]"
)

time = []
data = OrderedDict()
metrics = OrderedDict()
attributes = OrderedDict()

for name in self.fetch_all_metric_names(test_conf):
# Ignore metrics if selector.metrics is not None and name is not in selector.metrics
if selector.metrics is not None and name not in selector.metrics:
continue
data[name] = []

attr_names = self.fetch_all_attribute_names(test_conf)
for name in attr_names:
attributes[name] = []

# If the user specified a branch, only include results from that branch.
# Otherwise if the test config specifies a branch, only include results from that branch.
# Else include all results.
branch = None
if selector.branch:
branch = selector.branch
elif test_conf.base_branch:
branch = test_conf.base_branch

objs = self.inputfile(test_conf)
list_of_json_obj = []
for o in objs:
if branch and o["attributes"]["branch"] != branch:
continue
list_of_json_obj.append(o)

for result in list_of_json_obj:
time.append(result["timestamp"])
for metric in result["metrics"]:
# Skip metrics not in selector.metrics if selector.metrics is enabled
if metric["name"] not in data:
continue

data[metric["name"]].append(metric["value"])
metrics[metric["name"]] = Metric(1, 1.0)
for a in attr_names:
attributes[a] = [o["attributes"][a] for o in list_of_json_obj]

# Leave last n points:
time = time[-selector.last_n_points :]
tmp = data
data = {}
for k, v in tmp.items():
data[k] = v[-selector.last_n_points :]
tmp = attributes
attributes = {}
for k, v in tmp.items():
attributes[k] = v[-selector.last_n_points :]

return Series(
test_conf.name,
branch=None,
time=time,
metrics=metrics,
data=data,
attributes=attributes,
)

def fetch_all_metric_names(self, test_conf: JsonTestConfig) -> List[str]:
metric_names = set()
list_of_json_obj = self.inputfile(test_conf)
for result in list_of_json_obj:
for metric in result["metrics"]:
metric_names.add(metric["name"])
return [m for m in metric_names]

def fetch_all_attribute_names(self, test_conf: JsonTestConfig) -> List[str]:
attr_names = set()
list_of_json_obj = self.inputfile(test_conf)
for result in list_of_json_obj:
for a in result["attributes"].keys():
attr_names.add(a)
return [m for m in attr_names]


class Importers:
__config: Config
__csv_importer: Optional[CsvImporter]
__graphite_importer: Optional[GraphiteImporter]
__histostat_importer: Optional[HistoStatImporter]
__postgres_importer: Optional[PostgresImporter]
__json_importer: Optional[JsonImporter]

def __init__(self, config: Config):
self.__config = config
self.__csv_importer = None
self.__graphite_importer = None
self.__histostat_importer = None
self.__postgres_importer = None
self.__json_importer = None

def csv_importer(self) -> CsvImporter:
if self.__csv_importer is None:
Expand All @@ -571,6 +687,11 @@ def postgres_importer(self) -> PostgresImporter:
self.__postgres_importer = PostgresImporter(Postgres(self.__config.postgres))
return self.__postgres_importer

def json_importer(self) -> JsonImporter:
if self.__json_importer is None:
self.__json_importer = JsonImporter()
return self.__json_importer

def get(self, test: TestConfig) -> Importer:
if isinstance(test, CsvTestConfig):
return self.csv_importer()
Expand All @@ -580,5 +701,7 @@ def get(self, test: TestConfig) -> Importer:
return self.histostat_importer()
elif isinstance(test, PostgresTestConfig):
return self.postgres_importer()
elif isinstance(test, JsonTestConfig):
return self.json_importer()
else:
raise ValueError(f"Unsupported test type {type(test)}")
35 changes: 31 additions & 4 deletions hunter/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def create_test_config(name: str, config: Dict) -> TestConfig:
Loads properties of a test from a dictionary read from hunter's config file
This dictionary must have the `type` property to determine the type of the test.
Other properties depend on the type.
Currently supported test types are `fallout`, `graphite`, `csv`, and `psql`.
Currently supported test types are `fallout`, `graphite`, `csv`, `json`, and `psql`.
"""
test_type = config.get("type")
if test_type == "csv":
Expand All @@ -173,6 +173,8 @@ def create_test_config(name: str, config: Dict) -> TestConfig:
return create_histostat_test_config(name, config)
elif test_type == "postgres":
return create_postgres_test_config(name, config)
elif test_type == "json":
return create_json_test_config(name, config)
elif test_type is None:
raise TestConfigError(f"Test type not set for test {name}")
else:
Expand All @@ -192,7 +194,7 @@ def create_csv_test_config(test_name: str, test_info: Dict) -> CsvTestConfig:
for name in metrics_info:
metrics.append(CsvMetric(name, 1, 1.0, name))
elif isinstance(metrics_info, Dict):
for (metric_name, metric_conf) in metrics_info.items():
for metric_name, metric_conf in metrics_info.items():
metrics.append(
CsvMetric(
name=metric_name,
Expand Down Expand Up @@ -231,7 +233,7 @@ def create_graphite_test_config(name: str, test_info: Dict) -> GraphiteTestConfi

metrics = []
try:
for (metric_name, metric_conf) in metrics_info.items():
for metric_name, metric_conf in metrics_info.items():
metrics.append(
GraphiteMetric(
name=metric_name,
Expand Down Expand Up @@ -279,7 +281,7 @@ def create_postgres_test_config(test_name: str, test_info: Dict) -> PostgresTest
for name in metrics_info:
metrics.append(CsvMetric(name, 1, 1.0))
elif isinstance(metrics_info, Dict):
for (metric_name, metric_conf) in metrics_info.items():
for metric_name, metric_conf in metrics_info.items():
metrics.append(
PostgresMetric(
name=metric_name,
Expand All @@ -294,3 +296,28 @@ def create_postgres_test_config(test_name: str, test_info: Dict) -> PostgresTest
return PostgresTestConfig(test_name, query, update_stmt, time_column, metrics, attributes)
except KeyError as e:
raise TestConfigError(f"Configuration key not found in test {test_name}: {e.args[0]}")


@dataclass
class JsonTestConfig(TestConfig):
name: str
file: str
base_branch: str

# TODO: This should return the list defined in the config file hunter.yaml
def fully_qualified_metric_names(self):
from hunter.importer import JsonImporter

metric_names = JsonImporter().fetch_all_metric_names(self)
return [f"{self.name}.{m}" for m in metric_names]


def create_json_test_config(name: str, test_info: Dict) -> JsonTestConfig:
try:
file = test_info["file"]
except KeyError as e:
raise TestConfigError(f"Configuration key not found in test {name}: {e.args[0]}")
if not os.path.exists(file):
raise TestConfigError(f"Configuration file not found: {file}")
base_branch = test_info.get("base_branch", None)
return JsonTestConfig(name, file, base_branch)
Loading