Skip to content

Commit

Permalink
Ci quality workflows (#1423)
Browse files Browse the repository at this point in the history
* Add inference tests

* Clean up

* Rename test graph file

* Add readme for tests

* Separate server fixture

* test file name change

* Assert images are generated

* Clean up comments

* Add __init__.py so tests can run with command line `pytest`

* Fix command line args for pytest

* Loop all samplers/schedulers in test_inference.py

* Ci quality workflows compare (#1)

* Add image comparison tests

* Comparison tests do not pass with empty metadata

* Ensure tests are run in correct order

* Save image files  with test name

* Update tests readme

* Reduce step counts in tests to ~halve runtime

* Ci quality workflows build (#2)

* Add build test github workflow
  • Loading branch information
enzymezoo-code committed Sep 19, 2023
1 parent b92bf81 commit 26cd840
Show file tree
Hide file tree
Showing 10 changed files with 728 additions and 0 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/test-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Build package

#
# This workflow is a test of the python package build.
# Install Python dependencies across different Python versions.
#

on:
push:
paths:
- "requirements.txt"
- ".github/workflows/test-build.yml"

jobs:
build:
name: Build Test
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
5 changes: 5 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[pytest]
markers =
inference: mark as inference test (deselect with '-m "not inference"')
testpaths = tests
addopts = -s
29 changes: 29 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Automated Testing

## Running tests locally

Additional requirements for running tests:
```
pip install pytest
pip install websocket-client==1.6.1
opencv-python==4.6.0.66
scikit-image==0.21.0
```
Run inference tests:
```
pytest tests/inference
```

## Quality regression test
Compares images in 2 directories to ensure they are the same

1) Run an inference test to save a directory of "ground truth" images
```
pytest tests/inference --output_dir tests/inference/baseline
```
2) Make code edits

3) Run inference and quality comparison tests
```
pytest
```
Empty file added tests/__init__.py
Empty file.
41 changes: 41 additions & 0 deletions tests/compare/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os
import pytest

# Command line arguments for pytest
def pytest_addoption(parser):
parser.addoption('--baseline_dir', action="store", default='tests/inference/baseline', help='Directory for ground-truth images')
parser.addoption('--test_dir', action="store", default='tests/inference/samples', help='Directory for images to test')
parser.addoption('--metrics_file', action="store", default='tests/metrics.md', help='Output file for metrics')
parser.addoption('--img_output_dir', action="store", default='tests/compare/samples', help='Output directory for diff metric images')

# This initializes args at the beginning of the test session
@pytest.fixture(scope="session", autouse=True)
def args_pytest(pytestconfig):
args = {}
args['baseline_dir'] = pytestconfig.getoption('baseline_dir')
args['test_dir'] = pytestconfig.getoption('test_dir')
args['metrics_file'] = pytestconfig.getoption('metrics_file')
args['img_output_dir'] = pytestconfig.getoption('img_output_dir')

# Initialize metrics file
with open(args['metrics_file'], 'a') as f:
# if file is empty, write header
if os.stat(args['metrics_file']).st_size == 0:
f.write("| date | run | file | status | value | \n")
f.write("| --- | --- | --- | --- | --- | \n")

return args


def gather_file_basenames(directory: str):
files = []
for file in os.listdir(directory):
if file.endswith(".png"):
files.append(file)
return files

# Creates the list of baseline file names to use as a fixture
def pytest_generate_tests(metafunc):
if "baseline_fname" in metafunc.fixturenames:
baseline_fnames = gather_file_basenames(metafunc.config.getoption("baseline_dir"))
metafunc.parametrize("baseline_fname", baseline_fnames)
195 changes: 195 additions & 0 deletions tests/compare/test_quality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import datetime
import numpy as np
import os
from PIL import Image
import pytest
from pytest import fixture
from typing import Tuple, List

from cv2 import imread, cvtColor, COLOR_BGR2RGB
from skimage.metrics import structural_similarity as ssim


"""
This test suite compares images in 2 directories by file name
The directories are specified by the command line arguments --baseline_dir and --test_dir
"""
# ssim: Structural Similarity Index
# Returns a tuple of (ssim, diff_image)
def ssim_score(img0: np.ndarray, img1: np.ndarray) -> Tuple[float, np.ndarray]:
score, diff = ssim(img0, img1, channel_axis=-1, full=True)
# rescale the difference image to 0-255 range
diff = (diff * 255).astype("uint8")
return score, diff

# Metrics must return a tuple of (score, diff_image)
METRICS = {"ssim": ssim_score}
METRICS_PASS_THRESHOLD = {"ssim": 0.95}


class TestCompareImageMetrics:
@fixture(scope="class")
def test_file_names(self, args_pytest):
test_dir = args_pytest['test_dir']
fnames = self.gather_file_basenames(test_dir)
yield fnames
del fnames

@fixture(scope="class", autouse=True)
def teardown(self, args_pytest):
yield
# Runs after all tests are complete
# Aggregate output files into a grid of images
baseline_dir = args_pytest['baseline_dir']
test_dir = args_pytest['test_dir']
img_output_dir = args_pytest['img_output_dir']
metrics_file = args_pytest['metrics_file']

grid_dir = os.path.join(img_output_dir, "grid")
os.makedirs(grid_dir, exist_ok=True)

for metric_dir in METRICS.keys():
metric_path = os.path.join(img_output_dir, metric_dir)
for file in os.listdir(metric_path):
if file.endswith(".png"):
score = self.lookup_score_from_fname(file, metrics_file)
image_file_list = []
image_file_list.append([
os.path.join(baseline_dir, file),
os.path.join(test_dir, file),
os.path.join(metric_path, file)
])
# Create grid
image_list = [[Image.open(file) for file in files] for files in image_file_list]
grid = self.image_grid(image_list)
grid.save(os.path.join(grid_dir, f"{metric_dir}_{score:.3f}_{file}"))

# Tests run for each baseline file name
@fixture()
def fname(self, baseline_fname):
yield baseline_fname
del baseline_fname

def test_directories_not_empty(self, args_pytest):
baseline_dir = args_pytest['baseline_dir']
test_dir = args_pytest['test_dir']
assert len(os.listdir(baseline_dir)) != 0, f"Baseline directory {baseline_dir} is empty"
assert len(os.listdir(test_dir)) != 0, f"Test directory {test_dir} is empty"

def test_dir_has_all_matching_metadata(self, fname, test_file_names, args_pytest):
# Check that all files in baseline_dir have a file in test_dir with matching metadata
baseline_file_path = os.path.join(args_pytest['baseline_dir'], fname)
file_paths = [os.path.join(args_pytest['test_dir'], f) for f in test_file_names]
file_match = self.find_file_match(baseline_file_path, file_paths)
assert file_match is not None, f"Could not find a file in {args_pytest['test_dir']} with matching metadata to {baseline_file_path}"

# For a baseline image file, finds the corresponding file name in test_dir and
# compares the images using the metrics in METRICS
@pytest.mark.parametrize("metric", METRICS.keys())
def test_pipeline_compare(
self,
args_pytest,
fname,
test_file_names,
metric,
):
baseline_dir = args_pytest['baseline_dir']
test_dir = args_pytest['test_dir']
metrics_output_file = args_pytest['metrics_file']
img_output_dir = args_pytest['img_output_dir']

baseline_file_path = os.path.join(baseline_dir, fname)

# Find file match
file_paths = [os.path.join(test_dir, f) for f in test_file_names]
test_file = self.find_file_match(baseline_file_path, file_paths)

# Run metrics
sample_baseline = self.read_img(baseline_file_path)
sample_secondary = self.read_img(test_file)

score, metric_img = METRICS[metric](sample_baseline, sample_secondary)
metric_status = score > METRICS_PASS_THRESHOLD[metric]

# Save metric values
with open(metrics_output_file, 'a') as f:
run_info = os.path.splitext(fname)[0]
metric_status_str = "PASS ✅" if metric_status else "FAIL ❌"
date_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
f.write(f"| {date_str} | {run_info} | {metric} | {metric_status_str} | {score} | \n")

# Save metric image
metric_img_dir = os.path.join(img_output_dir, metric)
os.makedirs(metric_img_dir, exist_ok=True)
output_filename = f'{fname}'
Image.fromarray(metric_img).save(os.path.join(metric_img_dir, output_filename))

assert score > METRICS_PASS_THRESHOLD[metric]

def read_img(self, filename: str) -> np.ndarray:
cvImg = imread(filename)
cvImg = cvtColor(cvImg, COLOR_BGR2RGB)
return cvImg

def image_grid(self, img_list: list[list[Image.Image]]):
# imgs is a 2D list of images
# Assumes the input images are a rectangular grid of equal sized images
rows = len(img_list)
cols = len(img_list[0])

w, h = img_list[0][0].size
grid = Image.new('RGB', size=(cols*w, rows*h))

for i, row in enumerate(img_list):
for j, img in enumerate(row):
grid.paste(img, box=(j*w, i*h))
return grid

def lookup_score_from_fname(self,
fname: str,
metrics_output_file: str
) -> float:
fname_basestr = os.path.splitext(fname)[0]
with open(metrics_output_file, 'r') as f:
for line in f:
if fname_basestr in line:
score = float(line.split('|')[5])
return score
raise ValueError(f"Could not find score for {fname} in {metrics_output_file}")

def gather_file_basenames(self, directory: str):
files = []
for file in os.listdir(directory):
if file.endswith(".png"):
files.append(file)
return files

def read_file_prompt(self, fname:str) -> str:
# Read prompt from image file metadata
img = Image.open(fname)
img.load()
return img.info['prompt']

def find_file_match(self, baseline_file: str, file_paths: List[str]):
# Find a file in file_paths with matching metadata to baseline_file
baseline_prompt = self.read_file_prompt(baseline_file)

# Do not match empty prompts
if baseline_prompt is None or baseline_prompt == "":
return None

# Find file match
# Reorder test_file_names so that the file with matching name is first
# This is an optimization because matching file names are more likely
# to have matching metadata if they were generated with the same script
basename = os.path.basename(baseline_file)
file_path_basenames = [os.path.basename(f) for f in file_paths]
if basename in file_path_basenames:
match_index = file_path_basenames.index(basename)
file_paths.insert(0, file_paths.pop(match_index))

for f in file_paths:
test_file_prompt = self.read_file_prompt(f)
if baseline_prompt == test_file_prompt:
return f
36 changes: 36 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import pytest

# Command line arguments for pytest
def pytest_addoption(parser):
parser.addoption('--output_dir', action="store", default='tests/inference/samples', help='Output directory for generated images')
parser.addoption("--listen", type=str, default="127.0.0.1", metavar="IP", nargs="?", const="0.0.0.0", help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)")
parser.addoption("--port", type=int, default=8188, help="Set the listen port.")

# This initializes args at the beginning of the test session
@pytest.fixture(scope="session", autouse=True)
def args_pytest(pytestconfig):
args = {}
args['output_dir'] = pytestconfig.getoption('output_dir')
args['listen'] = pytestconfig.getoption('listen')
args['port'] = pytestconfig.getoption('port')

os.makedirs(args['output_dir'], exist_ok=True)

return args

def pytest_collection_modifyitems(items):
# Modifies items so tests run in the correct order

LAST_TESTS = ['test_quality']

# Move the last items to the end
last_items = []
for test_name in LAST_TESTS:
for item in items.copy():
print(item.module.__name__, item)
if item.module.__name__ == test_name:
last_items.append(item)
items.remove(item)

items.extend(last_items)
Empty file added tests/inference/__init__.py
Empty file.
Loading

0 comments on commit 26cd840

Please sign in to comment.