diff --git a/config/beanstalk.clean.yml b/config/beanstalk.clean.yml
index 3a22bb4ba..443e4aa81 100644
--- a/config/beanstalk.clean.yml
+++ b/config/beanstalk.clean.yml
@@ -17,3 +17,6 @@ beanstalkd:
     ancestry:
       submission: ancestry
       events: ancestry_events
+    proteomics:
+      submission: proteomics
+      events: proteomics_events
\ No newline at end of file
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 3e6430f0f..cc443c85a 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["maturin>=0.14,<0.15", "setuptools", "wheel", "Cython"]
+requires = ["maturin>=1.3.0,<1.4.0", "setuptools", "wheel", "Cython"]
 build-backend = "maturin"
 
 [project]
diff --git a/python/python/bystro/search/index/bystro_file.pyx b/python/python/bystro/search/index/bystro_file.pyx
index 9b8f202df..26017005a 100644
--- a/python/python/bystro/search/index/bystro_file.pyx
+++ b/python/python/bystro/search/index/bystro_file.pyx
@@ -30,14 +30,14 @@ cdef class ReadAnnotationTarball:
         list paths
         int id
 
-    def __cinit__(self, str index_name,  dict delimiters, str tar_path, str annotation_name = 'annotation.tsv.gz', int chunk_size=500):
+    def __cinit__(self, str index_name, object delimiters, str tar_path, str annotation_name = 'annotation.tsv.gz', int chunk_size=500):
         self.index_name = index_name
         self.chunk_size = chunk_size
-        self.field_separator = delimiters['field']
-        self.position_delimiter = delimiters['position']
-        self.overlap_delimiter = delimiters['overlap']
-        self.value_delimiter = delimiters['value']
-        self.empty_field_char = delimiters['empty_field']
+        self.field_separator = delimiters.field
+        self.position_delimiter = delimiters.position
+        self.overlap_delimiter = delimiters.overlap
+        self.value_delimiter = delimiters.value
+        self.empty_field_char = delimiters.empty_field
 
         t = tarfile.open(tar_path)
         for member in t.getmembers():
@@ -117,5 +117,5 @@ cdef class ReadAnnotationTarball:
     def get_header_fields(self):
         return self.header_fields
 
-cpdef ReadAnnotationTarball read_annotation_tarball(str index_name,  dict delimiters, str tar_path, str annotation_name = 'annotation.tsv.gz', int chunk_size=500):
+cpdef ReadAnnotationTarball read_annotation_tarball(str index_name,  object delimiters, str tar_path, str annotation_name = 'annotation.tsv.gz', int chunk_size=500):
     return ReadAnnotationTarball(index_name, delimiters, tar_path, annotation_name, chunk_size)
\ No newline at end of file
diff --git a/python/python/bystro/search/index/handler.py b/python/python/bystro/search/index/handler.py
index a9b1e5ff0..624080807 100644
--- a/python/python/bystro/search/index/handler.py
+++ b/python/python/bystro/search/index/handler.py
@@ -14,9 +14,9 @@
 
 from bystro.beanstalkd.worker import ProgressPublisher, get_progress_reporter
 from bystro.search.index.bystro_file import (  # type: ignore # pylint: disable=no-name-in-module,import-error  # noqa: E501
-    read_annotation_tarball, 
+    read_annotation_tarball,
 )
-from bystro.search.utils.annotation import get_delimiters
+from bystro.search.utils.annotation import DelimitersConfig
 from bystro.search.utils.opensearch import gather_opensearch_args
 
 ray.init(ignore_reinit_error=True, address="auto")
@@ -47,7 +47,9 @@ async def index(self, data):
         self.counter += resp[0]
 
         if self.counter >= self.reporter_batch:
-            await asyncio.to_thread(self.progress_tracker.increment.remote, self.counter)
+            await asyncio.to_thread(
+                self.progress_tracker.increment.remote, self.counter
+            )
             self.counter = 0
 
         return resp
@@ -96,7 +98,9 @@ async def go(
 
     if not index_body["settings"].get("number_of_shards"):
         file_size = os.path.getsize(tar_path)
-        index_body["settings"]["number_of_shards"] = ceil(float(file_size) / float(1e10))
+        index_body["settings"]["number_of_shards"] = ceil(
+            float(file_size) / float(1e10)
+        )
 
     try:
         await client.indices.create(index_name, body=index_body)
@@ -106,7 +110,7 @@ async def go(
     data = read_annotation_tarball(
         index_name=index_name,
         tar_path=tar_path,
-        delimiters=get_delimiters(),
+        delimiters=DelimitersConfig(),
         chunk_size=paralleleism_chunk_size,
     )
 
@@ -154,7 +158,9 @@ async def go(
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Process some config files.")
-    parser.add_argument("--tar", type=str, help="Path to the tarball containing the annotation")
+    parser.add_argument(
+        "--tar", type=str, help="Path to the tarball containing the annotation"
+    )
 
     parser.add_argument(
         "--search_conf",
diff --git a/python/python/bystro/search/index/tests/test_bystro_file.py b/python/python/bystro/search/index/tests/test_bystro_file.py
index b31571f65..97f490171 100644
--- a/python/python/bystro/search/index/tests/test_bystro_file.py
+++ b/python/python/bystro/search/index/tests/test_bystro_file.py
@@ -6,7 +6,7 @@
 from bystro.search.index.bystro_file import (  # type: ignore # pylint: disable=no-name-in-module,import-error  # noqa: E501
     read_annotation_tarball,
 )
-from bystro.search.utils.annotation import get_delimiters
+from bystro.search.utils.annotation import DelimitersConfig
 
 
 def create_mock_tarball(annotation_content):
@@ -27,11 +27,11 @@ def create_mock_tarball(annotation_content):
 
 
 def test_read_annotation_tarball():
-    delims = get_delimiters()
-    delim_v = delims["value"]  # e.g. ;
-    delim_f = delims["field"]  # e.g. \t
-    delim_o = delims["overlap"]  # e.g. chr(31)
-    delim_p = delims["position"]  # e.g. |
+    delims = DelimitersConfig()
+    delim_v = delims.value  # e.g. ;
+    delim_f = delims.field  # e.g. \t
+    delim_o = delims.overlap  # e.g. chr(31)
+    delim_p = delims.position  # e.g. |
 
     header = f"field1{delim_f}field2{delim_f}field3\n"
     field1val = f"value1a{delim_v}value1b{delim_p}value2aa{delim_o}value2ab{delim_v}value2b{delim_f}"
@@ -70,9 +70,12 @@ def test_read_annotation_tarball():
             },
         }
     ]
-
+    import numpy as np
+    import json
     result_data = next(reader)
     assert result_data == expected_data
+    np.testing.assert_equal(result_data, expected_data)
+    print(json.dumps(expected_data, indent=4))
 
     # Test the end of the data
     with pytest.raises(StopIteration):
diff --git a/python/python/bystro/search/save/handler.py b/python/python/bystro/search/save/handler.py
index 9489e7e05..c67bb0e0e 100644
--- a/python/python/bystro/search/save/handler.py
+++ b/python/python/bystro/search/save/handler.py
@@ -7,11 +7,13 @@
 # TODO 2023-05-08: concatenate chunks in a different ray worker
 
 import gzip
+import logging
 import math
 import os
 import pathlib
 import subprocess
 import traceback
+from typing import Any
 
 import numpy as np
 import ray
@@ -19,9 +21,16 @@
 from opensearchpy import OpenSearch
 
 from bystro.beanstalkd.worker import ProgressPublisher, get_progress_reporter
-from bystro.search.utils.annotation import AnnotationOutputs, get_delimiters
+from bystro.search.utils.annotation import (
+    AnnotationOutputs,
+    DelimitersConfig,
+)
 from bystro.search.utils.messages import SaveJobData
 from bystro.search.utils.opensearch import gather_opensearch_args
+from bystro.utils.compress import GZIP_EXECUTABLE
+from bystro.utils.tar import GNU_TAR_EXECUTABLE_NAME
+
+logger = logging.getLogger(__name__)
 
 ray.init(ignore_reinit_error=True, address="auto")
 
@@ -57,8 +66,8 @@ def _get_header(field_names):
     return parents, children
 
 
-def _populate_data(field_path, data_for_end_of_path):
-    if not isinstance(field_path, list) or data_for_end_of_path is None:
+def _populate_data(field_path: list[str] | str, data_for_end_of_path: Any):
+    if not isinstance(data_for_end_of_path, dict):
         return data_for_end_of_path
 
     for child_field in field_path:
@@ -70,8 +79,8 @@ def _populate_data(field_path, data_for_end_of_path):
     return data_for_end_of_path
 
 
-def _make_output_string(rows: list, delims: dict):
-    empty_field_char = delims["empty_field"]
+def _make_output_string(rows: list, delims: DelimitersConfig):
+    empty_field_char = delims.empty_field
     for row_idx, row in enumerate(rows):  # pylint:disable=too-many-nested-blocks
         # Some fields may just be missing; we won't store even the alt/pos [[]] structure for those
         for i, column in enumerate(row):
@@ -99,18 +108,23 @@ def _make_output_string(rows: list, delims: dict):
 
                         if isinstance(sub, list):
                             inner_values.append(
-                                delims["overlap"].join(
-                                    map(lambda x: str(x) if x is not None else empty_field_char, sub)
+                                delims.overlap.join(
+                                    map(
+                                        lambda x: str(x)
+                                        if x is not None
+                                        else empty_field_char,
+                                        sub,
+                                    )
                                 )
                             )
                         else:
                             inner_values.append(str(sub))
 
-                    column[j] = delims["value"].join(inner_values)
+                    column[j] = delims.value.join(inner_values)
 
-            row[i] = delims["position"].join(column)
+            row[i] = delims.position.join(column)
 
-        rows[row_idx] = delims["field"].join(row)
+        rows[row_idx] = delims.field.join(row)
 
     return bytes("\n".join(rows) + "\n", encoding="utf-8")
 
@@ -143,7 +157,9 @@ def _process_query(
     for doc in resp["hits"]["hits"]:
         row = np.empty(len(field_names), dtype=object)
         for y in range(len(field_names)):
-            row[y] = _populate_data(child_fields[y], doc["_source"].get(parent_fields[y]))
+            row[y] = _populate_data(
+                child_fields[y], doc["_source"].get(parent_fields[y])
+            )
 
         if row[discordant_idx][0][0] is False:
             row[discordant_idx][0][0] = 0
@@ -191,7 +207,7 @@ def go(  # pylint:disable=invalid-name
     output_dir = os.path.dirname(job_data.outputBasePath)
     basename = os.path.basename(job_data.outputBasePath)
     pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
-    outputs = AnnotationOutputs.from_path(output_dir, basename, True)
+    outputs, stats = AnnotationOutputs.from_path(output_dir, basename, compress=True)
 
     written_chunks = [os.path.join(output_dir, f"{job_data.indexName}_header")]
 
@@ -203,7 +219,9 @@ def go(  # pylint:disable=invalid-name
     client = OpenSearch(**search_client_args)
 
     query = _clean_query(job_data.queryBody)
-    num_slices = _get_num_slices(client, job_data.indexName, max_query_size, max_slices, query)
+    num_slices = _get_num_slices(
+        client, job_data.indexName, max_query_size, max_slices, query
+    )
     pit_id = client.create_point_in_time(index=job_data.indexName, params={"keep_alive": keep_alive})["pit_id"]  # type: ignore   # noqa: E501
     try:
         reporter = get_progress_reporter(publisher)
@@ -212,7 +230,9 @@ def go(  # pylint:disable=invalid-name
 
         reqs = []
         for slice_id in range(num_slices):
-            written_chunks.append(os.path.join(output_dir, f"{job_data.indexName}_{slice_id}"))
+            written_chunks.append(
+                os.path.join(output_dir, f"{job_data.indexName}_{slice_id}")
+            )
             body = query.copy()
             if num_slices > 1:
                 # Slice queries require max > 1
@@ -223,7 +243,7 @@ def go(  # pylint:disable=invalid-name
                 job_data.fieldNames,
                 written_chunks[-1],
                 reporter,
-                get_delimiters(),
+                DelimitersConfig(),
             )
             reqs.append(res)
         results_processed = ray.get(reqs)
@@ -234,14 +254,24 @@ def go(  # pylint:disable=invalid-name
         all_chunks = " ".join(written_chunks)
 
         annotation_path = os.path.join(output_dir, outputs.annotation)
-        ret = subprocess.call(f"cat {all_chunks} > {annotation_path}; rm {all_chunks}", shell=True)
+        ret = subprocess.call(
+            f"cat {all_chunks} > {annotation_path}; rm {all_chunks}", shell=True
+        )
         if ret != 0:
             raise IOError(f"Failed to write {annotation_path}")
 
-        tarball_name = os.path.basename(outputs.archived)
+        ret = subprocess.call(
+            f"{GZIP_EXECUTABLE} -d -c {annotation_path} | {stats.stdin_cli_stats_command}",
+            shell=True,
+        )
+        if ret != 0:
+            raise IOError(f"Failed to write statistics for {annotation_path}")
 
+        tarball_name = os.path.basename(outputs.archived)
+        # Webserver requires the output to have top-level statistics data,
+        # but annotation data will be too large to want to store 2 copies of
         ret = subprocess.call(
-            f'cd {output_dir}; tar --exclude ".*" --exclude={tarball_name} -cf {tarball_name} * --remove-files', # noqa: E501
+            f'cd {output_dir}; {GNU_TAR_EXECUTABLE_NAME} --exclude ".*" --exclude={tarball_name} -cf {tarball_name} * && rm {annotation_path}',  # noqa: E501
             shell=True,
         )
         if ret != 0:
diff --git a/python/python/bystro/search/save/tests/test_make_output_string.py b/python/python/bystro/search/save/tests/test_make_output_string.py
index 64d676185..8f0dc65b5 100644
--- a/python/python/bystro/search/save/tests/test_make_output_string.py
+++ b/python/python/bystro/search/save/tests/test_make_output_string.py
@@ -1,12 +1,68 @@
-from bystro.search.save.handler import _make_output_string
+from bystro.search.save.handler import _make_output_string, _populate_data
+from bystro.search.utils.annotation import DelimitersConfig
+
+delims = DelimitersConfig(
+    empty_field="!",
+    overlap="/",
+    value=";",
+    position="|",
+    field="\t",
+)
+
+def test_populate_data():
+    data = {"a": {"b": {"c": "value"}}}
+
+    # Positive cases
+    assert _populate_data(["a", "b", "c"], data) == "value"
+    assert _populate_data(["a"], data) == {"b": {"c": "value"}}
+
+    # Negative cases
+    assert _populate_data(["a", "b", "d"], data) is None
+    assert _populate_data("a.b.c", data) is None
+    assert _populate_data(["a", "b", "c"], None) is None
+    assert _populate_data(["a.exact"], data) is None
+    assert _populate_data(["a", "b.exact"], data) is None
+
+
+def test_populate_data_real_example_nested():
+    _real_example_nested = {
+        "nearest": {
+            "refSeq": {
+                "name2": [[["GCFC2"]]],
+                "name": [[["NM_001201334"], ["NM_003203"]]],
+                "dist": [[["0"]]],
+            }
+        }
+    }
+
+    assert _populate_data(["nearest", "refSeq", "name2"], _real_example_nested) == [
+        [["GCFC2"]]
+    ]
+    assert _populate_data(["nearest", "refSeq", "name"], _real_example_nested) == [
+        [["NM_001201334"], ["NM_003203"]]
+    ]
+    assert _populate_data(["nearest", "refSeq", "dist"], _real_example_nested) == [
+        [["0"]]
+    ]
+
+
+def test_populate_data_real_example_scalar():
+    _real_example_scalar = {
+        "chrom": [[["chr2"]]],
+        "pos": [[["75928300"]]],
+        "type": [[["SNP"]]]
+    }
+
+    assert _populate_data(["chrom"], _real_example_scalar) == [[["chr2"]]]
+    assert _populate_data(["pos"], _real_example_scalar) == [[["75928300"]]]
+    assert _populate_data(["type"], _real_example_scalar) == [[["SNP"]]]
+
+    # _populate_data considers any non-dict value a leaf value and will return it as is
+    # and the provided "path" is a label of the leaf's data, a no-op
+    assert _populate_data("chrom", [[["chr2"]]]) == [[["chr2"]]]
+    assert _populate_data("pos", [[["75928300"]]]) == [[["75928300"]]]
+    assert _populate_data("type", [[["SNP"]]]) == [[["SNP"]]]
 
-delims = {
-    "empty_field": "!",
-    "overlap": "/",
-    "value": ";",
-    "position": "|",
-    "field": "\t",
-}
 
 def test_basic_functionality():
     rows = [
@@ -15,9 +71,9 @@ def test_basic_functionality():
             # column1
             [  # position values
                 [  # value values
-                    ["gene1_mrna1", None, "gene1_mrna2"], # overlap values
+                    ["gene1_mrna1", None, "gene1_mrna2"],  # overlap values
                     # gene1 has 3 transcripts, 1 of which is non-coding and doens't have an mrna record
-                    ["gene1"]
+                    ["gene1"],
                 ],
                 # 2 value delimited values at the next position in the indel
                 ["position2a", "position2b"],
@@ -32,22 +88,19 @@ def test_basic_functionality():
         # row2
         [
             # column1
-            [   
+            [
                 # Retain backwards compat with scalar values in 2nd dimension
-                [
-                    "row2_scalar"
-                ]
+                ["row2_scalar"]
             ]
-        ]
+        ],
     ]
 
-    expected = (
-        b"gene1_mrna1/!/gene1_mrna2;gene1|position2a;position2b\tcol2_scalar\nrow2_scalar\n"
-    )
+    expected = b"gene1_mrna1/!/gene1_mrna2;gene1|position2a;position2b\tcol2_scalar\nrow2_scalar\n"
 
     assert _make_output_string(rows, delims) == expected
 
+
 def test_empty_list():
     rows = []
     expected = b"\n"
-    assert _make_output_string(rows, delims) == expected
\ No newline at end of file
+    assert _make_output_string(rows, delims) == expected
diff --git a/python/python/bystro/search/utils/annotation.py b/python/python/bystro/search/utils/annotation.py
index 7973617f6..ad9dc90e1 100644
--- a/python/python/bystro/search/utils/annotation.py
+++ b/python/python/bystro/search/utils/annotation.py
@@ -1,56 +1,107 @@
 import os
 from glob import glob
+import logging
 from os import path
+import shutil
+from typing import Optional, Any
 
 from msgspec import Struct
 
+logger = logging.getLogger(__name__)
 
-class StatisticsOutputs(Struct, frozen=True):
+
+class FileProcessorsConfig(Struct, frozen=True, forbid_unknown_fields=True):
+    args: str
+    program: str
+
+
+class StatisticsOutputExtensions(Struct, frozen=True, forbid_unknown_fields=True):
+    json: str = "statistics.json"
+    tsv: str = "statistics.tsv"
+    qc: str = "statistics.qc.tsv"
+
+
+class StatisticsConfig(Struct, frozen=True, forbid_unknown_fields=True):
+    dbSNPnameField: str = "dbSNP.name"
+    siteTypeField: str = "refSeq.siteType"
+    exonicAlleleFunctionField: str = "refSeq.exonicAlleleFunction"
+    refField: str = "ref"
+    homozygotesField: str = "homozygotes"
+    heterozygotesField: str = "heterozygotes"
+    altField: str = "alt"
+    programPath: str = "bystro-stats"
+    outputExtensions: StatisticsOutputExtensions = StatisticsOutputExtensions()
+
+    @staticmethod
+    def from_dict(annotation_config: dict[str, Any]):
+        """Get statistics config from a dictionary"""
+        stats_config: Optional[dict[str, Any]] = annotation_config.get("statistics")
+
+        if stats_config is None:
+            logger.warning(
+                "No 'statistics' config found in supplied annotation config, using defaults"
+            )
+            return StatisticsConfig()
+
+        if "outputExtensions" in stats_config:
+            stats_config["outputExtensions"] = StatisticsOutputExtensions(
+                **stats_config["outputExtensions"]
+            )
+
+        return StatisticsConfig(**stats_config)
+
+
+class StatisticsOutputs(Struct, frozen=True, forbid_unknown_fields=True):
     """
-        Paths to all possible Bystro statistics outputs
-
-        Attributes:
-            json: str
-                Basename of the JSON statistics file
-            tab: str
-                Basename of the TSV statistics file
-            qc: str
-                Basename of the QC statistics file
+    Paths to all possible Bystro statistics outputs
+
+    Attributes:
+        json: str
+            Basename of the JSON statistics file
+        tab: str
+            Basename of the TSV statistics file
+        qc: str
+            Basename of the QC statistics file
     """
+
     json: str
     tab: str
     qc: str
 
 
-class AnnotationOutputs(Struct, frozen=True):
+class AnnotationOutputs(Struct, frozen=True, forbid_unknown_fields=True):
     """
-        Paths to all possible Bystro annotation outputs
-        
-        Attributes:
-            output_dir: str
-                Output directory
-            archived: str
-                Basename of the archive
-            annotation: str
-                Basename of the annotation TSV file, inside the archive
-            sampleList: Optional[str]
-                Basename of the sample list file, inside the archive
-            log: Basename of the log file, inside the archive
-            statistics: Optional[StatisticsOutputs]
-                Basenames of the statistics files, inside the archive
+    Paths to all possible Bystro annotation outputs
+
+    Attributes:
+        output_dir: str
+            Output directory
+        archived: str
+            Basename of the archive
+        annotation: str
+            Basename of the annotation TSV file, found inside the archive only
+        sampleList: Optional[str]
+            Basename of the sample list file, in the archive and output directory
+        log: str
+            Basename of the log file, in the archive and output directory
+        statistics: StatisticsOutputs
+            Basenames of the statistics files, in the archive and output directory
+        header: Optional[str]
+            Basename of the header file, in the archive and output directory
     """
+
     archived: str
     annotation: str
     sampleList: str
     log: str
-    statistics: StatisticsOutputs | None = None
+    statistics: StatisticsOutputs
+    header: str | None = None
 
     @staticmethod
     def from_path(
         output_dir: str,
         basename: str,
         compress: bool,
-        generate_statistics: bool = True,
         make_dir: bool = True,
         make_dir_mode: int = 511,
     ):
@@ -71,46 +122,111 @@ def from_path(
 
         archived = f"{basename}.tar"
 
-        statistics = None
-        if generate_statistics:
-            statistics = StatisticsOutputs(
-                json=f"{basename}.statistics.json",
-                tab=f"{basename}.statistics.tsv",
-                qc=f"{basename}.statistics.qc.tsv",
-            )
+        stats = Statistics(output_base_path=os.path.join(output_dir, basename))
+        statistics_tarball_members = StatisticsOutputs(
+            json=f"{os.path.basename(stats.json_output_path)}",
+            tab=f"{os.path.basename(stats.tsv_output_path)}",
+            qc=f"{os.path.basename(stats.qc_output_path)}",
+        )
 
-        return AnnotationOutputs(
-            annotation=annotation,
-            sampleList=sampleList,
-            statistics=statistics,
-            archived=archived,
-            log=log,
+        return (
+            AnnotationOutputs(
+                annotation=annotation,
+                sampleList=sampleList,
+                statistics=statistics_tarball_members,
+                archived=archived,
+                log=log,
+            ),
+            stats,
         )
 
 
-_default_delimiters = {
-    "field": "\t",
-    "position": "|",
-    "overlap": chr(31),
-    "value": ";",
-    "empty_field": "!",
-}
+class DelimitersConfig(Struct, frozen=True, forbid_unknown_fields=True):
+    field: str = "\t"
+    position: str = "|"
+    overlap: str = chr(31)
+    value: str = ";"
+    empty_field: str = "!"
 
+    @staticmethod
+    def from_dict(annotation_config: dict[str, Any]):
+        """Get delimiters from a dictionary"""
+        delim_config: Optional[dict[str, str]] = annotation_config.get("delimiters")
 
-def get_delimiters(annotation_conf: dict | None = None):
-    if annotation_conf:
-        return annotation_conf.get("delimiters", _default_delimiters)
-    return _default_delimiters
+        if delim_config is None:
+            logger.warning(
+                "No 'delimiters' key found in supplied annotation config, using defaults"
+            )
+            return DelimitersConfig()
+
+        return DelimitersConfig(**delim_config)
 
 
-def get_config_file_path(config_path_base_dir: str, assembly: str, suffix: str = ".y*ml"):
+def get_config_file_path(
+    config_path_base_dir: str, assembly: str, suffix: str = ".y*ml"
+):
     """Get config file path"""
     paths = glob(path.join(config_path_base_dir, assembly + suffix))
 
     if not paths:
-        raise ValueError(f"\n\nNo config path found for the assembly {assembly}. Exiting\n\n")
+        raise ValueError(
+            f"\n\nNo config path found for the assembly {assembly}. Exiting\n\n"
+        )
 
     if len(paths) > 1:
         print("\n\nMore than 1 config path found, choosing first")
 
     return paths[0]
+
+
+class Statistics:
+    def __init__(
+        self, output_base_path: str, annotation_config: dict[str, Any] | None = None
+    ):
+        if annotation_config is None:
+            self._config = StatisticsConfig()
+            self._delimiters = DelimitersConfig()
+        else:
+            self._config = StatisticsConfig.from_dict(annotation_config)
+            self._delimiters = DelimitersConfig.from_dict(annotation_config)
+
+        program_path = shutil.which(self._config.programPath)
+        if not program_path:
+            raise ValueError(
+                f"Couldn't find statistics program {self._config.programPath}"
+            )
+
+        self.program_path = program_path
+        self.json_output_path = (
+            f"{output_base_path}.{self._config.outputExtensions.json}"
+        )
+        self.tsv_output_path = f"{output_base_path}.{self._config.outputExtensions.tsv}"
+        self.qc_output_path = f"{output_base_path}.{self._config.outputExtensions.qc}"
+
+    @property
+    def stdin_cli_stats_command(self) -> str:
+        value_delim = self._delimiters.value
+        field_delim = self._delimiters.field
+        empty_field = self._delimiters.empty_field
+
+        het_field = self._config.heterozygotesField
+        hom_field = self._config.homozygotesField
+        site_type_field = self._config.siteTypeField
+        ea_fun_field = self._config.exonicAlleleFunctionField
+        ref_field = self._config.refField
+        alt_field = self._config.altField
+        dbSNP_field = self._config.dbSNPnameField
+
+        statsProg = self.program_path
+
+        dbSNPpart = f"-dbSnpNameColumn {dbSNP_field}" if dbSNP_field else ""
+
+        return (
+            f"{statsProg} -outJsonPath {self.json_output_path} -outTabPath {self.tsv_output_path} "
+            f"-outQcTabPath {self.qc_output_path} -refColumn {ref_field} "
+            f"-altColumn {alt_field} -homozygotesColumn {hom_field} "
+            f"-heterozygotesColumn {het_field} -siteTypeColumn {site_type_field} "
+            f"{dbSNPpart} -emptyField '{empty_field}' "
+            f"-exonicAlleleFunctionColumn {ea_fun_field} "
+            f"-primaryDelimiter '{value_delim}' -fieldSeparator '{field_delim}'"
+        )
diff --git a/python/python/bystro/search/utils/tests/test_annotation.py b/python/python/bystro/search/utils/tests/test_annotation.py
new file mode 100644
index 000000000..524a40999
--- /dev/null
+++ b/python/python/bystro/search/utils/tests/test_annotation.py
@@ -0,0 +1,218 @@
+import pytest
+
+from bystro.search.utils.annotation import (
+    DelimitersConfig,
+    get_config_file_path,
+    StatisticsConfig,
+    StatisticsOutputExtensions,
+    Statistics,
+)
+
+
+def assert_defaults(config: DelimitersConfig):
+    assert config.field == "\t"
+    assert config.position == "|"
+    assert config.overlap == chr(31)
+    assert config.value == ";"
+    assert config.empty_field == "!"
+
+
+def test_delimiters_default_values():
+    config = DelimitersConfig()
+    assert_defaults(config)
+
+
+def test_delimiters_from_dict_no_arg():
+    with pytest.raises(
+        TypeError, match=r"missing 1 required positional argument: 'annotation_config'"
+    ):
+        # Ignoring type checking because we're testing the error
+        DelimitersConfig.from_dict() # type: ignore
+
+
+def test_delimiters_from_dict_no_delimiters_key():
+    config_dict = {"random_key": "random_value"}
+    config = DelimitersConfig.from_dict(config_dict)
+    assert_defaults(config)
+
+
+def test_delimiters_from_dict_unexpected_key():
+    annotation_config = {"delimiters": {"random_delim_key": "random_value"}}
+    with pytest.raises(
+        TypeError, match=r"Unexpected keyword argument 'random_delim_key'"
+    ):
+        DelimitersConfig.from_dict(annotation_config)
+
+
+def test_delimiters_from_dict_with_delimiters_key():
+    config_dict = {
+        "delimiters": {
+            "field": "x",
+            "position": "y",
+            "overlap": "z",
+            "value": "w",
+            "empty_field": "v",
+        }
+    }
+    config = DelimitersConfig.from_dict(config_dict)
+    assert config.field == "x"
+    assert config.position == "y"
+    assert config.overlap == "z"
+    assert config.value == "w"
+    assert config.empty_field == "v"
+
+
+def test_delimiters_from_dict_partial_delimiters_key():
+    config_dict = {"delimiters": {"field": "x", "position": "y"}}
+    config = DelimitersConfig.from_dict(config_dict)
+    assert config.field == "x"
+    assert config.position == "y"
+    assert config.overlap == chr(31)  # default value
+    assert config.value == ";"  # default value
+    assert config.empty_field == "!"  # default value
+
+
+def test_delimiters_unexpected_key():
+    with pytest.raises(
+        TypeError, match=r"Unexpected keyword argument 'random_delim_key2'"
+    ):
+         # Ignoring type checking because we're testing the error
+        DelimitersConfig(random_delim_key2= "random_value") # type: ignore
+
+
+def test_get_config_file_path_no_path_found(mocker):
+    mocker.patch("bystro.search.utils.annotation.glob", return_value=[])
+
+    with pytest.raises(ValueError, match=r"No config path found for the assembly"):
+        get_config_file_path("/dummy/path", "dummy_assembly")
+
+
+def test_get_config_file_path_single_path_found(mocker):
+    mocked_path = "/dummy/path/dummy_assembly.yml"
+    mocker.patch("bystro.search.utils.annotation.glob", return_value=[mocked_path])
+    result = get_config_file_path("/dummy/path", "dummy_assembly")
+    assert result == mocked_path
+
+
+def test_get_config_file_path_single_path_found_with_extension(mocker):
+    mocked_path = "/dummy/path/dummy_assembly.blargh"
+    mocker.patch("bystro.search.utils.annotation.glob", return_value=[mocked_path])
+    result = get_config_file_path("/dummy/path", "dummy_assembly", suffix=".blargh")
+    assert result == mocked_path
+
+
+def test_get_config_file_path_single_path_found_with_extension_wildcard(mocker):
+    mocked_path = "/dummy/path/dummy_assembly.blargh"
+    mocker.patch("bystro.search.utils.annotation.glob", return_value=[mocked_path])
+    result = get_config_file_path("/dummy/path", "dummy_assembly", suffix=".bl*rgh")
+    assert result == mocked_path
+
+
+def test_get_config_file_path_multiple_paths_found(mocker, capsys):
+    mocked_paths = [
+        "/dummy/path/dummy_assembly_1.yml",
+        "/dummy/path/dummy_assembly_2.yml",
+    ]
+    mocker.patch("bystro.search.utils.annotation.glob", return_value=mocked_paths)
+    result = get_config_file_path("/dummy/path", "dummy_assembly")
+    captured = capsys.readouterr()  # Capture the standard output
+    assert "More than 1 config path found, choosing first" in captured.out
+    assert result == mocked_paths[0]
+
+
+def test_statistics_output_extensions_defaults():
+    extensions = StatisticsOutputExtensions()
+    assert extensions.json == "statistics.json"
+    assert extensions.tsv == "statistics.tsv"
+    assert extensions.qc == "statistics.qc.tsv"
+
+
+def test_statistics_config_defaults():
+    config = StatisticsConfig()
+    assert config.dbSNPnameField == "dbSNP.name"
+    assert config.siteTypeField == "refSeq.siteType"
+    assert config.exonicAlleleFunctionField == "refSeq.exonicAlleleFunction"
+    assert config.refField == "ref"
+    assert config.homozygotesField == "homozygotes"
+    assert config.heterozygotesField == "heterozygotes"
+    assert config.altField == "alt"
+    assert config.programPath == "bystro-stats"
+    assert isinstance(config.outputExtensions, StatisticsOutputExtensions)
+
+
+def test_statistics_config_from_dict_none():
+    with pytest.raises(
+        TypeError, match=r"missing 1 required positional argument: 'annotation_config'"
+    ):
+        # Ignoring type checking because we're testing the error
+        StatisticsConfig.from_dict() # type: ignore
+
+
+def test_statistics_config_from_dict_no_statistics_key():
+    annotation_config = {"random_key": "random_value"}
+    config = StatisticsConfig.from_dict(annotation_config)
+
+    assert (
+        config.dbSNPnameField == "dbSNP.name"
+    )  # Again, just checking one representative default value
+
+
+def test_statistics_config_no_statistics_key():
+    with pytest.raises(
+        TypeError, match=r"Unexpected keyword argument 'random_stats_key2'"
+    ):
+        StatisticsConfig(random_stats_key2="random_value") # type: ignore
+
+
+def test_statistics_config_from_dict_unexpected_key():
+    annotation_config = {"statistics": {"random_key": "random_value"}}
+    with pytest.raises(TypeError, match=r"Unexpected keyword argument 'random_key'"):
+        StatisticsConfig.from_dict(annotation_config)
+
+
+def test_statistics_config_from_dict_with_statistics_key():
+    annotation_config = {
+        "statistics": {
+            "dbSNPnameField": "new.dbSNP.name",
+            "siteTypeField": "new.refSeq.siteType",
+            "outputExtensions": {
+                "json": ".new.json",
+                "tsv": ".new.tsv",
+                "qc": ".new.qc.tsv",
+            },
+        }
+    }
+    config = StatisticsConfig.from_dict(annotation_config)
+    assert config.dbSNPnameField == "new.dbSNP.name"
+    assert config.siteTypeField == "new.refSeq.siteType"
+    assert config.outputExtensions.json == ".new.json"
+    assert config.outputExtensions.tsv == ".new.tsv"
+    assert config.outputExtensions.qc == ".new.qc.tsv"
+
+
+def test_statistics_init_program_not_found(mocker):
+    mocker.patch("shutil.which", return_value=None)
+    with pytest.raises(ValueError, match=r"Couldn't find statistics program"):
+        Statistics("/dummy/path", None)
+
+
+def test_statistics_get_stats_arguments(mocker):
+    # Mocking which to return a valid program path
+    mocker.patch("shutil.which", return_value="/path/to/bystro-stats")
+
+    statistics = Statistics("/dummy/output_base_path", None)
+
+    # Note: Your actual values might differ based on the defaults
+    # in StatisticsConfig and DelimitersConfig
+    expected_args = (
+        "/path/to/bystro-stats -outJsonPath /dummy/output_base_path.statistics.json "
+        "-outTabPath /dummy/output_base_path.statistics.tsv "
+        "-outQcTabPath /dummy/output_base_path.statistics.qc.tsv "
+        "-refColumn ref -altColumn alt -homozygotesColumn homozygotes "
+        "-heterozygotesColumn heterozygotes -siteTypeColumn refSeq.siteType "
+        "-dbSnpNameColumn dbSNP.name -emptyField '!' "
+        "-exonicAlleleFunctionColumn refSeq.exonicAlleleFunction "
+        "-primaryDelimiter ';' -fieldSeparator '\t'"
+    )
+
+    assert statistics.stdin_cli_stats_command == expected_args
diff --git a/python/python/bystro/utils/compress.py b/python/python/bystro/utils/compress.py
new file mode 100644
index 000000000..08921f7d4
--- /dev/null
+++ b/python/python/bystro/utils/compress.py
@@ -0,0 +1,19 @@
+"""Utility functions for safely invoking tar in cross-OS manner."""
+import shutil
+
+
+def _get_gzip_program_path() -> str:
+    pigz = shutil.which("pigz")
+
+    if pigz:
+        return pigz
+
+    gzip = shutil.which("gzip")
+
+    if gzip:
+        return gzip
+
+    raise OSError("Neither gzip nor pigz not found on system")
+
+
+GZIP_EXECUTABLE = _get_gzip_program_path()
diff --git a/python/python/bystro/utils/tests/test_compress.py b/python/python/bystro/utils/tests/test_compress.py
new file mode 100644
index 000000000..331db1a24
--- /dev/null
+++ b/python/python/bystro/utils/tests/test_compress.py
@@ -0,0 +1,23 @@
+import pytest
+import shutil
+from bystro.utils.compress import _get_gzip_program_path
+
+
+def test_pigz_found(monkeypatch):
+    monkeypatch.setattr(
+        shutil, "which", lambda x: "/path/to/pigz" if x == "pigz" else None
+    )
+    assert _get_gzip_program_path() == "/path/to/pigz"
+
+
+def test_pigz_not_found_gzip_found(monkeypatch):
+    monkeypatch.setattr(
+        shutil, "which", lambda x: "/path/to/gzip" if x == "gzip" else None
+    )
+    assert _get_gzip_program_path() == "/path/to/gzip"
+
+
+def test_neither_found(monkeypatch):
+    monkeypatch.setattr(shutil, "which", lambda _: None)
+    with pytest.raises(OSError, match="Neither gzip nor pigz not found on system"):
+        _get_gzip_program_path()
diff --git a/startup.yml b/startup.yml
index ec4be407d..43777a2dd 100644
--- a/startup.yml
+++ b/startup.yml
@@ -1,6 +1,6 @@
 apps:
   - name: BystroAnnotationServer
-    script: bin/bystro-server.pl
+    script: perl/bin/bystro-server.pl
     interpreter: 'perl'
     args: -q config/beanstalk.yml --debug
   - name: BystroSaveServer