fix ValueError: No objects to concatenate in summary

bactopia · Aug 21, 2023 · 1c8d4a9 · 1c8d4a9
1 parent 493479d
commit 1c8d4a9
Show file tree

Hide file tree

Showing 4 changed files with 107 additions and 100 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+## 1.0.6
+
+- Fixed `bactopia-summary` handling of empty searches
+
 ## 1.0.5
 
 - Fixed `bactopia-download` not building prokka and bakta conda envs

diff --git a/bactopia/cli/summary.py b/bactopia/cli/summary.py
@@ -404,110 +404,114 @@ def summary(
                         dfs.append(df)
                         logging.debug(f"\tRank: {rank} ({reason})")
                     else:
-                        missing_files = ";".join(parsable_files)
                         logging.debug(
-                            f"Skipping {sample['id']} ({sample['path']}) due to missing files: {missing_files}"
+                            f"Skipping {sample['id']} ({sample['path']}) due to missing files. Missing:"
                         )
+                        for missing_file in parsable_files:
+                            logging.debug(f"\t{missing_file}")
                         increment_and_append("ignore-unknown", sample["id"])
             else:
                 logging.debug(
                     f"Skipping {sample['id']} ({sample['path']}), incomplete or not a Bactopia directory"
                 )
                 increment_and_append("ignore-unknown", sample["id"])
-    final_df = pd.concat(dfs)
-    for col in EXCLUDE_COLUMNS:
-        if col in final_df.columns:
-            final_df.drop(col, axis=1, inplace=True)
-
-    # Reorder the columns
-    col_order = [
-        "sample",
-        "rank",
-        "reason",
-        "genome_size",
-        "species",
-        "runtype",
-        "original_runtype",
-        "mlst_scheme",
-        "mlst_st",
-    ]
-    for col in final_df.columns:
-        if col not in col_order:
-            col_order.append(col)
-    final_df = final_df[col_order]
-
-    # Tab-delimited report
-    logging.info(f"Writing report: {txt_report}")
-    final_df.to_csv(txt_report, sep="\t", index=False)
-
-    # Exclusion report
-    logging.info(f"Writing exclusion report: {exclusion_report}")
-    cutoff_counts = defaultdict(int)
-    with open(exclusion_report, "w") as exclude_fh:
-        exclude_fh.write("sample\tstatus\treason\n")
-        for name, reason in CATEGORIES["failed"]:
-            if name in processed_samples:
-                reasons = reason.split(":")[1].split(";")
-                cutoffs = []
-                for r in reasons:
-                    cutoffs.append(r.split("(")[0].strip().title())
-                cutoff_counts[";".join(sorted(cutoffs))] += 1
-                exclude_fh.write(f"{name}\texclude\t{reason}\n")
-            else:
-                exclude_fh.write(f"{name}\tqc-fail\t{reason}\n")
-
-    # Screen report
-    logging.info(f"Writing summary report: {summary_report}")
-    with open(summary_report, "w") as summary_fh:
-        summary_fh.write("Bactopia Summary Report\n")
-        summary_fh.write(
-            textwrap.dedent(
-                f"""
-            Total Samples: {COUNTS['total']}
-
-            Passed: {COUNTS["pass"]}
-                Gold: {COUNTS["gold"]}
-                Silver: {COUNTS["silver"]}
-                Bronze: {COUNTS["bronze"]}
-
-            Excluded: {COUNTS["total-excluded"]}
-                Failed Cutoff: {COUNTS["exclude"]}\n"""
+    if dfs:
+        final_df = pd.concat(dfs)
+        for col in EXCLUDE_COLUMNS:
+            if col in final_df.columns:
+                final_df.drop(col, axis=1, inplace=True)
+
+        # Reorder the columns
+        col_order = [
+            "sample",
+            "rank",
+            "reason",
+            "genome_size",
+            "species",
+            "runtype",
+            "original_runtype",
+            "mlst_scheme",
+            "mlst_st",
+        ]
+        for col in final_df.columns:
+            if col not in col_order:
+                col_order.append(col)
+        final_df = final_df[col_order]
+
+        # Tab-delimited report
+        logging.info(f"Writing report: {txt_report}")
+        final_df.to_csv(txt_report, sep="\t", index=False)
+
+        # Exclusion report
+        logging.info(f"Writing exclusion report: {exclusion_report}")
+        cutoff_counts = defaultdict(int)
+        with open(exclusion_report, "w") as exclude_fh:
+            exclude_fh.write("sample\tstatus\treason\n")
+            for name, reason in CATEGORIES["failed"]:
+                if name in processed_samples:
+                    reasons = reason.split(":")[1].split(";")
+                    cutoffs = []
+                    for r in reasons:
+                        cutoffs.append(r.split("(")[0].strip().title())
+                    cutoff_counts[";".join(sorted(cutoffs))] += 1
+                    exclude_fh.write(f"{name}\texclude\t{reason}\n")
+                else:
+                    exclude_fh.write(f"{name}\tqc-fail\t{reason}\n")
+
+        # Screen report
+        logging.info(f"Writing summary report: {summary_report}")
+        with open(summary_report, "w") as summary_fh:
+            summary_fh.write("Bactopia Summary Report\n")
+            summary_fh.write(
+                textwrap.dedent(
+                    f"""
+                Total Samples: {COUNTS['total']}
+
+                Passed: {COUNTS["pass"]}
+                    Gold: {COUNTS["gold"]}
+                    Silver: {COUNTS["silver"]}
+                    Bronze: {COUNTS["bronze"]}
+
+                Excluded: {COUNTS["total-excluded"]}
+                    Failed Cutoff: {COUNTS["exclude"]}\n"""
+                )
             )
-        )
-        summary_fh.write(f"{print_cutoffs(cutoff_counts)}\n")
-        summary_fh.write(f'    QC Failure: {COUNTS["qc-failure"]}\n')
-        summary_fh.write(f"{print_failed(FAILED)}\n")
-        summary_fh.write(
-            textwrap.dedent(
-                f"""
-            Reports:
-                Full Report (txt): {txt_report}
-                Exclusion: {exclusion_report}
-                Summary: {summary_report}
-
-            Rank Cutoffs:
-                Gold:
-                    Coverage >= {RANK_CUTOFF['gold']['coverage']}x
-                    Quality >= Q{RANK_CUTOFF['gold']['quality']}
-                    Read Length >= {RANK_CUTOFF['gold']['length']}bp
-                    Total Contigs < {RANK_CUTOFF['gold']['contigs']}
-                Silver:
-                    Coverage >= {RANK_CUTOFF['silver']['coverage']}x
-                    Quality >= Q{RANK_CUTOFF['silver']['quality']}
-                    Read Length >= {RANK_CUTOFF['silver']['length']}bp
-                    Total Contigs < {RANK_CUTOFF['silver']['contigs']}
-                Bronze:
-                    Coverage >= {RANK_CUTOFF['bronze']['coverage']}x
-                    Quality >= Q{RANK_CUTOFF['bronze']['quality']}
-                    Read Length >= {RANK_CUTOFF['bronze']['length']}bp
-                    Total Contigs < {RANK_CUTOFF['bronze']['contigs']}
-
-            Assembly Length Exclusions:
-                Minimum: {RANK_CUTOFF['min-assembled-size']}
-                Maximum: {RANK_CUTOFF['min-assembled-size']}
-        """
+            summary_fh.write(f"{print_cutoffs(cutoff_counts)}\n")
+            summary_fh.write(f'    QC Failure: {COUNTS["qc-failure"]}\n')
+            summary_fh.write(f"{print_failed(FAILED)}\n")
+            summary_fh.write(
+                textwrap.dedent(
+                    f"""
+                Reports:
+                    Full Report (txt): {txt_report}
+                    Exclusion: {exclusion_report}
+                    Summary: {summary_report}
+
+                Rank Cutoffs:
+                    Gold:
+                        Coverage >= {RANK_CUTOFF['gold']['coverage']}x
+                        Quality >= Q{RANK_CUTOFF['gold']['quality']}
+                        Read Length >= {RANK_CUTOFF['gold']['length']}bp
+                        Total Contigs < {RANK_CUTOFF['gold']['contigs']}
+                    Silver:
+                        Coverage >= {RANK_CUTOFF['silver']['coverage']}x
+                        Quality >= Q{RANK_CUTOFF['silver']['quality']}
+                        Read Length >= {RANK_CUTOFF['silver']['length']}bp
+                        Total Contigs < {RANK_CUTOFF['silver']['contigs']}
+                    Bronze:
+                        Coverage >= {RANK_CUTOFF['bronze']['coverage']}x
+                        Quality >= Q{RANK_CUTOFF['bronze']['quality']}
+                        Read Length >= {RANK_CUTOFF['bronze']['length']}bp
+                        Total Contigs < {RANK_CUTOFF['bronze']['contigs']}
+
+                Assembly Length Exclusions:
+                    Minimum: {RANK_CUTOFF['min-assembled-size']}
+                    Maximum: {RANK_CUTOFF['min-assembled-size']}
+            """
+                )
             )
-        )
+    else:
+        logging.warning("No samples found to process!")
 
 
 def main():

diff --git a/bactopia/parsers/parsables.py b/bactopia/parsers/parsables.py
@@ -31,12 +31,9 @@ def get_parsable_files(path: str, name: str) -> list:
         f"{path}/main/assembler/{name}.tsv": "assembler",
         # gather
         f"{path}/main/gather/{name}-meta.tsv": "gather",
-        # qc
-        f"{path}/main/qc/summary/{name}-final.json": "qc",
-        f"{path}/main/qc/summary/{name}-original.json": "qc",
         # sketcher
-        f"{path}/main/sketcher/summary/{name}-mash-refseq88-k21.txt": "sketcher",
-        f"{path}/main/sketcher/summary/{name}-sourmash-gtdb-rs207-k31.txt": "sketcher",
+        f"{path}/main/sketcher/{name}-mash-refseq88-k21.txt": "sketcher",
+        f"{path}/main/sketcher/{name}-sourmash-gtdb-rs207-k31.txt": "sketcher",
         # bactopia-tools
         # amrfinderplus
         f"{path}/tools/amrfinderplus/{name}-genes.tsv": "amrfinderplus",
@@ -53,6 +50,8 @@ def get_parsable_files(path: str, name: str) -> list:
             missing_files.append(output_file)
 
     if is_complete:
+        parsable_files[f"{path}/main/qc/summary/{name}-original.json"] = "qc"
+        parsable_files[f"{path}/main/qc/summary/{name}-final.json"] = "qc"
         return [is_complete, parsable_files]
     else:
         return [is_complete, missing_files]
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bactopia"
-version = "1.0.5"
+version = "1.0.6"
 description = "A Python package for working with Bactopia"
 authors = [
     "Robert A. Petit III <[email protected]>",