Skip to content

Commit

Permalink
Merge branch 'main' of github.com:sunbeam-labs/sbx_kraken into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Scott G. Daniel committed Sep 12, 2023
2 parents a4f6cc3 + a21b558 commit fab71a4
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 14 deletions.
Binary file added .tests/data/reads/EMPTY_R1.fastq.gz
Binary file not shown.
Binary file added .tests/data/reads/EMPTY_R2.fastq.gz
Binary file not shown.
16 changes: 10 additions & 6 deletions .tests/e2e/test_full_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,23 +80,27 @@ def run_sunbeam(setup):
shutil.copytree(os.path.join(output_fp, "logs/"), "logs/")
shutil.copytree(os.path.join(project_dir, "stats/"), "stats/")

all_samples_fp = os.path.join(output_fp, "classify/kraken/all_samples.tsv")

benchmarks_fp = os.path.join(project_dir, "stats/")

yield all_samples_fp, benchmarks_fp
yield output_fp, benchmarks_fp


def test_full_run(run_sunbeam):
all_samples_fp, benchmarks_fp = run_sunbeam
output_fp, benchmarks_fp = run_sunbeam

all_samples_fp = os.path.join(output_fp, "classify/kraken/all_samples.tsv")

# Check output
assert os.path.exists(all_samples_fp)

with open(all_samples_fp) as f:
f.readline()
f.readline() # Headers
lines = f.readlines()
print(lines)
assert (
f.readline().strip()
== "2\t200.0\tk__Bacteria; p__; c__; o__; f__; g__; s__"
any(["2\t0.0\t200.0\tk__Bacteria; p__; c__; o__; f__; g__; s__" in x.strip() for x in lines])
)

with open(os.path.join(output_fp, "logs/kraken2_classify_report_EMPTY.log")) as f:
assert f.readline() == "Empty reads files"
1 change: 1 addition & 0 deletions sbx_kraken_env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ channels:
dependencies:
- biom-format
- kraken2
- python>=3
- pip:
- git+http://github.com/smdabdoub/kraken-biom.git
9 changes: 1 addition & 8 deletions biom_to_tsv.py → scripts/biom_to_tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,13 @@ def parse_kraken(kraken_file):
dic_kraken = {}

with open(kraken_file) as f:

for line in f:

if (
line[0] == "C"
): # if first character is 'C', then this is a comment line and can be skipped.

continue

else: # otherwise, this is a data line and should be parsed.

row = re.split(
"\t|\n", line
) # split the line into its components using tab or newline as delimiter.
Expand All @@ -47,7 +43,6 @@ def parse_kraken(kraken_file):
if (
sample_id not in dic_kraken
): # if this is the first time seeing this sample id...

dic_kraken[
sample_id
] = {} # create a new dictionary entry for this sample id...
Expand All @@ -57,11 +52,9 @@ def parse_kraken(kraken_file):
] = 1 # and initialize count for this taxon to 1 (since we've seen it once).

else: # otherwise, we've already seen this sample id before...

if (
taxon not in dic_kraken[sample_id]
): # if we haven't seen this particular taxon before...

dic_kraken[sample_id][
taxon
] = 1 # initialize count for that particular taxon to 1 (since we've seen it once).
Expand Down Expand Up @@ -136,10 +129,10 @@ def makeTableFromKrakOutAndTaxDict(dKrakOut, dTaxDict):
# Function to convert biom files to tsv files.
import skbio as skb


# Input: biom file
# Output: tsv file with the following columns: OTU ID, Kingdom, Phylum, Class, Order, Family, Genus, Species
def biom_to_tsv_skbio(biom_file, tsv_file):

table = skb.Table.read(biom_file)

table.to_tsv(tsv_file)
Expand Down
37 changes: 37 additions & 0 deletions scripts/kraken2_classify_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import gzip
import subprocess as sp
from pathlib import Path

def not_empty():
with gzip.open(snakemake.input[0], "rb") as f:
try:
file_content = f.read(1)
return len(file_content) > 0
except:
return False


if not_empty():
args = [
"kraken2",
"--gzip-compressed",
"--db",
f"{snakemake.params.db}",
"--report",
f"{snakemake.output.report}",
"--output",
f"{snakemake.output.raw}",
f"{snakemake.params.paired_end}",
]
[args.append(x) for x in snakemake.input]
output = sp.check_output(args)

with open(snakemake.log[0], "wb") as f:
f.write(output)
else:
with open(snakemake.log[0], "w") as f_log, open(
snakemake.output.report, "w"
) as f_out:
f_log.write("Empty reads files")
f_out.write("0\t0.0\tk__Bacteria; p__; c__; o__; f__; g__; s__")
Path(snakemake.output.raw).touch()

0 comments on commit fab71a4

Please sign in to comment.