diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/simreaduntil/simulator/readpool.py b/src/simreaduntil/simulator/readpool.py index 6292786..cbef976 100644 --- a/src/simreaduntil/simulator/readpool.py +++ b/src/simreaduntil/simulator/readpool.py @@ -52,7 +52,7 @@ class ReadPool: Do not forget to call finish() when done. Args: - reads_per_channel: whether reads are channel-specific + reads_per_channel: whether reads are channel-specific or any read can be assigned to any channel """ def __init__(self, reads_per_channel): self.lock = threading.Lock() @@ -86,18 +86,19 @@ def _get_new_read(self, channel=None) -> Tuple[str, Any]: """ raise NotImplementedError() - """ - Stop the read pool - - For example, if it is threaded, stop the thread - """ def finish(self): + """ + Stop the read pool + + For example, if it is threaded, stop the thread + """ pass def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.finish() + class ReadPoolFromIterable(ReadPool): """ Read pool that requests reads from generator @@ -166,11 +167,14 @@ def read_gen(): self.shuffled = shuffle_rand_state is not None self.reads_file_or_dir = reads_file_or_dir - """ - Check if the read pool can open the file/directory - """ @staticmethod def can_handle(file: Path) -> bool: + """ + Check if the read pool can open the file/directory + + Args: + file: file or directory + """ file = Path(file) return ( (file.is_dir() and any(file.glob("**/*.fasta"))) or @@ -180,12 +184,13 @@ def can_handle(file: Path) -> bool: def __repr__(self): return f"ReadPool(file = {self.reads_file_or_dir}, shuffled = {self.shuffled})" -""" -Threaded ReadPool that wraps another ReadPool and reads from it in another thread using a queue - -Note: Using a rng with ThreadedPoolWrapper is not thread-safe if rng is accessed from multiple threads -""" class ThreadedReadPoolWrapper(ReadPool): + """ + Threaded ReadPool that wraps another ReadPool and reads from it in another thread using a queue + + Note: Using a rng with ThreadedPoolWrapper is not thread-safe if rng is accessed from multiple threads + """ + def __init__(self, read_pool: ReadPool, queue_size: int): super().__init__(reads_per_channel=read_pool.reads_per_channel) self._read_pool = read_pool @@ -195,13 +200,17 @@ def __init__(self, read_pool: ReadPool, queue_size: int): self._reader_thread.start() self.definitely_empty = False - """ - Check if the read pool can open the file/directory - """ def can_handle(self, *args, **kwargs) -> bool: + """ + Check if the read pool can open the file/directory + + Params: + args, kwargs: passed to wrapped read pool + """ return self._read_pool.can_handle(*args, **kwargs) def _fill_queue(self): + """Keep the queue filled""" try: while True: read = self._read_pool.get_new_read() diff --git a/src/simreaduntil/usecase_helpers/simulator_with_readfish.py b/src/simreaduntil/usecase_helpers/simulator_with_readfish.py index 63bd33c..f6df996 100644 --- a/src/simreaduntil/usecase_helpers/simulator_with_readfish.py +++ b/src/simreaduntil/usecase_helpers/simulator_with_readfish.py @@ -87,7 +87,7 @@ def get_sim_params(sim_params_file, n_channels) -> SimParams: if n_channels != sim_params.n_channels: logger.warning(f"Using sim_params.n_channels={sim_params.n_channels} instead of {n_channels} because it was saved in the sim_params_file") - assert sorted(list(sim_params.gap_samplers.keys())) == {f"ch{i+1}" for i in range(n_channels)} # assumed by downstream plotting scripts + assert sorted(list(sim_params.gap_samplers.keys())) == sorted([f"ch{i+1}" for i in range(n_channels)]) # assumed by downstream plotting scripts return sim_params diff --git a/src/simreaduntil/usecase_helpers/utils.py b/src/simreaduntil/usecase_helpers/utils.py index 9c712c4..fcc2ee9 100644 --- a/src/simreaduntil/usecase_helpers/utils.py +++ b/src/simreaduntil/usecase_helpers/utils.py @@ -8,7 +8,7 @@ import os from pathlib import Path from textwrap import dedent -from typing import List, Optional +from typing import List, Optional, Tuple import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -74,7 +74,7 @@ def random_nanosim_reads_gen(random_state=np.random.default_rng(2), length_range # to load the FASTA file when the function is called rather than when the first read is requested (which may delay the simulation if an index has to be built first) @force_eval_generator_function -def perfect_reads_gen(fasta_filename: Path, read_lens_range: tuple[int], random_state=np.random.default_rng(1), nanosim_read_id=True): +def perfect_reads_gen(fasta_filename: Path, read_lens_range: Tuple[int], random_state=np.random.default_rng(1), nanosim_read_id=True): """ Generate perfect reads that align to the reference genome diff --git a/usecases/compute_absolute_enrichment.ipynb b/usecases/compute_absolute_enrichment.ipynb index 19a0d24..0b45eb4 100644 --- a/usecases/compute_absolute_enrichment.ipynb +++ b/usecases/compute_absolute_enrichment.ipynb @@ -400,14 +400,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(1.41, 1.4929629629629628, 1.4478169014084508)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# yield-corrected\n", - "1.49 * 1.23/1.24,\\\n", - "2.2 * 1.44/1.89, \\\n", - "3.04 * 1.7/2.84" + "# # yield-corrected\n", + "# 1.49 * 1.23/1.24,\\\n", + "# 2.2 * 1.44/1.89, \\\n", + "# 3.04 * 1.7/2.84\n", + "1.41 * 1.24/1.24,\\\n", + "2.03 * 1.39/1.89, \\\n", + "2.67 * 1.54/2.84" ] }, {