Skip to content

Commit

Permalink
cosmetic changes
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewprzh committed May 8, 2024
1 parent 113281d commit e19b169
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 6 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ Reads must be provided in FASTQ or FASTA format (can be gzipped). If you have al

IsoQuant expect reads to contain polyA tails. For more reliable transcript model construction do not trim polyA tails.

IsoQuant can also take aligned Illumina reads to correct long-read spliced alignments. However, short reads are _not_
used to discover transcript models or compute abundances.

<a name="sec1.2"></a>
## Supported reference data
Expand All @@ -116,6 +118,8 @@ Reference genome is mandatory even when BAM files are provided.
Reference gene annotation is not mandatory, but is likely to increase precision and recall.
It can be provided in GFF/GTF format (can be gzipped).
In this case it will be converted to [gffutils](https://pythonhosted.org/gffutils/installation.html) database. Information on converted databases will be stored in your `~/.config/IsoQuant/db_config.json` to increase speed of future runs. You can also provide gffutils database manually. Make sure that chromosome/scaffold names are identical in FASTA file and gene annotation.
Note, that gffutils databases may not work correctly on NFS shares. It is possible to set a designated folder for
the database with `--genedb_output` (different from the output directory).

Pre-constructed aligner index can also be provided to increase mapping time.

Expand Down
8 changes: 4 additions & 4 deletions src/dataset_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,15 +410,15 @@ def __del__(self):
os.remove(self.args.gunzipped_reference)

def process_all_samples(self, input_data):
logger.info("Processing " + proper_plural_form("sample", len(input_data.samples)))
logger.info("Processing " + proper_plural_form("experiment", len(input_data.samples)))
for sample in input_data.samples:
self.process_sample(sample)
logger.info("Processed " + proper_plural_form("sample", len(input_data.samples)))
logger.info("Processed " + proper_plural_form("experiment", len(input_data.samples)))

# Run through all genes in db and count stats according to alignments given in bamfile_name
def process_sample(self, sample):
logger.info("Processing sample " + sample.prefix)
logger.info("Sample has " + proper_plural_form("BAM file", len(sample.file_list)) + ": " + ", ".join(
logger.info("Processing experiment " + sample.prefix)
logger.info("Experiment has " + proper_plural_form("BAM file", len(sample.file_list)) + ": " + ", ".join(
map(lambda x: x[0], sample.file_list)))
self.args.use_technical_replicas = self.args.read_group == "file_name" and len(sample.file_list) > 1

Expand Down
1 change: 1 addition & 0 deletions src/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def merge_counts(counter, label, chr_ids, unaligned_reads=0):
merged_file_handler.write("%s\t%d\n" % (v, stat_dict[v]))
counter.reads_for_tpm = stat_dict[ "__usable"]


def normalize_path(config_path, file_path):
if os.path.isabs(file_path):
return os.path.normpath(file_path)
Expand Down
2 changes: 1 addition & 1 deletion src/gene_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ def set_introns_and_exons(self):
def set_feature_properties(self, isoforms_to_feature_map, feature_profiles):
similar_features = set()
contained_features = set()
# FIXME: change to interval tree instead of brute force
# TODO: change to interval tree instead of brute force
for f1 in feature_profiles.features:
for f2 in feature_profiles.features:
if f1 == f2:
Expand Down
1 change: 0 additions & 1 deletion src/input_data_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ def has_replicas(self):
return any(len(sample.file_list) > 1 for sample in self.samples)

def get_samples_from_yaml(self, yaml_file_path):
# TODO: allow relative paths, i.e. introduce "path fixer" for non-abosulte paths (relative to YAML file)
sample_files = []
experiment_names = []
illumina_bam = []
Expand Down

0 comments on commit e19b169

Please sign in to comment.