Skip to content

Commit

Permalink
introduce automatic filtering selection
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewprzh committed Apr 9, 2024
1 parent 666b79b commit 0293d7a
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 27 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -525,15 +525,16 @@ for ONT reads.
`--report_canonical`
Strategy for reporting novel transcripts based on canonical splice sites, should be one of:

* `auto` - automatic selection based on the data type and model construction strategy (default);
* `only_canonical` - report novel transcripts, which contain only canonical splice sites;
* `only_stranded` - report novel transcripts, for which the strand can be unambiguously derived using splice sites and
presence of a polyA tail, allowing some splice sites to be non-canonical (default);
presence of a polyA tail, allowing some splice sites to be non-canonical
* `all` -- report all transcript model regardless of their splice sites.


`--polya_requirement` Strategy for using polyA tails during transcript model construction, should be one of:

* `default` - polyA tails are required if at least 70% of the reads have polyA tail;
* `auto` - default behaviour: polyA tails are required if at least 70% of the reads have polyA tail;
polyA tails are always required for 1/2-exon transcripts when using ONT data (this is caused by elevated number of false 1/2-exonic alignments reported by minimap2);
* `never` - polyA tails are never required; use this option **at your own risk** as it may noticeably increase false discovery rate, especially for ONT data;
* `always` - reported transcripts are always required to have polyA support in the reads.
Expand Down
43 changes: 22 additions & 21 deletions isoquant.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ def bool_str(s):

def parse_args(cmd_args=None, namespace=None):
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
ref_args_group = parser.add_argument_group('Reference data:')
input_args_group = parser.add_argument_group('Input data:')
output_args_group = parser.add_argument_group('Output naming:')
pipeline_args_group = parser.add_argument_group('Pipeline options:')
ref_args_group = parser.add_argument_group('Reference data')
input_args_group = parser.add_argument_group('Input data')
output_args_group = parser.add_argument_group('Output naming')
pipeline_args_group = parser.add_argument_group('Pipeline options')

other_options = parser.add_argument_group("Additional options:")
show_full_help = '--full_help' in cmd_args
Expand Down Expand Up @@ -153,15 +153,13 @@ def add_hidden_option(*args, **kwargs): # show command only with --full-help
help="report novel monoexonic transcripts (true/false), "
"default: false for ONT, true for other data types")
add_additional_option("--report_canonical", type=str, choices=[e.name for e in StrandnessReportingLevel],
help="reporting level for novel transcripts based on canonical splice sites " +
"/".join([e.name for e in StrandnessReportingLevel]) +
"default: " + StrandnessReportingLevel.only_canonical.name,
default=StrandnessReportingLevel.only_canonical.name)
help="reporting level for novel transcripts based on canonical splice sites;"
" default: " + StrandnessReportingLevel.auto.name,
default=StrandnessReportingLevel.only_stranded.name)
add_additional_option("--polya_requirement", type=str, choices=[e.name for e in PolyAUsageStrategies],
help="require polyA tails to be present when reporting transcripts " +
"/".join([e.name for e in StrandnessReportingLevel]) +
"default: require polyA only when polyA percentage is >= 70%%",
default=PolyAUsageStrategies.default.name)
help="require polyA tails to be present when reporting transcripts; "
"default: auto (requires polyA only when polyA percentage is >= 70%%)",
default=PolyAUsageStrategies.auto.name)
# OUTPUT PROPERTIES
pipeline_args_group.add_argument("--threads", "-t", help="number of threads to use", type=int,
default="16")
Expand Down Expand Up @@ -600,24 +598,25 @@ def set_model_construction_options(args):
'min_novel_count', 'min_novel_count_rel',
'min_mono_count_rel', 'singleton_adjacent_cov',
'fl_only', 'novel_monoexonic',
'require_monointronic_polya', 'require_monoexonic_polya'))
'require_monointronic_polya', 'require_monoexonic_polya',
'report_canonical'))
strategies = {
'reliable': ModelConstructionStrategy(2, 0.5, 20, 5, 0.05, 1, 0.1, 0.1, 2, 4, 8, 0.05, 0.05, 50,
True, False, True, True),
True, False, True, True, StrandnessReportingLevel.only_canonical),
'default_pacbio': ModelConstructionStrategy(1, 0.5, 10, 2, 0.02, 1, 0.05, 0.05, 1, 2, 2, 0.02, 0.005, 100,
False, True, False, True),
False, True, False, True, StrandnessReportingLevel.only_stranded),
'sensitive_pacbio':ModelConstructionStrategy(1, 0.5, 5, 2, 0.005, 1, 0.01, 0.02, 1, 2, 2, 0.005, 0.001, 100,
False, True, False, False),
False, True, False, False, StrandnessReportingLevel.only_stranded),
'default_ont': ModelConstructionStrategy(1, 0.5, 20, 3, 0.02, 1, 0.05, 0.05, 1, 3, 3, 0.02, 0.02, 10,
False, False, True, True),
False, False, True, True, StrandnessReportingLevel.only_stranded),
'sensitive_ont': ModelConstructionStrategy(1, 0.5, 20, 3, 0.005, 1, 0.01, 0.02, 1, 2, 3, 0.005, 0.005, 10,
False, True, False, False),
False, True, False, False, StrandnessReportingLevel.only_stranded),
'fl_pacbio': ModelConstructionStrategy(1, 0.5, 10, 2, 0.02, 1, 0.05, 0.01, 1, 2, 3, 0.02, 0.005, 100,
True, True, False, False),
True, True, False, False, StrandnessReportingLevel.only_stranded),
'all': ModelConstructionStrategy(0, 0.3, 5, 1, 0.002, 1, 0.01, 0.01, 1, 1, 1, 0.002, 0.001, 500,
False, True, False, False),
False, True, False, False, StrandnessReportingLevel.all),
'assembly': ModelConstructionStrategy(0, 0.3, 5, 1, 0.05, 1, 0.01, 0.02, 1, 1, 1, 0.05, 0.01, 50,
False, True, False, False)
False, True, False, False, StrandnessReportingLevel.only_stranded)
}
strategy = strategies[args.model_construction_strategy]

Expand Down Expand Up @@ -654,6 +653,8 @@ def set_model_construction_options(args):
args.require_monoexonic_polya = strategy.require_monoexonic_polya
args.polya_requirement_strategy = PolyAUsageStrategies[args.polya_requirement]
args.report_canonical_strategy = StrandnessReportingLevel[args.report_canonical]
if args.report_canonical_strategy == StrandnessReportingLevel.auto:
args.report_canonical_strategy = strategy.report_canonical


def set_configs_directory(args):
Expand Down
7 changes: 3 additions & 4 deletions src/dataset_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ def clean_locks(chr_ids, base_name, fname_function):

@unique
class PolyAUsageStrategies(Enum):
default = 1
auto = 1
never = 2
always = 3


def set_polya_requirement_strategy(flag, polya_requirement_strategy):
if polya_requirement_strategy == PolyAUsageStrategies.default:
if polya_requirement_strategy == PolyAUsageStrategies.auto:
return flag
elif polya_requirement_strategy == PolyAUsageStrategies.never:
return False
Expand Down Expand Up @@ -558,8 +558,7 @@ def process_assigned_reads(self, sample, dump_filename):
logger.info(" PolyA tails are required for known monoexon transcripts to be reported: %s"
% ("yes" if self.args.require_monoexonic_polya else "no"))
logger.info(" PolyA tails are required for novel monoexon transcripts to be reported: %s" % "yes")
logger.info(" Report transcript for which the strand cannot be detected using canonical splice sites: %s"
% ("yes" if self.args.report_unstranded else "no"))
logger.info(" Splice site reporting level: %s" % self.args.report_canonical_strategy.name)

gff_printer = GFFPrinter(
sample.out_dir, sample.prefix, self.io_support, header=self.common_header
Expand Down
1 change: 1 addition & 0 deletions src/graph_based_model_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class StrandnessReportingLevel(Enum):
only_canonical = 1
only_stranded = 2
all = 3
auto = 10


class GraphBasedModelConstructor:
Expand Down

0 comments on commit 0293d7a

Please sign in to comment.