Skip to content

Commit

Permalink
do not check GTF is .db already exists
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewprzh committed Apr 3, 2024
1 parent 5706cae commit fdb32bf
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 14 deletions.
12 changes: 0 additions & 12 deletions isoquant.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,18 +683,6 @@ def run_pipeline(args):

# convert GTF/GFF if needed
if args.genedb and not args.genedb.lower().endswith('db'):
logger.info("Checking input gene annotation")
gtf_is_correct, corrected_gtf, out_fname = check_gtf_duplicates(args.genedb)
if not gtf_is_correct:
new_gtf_path = os.path.join(args.output, out_fname)
with open(new_gtf_path, "w") as out_gtf:
out_gtf.write(corrected_gtf)
logger.error("Input GTF seems to be corrupted (see warnings above). "
"An attempt to correct this GTF was made, the result is written to %s" % new_gtf_path)
logger.error("Provide a correct GTF by fixing the original input GTF or checking the corrected one.")
exit(-3)
else:
logger.info("Gene annotation seems to be correct")
args.genedb = convert_gtf_to_db(args)

# map reads if fastqs are provided
Expand Down
23 changes: 21 additions & 2 deletions src/gtf2db.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,21 @@ def get_color(transcript_kind):


def gtf2db(gtf, db, complete_db=False):
logger.info("Checking input gene annotation")
gtf_is_correct, corrected_gtf, out_fname = check_gtf_duplicates(gtf)
if not gtf_is_correct:
outdir = os.path.dirname(db)
new_gtf_path = os.path.join(outdir, out_fname)
with open(new_gtf_path, "w") as out_gtf:
out_gtf.write(corrected_gtf)
logger.error("Input GTF seems to be corrupted (see warnings above).")
logger.error("An attempt to correct this GTF was made, the result is written to %s" % new_gtf_path)
logger.error("NB! some transcript / gene ids in the corrected annotation are modified.")
logger.error("Provide a correct GTF by fixing the original input GTF or checking the corrected one.")
exit(-3)
else:
logger.info("Gene annotation seems to be correct")

logger.info("Converting gene annotation file to .db format (takes a while)...")
gffutils.create_db(gtf, db, force=True, keep_order=True, merge_strategy='error',
sort_attribute_values=True, disable_infer_transcripts=complete_db,
Expand Down Expand Up @@ -128,8 +143,12 @@ def check_gtf_duplicates(gtf):

gtf_name = os.path.basename(gtf)
gtf_name, outer_ext = os.path.splitext(gtf_name)
gtf_name, inner_ext = os.path.splitext(gtf_name)
handle = gzip.open(gtf, "rt") if outer_ext.lower() in ['.gz', '.gzip', '.bgz'] else open(gtf, "rt")
if outer_ext.lower() in ['.gz', '.gzip', '.bgz']:
handle = gzip.open(gtf, "rt")
gtf_name, inner_ext = os.path.splitext(gtf_name)
else:
handle = open(gtf, "rt")
inner_ext = outer_ext

for l in handle.readlines():
line_count += 1
Expand Down

0 comments on commit fdb32bf

Please sign in to comment.