Skip to content

Commit

Permalink
clean up scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
dlaehnemann committed Aug 17, 2023
1 parent 969a768 commit 060b095
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 17 deletions.
3 changes: 1 addition & 2 deletions workflow/rules/quant_3prime.smk
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,7 @@ use rule samtools_index as samtools_index_canonical with:

rule get_closest_3prime_aligned_pos:
input:
canonical_mapped_bam="results/canonical_mapped_bam/{sample}-{unit}.sorted.canonical.bam",
canonical_mapped_bam_index="results/canonical_mapped_bam/{sample}-{unit}.sorted.canonical.bam.bai",
canonical_ids="resources/canonical_ids.bed",
canonical_mapped_pos="results/canonical_mapped_bam/{sample}-{unit}.sorted.canonical.position.txt",
output:
canonical_mapped_3prime_pos=temp(
Expand Down
17 changes: 8 additions & 9 deletions workflow/scripts/get-3prime-max-positions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,21 @@

sys.stderr = open(snakemake.log[0], "w")

bam_file = snakemake.input["canonical_mapped_bam"]
sample_name = bam_file.split(".canonical.mapped.sorted.bam")[0]
# Bam file reading
bam_file = pysam.AlignmentFile(snakemake.input["canonical_mapped_bam"], "rb")
bam_header = bam_file.header.to_dict()
trans_length_data = pd.DataFrame(bam_header.get("SQ"))
trans_length_data.rename(columns={"SN": "transcript"}, inplace=True)
sample_name = f"{snakemake.wildcards['sample']}-{snakemake.wildcards['unit']}"

# BED file reading
trans_length_data = pd.read_csv(
snakemake.input["canonical_ids"],
sep="\t",
names=["transcript", "transcript_start", "transcript_length", "strand"],
).drop(columns = ["transcript_start"])

# Aligned text file reading
align_bam_txt = pd.read_csv(
snakemake.input["canonical_mapped_pos"],
sep="\t",
names=["read_name", "transcript", "start", "read", "quality"],
)
align_bam_txt["strand"] = align_bam_txt["transcript"].str.split("_", 1).str[1]
align_bam_txt["Transcript"] = align_bam_txt["transcript"].str.split("_", 1).str[0]
merge_data = align_bam_txt.merge(trans_length_data, on="transcript")

# reads aligned to forward strand
Expand Down
6 changes: 0 additions & 6 deletions workflow/scripts/get-sample-hist-bins.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,6 @@
# Get the sample names
sample_name = snakemake.params["samples"]

# Bam file reading
bam_file = pysam.AlignmentFile(snakemake.input["samtools_sort"], "rb")
bam_header = bam_file.header.to_dict()
trans_length_data = pd.DataFrame(bam_header.get("SQ"))
trans_length_data.rename(columns={"SN": "transcript"}, inplace=True)

# BED file reading
trans_length_data = pd.read_csv(
snakemake.input["canonical_ids"],
Expand Down

0 comments on commit 060b095

Please sign in to comment.