forked from jacahill/AR_Tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
classify_VLAR.pc.py
90 lines (81 loc) · 2.58 KB
/
classify_VLAR.pc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import sys,os
import subprocess
#callme="intersectBed -a " + GFF + " -b " + line
bedtools="~/bedtools2-master/bin/intersectBed"
geneGFF="/ru-auth/local/home/jcahill/scratch/pubdata/Galga5/GCF_000002315.4_Gallus_gallus-5.0_genomic.gene.pc.gff"
exonGFF="/ru-auth/local/home/jcahill/scratch/pubdata/Galga5/GCF_000002315.4_Gallus_gallus-5.0_genomic.exon.mtrRNA.gff"
for line in sys.stdin:
assoc="none"
genename="none"
data=line.split("\t")
# exoninter=intersectGFF(data,exonGFF)
# geneinter=intersectGFF(data,exonGFF)
ggff=open(geneGFF)
for gene in ggff:
gdat=gene.split("\t")
if data[0]==gdat[0] and int(data[2])>=int(gdat[3]) and int(data[1])<=int(gdat[4]):
assoc="intron"
egff=open(exonGFF)
gname=gene.split("Name=")
gnameb=gname[1].split(";")
genename=gnameb[0]
for exon in egff:
edat=exon.split("\t")
if data[0]==edat[0] and int(data[2])>=int(edat[3]) and int(data[1])<=int(edat[4]):
assoc="exon"
break
egff.close()
break
ggff.close()
if assoc=="none":
ggff=open(geneGFF)
mindist="none"
side="none"
for gene in ggff:
gdat=gene.split("\t")
if data[0]==gdat[0]:
if gdat[6]=="+":
delta_a=abs(int(data[1])-int(gdat[3]))
delta_b=abs(int(data[2])-int(gdat[3]))
delta=min([delta_a,delta_b])
if delta_a<=delta_b:
direction="up"
else:
direction="down"
elif gdat[6]=="-":
delta_b=abs(int(data[1])-int(gdat[4]))
delta_a=abs(int(data[2])-int(gdat[4]))
delta=min([delta_a,delta_b])
if delta_a<=delta_b:
direction="up"
else:
direction="down"
if mindist=="none" or delta<mindist:
mindist=delta
gname=gene.split("Name=")
gnameb=gname[1].split(";")
genename=gnameb[0]
if mindist<=10000 and direction=="up":
assoc="5p_10k"
elif mindist<=10000 and direction=="down":
assoc="3p_10k"
else:
assoc="intergenic"
if assoc=="exon":
out=assoc+"\t"+genename+"\t.\t"+line[:-1]
print out
elif assoc=="intron":
out=assoc+"\t"+genename+"\t.\t"+line[:-1]
print out
elif assoc=="intergenic":
out=assoc+"\t"+genename+"\t"+str(mindist)+"\t"+line[:-1]
print out
else:
out=assoc+"\t"+genename+"\t.\t"+line[:-1]
print out
# file=open("tmp.txt", "w")
# file.write(line)
# callme=bedtools+" -a " + GFF + " -b tmp.txt"
# print callme
# subprocess.call(bedtools, "-a", GFF, "-b", "tmp.txt")
# file.close()