Skip to content

Commit

Permalink
add speciesstats output
Browse files Browse the repository at this point in the history
  • Loading branch information
cdoorenweerd committed Jun 22, 2020
1 parent 076e18a commit 8cc41f3
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 3 deletions.
54 changes: 51 additions & 3 deletions pdistancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import argparse
import os
from statistics import mean
from Bio import SeqIO
from Bio.Alphabet import IUPAC
from basefunctions import IUPACdistance
Expand All @@ -23,6 +24,15 @@
inputfileformat = args.inputfileformat
inputfileclean = os.path.splitext(inputfile)[0]
outputfile = str(str(inputfileclean) + "_pdistances.csv")
speciesstatsfile = str(str(inputfileclean) + "_speciesstats.csv")


def average(list):
if len(list) > 0:
avg = sum(list) / len(list)
else:
avg = 'N/A'
return avg


pdistdict = []
Expand All @@ -48,9 +58,11 @@
listofspecies = createlistofspecies(inputfile, inputfileformat)
dmaxvalues = []
dmin_nnvalues = []
sp_avg = {}
for speciesname in listofspecies:
intraperspecies = []
interperspecies = []
neighbors = {}
for pair in pdistdict:
species1 = str(pair).split(".")[1]
species2 = str(pair).split(".")[3]
Expand All @@ -59,18 +71,54 @@
intraperspecies.append(pdist)
elif speciesname == species1 != species2:
interperspecies.append(pdist)
neighbors.update({species2: [pdist]})
d_max = 'N/A'
dmin_nn = 'N/A'
nearestneighbor = 'N/A'
d_nearestneighbor = 'N/A'
sp_avg.update({speciesname: [average(intraperspecies),
d_max,
len(intraperspecies),
average(interperspecies),
dmin_nn,
len(interperspecies),
nearestneighbor,
d_nearestneighbor]})
if len(intraperspecies) > 0:
dmaxvalues.append(max(intraperspecies))
d_max = max(intraperspecies)
dmaxvalues.append(d_max)
sp_avg[speciesname][1] = d_max
if len(interperspecies) > 0:
dmin_nnvalues.append(min(interperspecies))
dmin_nn = min(interperspecies)
dmin_nnvalues.append(dmin_nn)
sp_avg[speciesname][4] = dmin_nn
if len(neighbors) > 0:
d_nearestneighbor = min(neighbors.values())
sp_avg[speciesname][7] = d_nearestneighbor
nearestneighbor = list(neighbors.keys())[list(neighbors.values()).index(d_nearestneighbor)]
sp_avg[speciesname][6] = nearestneighbor



print(str(len(dmaxvalues)) + " intraspecific Dmax values.")
print(str(len(dmin_nnvalues)) + " interspecific Dmix_NN values.")

df_sp_avg = pd.DataFrame.from_dict(sp_avg, orient='index', columns=['avg_intra',
'intra_d_max',
'n_intra',
'avg_inter',
'inter_dmin_nn',
'n_inter',
'nearest_neighbor',
'd_nearestneighbor'])
df_sp_avg.to_csv(speciesstatsfile)
print("P-distance averages per species written to " + str(outputfile))

df_intra = pd.DataFrame({'all_intra': intravalues})
df_intradmax = pd.DataFrame({'intra_dmax': dmaxvalues})
df_inter = pd.DataFrame({'all_inter': intervalues})
df_interdmin_nn = pd.DataFrame({'inter_dmin_nn': dmin_nnvalues})

df_distances = pd.concat([df_intra,df_intradmax,df_inter,df_interdmin_nn], ignore_index=False, axis=1)
df_distances.to_csv(outputfile)
print("Results written to " + str(outputfile))
print("All p-distances written to " + str(outputfile))
31 changes: 31 additions & 0 deletions test_speciesstats.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
,avg_intra,intra_d_max,n_intra,avg_inter,inter_dmin_nn,n_inter,nearest_neighbor,d_nearestneighbor
Phyllonorycter_issikii,N/A,N/A,0,0.1359183795019451,0.08149779735682819,45,Phyllonorycter_muelleriella,[0.0881057268722467]
Phyllonorycter_ulmifoliella,N/A,N/A,0,0.1396930425102001,0.07692307692307693,44,Phyllonorycter_AlnuscordataItaly,[0.08228980322003578]
Phyllonorycter_spinicolella,N/A,N/A,0,0.12489627910089698,0.0653950953678474,43,Phyllonorycter_muelleriella,[0.06828193832599119]
Cameraria_ohridella,N/A,N/A,0,0.13106852310428532,0.0918918918918919,42,Parornix_loganella,[0.0918918918918919]
Caloptilia_staintoni,0.008259587020648967,0.012389380530973451,3,0.13334313126946137,0.07871198568872988,117,Povolnya_leucapennella,[0.08228980322003578]
Phyllonorycter_corylifoliella,0.03318901547472215,0.057245080500894455,15,0.13227002808500105,0.051771117166212535,138,Phyllonorycter_kuhlweiniella,[0.06618962432915922]
Caloptilia_braccatella,0.0021367521367521365,0.003205128205128205,3,0.15876771170827772,0.08653846153846154,99,Phyllonorycter_trifasciella,[0.09513274336283185]
Caloptilia_flava,0.0,0.0,1,0.1364973079068558,0.0871559633027523,70,Caloptilia_elongella,[0.0871559633027523]
Povolnya_leucapennella,N/A,N/A,0,0.1352055609898113,0.08944543828264759,34,Caloptilia_cuculipennella,[0.08944543828264759]
Aristaea_pavoniella,N/A,N/A,0,0.13186419834468582,0.10352422907488987,29,Phyllonorycter_muelleriella,[0.10352422907488987]
Phyllonorycter_kuhlweiniella,0.0,0.0,1,0.11391711368167647,0.05722070844686648,54,Phyllonorycter_muelleriella,[0.05947136563876652]
Phyllonorycter_muelleriella,N/A,N/A,0,0.11395594873217242,0.06637168141592921,26,Phyllonorycter_trifasciella,[0.06637168141592921]
Phyllonorycter_acerifoliella,N/A,N/A,0,0.12663792484550707,0.07465618860510806,25,Phyllonorycter_trifasciella,[0.07964601769911504]
Triberta_helianthemella,0.0033085194375516956,0.004962779156327543,3,0.15635373376409445,0.09429280397022333,66,Phyllonorycter_trifasciella,[0.12168141592920353]
Phyllonorycter_alpina,0.0017699115044247787,0.0017699115044247787,1,0.12345442836980369,0.037444933920704845,40,Phyllonorycter_AlnuscordataItaly,[0.04070796460176991]
Phyllonorycter_AlnuscordataItaly,0.0,0.0,1,0.13908556975136513,0.07079646017699115,30,Phyllonorycter_trifasciella,[0.08849557522123894]
Phyllocnistis_xenia,N/A,N/A,0,0.13755304101838756,0.054455445544554455,14,Phyllocnistis_unipunctella,[0.054455445544554455]
Parornix_torquillella,N/A,N/A,0,0.12278909649110797,0.0672782874617737,13,Parornix_polygrammella,[0.0672782874617737]
Phyllonorycter_abrasella,N/A,N/A,0,0.12389605657593228,0.07186544342507645,12,Phyllonorycter_nicellii,[0.07186544342507645]
Caloptilia_cuculipennella,N/A,N/A,0,0.12866669234928138,0.10091743119266056,11,Caloptilia_elongella,[0.10091743119266056]
Aspilapteryx_tringipennella,N/A,N/A,0,0.14517696521850926,0.11061946902654868,10,Phyllonorycter_trifasciella,[0.11061946902654868]
Leucospilapteryx_omissella,0.0015290519877675841,0.0015290519877675841,1,0.1497676704133809,0.1172566371681416,9,Phyllonorycter_trifasciella,[0.1172566371681416]
Dialectica_imperialella,N/A,N/A,0,0.13633073225827458,0.1084070796460177,8,Phyllonorycter_trifasciella,[0.1084070796460177]
Caloptilia_elongella,N/A,N/A,0,0.1409972277363622,0.1172566371681416,7,Phyllonorycter_trifasciella,[0.1172566371681416]
Phyllonorycter_hilarella,N/A,N/A,0,0.1215300434884941,0.08661417322834646,6,Phyllonorycter_nicellii,[0.08661417322834646]
Phyllocnistis_unipunctella,N/A,N/A,0,0.15231240089647172,0.13053097345132744,5,Phyllonorycter_trifasciella,[0.13053097345132744]
Phyllonorycter_trifasciella,N/A,N/A,0,0.10121681415929204,0.08628318584070796,4,Phyllonorycter_nicellii,[0.08628318584070796]
Parornix_polygrammella,N/A,N/A,0,0.11600407747196738,0.06666666666666667,3,Parornix_loganella,[0.06666666666666667]
Phyllonorycter_nicellii,N/A,N/A,0,0.13924291263740807,0.12252252252252252,2,Parornix_loganella,[0.12252252252252252]
Parornix_loganella,N/A,N/A,0,N/A,N/A,0,N/A,N/A

0 comments on commit 8cc41f3

Please sign in to comment.