diff --git a/pdistancer.py b/pdistancer.py index 6335087..28cdeff 100644 --- a/pdistancer.py +++ b/pdistancer.py @@ -5,6 +5,7 @@ import pandas as pd import argparse import os +from statistics import mean from Bio import SeqIO from Bio.Alphabet import IUPAC from basefunctions import IUPACdistance @@ -23,6 +24,15 @@ inputfileformat = args.inputfileformat inputfileclean = os.path.splitext(inputfile)[0] outputfile = str(str(inputfileclean) + "_pdistances.csv") +speciesstatsfile = str(str(inputfileclean) + "_speciesstats.csv") + + +def average(list): + if len(list) > 0: + avg = sum(list) / len(list) + else: + avg = 'N/A' + return avg pdistdict = [] @@ -48,9 +58,11 @@ listofspecies = createlistofspecies(inputfile, inputfileformat) dmaxvalues = [] dmin_nnvalues = [] +sp_avg = {} for speciesname in listofspecies: intraperspecies = [] interperspecies = [] + neighbors = {} for pair in pdistdict: species1 = str(pair).split(".")[1] species2 = str(pair).split(".")[3] @@ -59,13 +71,49 @@ intraperspecies.append(pdist) elif speciesname == species1 != species2: interperspecies.append(pdist) + neighbors.update({species2: [pdist]}) + d_max = 'N/A' + dmin_nn = 'N/A' + nearestneighbor = 'N/A' + d_nearestneighbor = 'N/A' + sp_avg.update({speciesname: [average(intraperspecies), + d_max, + len(intraperspecies), + average(interperspecies), + dmin_nn, + len(interperspecies), + nearestneighbor, + d_nearestneighbor]}) if len(intraperspecies) > 0: - dmaxvalues.append(max(intraperspecies)) + d_max = max(intraperspecies) + dmaxvalues.append(d_max) + sp_avg[speciesname][1] = d_max if len(interperspecies) > 0: - dmin_nnvalues.append(min(interperspecies)) + dmin_nn = min(interperspecies) + dmin_nnvalues.append(dmin_nn) + sp_avg[speciesname][4] = dmin_nn + if len(neighbors) > 0: + d_nearestneighbor = min(neighbors.values()) + sp_avg[speciesname][7] = d_nearestneighbor + nearestneighbor = list(neighbors.keys())[list(neighbors.values()).index(d_nearestneighbor)] + sp_avg[speciesname][6] = nearestneighbor + + + print(str(len(dmaxvalues)) + " intraspecific Dmax values.") print(str(len(dmin_nnvalues)) + " interspecific Dmix_NN values.") +df_sp_avg = pd.DataFrame.from_dict(sp_avg, orient='index', columns=['avg_intra', + 'intra_d_max', + 'n_intra', + 'avg_inter', + 'inter_dmin_nn', + 'n_inter', + 'nearest_neighbor', + 'd_nearestneighbor']) +df_sp_avg.to_csv(speciesstatsfile) +print("P-distance averages per species written to " + str(outputfile)) + df_intra = pd.DataFrame({'all_intra': intravalues}) df_intradmax = pd.DataFrame({'intra_dmax': dmaxvalues}) df_inter = pd.DataFrame({'all_inter': intervalues}) @@ -73,4 +121,4 @@ df_distances = pd.concat([df_intra,df_intradmax,df_inter,df_interdmin_nn], ignore_index=False, axis=1) df_distances.to_csv(outputfile) -print("Results written to " + str(outputfile)) \ No newline at end of file +print("All p-distances written to " + str(outputfile)) \ No newline at end of file diff --git a/test_speciesstats.csv b/test_speciesstats.csv new file mode 100644 index 0000000..fda8336 --- /dev/null +++ b/test_speciesstats.csv @@ -0,0 +1,31 @@ +,avg_intra,intra_d_max,n_intra,avg_inter,inter_dmin_nn,n_inter,nearest_neighbor,d_nearestneighbor +Phyllonorycter_issikii,N/A,N/A,0,0.1359183795019451,0.08149779735682819,45,Phyllonorycter_muelleriella,[0.0881057268722467] +Phyllonorycter_ulmifoliella,N/A,N/A,0,0.1396930425102001,0.07692307692307693,44,Phyllonorycter_AlnuscordataItaly,[0.08228980322003578] +Phyllonorycter_spinicolella,N/A,N/A,0,0.12489627910089698,0.0653950953678474,43,Phyllonorycter_muelleriella,[0.06828193832599119] +Cameraria_ohridella,N/A,N/A,0,0.13106852310428532,0.0918918918918919,42,Parornix_loganella,[0.0918918918918919] +Caloptilia_staintoni,0.008259587020648967,0.012389380530973451,3,0.13334313126946137,0.07871198568872988,117,Povolnya_leucapennella,[0.08228980322003578] +Phyllonorycter_corylifoliella,0.03318901547472215,0.057245080500894455,15,0.13227002808500105,0.051771117166212535,138,Phyllonorycter_kuhlweiniella,[0.06618962432915922] +Caloptilia_braccatella,0.0021367521367521365,0.003205128205128205,3,0.15876771170827772,0.08653846153846154,99,Phyllonorycter_trifasciella,[0.09513274336283185] +Caloptilia_flava,0.0,0.0,1,0.1364973079068558,0.0871559633027523,70,Caloptilia_elongella,[0.0871559633027523] +Povolnya_leucapennella,N/A,N/A,0,0.1352055609898113,0.08944543828264759,34,Caloptilia_cuculipennella,[0.08944543828264759] +Aristaea_pavoniella,N/A,N/A,0,0.13186419834468582,0.10352422907488987,29,Phyllonorycter_muelleriella,[0.10352422907488987] +Phyllonorycter_kuhlweiniella,0.0,0.0,1,0.11391711368167647,0.05722070844686648,54,Phyllonorycter_muelleriella,[0.05947136563876652] +Phyllonorycter_muelleriella,N/A,N/A,0,0.11395594873217242,0.06637168141592921,26,Phyllonorycter_trifasciella,[0.06637168141592921] +Phyllonorycter_acerifoliella,N/A,N/A,0,0.12663792484550707,0.07465618860510806,25,Phyllonorycter_trifasciella,[0.07964601769911504] +Triberta_helianthemella,0.0033085194375516956,0.004962779156327543,3,0.15635373376409445,0.09429280397022333,66,Phyllonorycter_trifasciella,[0.12168141592920353] +Phyllonorycter_alpina,0.0017699115044247787,0.0017699115044247787,1,0.12345442836980369,0.037444933920704845,40,Phyllonorycter_AlnuscordataItaly,[0.04070796460176991] +Phyllonorycter_AlnuscordataItaly,0.0,0.0,1,0.13908556975136513,0.07079646017699115,30,Phyllonorycter_trifasciella,[0.08849557522123894] +Phyllocnistis_xenia,N/A,N/A,0,0.13755304101838756,0.054455445544554455,14,Phyllocnistis_unipunctella,[0.054455445544554455] +Parornix_torquillella,N/A,N/A,0,0.12278909649110797,0.0672782874617737,13,Parornix_polygrammella,[0.0672782874617737] +Phyllonorycter_abrasella,N/A,N/A,0,0.12389605657593228,0.07186544342507645,12,Phyllonorycter_nicellii,[0.07186544342507645] +Caloptilia_cuculipennella,N/A,N/A,0,0.12866669234928138,0.10091743119266056,11,Caloptilia_elongella,[0.10091743119266056] +Aspilapteryx_tringipennella,N/A,N/A,0,0.14517696521850926,0.11061946902654868,10,Phyllonorycter_trifasciella,[0.11061946902654868] +Leucospilapteryx_omissella,0.0015290519877675841,0.0015290519877675841,1,0.1497676704133809,0.1172566371681416,9,Phyllonorycter_trifasciella,[0.1172566371681416] +Dialectica_imperialella,N/A,N/A,0,0.13633073225827458,0.1084070796460177,8,Phyllonorycter_trifasciella,[0.1084070796460177] +Caloptilia_elongella,N/A,N/A,0,0.1409972277363622,0.1172566371681416,7,Phyllonorycter_trifasciella,[0.1172566371681416] +Phyllonorycter_hilarella,N/A,N/A,0,0.1215300434884941,0.08661417322834646,6,Phyllonorycter_nicellii,[0.08661417322834646] +Phyllocnistis_unipunctella,N/A,N/A,0,0.15231240089647172,0.13053097345132744,5,Phyllonorycter_trifasciella,[0.13053097345132744] +Phyllonorycter_trifasciella,N/A,N/A,0,0.10121681415929204,0.08628318584070796,4,Phyllonorycter_nicellii,[0.08628318584070796] +Parornix_polygrammella,N/A,N/A,0,0.11600407747196738,0.06666666666666667,3,Parornix_loganella,[0.06666666666666667] +Phyllonorycter_nicellii,N/A,N/A,0,0.13924291263740807,0.12252252252252252,2,Parornix_loganella,[0.12252252252252252] +Parornix_loganella,N/A,N/A,0,N/A,N/A,0,N/A,N/A