-
Notifications
You must be signed in to change notification settings - Fork 3
/
null_models.py
227 lines (199 loc) · 7.09 KB
/
null_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 4 13:22:11 2021
@author: mariapalazzi
"""
import random
import numpy as np
from scipy.optimize import root
from required_functions_null_models import avg_degrees_expoRG
#%%
def exponentialRG_proportional_proportional(M):
'''
Solve the system of equations of the maximum log-likelihood problem.
The system of equations is solved using scipy's root function with
the solver defined by 'method'.
The solutions correspond to the Lagrange multipliers. We used the Least-squares
method='lm' for the Levenberg-Marquardt approach.
It can happen that the solver ``method`` used by ``scipy.root``
does not converge to a solution.
Payrato et al (PRX 2019) and F. Saracco et al (Sci Rep 2015)
Parameters
----------
M : array
the binary biadjacency matrix.
Returns
-------
Pij : array, floats
The probability matrix.
Given the biadjacency matrix M, which describes the probabilities of
row-node r and column-node c being linked.
'''
R,C=M.shape #rows and cols
#degrees of "real" matrix
cols_degr = M.sum(axis=0)
rows_degr = M.sum(axis=1)
x0=np.random.uniform(0,1,size=(R+C))
Pij = np.zeros((R,C));
solution_vector = root(avg_degrees_expoRG, x0,args=(rows_degr,cols_degr),method='lm')
print("Solver successful for lagrange multipliers?", solution_vector.success)
print(solution_vector.message)
if solution_vector.success==False:
Pij=Pij*np.nan
print('Solution did not converge, returning P_ij with NAN')
return Pij
x = solution_vector.x[0:R]
y = solution_vector.x[R:R+C]
for p_index in range(len(rows_degr)):
for a_index in range(len(cols_degr)):
Pij[p_index,a_index] = (x[p_index]*y[a_index])/(1+x[p_index]*y[a_index])
return Pij
#%%
def bascompte_probabilistic_proportional(M):
'''
Bascopmte's probabilistic null model. Bacompte PNAS 2003.
Parameters
----------
M : array
the binary biadjacency matrix.
Returns
-------
p_ij : array
The probability matrix.
Given the biadjacency matrix M, which describes the probabilities of
row-node r and column-node c being linked.
'''
rows,cols=M.shape
# dregrees of the rows and cols nodes
cols_degr = M.sum(axis=0)
rows_degr = M.sum(axis=1)
# normalized degrees
rows_degr_norm = rows_degr/cols
cols_degr_norm = cols_degr/rows
# M_null=np.zeros((rows,cols),dtype=int)
M_n=np.zeros((rows,cols))
# M_rand_ij=np.random.uniform(0,1,size=(rows,cols))
#obtaining the matrix of probabilities
for i in range(rows):
for j in range(cols):
p_ij=0.5*(cols_degr_norm[j] + rows_degr_norm[i])
M_n[i,j]=p_ij
#null matrix
#M_null=(M_n>=M_rand_ij).astype(int)
return M_n
#%%
def corrected_probabilistic_proportional(M):
'''
corrected probabilistic null model, a variation of Bascompte's model
Parameters
----------
M : array
the binary biadjacency matrix.
Returns
-------
p_ij : array
The probability matrix.
Given the biadjacency matrix M, which describes the probabilities of
row-node r and column-node c being linked.
'''
rows,cols=M.shape
# dregrees of the rows and cols nodes
cols_degr = M.sum(axis=0)
rows_degr = M.sum(axis=1)
# normalized dregrees
rows_degr_norm = rows_degr/cols
cols_degr_norm = cols_degr/rows
degr_rows_from_cols_sampling = (cols_degr_norm.sum())/cols
degr_cols_from_rows_sampling = (rows_degr_norm.sum())/rows
#M_null=np.zeros((rows,cols),dtype=int)
M_n=np.zeros((rows,cols))
#M_rand_ij=np.random.uniform(0,1,size=(rows,cols))
#obtaining the matrix of probabilities
for i in range(rows):
for j in range(cols):
p_ij=0.5*(cols_degr_norm[j] +
((1 - cols_degr_norm[j])*(rows_degr_norm[i] - degr_rows_from_cols_sampling)) +
rows_degr_norm[i] +
((1 - rows_degr_norm[i])*(cols_degr_norm[j] - degr_cols_from_rows_sampling)) )
if p_ij<0:
p_ij=0
M_n[i,j]=p_ij
#null matrix
# M_null=(M_n>=M_rand_ij).astype(int)
# if symmetric==True:
# np.fill_diagonal(M_null, 0)
# M_=np.triu(M_null,k=1)+(np.triu(M_null,k=1)).T
# else:
# M_=M_null
return M_n
#%%
def equiprobable_equiprobable(matrix):
'''
Function to generate randomization with equiprobable degrees.
Inputs:
----------
input_matrix: array
the binary biadjacency matrix
output:
----------
result: array
randomized version of the original matrix with equiprobable degrees
'''
matrix=np.array(matrix)
R,C=matrix.shape
occs=matrix.sum()
fill=occs/float(R*C)
rm=np.zeros([R,C])
while rm.sum()<occs:
rr,rc=random.randrange(R),random.randrange(C)
if random.random()<=fill:
rm[rr][rc]=1
return rm.astype(int)
#%%
def curve_ball(m,presences,num_iterations=-1):
'''
Function to generate randomization with fixed degrees.
FF null model, Curveball (Strona et al. 2014)
Inputs:
----------
input_matrix: array
the binary biadjacency matrix
num_iterations: int
Number of pair comparisons for each matrix generation, if empty, it takes
the smallets dimension times five.
presences: list of lists (int)
a list of list containing the indices of column (or rows) nodes to
wich each row (or column) node has a link
output:
----------
result: array
randomized version of the original matrix with fixed degrees
'''
r_hp=presences.copy()
num_rows, num_cols = m.shape
num_iters = 5 * min(num_rows, num_cols) if num_iterations == -1 else num_iterations
for rep in range(num_iters):
ab = random.sample(range(len(r_hp)), 2) #randomly select two nodes
a = ab[0]
b = ab[1]
ab = set(r_hp[a]) & set(r_hp[b])# overlap between the two nodes
a_ba = set(r_hp[a]) - ab #other links of node a
if len(a_ba) != 0:
b_aa = set(r_hp[b]) - ab #other links of node b
if len(b_aa) != 0:
ab = list(ab)
a_ba = list(a_ba)
b_aa = list(b_aa)
random.shuffle(a_ba)
random.shuffle(b_aa)
max_swap_extent=min(len(a_ba), len(b_aa)) #max value for pair extractions
swap_extent = random.randint(1,max_swap_extent)
#pair extractions
r_hp[a] = ab+a_ba[:-swap_extent]+b_aa[-swap_extent:]
r_hp[b] = ab+b_aa[:-swap_extent]+a_ba[-swap_extent:]
out_mat = np.zeros([num_rows, num_cols], dtype='int8') if num_cols >= num_rows else np.zeros([num_cols,num_rows], dtype='int8')
for r in range(min(num_rows, num_cols)):
out_mat[r, r_hp[r]] = 1
result = out_mat if num_cols >= num_rows else out_mat.T
return result