microsoft / graspologic
1
# Copyright 2019 NeuroData (http://neurodata.io)
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4 2
# you may not use this file except in compliance with the License.
5 2
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8 2
#
9 2
# Unless required by applicable law or agreed to in writing, software
10 2
# distributed under the License is distributed on an "AS IS" BASIS,
11 2
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 2
# See the License for the specific language governing permissions and
13
# limitations under the License.
14

15 2
import numpy as np
16 2
from graspy.simulations import sample_edges
17 0

18 2

19 2
def check_dirloop(directed, loops):
20 2
    if type(directed) is not bool:
21
        raise TypeError("directed is not of type bool.")
22
    if type(loops) is not bool:
23 2
        raise TypeError("loops is not of type bool.")
24 2

25 0

26 0
def check_r(r):
27
    if not np.issubdtype(type(r), np.floating):
28 2
        raise TypeError("r is not of type float.")
29 0
    elif r < -1 or r > 1:
30 0
        msg = "r must between -1 and 1."
31
        raise ValueError(msg)
32

33 2

34 2
def check_rel_er(p, r):
35 2
    if p + r * (1 - p) < 0:
36 2
        msg = "p + r * (1 - p) should be bigger than 0"
37 0
        raise ValueError(msg)
38 0

39
    if p * (1 - r) < 0:
40 2
        msg = "p * (1 - r) should be bigger than 0"
41 0
        raise ValueError(msg)
42 0

43

44
def check_rel_sbm(p, r):
45 2
    for i in range(np.array(p).shape[0]):
46
        for j in range(np.array(p).shape[1]):
47
            if p[i][j] + r * (1 - p[i][j]) < 0:
48
                msg = "p + r * (1 - p) should be bigger than 0"
49
                raise ValueError(msg)
50

51
            elif p[i][j] * (1 - r) < 0:
52
                msg = "p * (1 - r) should be bigger than 0"
53
                raise ValueError(msg)
54

55

56
def sample_edges_corr(P, R, directed=False, loops=False):
57
    """
58
    Generate a pair of correlated graphs with Bernoulli distribution.
59
    Both G1 and G2 are binary matrices.
60

61
    Parameters
62
    ----------
63
    P: np.ndarray, shape (n_vertices, n_vertices)
64
        Matrix of probabilities (between 0 and 1) for a random graph.
65

66
    R: np.ndarray, shape (n_vertices, n_vertices)
67
        Matrix of correlation (between 0 and 1) between graph pairs.
68

69
    directed: boolean, optional (default=False)
70
        If False, output adjacency matrix will be symmetric. Otherwise, output adjacency
71
        matrix will be asymmetric.
72

73
    loops: boolean, optional (default=False)
74
        If False, no edges will be sampled in the diagonal. Otherwise, edges
75
        are sampled in the diagonal.
76

77
    References
78
    ----------
79
    .. [1] Vince Lyzinski, et al. "Seeded Graph Matching for Correlated Erdos-Renyi Graphs",
80
       Journal of Machine Learning Research 15, 2014
81

82
    Returns
83
    -------
84
    G1: ndarray (n_vertices, n_vertices)
85
        Adjacency matrix the same size as P representing a random graph.
86

87
    G2: ndarray (n_vertices, n_vertices)
88
        Adjacency matrix the same size as P representing a random graph.
89

90
    Examples
91
    --------
92
    >>> np.random.seed(1)
93
    >>> p = 0.5
94
    >>> r = 0.3
95
    >>> R = r * np.ones((5, 5))
96
    >>> P = p * np.ones((5, 5))
97

98
    To sample a correlated graph pair based on P and R matrices:
99

100
    >>> sample_edges_corr(P, R, directed = False, loops = False)
101
    (array([[0., 1., 0., 0., 0.],
102 2
            [1., 0., 0., 0., 0.],
103 2
            [0., 0., 0., 0., 1.],
104 2
            [0., 0., 0., 0., 1.],
105 0
            [0., 0., 1., 1., 0.]]), array([[0., 1., 0., 0., 0.],
106 2
            [1., 0., 1., 0., 1.],
107 0
            [0., 1., 0., 1., 1.],
108
            [0., 0., 1., 0., 1.],
109
            [0., 1., 1., 1., 0.]]))
110 2
    """
111 2
    # test input
112 2
    # check P
113 0
    if type(P) is not np.ndarray:
114 2
        raise TypeError("P must be numpy.ndarray")
115 0
    if len(P.shape) != 2:
116
        raise ValueError("P must have dimension 2 (n_vertices, n_vertices)")
117
    if P.shape[0] != P.shape[1]:
118 2
        raise ValueError("P must be a square matrix")
119

120 2
    # check R
121 2
    if type(R) is not np.ndarray:
122 2
        raise TypeError("R must be numpy.ndarray")
123 2
    if len(R.shape) != 2:
124 2
        raise ValueError("R must have dimension 2 (n_vertices, n_vertices)")
125
    if R.shape[0] != P.shape[1]:
126
        raise ValueError("R must be a square matrix")
127 2

128
    # check directed and loops
129
    check_dirloop(directed, loops)
130

131
    G1 = sample_edges(P, directed=directed, loops=loops)
132
    P2 = G1.copy()
133
    P2 = np.where(P2 == 1, P + R * (1 - P), P * (1 - R))
134
    G2 = sample_edges(P2, directed=directed, loops=loops)
135
    return G1, G2
136

137

138
def er_corr(n, p, r, directed=False, loops=False):
139
    """
140
    Generate a pair of correlated graphs with specified edge probability
141
    Both G1 and G2 are binary matrices.
142

143
    Parameters
144
    ----------
145
    n: int
146
       Number of vertices
147

148
    p: float
149
        Probability of an edge existing between two vertices, between 0 and 1.
150

151
    r: float
152
        The value of the correlation between the same vertices in two graphs.
153

154
    directed: boolean, optional (default=False)
155
        If False, output adjacency matrix will be symmetric. Otherwise, output adjacency
156
        matrix will be asymmetric.
157

158
    loops: boolean, optional (default=False)
159
        If False, no edges will be sampled in the diagonal. Otherwise, edges
160
        are sampled in the diagonal.
161

162
    Returns
163
    -------
164
    G1: ndarray (n_vertices, n_vertices)
165
        Adjacency matrix the same size as P representing a random graph.
166

167
    G2: ndarray (n_vertices, n_vertices)
168
        Adjacency matrix the same size as P representing a random graph.
169

170
    Examples
171
    --------
172
    >>> np.random.seed(2)
173
    >>> p = 0.5
174
    >>> r = 0.3
175
    >>> n = 5
176

177
    To sample a correlated ER graph pair based on n, p and r:
178

179
    >>> er_corr(n, p, r, directed=False, loops=False)
180
    (array([[0., 0., 1., 0., 0.],
181 2
        [0., 0., 0., 1., 0.],
182 2
        [1., 0., 0., 1., 1.],
183 2
        [0., 1., 1., 0., 1.],
184 2
        [0., 0., 1., 1., 0.]]), array([[0., 1., 1., 1., 0.],
185 2
        [1., 0., 0., 1., 0.],
186
        [1., 0., 0., 1., 1.],
187
        [1., 1., 1., 0., 1.],
188 2
        [0., 0., 1., 1., 0.]]))
189 2
    """
190 2
    # test input
191 2
    # check n
192 2
    if not np.issubdtype(type(n), np.integer):
193
        raise TypeError("n is not of type int.")
194
    elif n <= 0:
195 2
        msg = "n must be > 0."
196
        raise ValueError(msg)
197

198 2
    # check p
199
    if not np.issubdtype(type(p), np.floating):
200
        raise TypeError("r is not of type float.")
201 2
    elif p < 0 or p > 1:
202
        msg = "p must between 0 and 1."
203 2
        raise ValueError(msg)
204 2

205 2
    # check r
206 2
    check_r(r)
207

208
    # check the relation between r and p
209 2
    check_rel_er(p, r)
210

211
    # check directed and loops
212
    check_dirloop(directed, loops)
213

214
    P = p * np.ones((n, n))
215
    R = r * np.ones((n, n))
216
    G1, G2 = sample_edges_corr(P, R, directed=directed, loops=loops)
217
    return G1, G2
218

219

220
def sbm_corr(n, p, r, directed=False, loops=False):
221
    """
222
    Generate a pair of correlated graphs with specified edge probability
223
    Both G1 and G2 are binary matrices.
224

225
    Parameters
226
    ----------
227
    n: list of int, shape (n_communities)
228
        Number of vertices in each community. Communities are assigned n[0], n[1], ...
229

230
    p: array-like, shape (n_communities, n_communities)
231
        Probability of an edge between each of the communities, where p[i, j] indicates
232
        the probability of a connection between edges in communities [i, j].
233
        0 < p[i, j] < 1 for all i, j.
234

235
    r: float
236
        Probability of the correlation between the same vertices in two graphs.
237

238
    directed: boolean, optional (default=False)
239
        If False, output adjacency matrix will be symmetric. Otherwise, output adjacency
240
        matrix will be asymmetric.
241

242
    loops: boolean, optional (default=False)
243
        If False, no edges will be sampled in the diagonal. Otherwise, edges
244
        are sampled in the diagonal.
245

246
    Returns
247
    -------
248
    G1: ndarray (n_vertices, n_vertices)
249
        Adjacency matrix the same size as P representing a random graph.
250

251
    G2: ndarray (n_vertices, n_vertices)
252
        Adjacency matrix the same size as P representing a random graph.
253

254
    Examples
255
    --------
256
    >>> np.random.seed(3)
257
    >>> n = [3, 3]
258
    >>> p = [[0.5, 0.1], [0.1, 0.5]]
259
    >>> r = 0.3
260

261
    To sample a correlated SBM graph pair based on n, p and r:
262

263
    >>> sbm_corr(n, p, r, directed=False, loops=False)
264
    (array([[0., 1., 0., 0., 0., 0.],
265
        [1., 0., 0., 0., 0., 0.],
266
        [0., 0., 0., 0., 0., 0.],
267 2
        [0., 0., 0., 0., 0., 1.],
268 2
        [0., 0., 0., 0., 0., 0.],
269 2
        [0., 0., 0., 1., 0., 0.]]), array([[0., 1., 0., 0., 0., 0.],
270
        [1., 0., 0., 1., 1., 0.],
271 2
        [0., 0., 0., 0., 0., 0.],
272 2
        [0., 1., 0., 0., 0., 1.],
273 2
        [0., 1., 0., 0., 0., 0.],
274 2
        [0., 0., 0., 1., 0., 0.]]))
275
    """
276
    # test input
277 2
    # Check n
278 2
    if not isinstance(n, (list, np.ndarray)):
279 2
        msg = "n must be a list or np.array, not {}.".format(type(n))
280
        raise TypeError(msg)
281 2
    else:
282 2
        n = np.array(n)
283 2
        if not np.issubdtype(n.dtype, np.integer):
284 2
            msg = "There are non-integer elements in n"
285 2
            raise ValueError(msg)
286 2

287 2
    # Check p
288 2
    if not isinstance(p, (list, np.ndarray)):
289 2
        msg = "p must be a list or np.array, not {}.".format(type(p))
290 2
        raise TypeError(msg)
291
    else:
292
        p = np.array(p)
293 2
        if not np.issubdtype(p.dtype, np.number):
294
            msg = "There are non-numeric elements in p"
295
            raise ValueError(msg)
296 2
        elif p.shape != (n.size, n.size):
297
            msg = "p is must have shape len(n) x len(n), not {}".format(p.shape)
298
            raise ValueError(msg)
299 2
        elif np.any(p < 0) or np.any(p > 1):
300
            msg = "Values in p must be in between 0 and 1."
301 2
            raise ValueError(msg)
302 2

303 2
    # check r
304 2
    check_r(r)
305 2

306
    # check the relation between r and p
307
    check_rel_sbm(p, r)
308

309 2
    # check directed and loops
310 2
    check_dirloop(directed, loops)
311 2

312
    P = np.zeros((np.sum(n), np.sum(n)))
313
    block_indices = np.insert(np.cumsum(np.array(n)), 0, 0)
314
    for i in range(np.array(p).shape[0]):  # for each row
315
        for j in range(np.array(p).shape[1]):  # for each column
316
            P[
317
                block_indices[i] : block_indices[i + 1],
318
                block_indices[j] : block_indices[j + 1],
319
            ] = p[i][j]
320
    R = r * np.ones((np.sum(n), np.sum(n)))
321
    G1, G2 = sample_edges_corr(P, R, directed=directed, loops=loops)
322
    return G1, G2

Read our documentation on viewing source code .

Loading