microsoft / graspologic
1
# Copyright 2019 NeuroData (http://neurodata.io)
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4 2
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6 2
#
7 2
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10 2
# distributed under the License is distributed on an "AS IS" BASIS,
11 2
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 2
# See the License for the specific language governing permissions and
13 2
# limitations under the License.
14 2

15 2
import numpy as np
16

17
from ..utils import symmetrize, cartprod
18 2
import warnings
19

20

21
def _n_to_labels(n):
22
    n_cumsum = n.cumsum()
23
    labels = np.zeros(n.sum(), dtype=np.int64)
24
    for i in range(1, len(n)):
25
        labels[n_cumsum[i - 1] : n_cumsum[i]] = i
26
    return labels
27

28

29
def sample_edges(P, directed=False, loops=False):
30
    """
31
    Gemerates a binary random graph based on the P matrix provided
32

33
    Each element in P represents the probability of a connection between
34
    a vertex indexed by the row i and the column j.
35

36
    Parameters
37
    ----------
38
    P: np.ndarray, shape (n_vertices, n_vertices)
39
        Matrix of probabilities (between 0 and 1) for a random graph
40
    directed: boolean, optional (default=False)
41
        If False, output adjacency matrix will be symmetric. Otherwise, output adjacency
42
        matrix will be asymmetric.
43
    loops: boolean, optional (default=False)
44
        If False, no edges will be sampled in the diagonal. Otherwise, edges
45
        are sampled in the diagonal.
46

47
    Returns
48 2
    -------
49 2
    A: ndarray (n_vertices, n_vertices)
50 2
        Binary adjacency matrix the same size as P representing a random
51 2
        graph
52 2

53 2
    References
54 2
    ----------
55
    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
56 2
       Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,"
57 2
       Journal of the American Statistical Association, Vol. 107(499), 2012
58 2
    """
59 2
    if type(P) is not np.ndarray:
60 2
        raise TypeError("P must be numpy.ndarray")
61
    if len(P.shape) != 2:
62 2
        raise ValueError("P must have dimension 2 (n_vertices, n_dimensions)")
63
    if P.shape[0] != P.shape[1]:
64 2
        raise ValueError("P must be a square matrix")
65 2
    if not directed:
66
        # can cut down on sampling by ~half
67 2
        triu_inds = np.triu_indices(P.shape[0])
68
        samples = np.random.binomial(1, P[triu_inds])
69
        A = np.zeros_like(P)
70 2
        A[triu_inds] = samples
71
        A = symmetrize(A, method="triu")
72
    else:
73
        A = np.random.binomial(1, P)
74

75
    if loops:
76
        return A
77
    else:
78
        return A - np.diag(np.diag(A))
79

80

81
def er_np(n, p, directed=False, loops=False, wt=1, wtargs=None, dc=None, dc_kws={}):
82
    r"""
83
    Samples a Erdos Renyi (n, p) graph with specified edge probability.
84

85
    Erdos Renyi (n, p) graph is a simple graph with n vertices and a probability
86
    p of edges being connected.
87

88
    Read more in the :ref:`tutorials <simulations_tutorials>`
89

90
    Parameters
91
    ----------
92
    n: int
93
       Number of vertices
94

95
    p: float
96
        Probability of an edge existing between two vertices, between 0 and 1.
97

98
    directed: boolean, optional (default=False)
99
        If False, output adjacency matrix will be symmetric. Otherwise, output adjacency
100
        matrix will be asymmetric.
101

102
    loops: boolean, optional (default=False)
103
        If False, no edges will be sampled in the diagonal. Otherwise, edges
104
        are sampled in the diagonal.
105

106
    wt: object, optional (default=1)
107
        Weight function for each of the edges, taking only a size argument.
108
        This weight function will be randomly assigned for selected edges.
109
        If 1, graph produced is binary.
110

111
    wtargs: dictionary, optional (default=None)
112
        Optional arguments for parameters that can be passed
113
        to weight function ``wt``.
114

115
    dc: function or array-like, shape (n_vertices)
116
        `dc` is used to generate a degree-corrected Erdos Renyi Model in
117
        which each node in the graph has a parameter to specify its expected degree
118
        relative to other nodes.
119

120
        - function:
121
            should generate a non-negative number to be used as a degree correction to
122
            create a heterogenous degree distribution. A weight will be generated for
123
            each vertex, normalized so that the sum of weights is 1.
124
        - array-like of scalars, shape (n_vertices):
125
            The weights should sum to 1; otherwise, they will be
126
            normalized and a warning will be thrown. The scalar associated with each
127
            vertex is the node's relative expected degree.
128

129
    dc_kws: dictionary
130
        Ignored if `dc` is none or array of scalar.
131
        If `dc` is a function, `dc_kws` corresponds to its named arguments.
132
        If not specified, in either case all functions will assume their default
133
        parameters.
134

135
    Returns
136
    -------
137
    A : ndarray, shape (n, n)
138
        Sampled adjacency matrix
139

140
    Examples
141
    --------
142
    >>> np.random.seed(1)
143
    >>> n = 4
144
    >>> p = 0.25
145

146
    To sample a binary Erdos Renyi (n, p) graph:
147

148
    >>> er_np(n, p)
149
    array([[0., 0., 1., 0.],
150
           [0., 0., 1., 0.],
151
           [1., 1., 0., 0.],
152
           [0., 0., 0., 0.]])
153 2

154 0
    To sample a weighted Erdos Renyi (n, p) graph with Uniform(0, 1) distribution:
155 2

156 2
    >>> wt = np.random.uniform
157 2
    >>> wtargs = dict(low=0, high=1)
158 2
    >>> er_np(n, p, wt=wt, wtargs=wtargs)
159 2
    array([[0.        , 0.        , 0.95788953, 0.53316528],
160 2
           [0.        , 0.        , 0.        , 0.        ],
161 2
           [0.95788953, 0.        , 0.        , 0.31551563],
162 2
           [0.53316528, 0.        , 0.31551563, 0.        ]])
163 2
    """
164 2
    if isinstance(dc, (list, np.ndarray)) and all(callable(f) for f in dc):
165 2
        raise TypeError("dc is not of type function or array-like of scalars")
166 2
    if not np.issubdtype(type(n), np.integer):
167
        raise TypeError("n is not of type int.")
168
    if not np.issubdtype(type(p), np.floating):
169 2
        raise TypeError("p is not of type float.")
170
    if type(loops) is not bool:
171
        raise TypeError("loops is not of type bool.")
172
    if type(directed) is not bool:
173
        raise TypeError("directed is not of type bool.")
174
    n_sbm = np.array([n])
175
    p_sbm = np.array([[p]])
176
    g = sbm(n_sbm, p_sbm, directed, loops, wt, wtargs, dc, dc_kws)
177
    return g
178

179

180
def er_nm(n, m, directed=False, loops=False, wt=1, wtargs=None):
181
    r"""
182
    Samples an Erdos Renyi (n, m) graph with specified number of edges.
183

184
    Erdos Renyi (n, m) graph is a simple graph with n vertices and exactly m
185
    number of total edges.
186

187
    Read more in the :ref:`tutorials <simulations_tutorials>`
188

189
    Parameters
190
    ----------
191
    n: int
192
        Number of vertices
193

194
    m: int
195
        Number of edges, a value between 1 and :math:`n^2`.
196

197
    directed: boolean, optional (default=False)
198
        If False, output adjacency matrix will be symmetric. Otherwise, output adjacency
199
        matrix will be asymmetric.
200

201
    loops: boolean, optional (default=False)
202
        If False, no edges will be sampled in the diagonal. Otherwise, edges
203
        are sampled in the diagonal.
204

205
    wt: object, optional (default=1)
206
        Weight function for each of the edges, taking only a size argument.
207
        This weight function will be randomly assigned for selected edges.
208
        If 1, graph produced is binary.
209

210
    wtargs: dictionary, optional (default=None)
211
        Optional arguments for parameters that can be passed
212
        to weight function ``wt``.
213

214
    Returns
215
    -------
216
    A: ndarray, shape (n, n)
217
        Sampled adjacency matrix
218

219
    Examples
220
    --------
221
    >>> np.random.seed(1)
222
    >>> n = 4
223
    >>> m = 4
224

225
    To sample a binary Erdos Renyi (n, m) graph:
226

227
    >>> er_nm(n, m)
228
    array([[0., 1., 1., 1.],
229
           [1., 0., 0., 1.],
230
           [1., 0., 0., 0.],
231
           [1., 1., 0., 0.]])
232 2

233 2
    To sample a weighted Erdos Renyi (n, m) graph with Uniform(0, 1) distribution:
234 2

235 2
    >>> wt = np.random.uniform
236 2
    >>> wtargs = dict(low=0, high=1)
237 2
    >>> er_nm(n, m, wt=wt, wtargs=wtargs)
238 2
    array([[0.        , 0.66974604, 0.        , 0.38791074],
239 2
           [0.66974604, 0.        , 0.        , 0.39658073],
240 2
           [0.        , 0.        , 0.        , 0.93553907],
241 2
           [0.38791074, 0.39658073, 0.93553907, 0.        ]])
242 2
    """
243 2
    if not np.issubdtype(type(m), np.integer):
244 2
        raise TypeError("m is not of type int.")
245 2
    elif m <= 0:
246
        msg = "m must be > 0."
247
        raise ValueError(msg)
248 2
    if not np.issubdtype(type(n), np.integer):
249 2
        raise TypeError("n is not of type int.")
250 2
    elif n <= 0:
251
        msg = "n must be > 0."
252
        raise ValueError(msg)
253 2
    if type(directed) is not bool:
254 2
        raise TypeError("directed is not of type bool.")
255 2
    if type(loops) is not bool:
256 2
        raise TypeError("loops is not of type bool.")
257

258 2
    # check weight function
259 2
    if not np.issubdtype(type(wt), np.integer):
260
        if not callable(wt):
261 2
            raise TypeError("You have not passed a function for wt.")
262 2

263 2
    # compute max number of edges to sample
264
    if loops:
265 2
        if directed:
266 2
            max_edges = n ** 2
267 2
            msg = "n^2"
268 2
        else:
269 2
            max_edges = n * (n + 1) // 2
270 2
            msg = "n(n+1)/2"
271
    else:
272 2
        if directed:
273
            max_edges = n * (n - 1)
274 2
            msg = "n(n-1)"
275 2
        else:
276
            max_edges = n * (n - 1) // 2
277 2
            msg = "n(n-1)/2"
278
    if m > max_edges:
279
        msg = "You have passed a number of edges, {}, exceeding {}, {}."
280 2
        msg = msg.format(m, msg, max_edges)
281
        raise ValueError(msg)
282

283
    A = np.zeros((n, n))
284 2
    # check if directedness is desired
285
    if directed:
286
        if loops:
287 2
            # use all of the indices
288
            idx = np.where(np.logical_not(A))
289 2
        else:
290
            # use only the off-diagonal indices
291 2
            idx = np.where(~np.eye(n, dtype=bool))
292
    else:
293 2
        # use upper-triangle indices, and ignore diagonal according
294 2
        # to loops argument
295 2
        idx = np.triu_indices(n, k=int(loops is False))
296

297 2
    # get idx in 1d coordinates by ravelling
298 2
    triu = np.ravel_multi_index(idx, A.shape)
299
    # choose M of them
300 2
    triu = np.random.choice(triu, size=m, replace=False)
301
    # unravel back
302
    triu = np.unravel_index(triu, A.shape)
303 2
    # check weight function
304
    if not np.issubdtype(type(wt), np.number):
305
        wt = wt(size=m, **wtargs)
306
    A[triu] = wt
307

308
    if not directed:
309
        A = symmetrize(A, method="triu")
310

311
    return A
312

313

314
def sbm(
315
    n,
316
    p,
317
    directed=False,
318
    loops=False,
319
    wt=1,
320
    wtargs=None,
321
    dc=None,
322
    dc_kws={},
323
    return_labels=False,
324
):
325
    """
326
    Samples a graph from the stochastic block model (SBM).
327

328
    SBM produces a graph with specified communities, in which each community can
329
    have different sizes and edge probabilities.
330

331
    Read more in the :ref:`tutorials <simulations_tutorials>`
332

333
    Parameters
334
    ----------
335
    n: list of int, shape (n_communities)
336
        Number of vertices in each community. Communities are assigned n[0], n[1], ...
337

338
    p: array-like, shape (n_communities, n_communities)
339
        Probability of an edge between each of the communities, where p[i, j] indicates
340
        the probability of a connection between edges in communities [i, j].
341
        0 < p[i, j] < 1 for all i, j.
342

343
    directed: boolean, optional (default=False)
344
        If False, output adjacency matrix will be symmetric. Otherwise, output adjacency
345
        matrix will be asymmetric.
346

347
    loops: boolean, optional (default=False)
348
        If False, no edges will be sampled in the diagonal. Otherwise, edges
349
        are sampled in the diagonal.
350

351
    wt: object or array-like, shape (n_communities, n_communities)
352
        if Wt is an object, a weight function to use globally over
353
        the sbm for assigning weights. 1 indicates to produce a binary
354
        graph. If Wt is an array-like, a weight function for each of
355
        the edge communities. Wt[i, j] corresponds to the weight function
356
        between communities i and j. If the entry is a function, should
357
        accept an argument for size. An entry of Wt[i, j] = 1 will produce a
358
        binary subgraph over the i, j community.
359

360
    wtargs: dictionary or array-like, shape (n_communities, n_communities)
361
        if Wt is an object, Wtargs corresponds to the trailing arguments
362
        to pass to the weight function. If Wt is an array-like, Wtargs[i, j]
363
        corresponds to trailing arguments to pass to Wt[i, j].
364

365
    dc: function or array-like, shape (n_vertices) or (n_communities), optional
366
        `dc` is used to generate a degree-corrected stochastic block model [1] in
367
        which each node in the graph has a parameter to specify its expected degree
368
        relative to other nodes within its community.
369

370
        - function:
371
            should generate a non-negative number to be used as a degree correction to
372
            create a heterogenous degree distribution. A weight will be generated for
373
            each vertex, normalized so that the sum of weights in each block is 1.
374
        - array-like of functions, shape (n_communities):
375
            Each function will generate the degree distribution for its respective
376
            community.
377
        - array-like of scalars, shape (n_vertices):
378
            The weights in each block should sum to 1; otherwise, they will be
379
            normalized and a warning will be thrown. The scalar associated with each
380
            vertex is the node's relative expected degree within its community.
381

382
    dc_kws: dictionary or array-like, shape (n_communities), optional
383
        Ignored if `dc` is none or array of scalar.
384
        If `dc` is a function, `dc_kws` corresponds to its named arguments.
385
        If `dc` is an array-like of functions, `dc_kws` should be an array-like, shape
386
        (n_communities), of dictionary. Each dictionary is the named arguments
387
        for the corresponding function for that community.
388
        If not specified, in either case all functions will assume their default
389
        parameters.
390

391
    return_labels: boolean, optional (default=False)
392
        If False, only output is adjacency matrix. Otherwise, an additional output will
393
        be an array with length equal to the number of vertices in the graph, where each
394
        entry in the array labels which block a vertex in the graph is in.
395

396
    References
397
    ----------
398
    .. [1] Tai Qin and Karl Rohe. "Regularized spectral clustering under the
399
        Degree-Corrected Stochastic Blockmodel," Advances in Neural Information
400
        Processing Systems 26, 2013
401

402
    Returns
403
    -------
404
    A: ndarray, shape (sum(n), sum(n))
405
        Sampled adjacency matrix
406
    labels: ndarray, shape (sum(n))
407
        Label vector
408

409
    Examples
410
    --------
411
    >>> np.random.seed(1)
412
    >>> n = [3, 3]
413
    >>> p = [[0.5, 0.1], [0.1, 0.5]]
414

415
    To sample a binary 2-block SBM graph:
416

417
    >>> sbm(n, p)
418
    array([[0., 0., 1., 0., 0., 0.],
419
           [0., 0., 1., 0., 0., 1.],
420
           [1., 1., 0., 0., 0., 0.],
421
           [0., 0., 0., 0., 1., 0.],
422
           [0., 0., 0., 1., 0., 0.],
423
           [0., 1., 0., 0., 0., 0.]])
424

425
    To sample a weighted 2-block SBM graph with Poisson(2) distribution:
426

427 2
    >>> wt = np.random.poisson
428 2
    >>> wtargs = dict(lam=2)
429 2
    >>> sbm(n, p, wt=wt, wtargs=wtargs)
430
    array([[0., 4., 0., 1., 0., 0.],
431 2
           [4., 0., 0., 0., 0., 2.],
432 2
           [0., 0., 0., 0., 0., 0.],
433 2
           [1., 0., 0., 0., 0., 0.],
434 2
           [0., 0., 0., 0., 0., 0.],
435
           [0., 2., 0., 0., 0., 0.]])
436
    """
437 2
    # Check n
438 2
    if not isinstance(n, (list, np.ndarray)):
439 2
        msg = "n must be a list or np.array, not {}.".format(type(n))
440
        raise TypeError(msg)
441 2
    else:
442 2
        n = np.array(n)
443 2
        if not np.issubdtype(n.dtype, np.integer):
444 2
            msg = "There are non-integer elements in n"
445 2
            raise ValueError(msg)
446 2

447 2
    # Check p
448 2
    if not isinstance(p, (list, np.ndarray)):
449 2
        msg = "p must be a list or np.array, not {}.".format(type(p))
450 2
        raise TypeError(msg)
451
    else:
452
        p = np.array(p)
453 2
        if not np.issubdtype(p.dtype, np.number):
454 2
            msg = "There are non-numeric elements in p"
455 2
            raise ValueError(msg)
456 2
        elif p.shape != (n.size, n.size):
457 2
            msg = "p is must have shape len(n) x len(n), not {}".format(p.shape)
458 2
            raise ValueError(msg)
459
        elif np.any(p < 0) or np.any(p > 1):
460
            msg = "Values in p must be in between 0 and 1."
461 2
            raise ValueError(msg)
462

463 2
    # Check wt and wtargs
464 2
    if not np.issubdtype(type(wt), np.number) and not callable(wt):
465
        if not isinstance(wt, (list, np.ndarray)):
466 2
            msg = "wt must be a numeric, list, or np.array, not {}".format(type(wt))
467 2
            raise TypeError(msg)
468 2
        if not isinstance(wtargs, (list, np.ndarray)):
469 2
            msg = "wtargs must be a numeric, list, or np.array, not {}".format(
470 2
                type(wtargs)
471 2
            )
472
            raise TypeError(msg)
473 2

474 2
        wt = np.array(wt, dtype=object)
475 2
        wtargs = np.array(wtargs, dtype=object)
476 2
        # if not number, check dimensions
477
        if wt.shape != (n.size, n.size):
478 2
            msg = "wt must have size len(n) x len(n), not {}".format(wt.shape)
479 2
            raise ValueError(msg)
480
        if wtargs.shape != (n.size, n.size):
481
            msg = "wtargs must have size len(n) x len(n), not {}".format(wtargs.shape)
482 2
            raise ValueError(msg)
483 2
        # check if each element is a function
484 2
        for element in wt.ravel():
485 2
            if not callable(element):
486 2
                msg = "{} is not a callable function.".format(element)
487 2
                raise TypeError(msg)
488 2
    else:
489
        wt = np.full(p.shape, wt, dtype=object)
490 2
        wtargs = np.full(p.shape, wtargs, dtype=object)
491 2

492
    # Check directed
493 2
    if not directed:
494 2
        if np.any(p != p.T):
495 2
            raise ValueError("Specified undirected, but P is directed.")
496 2
        if np.any(wt != wt.T):
497
            raise ValueError("Specified undirected, but Wt is directed.")
498
        if np.any(wtargs != wtargs.T):
499 2
            raise ValueError("Specified undirected, but Wtargs is directed.")
500

501 2
    K = len(n)  # the number of communities
502 2
    counter = 0
503 2
    # get a list of community indices
504
    cmties = []
505 2
    for i in range(0, K):
506 2
        cmties.append(range(counter, counter + n[i]))
507 2
        counter += n[i]
508 2

509
    # Check degree-corrected input parameters
510
    if callable(dc):
511 2
        # Check that the paramters are a dict
512
        if not isinstance(dc_kws, dict):
513 2
            msg = "dc_kws must be of type dict not{}".format(type(dc_kws))
514 0
            raise TypeError(msg)
515 0
        # Create the probability matrix for each vertex
516 2
        dcProbs = np.array([dc(**dc_kws) for _ in range(0, sum(n))], dtype="float")
517 2
        for indices in cmties:
518 2
            dcProbs[indices] /= sum(dcProbs[indices])
519 2
    elif isinstance(dc, (list, np.ndarray)) and np.issubdtype(
520 2
        np.array(dc).dtype, np.number
521 2
    ):
522 2
        dcProbs = np.array(dc, dtype=float)
523
        # Check size and element types
524 2
        if not np.issubdtype(dcProbs.dtype, np.number):
525 2
            msg = "There are non-numeric elements in dc, {}".format(dcProbs.dtype)
526 2
            raise ValueError(msg)
527 2
        elif dcProbs.shape != (sum(n),):
528 2
            msg = "dc must have size equal to the number of"
529 2
            msg += " vertices {0}, not {1}".format(sum(n), dcProbs.shape)
530 2
            raise ValueError(msg)
531 2
        elif np.any(dcProbs < 0):
532 2
            msg = "Values in dc cannot be negative."
533
            raise ValueError(msg)
534
        # Check that probabilities sum to 1 in each block
535 2
        for i in range(0, K):
536
            if not np.isclose(sum(dcProbs[cmties[i]]), 1, atol=1.0e-8):
537 2
                msg = "Block {} probabilities should sum to 1, normalizing...".format(i)
538
                warnings.warn(msg, UserWarning)
539 2
                dcProbs[cmties[i]] /= sum(dcProbs[cmties[i]])
540 2
    elif isinstance(dc, (list, np.ndarray)) and all(callable(f) for f in dc):
541
        dcFuncs = np.array(dc)
542 2
        if dcFuncs.shape != (len(n),):
543
            msg = "dc must have size equal to the number of blocks {0}, not {1}".format(
544
                len(n), dcFuncs.shape
545 2
            )
546 2
            raise ValueError(msg)
547 2
        # Check that the parameters type, length, and type
548 2
        if not isinstance(dc_kws, (list, np.ndarray)):
549 2
            # Allows for nonspecification of default parameters for all functions
550 2
            if dc_kws == {}:
551 2
                dc_kws = [{} for _ in range(0, len(n))]
552 2
            else:
553
                msg = "dc_kws must be of type list or np.ndarray, not {}".format(
554 2
                    type(dc_kws)
555
                )
556
                raise TypeError(msg)
557
        elif not len(dc_kws) == len(n):
558
            msg = "dc_kws must have size equal to"
559
            msg += " the number of blocks {0}, not {1}".format(len(n), len(dc_kws))
560
            raise ValueError(msg)
561
        elif not all(type(kw) == dict for kw in dc_kws):
562
            msg = "dc_kws elements must all be of type dict"
563 2
            raise TypeError(msg)
564 2
        # Create the probability matrix for each vertex
565
        dcProbs = np.array(
566 2
            [
567 2
                dcFunc(**kws)
568 2
                for dcFunc, kws, size in zip(dcFuncs, dc_kws, n)
569 2
                for _ in range(0, size)
570
            ],
571
            dtype="float",
572 2
        )
573
        # dcProbs = dcProbs.astype(float)
574 2
        for indices in cmties:
575 2
            dcProbs[indices] /= sum(dcProbs[indices])
576 2
            # dcProbs[indices] = dcProbs / dcProbs[indices].sum()
577
    elif dc is not None:
578 2
        msg = "dc must be a function or a list or np.array of numbers or callable"
579 2
        msg += " functions, not {}".format(type(dc))
580 2
        raise ValueError(msg)
581 2

582 2
    # End Checks, begin simulation
583
    A = np.zeros((sum(n), sum(n)))
584

585 2
    for i in range(0, K):
586
        if directed:
587 2
            jrange = range(0, K)
588 2
        else:
589 2
            jrange = range(i, K)
590
        for j in jrange:
591 2
            block_wt = wt[i, j]
592 2
            block_wtargs = wtargs[i, j]
593
            block_p = p[i, j]
594 2
            # identify submatrix for community i, j
595 0
            # cartesian product to identify edges for community i,j pair
596 0
            cprod = cartprod(cmties[i], cmties[j])
597 0
            # get idx in 1d coordinates by ravelling
598 0
            triu = np.ravel_multi_index((cprod[:, 0], cprod[:, 1]), A.shape)
599 2
            pchoice = np.random.uniform(size=len(triu))
600
            if dc is not None:
601
                # (v1,v2) connected with probability p*k_i*k_j*dcP[v1]*dcP[v2]
602
                num_edges = sum(pchoice < block_p)
603
                edge_dist = dcProbs[cprod[:, 0]] * dcProbs[cprod[:, 1]]
604 2
                # If n_edges greater than support of dc distribution, pick fewer edges
605 2
                if num_edges > sum(edge_dist > 0):
606 2
                    msg = "More edges sampled than nonzero pairwise dc entries."
607 2
                    msg += " Picking fewer edges"
608 2
                    warnings.warn(msg, UserWarning)
609
                    num_edges = sum(edge_dist > 0)
610 2
                triu = np.random.choice(
611 2
                    triu, size=num_edges, replace=False, p=edge_dist
612 2
                )
613 2
            else:
614 2
                # connected with probability p
615 2
                triu = triu[pchoice < block_p]
616 2
            if type(block_wt) is not int:
617 2
                block_wt = block_wt(size=len(triu), **block_wtargs)
618
            triu = np.unravel_index(triu, A.shape)
619
            A[triu] = block_wt
620 2

621
    if not loops:
622
        A = A - np.diag(np.diag(A))
623
    if not directed:
624
        A = symmetrize(A, method="triu")
625
    if return_labels:
626
        labels = _n_to_labels(n)
627
        return A, labels
628
    return A
629

630

631
def rdpg(X, Y=None, rescale=False, directed=False, loops=False, wt=1, wtargs=None):
632
    r"""
633
    Samples a random graph based on the latent positions in X (and
634
    optionally in Y)
635

636
    If only X :math:`\in\mathbb{R}^{n\times d}` is given, the P matrix is calculated as
637
    :math:`P = XX^T`. If X, Y :math:`\in\mathbb{R}^{n\times d}` is given, then
638
    :math:`P = XY^T`. These operations correspond to the dot products between a set of
639
    latent positions, so each row in X or Y represents the latent positions in
640
    :math:`\mathbb{R}^{d}` for a single vertex in the random graph
641
    Note that this function may also rescale or clip the resulting P
642
    matrix to get probabilities between 0 and 1, or remove loops.
643
    A binary random graph is then sampled from the P matrix described
644
    by X (and possibly Y).
645

646
    Read more in the :ref:`tutorials <simulations_tutorials>`
647

648
    Parameters
649
    ----------
650
    X: np.ndarray, shape (n_vertices, n_dimensions)
651
        latent position from which to generate a P matrix
652
        if Y is given, interpreted as the left latent position
653

654
    Y: np.ndarray, shape (n_vertices, n_dimensions) or None, optional
655
        right latent position from which to generate a P matrix
656

657
    rescale: boolean, optional (default=False)
658
        when rescale is True, will subtract the minimum value in
659
        P (if it is below 0) and divide by the maximum (if it is
660
        above 1) to ensure that P has entries between 0 and 1. If
661
        False, elements of P outside of [0, 1] will be clipped
662

663
    directed: boolean, optional (default=False)
664
        If False, output adjacency matrix will be symmetric. Otherwise, output adjacency
665
        matrix will be asymmetric.
666

667
    loops: boolean, optional (default=False)
668
        If False, no edges will be sampled in the diagonal. Diagonal elements in P
669
        matrix are removed prior to rescaling (see above) which may affect behavior.
670
        Otherwise, edges are sampled in the diagonal.
671

672
    wt: object, optional (default=1)
673
        Weight function for each of the edges, taking only a size argument.
674
        This weight function will be randomly assigned for selected edges.
675
        If 1, graph produced is binary.
676

677
    wtargs: dictionary, optional (default=None)
678
        Optional arguments for parameters that can be passed
679
        to weight function ``wt``.
680

681
    Returns
682
    -------
683
    A: ndarray (n_vertices, n_vertices)
684
        A matrix representing the probabilities of connections between
685
        vertices in a random graph based on their latent positions
686

687
    References
688
    ----------
689
    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
690
       Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,"
691
       Journal of the American Statistical Association, Vol. 107(499), 2012
692

693
    Examples
694
    --------
695
    >>> np.random.seed(1)
696

697
    Generate random latent positions using 2-dimensional Dirichlet distribution.
698

699
    >>> X = np.random.dirichlet([1, 1], size=5)
700

701
    Sample a binary RDPG using sampled latent positions.
702

703
    >>> rdpg(X, loops=False)
704
    array([[0., 1., 0., 0., 1.],
705
           [1., 0., 0., 1., 1.],
706
           [0., 0., 0., 1., 1.],
707
           [0., 1., 1., 0., 0.],
708
           [1., 1., 1., 0., 0.]])
709

710 2
    Sample a weighted RDPG with Poisson(2) weight distribution
711 2

712
    >>> wt = np.random.poisson
713
    >>> wtargs = dict(lam=2)
714 2
    >>> rdpg(X, loops=False, wt=wt, wtargs=wtargs)
715
    array([[0., 4., 0., 2., 0.],
716
           [1., 0., 0., 0., 0.],
717 2
           [0., 0., 0., 0., 2.],
718 0
           [1., 0., 0., 0., 1.],
719
           [0., 2., 2., 0., 0.]])
720 2
    """
721 2
    P = p_from_latent(X, Y, rescale=rescale, loops=loops)
722 2
    A = sample_edges(P, directed=directed, loops=loops)
723

724 2
    # check weight function
725 2
    if (not np.issubdtype(type(wt), np.integer)) and (
726
        not np.issubdtype(type(wt), np.floating)
727
    ):
728 2
        if not callable(wt):
729
            raise TypeError("You have not passed a function for wt.")
730

731
    if not np.issubdtype(type(wt), np.number):
732
        wts = wt(size=(np.count_nonzero(A)), **wtargs)
733
        A[A > 0] = wts
734
    else:
735
        A *= wt
736
    return A
737

738

739
def p_from_latent(X, Y=None, rescale=False, loops=True):
740
    r"""
741
    Gemerates a matrix of connection probabilities for a random graph
742
    based on a set of latent positions
743

744
    If only X is given, the P matrix is calculated as :math:`P = XX^T`
745
    If X and Y is given, then :math:`P = XY^T`
746
    These operations correspond to the dot products between a set of latent
747
    positions, so each row in X or Y represents the latent positions in
748
    :math:`\mathbb{R}^{num-columns}` for a single vertex in the random graph
749
    Note that this function may also rescale or clip the resulting P
750
    matrix to get probabilities between 0 and 1, or remove loops
751

752
    Parameters
753
    ----------
754
    X: np.ndarray, shape (n_vertices, n_dimensions)
755
        latent position from which to generate a P matrix
756
        if Y is given, interpreted as the left latent position
757

758
    Y: np.ndarray, shape (n_vertices, n_dimensions) or None, optional
759
        right latent position from which to generate a P matrix
760

761
    rescale: boolean, optional (default=False)
762
        when rescale is True, will subtract the minimum value in
763
        P (if it is below 0) and divide by the maximum (if it is
764
        above 1) to ensure that P has entries between 0 and 1. If
765
        False, elements of P outside of [0, 1] will be clipped
766

767
    loops: boolean, optional (default=True)
768
        whether to allow elements on the diagonal (corresponding
769
        to self connections in a graph) in the returned P matrix.
770
        If loops is False, these elements are removed prior to
771
        rescaling (see above) which may affect behavior
772

773
    Returns
774
    -------
775 2
    P: ndarray (n_vertices, n_vertices)
776 2
        A matrix representing the probabilities of connections between
777 2
        vertices in a random graph based on their latent positions
778 2

779 2
    References
780 2
    ----------
781
    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
782
       Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,"
783 2
       Journal of the American Statistical Association, Vol. 107(499), 2012
784 2

785 2
    """
786
    if Y is None:
787 2
        Y = X
788 2
    if type(X) is not np.ndarray or type(Y) is not np.ndarray:
789 2
        raise TypeError("Latent positions must be numpy.ndarray")
790 2
    if X.ndim != 2 or Y.ndim != 2:
791 0
        raise ValueError(
792 2
            "Latent positions must have dimension 2 (n_vertices, n_dimensions)"
793 0
        )
794
    if X.shape != Y.shape:
795 2
        raise ValueError("Dimensions of latent positions X and Y must be the same")
796 2
    P = X @ Y.T
797 2
    # should this be before or after the rescaling, could give diff answers
798
    if not loops:
799
        P = P - np.diag(np.diag(P))
800
    if rescale:
801
        if P.min() < 0:
802
            P = P - P.min()
803
        if P.max() > 1:
804
            P = P / P.max()
805
    else:
806
        P[P < 0] = 0
807
        P[P > 1] = 1
808
    return P

Read our documentation on viewing source code .

Loading