microsoft / graspologic
1
# Copyright 2019 NeuroData (http://neurodata.io)
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4 2
# you may not use this file except in compliance with the License.
5 2
# You may obtain a copy of the License at
6 2
#
7 2
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9 2
# Unless required by applicable law or agreed to in writing, software
10 2
# distributed under the License is distributed on an "AS IS" BASIS,
11 2
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14 2

15
import warnings
16
from collections import Iterable
17
from functools import reduce
18
from pathlib import Path
19

20
import networkx as nx
21
import numpy as np
22
from sklearn.utils import check_array
23

24

25
def import_graph(graph, copy=True):
26
    """
27
    A function for reading a graph and returning a shared data type.
28

29
    Parameters
30
    ----------
31
    graph: object
32
        Either array-like, shape (n_vertices, n_vertices) numpy array,
33
        or an object of type networkx.Graph.
34

35
    copy: bool, (default=True)
36
        Whether to return a copied version of array. If False and input is np.array,
37 2
        the output returns the original input.
38 2

39 2
    Returns
40 2
    -------
41 2
    out: array-like, shape (n_vertices, n_vertices)
42 0
        A graph.
43

44
    See Also
45 0
    --------
46 2
    networkx.Graph, numpy.array
47 2
    """
48 2
    if isinstance(graph, (nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph)):
49 2
        out = nx.to_numpy_array(graph, nodelist=sorted(graph.nodes), dtype=np.float)
50 2
    elif isinstance(graph, (np.ndarray, np.memmap)):
51 2
        shape = graph.shape
52
        if len(shape) > 3:
53 2
            msg = "Input tensor must have at most 3 dimensions, not {}.".format(
54 2
                len(shape)
55 2
            )
56
            raise ValueError(msg)
57
        elif len(shape) == 3:
58
            if shape[1] != shape[2]:
59
                msg = "Input tensor must have same number of vertices."
60
                raise ValueError(msg)
61
            min_features = shape[1]
62
            min_samples = 2
63
        else:
64
            min_features = np.max(shape)
65 2
            min_samples = min_features
66 2
        out = check_array(
67 2
            graph,
68
            dtype=[np.float64, np.float32],
69
            ensure_2d=True,
70 2
            allow_nd=True,  # For omni tensor input
71
            ensure_min_features=min_features,
72
            ensure_min_samples=min_samples,
73
            copy=copy,
74
        )
75
    else:
76
        msg = "Input must be networkx.Graph or np.array, not {}.".format(type(graph))
77
        raise TypeError(msg)
78
    return out
79

80

81
def import_edgelist(
82
    path, extension="edgelist", delimiter=None, nodetype=int, return_vertices=False
83
):
84
    """
85
    Function for reading a single or multiple edgelists. When importing multiple
86
    edgelists, the union of vertices from all graphs is computed so that each output
87
    graph have matched vertex set. The order of nodes are sorted by node values.
88

89
    Parameters
90
    ----------
91
    path : str, Path object, or iterable
92
        If ``path`` is a directory, then the importing order will be sorted in
93
        alphabetical order.
94

95
    extension : str, optional
96
        If ``path`` is a directory, then the function will convert all files
97
        with matching extension.
98

99
    delimiter : str or None, default=None, optional
100
        Delimiter of edgelist. If None, the delimiter is whitespace.
101

102
    nodetype : int (default), float, str, Python type, optional
103
       Convert node data from strings to specified type.
104

105
    return_vertices : bool, default=False, optional
106
        Returns the union of all ind
107

108 2
    Returns
109 2
    -------
110 2
    out : list of array-like, or array-like, shape (n_vertices, n_vertices)
111
        If ``path`` is a directory, a list of arrays is returned. If ``path`` is a file,
112
        an array is returned.
113 2

114 2
    vertices : array-like, shape (n_vertices, )
115 2
        If ``return_vertices`` == True, then returns an array of all vertices that were
116 2
        included in the output graphs.
117 2
    """
118 2
    # p = Path(path)
119
    if not isinstance(path, (str, Path, Iterable)):
120 2
        msg = "path must be a string or Iterable, not {}".format(type(path))
121
        raise TypeError(msg)
122 0

123
    # get a list of files to import
124 2
    if isinstance(path, (str, Path)):
125 0
        p = Path(path)
126 0
        if p.is_dir():
127
            files = sorted(p.glob("*" + extension))
128 2
        elif p.is_file():
129
            files = [p]
130
        else:
131
            raise ValueError("No graphs founds to import.")
132
    else:  # path is an iterable
133 2
        files = [Path(f) for f in path]
134 2

135
    if len(files) == 0:
136
        msg = "No files found with '{}' extension found.".format(extension)
137
        raise ValueError(msg)
138 2

139
    graphs = [
140
        nx.read_weighted_edgelist(f, nodetype=nodetype, delimiter=delimiter)
141 2
        for f in files
142 2
    ]
143

144
    if all(len(G.nodes) == 0 for G in graphs):
145 2
        msg = (
146 2
            "All graphs have 0 vertices. Please double check if proper "
147
            + "'delimiter' is given."
148 2
        )
149 2
        warnings.warn(msg, UserWarning)
150

151 2
    # Compute union of all vertices
152
    vertices = np.sort(reduce(np.union1d, [G.nodes for G in graphs]))
153
    out = [nx.to_numpy_array(G, nodelist=vertices, dtype=np.float) for G in graphs]
154 2

155 2
    # only return adjacency matrix if input is only 1 graph
156
    if len(out) == 1:
157
        out = out[0]
158 2

159 2
    if return_vertices:
160
        return out, vertices
161
    else:
162 2
        return out
163 2

164

165
def is_symmetric(X):
166 2
    return np.array_equal(X, X.T)
167 2

168

169
def is_loopless(X):
170 2
    return not np.any(np.diag(X) != 0)
171

172

173
def is_unweighted(X):
174
    return ((X == 0) | (X == 1)).all()
175

176

177
def is_almost_symmetric(X, atol=1e-15):
178
    return np.allclose(X, X.T, atol=atol)
179

180

181
def symmetrize(graph, method="avg"):
182
    """
183
    A function for forcing symmetry upon a graph.
184

185
    Parameters
186
    ----------
187
    graph: object
188
        Either array-like, (n_vertices, n_vertices) numpy matrix,
189
        or an object of type networkx.Graph.
190

191
    method: {'avg' (default), 'triu', 'tril',}, optional
192
        An option indicating which half of the edges to
193
        retain when symmetrizing.
194

195
            - 'avg'
196
                Retain the average weight between the upper and lower
197
                right triangle, of the adjacency matrix.
198
            - 'triu'
199
                Retain the upper right triangle.
200
            - 'tril'
201
                Retain the lower left triangle.
202

203
    Returns
204
    -------
205
    graph: array-like, shape (n_vertices, n_vertices)
206
        Graph with asymmetries removed.
207

208
    Examples
209 2
    --------
210 2
    >>> a = np.array([
211 2
    ...    [0, 1, 1],
212 0
    ...    [0, 0, 1],
213 2
    ...    [0, 0, 1]])
214 2
    >>> symmetrize(a, method="triu")
215
    array([[0, 1, 1],
216 0
           [1, 0, 1],
217 0
           [1, 1, 1]])
218
    """
219 2
    # graph = import_graph(graph)
220 2
    if method == "triu":
221
        graph = np.triu(graph)
222
    elif method == "tril":
223 2
        graph = np.tril(graph)
224
    elif method == "avg":
225
        graph = (np.triu(graph) + np.tril(graph)) / 2
226
    else:
227
        msg = "You have not passed a valid parameter for the method."
228
        raise ValueError(msg)
229
    # A = A + A' - diag(A)
230
    graph = graph + graph.T - np.diag(np.diag(graph))
231
    return graph
232

233

234
def remove_loops(graph):
235
    """
236
    A function to remove loops from a graph.
237

238 2
    Parameters
239 2
    ----------
240
    graph: object
241 2
        Either array-like, (n_vertices, n_vertices) numpy matrix,
242
        or an object of type networkx.Graph.
243

244 2
    Returns
245
    -------
246
    graph: array-like, shape(n_vertices, n_vertices)
247
        the graph with self-loops (edges between the same node) removed.
248
    """
249
    graph = import_graph(graph)
250
    graph = graph - np.diag(np.diag(graph))
251

252
    return graph
253

254

255
def to_laplace(graph, form="DAD", regularizer=None):
256
    r"""
257
    A function to convert graph adjacency matrix to graph Laplacian.
258

259
    Currently supports I-DAD, DAD, and R-DAD Laplacians, where D is the diagonal
260
    matrix of degrees of each node raised to the -1/2 power, I is the
261
    identity matrix, and A is the adjacency matrix.
262

263
    R-DAD is regularized Laplacian: where :math:`D_t = D + regularizer*I`.
264

265
    Parameters
266
    ----------
267
    graph: object
268
        Either array-like, (n_vertices, n_vertices) numpy array,
269
        or an object of type networkx.Graph.
270

271
    form: {'I-DAD' (default), 'DAD', 'R-DAD'}, string, optional
272

273
        - 'I-DAD'
274
            Computes :math:`L = I - D_i*A*D_i`
275
        - 'DAD'
276
            Computes :math:`L = D_o*A*D_i`
277
        - 'R-DAD'
278
            Computes :math:`L = D_o^r*A*D_i^r`
279
            where :math:`D_o^r = D_o + regularizer * I` and likewise for :math:`D_i`
280

281
    regularizer: int, float or None, optional (default=None)
282
        Constant to add to the degree vector(s). If None, average node degree is added.
283
        If int or float, must be >= 0. Only used when ``form`` == 'R-DAD'.
284

285
    Returns
286
    -------
287
    L : numpy.ndarray
288
        2D (n_vertices, n_vertices) array representing graph
289
        Laplacian of specified form
290

291
    References
292
    ----------
293
    .. [1] Qin, Tai, and Karl Rohe. "Regularized spectral clustering
294
           under the degree-corrected stochastic blockmodel." In Advances
295
           in Neural Information Processing Systems, pp. 3120-3128. 2013
296

297
    .. [2] Rohe, Karl, Tai Qin, and Bin Yu. "Co-clustering directed graphs to discover
298
           asymmetries and directional communities." Proceedings of the National Academy
299
           of Sciences 113.45 (2016): 12679-12684.
300

301
    Examples
302 2
    --------
303 2
    >>> a = np.array([
304 2
    ...    [0, 1, 1],
305
    ...    [1, 0, 0],
306 2
    ...    [1, 0, 0]])
307
    >>> to_laplace(a, "DAD")
308 2
    array([[0.        , 0.70710678, 0.70710678],
309 2
           [0.70710678, 0.        , 0.        ],
310
           [0.70710678, 0.        , 0.        ]])
311

312
    """
313 2
    valid_inputs = ["I-DAD", "DAD", "R-DAD"]
314 2
    if form not in valid_inputs:
315 2
        raise TypeError("Unsuported Laplacian normalization")
316 2

317 2
    A = import_graph(graph)
318

319
    in_degree = np.sum(A, axis=0)
320 2
    out_degree = np.sum(A, axis=1)
321 2

322
    # regularize laplacian with parameter
323 2
    # set to average degree
324 2
    if form == "R-DAD":
325
        if regularizer is None:
326 2
            regularizer = np.mean(out_degree)
327 2
        elif not isinstance(regularizer, (int, float)):
328 2
            raise TypeError(
329
                "Regularizer must be a int or float, not {}".format(type(regularizer))
330 2
            )
331 2
        elif regularizer < 0:
332
            raise ValueError("Regularizer must be greater than or equal to 0")
333 2

334 2
        in_degree += regularizer
335
        out_degree += regularizer
336 2

337 2
    with np.errstate(divide="ignore"):
338 2
        in_root = 1 / np.sqrt(in_degree)  # this is 10x faster than ** -0.5
339 2
        out_root = 1 / np.sqrt(out_degree)
340 2

341 2
    in_root[np.isinf(in_root)] = 0
342 2
    out_root[np.isinf(out_root)] = 0
343

344
    in_root = np.diag(in_root)  # just change to sparse diag for sparse support
345 2
    out_root = np.diag(out_root)
346

347
    if form == "I-DAD":
348 2
        L = np.diag(in_degree) - A
349
        L = in_root @ L @ in_root
350
    elif form == "DAD" or form == "R-DAD":
351
        L = out_root @ A @ in_root
352
    if is_symmetric(A):
353
        return symmetrize(
354
            L, method="avg"
355
        )  # sometimes machine prec. makes this necessary
356
    return L
357

358

359
def is_fully_connected(graph):
360
    r"""
361
    Checks whether the input graph is fully connected in the undirected case
362
    or weakly connected in the directed case.
363

364
    Connected means one can get from any vertex u to vertex v by traversing
365
    the graph. For a directed graph, weakly connected means that the graph
366
    is connected after it is converted to an unweighted graph (ignore the
367
    direction of each edge)
368

369
    Parameters
370
    ----------
371
    graph: nx.Graph, nx.DiGraph, nx.MultiDiGraph, nx.MultiGraph, np.ndarray
372
        Input graph in any of the above specified formats. If np.ndarray,
373
        interpreted as an :math:`n \times n` adjacency matrix
374

375
    Returns
376
    -------
377
    boolean: True if the entire input graph is connected
378

379
    References
380
    ----------
381
    http://mathworld.wolfram.com/ConnectedGraph.html
382 2
    http://mathworld.wolfram.com/WeaklyConnectedDigraph.html
383 2

384 2
    Examples
385
    --------
386 2
    >>> a = np.array([
387 2
    ...    [0, 1, 0],
388 2
    ...    [1, 0, 0],
389 2
    ...    [0, 0, 0]])
390 2
    >>> is_fully_connected(a)
391 2
    False
392
    """
393
    if type(graph) is np.ndarray:
394 2
        if is_symmetric(graph):
395
            g_object = nx.Graph()
396
        else:
397
            g_object = nx.DiGraph()
398
        graph = nx.from_numpy_array(graph, create_using=g_object)
399
    if type(graph) in [nx.Graph, nx.MultiGraph]:
400
        return nx.is_connected(graph)
401
    elif type(graph) in [nx.DiGraph, nx.MultiDiGraph]:
402
        return nx.is_weakly_connected(graph)
403

404

405
def get_lcc(graph, return_inds=False):
406
    r"""
407
    Finds the largest connected component for the input graph.
408

409
    The largest connected component is the fully connected subgraph
410
    which has the most nodes.
411

412
    Parameters
413
    ----------
414
    graph: nx.Graph, nx.DiGraph, nx.MultiDiGraph, nx.MultiGraph, np.ndarray
415
        Input graph in any of the above specified formats. If np.ndarray,
416
        interpreted as an :math:`n \times n` adjacency matrix
417

418
    return_inds: boolean, default: False
419
        Whether to return a np.ndarray containing the indices in the original
420
        adjacency matrix that were kept and are now in the returned graph.
421 2
        Ignored when input is networkx object
422 2

423 2
    Returns
424 2
    -------
425 2
    graph: nx.Graph, nx.DiGraph, nx.MultiDiGraph, nx.MultiGraph, np.ndarray
426
        New graph of the largest connected component of the input parameter.
427 2

428 2
    inds: (optional)
429 2
        Indices from the original adjacency matrix that were kept after taking
430 2
        the largest connected component.
431 2
    """
432 2
    input_ndarray = False
433 2
    if type(graph) is np.ndarray:
434 2
        input_ndarray = True
435 2
        if is_symmetric(graph):
436 2
            g_object = nx.Graph()
437 2
        else:
438 2
            g_object = nx.DiGraph()
439 2
        graph = nx.from_numpy_array(graph, create_using=g_object)
440 2
    if type(graph) in [nx.Graph, nx.MultiGraph]:
441 2
        lcc_nodes = max(nx.connected_components(graph), key=len)
442
    elif type(graph) in [nx.DiGraph, nx.MultiDiGraph]:
443
        lcc_nodes = max(nx.weakly_connected_components(graph), key=len)
444 2
    lcc = graph.subgraph(lcc_nodes).copy()
445
    lcc.remove_nodes_from([n for n in lcc if n not in lcc_nodes])
446
    if return_inds:
447
        nodelist = np.array(list(lcc_nodes))
448
    if input_ndarray:
449
        lcc = nx.to_numpy_array(lcc)
450
    if return_inds:
451
        return lcc, nodelist
452
    return lcc
453

454

455
def get_multigraph_union_lcc(graphs, return_inds=False):
456
    r"""
457
    Finds the union of all multiple graphs, then compute the largest connected
458
    component.
459

460
    Parameters
461
    ----------
462
    graphs: list or np.ndarray
463
        List of array-like, (n_vertices, n_vertices), or list of np.ndarray
464
        nx.Graph, nx.DiGraph, nx.MultiDiGraph, nx.MultiGraph.
465 2

466 2
    return_inds: boolean, default: False
467
        Whether to return a np.ndarray containing the indices in the original
468
        adjacency matrix that were kept and are now in the returned graph.
469 2
        Ignored when input is networkx object
470 2

471 0
    Returns
472 0
    -------
473 2
    out : list or np.ndarray
474 2
        If input was a list
475 2
    """
476 2
    if isinstance(graphs, list):
477 0
        if not isinstance(graphs[0], np.ndarray):
478 0
            raise NotImplementedError
479 2

480
        out = [import_graph(g) for g in graphs]
481 0
        if len(set(map(np.shape, out))) != 1:
482 0
            msg = "All input graphs must have the same size"
483
            raise ValueError(msg)
484 2
        bar = np.stack(out).mean(axis=0)
485 2
    elif isinstance(graphs, np.ndarray):
486
        shape = graphs.shape
487 2
        if shape[1] != shape[2]:
488 2
            msg = "Input graphs must be square"
489 2
            raise ValueError(msg)
490 2
        bar = graphs.mean(axis=0)
491 2
    else:
492 2
        msg = "Expected list or np.ndarray, but got {} instead.".format(type(graphs))
493 0
        raise ValueError(msg)
494 2

495
    _, idx = get_lcc(bar, return_inds=True)
496
    idx = np.array(idx)
497 2

498
    if isinstance(graphs, np.ndarray):
499
        graphs[:, idx[:, None], idx]
500
    elif isinstance(graphs, list):
501
        if isinstance(graphs[0], np.ndarray):
502
            graphs = [g[idx[:, None], idx] for g in graphs]
503
    if return_inds:
504
        return graphs, idx
505
    return graphs
506

507

508
def get_multigraph_intersect_lcc(graphs, return_inds=False):
509
    r"""
510
    Finds the intersection of multiple graphs's largest connected components.
511

512
    Computes the largest connected component for each graph that was input, and
513
    takes the intersection over all of these resulting graphs. Note that this
514
    does not guarantee finding the largest graph where every node is shared among
515
    all of the input graphs.
516

517
    Parameters
518
    ----------
519
    graphs: list or np.ndarray
520
        if list, each element must be an :math:`n \times n` np.ndarray adjacency matrix
521

522
    return_inds: boolean, default: False
523
        Whether to return a np.ndarray containing the indices in the original
524
        adjacency matrix that were kept and are now in the returned graph.
525 2
        Ignored when input is networkx object
526 2

527 2
    Returns
528 2
    -------
529 2
    graph: nx.Graph, nx.DiGraph, nx.MultiDiGraph, nx.MultiGraph, np.ndarray
530 2
        New graph of the largest connected component of the input parameter.
531 2

532 2
    inds: (optional)
533 2
        Indices from the original adjacency matrix that were kept after taking
534 2
        the largest connected component
535 2
    """
536
    lcc_by_graph = []
537 2
    inds_by_graph = []
538 2
    for graph in graphs:
539 2
        lcc, inds = get_lcc(graph, return_inds=True)
540
        lcc_by_graph.append(lcc)
541
        inds_by_graph.append(inds)
542
    inds_intersection = reduce(np.intersect1d, inds_by_graph)
543 2
    new_graphs = []
544 2
    for graph in graphs:
545 2
        if type(graph) is np.ndarray:
546 2
            lcc = graph[inds_intersection, :][:, inds_intersection]
547 2
        else:
548 2
            lcc = graph.subgraph(inds_intersection).copy()
549 2
            lcc.remove_nodes_from([n for n in lcc if n not in inds_intersection])
550
        new_graphs.append(lcc)
551
    # this is not guaranteed be connected after one iteration because taking the
552
    # intersection of nodes among graphs can cause some components to become
553
    # disconnected, so, we check for this and run again if necessary
554 2
    recurse = False
555 2
    for new_graph in new_graphs:
556
        if not is_fully_connected(new_graph):
557 0
            recurse = True
558 2
            break
559 2
    if recurse:
560 2
        new_graphs, new_inds_intersection = get_multigraph_intersect_lcc(
561 2
            new_graphs, return_inds=True
562
        )
563 2
        # new inds intersection are the indices of new_graph that were kept on recurse
564
        # need to do this because indices could have shifted during recursion
565
        if type(graphs[0]) is np.ndarray:
566 2
            inds_intersection = inds_intersection[new_inds_intersection]
567
        else:
568
            inds_intersection = new_inds_intersection
569
    if type(graphs) != list:
570
        new_graphs = np.stack(new_graphs)
571
    if return_inds:
572
        return new_graphs, inds_intersection
573
    else:
574
        return new_graphs
575

576

577
def augment_diagonal(graph, weight=1):
578
    r"""
579
    Replaces the diagonal of an adjacency matrix with :math:`\frac{d}{nverts - 1}` where
580
    :math:`d` is the degree vector for an unweighted graph and the sum of magnitude of
581
    edge weights for each node for a weighted graph. For a directed graph the in/out
582
    :math:`d` is averaged.
583

584
    Parameters
585
    ----------
586
    graph: nx.Graph, nx.DiGraph, nx.MultiDiGraph, nx.MultiGraph, np.ndarray
587
        Input graph in any of the above specified formats. If np.ndarray,
588
        interpreted as an :math:`n \times n` adjacency matrix
589
    weight: float/int
590
        scalar value to multiply the new diagonal vector by
591

592
    Returns
593
    -------
594
    graph : np.array
595
        Adjacency matrix with average degrees added to the diagonal.
596

597 2
    Examples
598 2
    --------
599
    >>> a = np.array([
600 2
    ...    [0, 1, 1],
601
    ...    [1, 0, 0],
602 2
    ...    [1, 0, 0]])
603 2
    >>> augment_diagonal(a)
604 2
    array([[1. , 1. , 1. ],
605
           [1. , 0.5, 0. ],
606 2
           [1. , 0. , 0.5]])
607 2
    """
608
    graph = import_graph(graph)
609 2
    graph = remove_loops(graph)
610

611
    divisor = graph.shape[0] - 1
612 2

613
    in_degrees = np.sum(np.abs(graph), axis=0)
614
    out_degrees = np.sum(np.abs(graph), axis=1)
615
    degrees = (in_degrees + out_degrees) / 2
616

617
    diag = weight * degrees / divisor
618
    graph += np.diag(diag)
619

620
    return graph
621

622

623
def binarize(graph):
624
    """
625
    Binarize the input adjacency matrix.
626

627
    Parameters
628
    ----------
629
    graph: nx.Graph, nx.DiGraph, nx.MultiDiGraph, nx.MultiGraph, np.ndarray
630
        Input graph in any of the above specified formats. If np.ndarray,
631
        interpreted as an :math:`n \times n` adjacency matrix
632

633
    Returns
634
    -------
635 2
    graph : np.array
636 2
        Adjacency matrix with all nonzero values transformed to one.
637 2

638
    Examples
639
    --------
640 2
    >>> a = np.array([[0, 1, 2], [1, 0, 3], [2, 3, 0]])
641
    >>> binarize(a)
642
    array([[0., 1., 1.],
643
           [1., 0., 1.],
644 2
           [1., 1., 0.]])
645 2
    """
646
    graph = import_graph(graph)
647
    graph[graph != 0] = 1
648
    return graph
649

650

651
def cartprod(*arrays):
652
    """
653
    Compute the cartesian product of multiple arrays
654
    """
655
    N = len(arrays)
656
    return np.transpose(
657
        np.meshgrid(*arrays, indexing="ij"), np.roll(np.arange(N + 1), -1)
658
    ).reshape(-1, N)

Read our documentation on viewing source code .

Loading