microsoft / graspologic
1
# Copyright 2019 NeuroData (http://neurodata.io)
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4 2
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6 2
#
7 2
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14

15
import warnings
16 2

17
from .base import BaseEmbed
18
from ..utils import (
19
    import_graph,
20
    is_fully_connected,
21
    augment_diagonal,
22
    pass_to_ranks,
23
    is_unweighted,
24
)
25

26

27
class AdjacencySpectralEmbed(BaseEmbed):
28
    r"""
29
    Class for computing the adjacency spectral embedding of a graph.
30

31
    The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation
32
    of the graph based on its adjacency matrix. It relies on an SVD to reduce
33
    the dimensionality to the specified k, or if k is unspecified, can find a number of
34
    dimensions automatically (see :class:`~graspy.embed.selectSVD`).
35

36
    Read more in the :ref:`tutorials <embed_tutorials>`
37

38
    Parameters
39
    ----------
40
    n_components : int or None, default = None
41
        Desired dimensionality of output data. If "full",
42
        n_components must be <= min(X.shape). Otherwise, n_components must be
43
        < min(X.shape). If None, then optimal dimensions will be chosen by
44
        :func:`~graspy.embed.select_dimension` using ``n_elbows`` argument.
45

46
    n_elbows : int, optional, default: 2
47
        If ``n_components=None``, then compute the optimal embedding dimension using
48
        :func:`~graspy.embed.select_dimension`. Otherwise, ignored.
49

50
    algorithm : {'randomized' (default), 'full', 'truncated'}, optional
51
        SVD solver to use:
52

53
        - 'randomized'
54
            Computes randomized svd using
55
            :func:`sklearn.utils.extmath.randomized_svd`
56
        - 'full'
57
            Computes full svd using :func:`scipy.linalg.svd`
58
        - 'truncated'
59
            Computes truncated svd using :func:`scipy.sparse.linalg.svds`
60

61
    n_iter : int, optional (default = 5)
62
        Number of iterations for randomized SVD solver. Not used by 'full' or
63
        'truncated'. The default is larger than the default in randomized_svd
64
        to handle sparse matrices that may have large slowly decaying spectrum.
65

66
    check_lcc : bool , optional (default = True)
67
        Whether to check if input graph is connected. May result in non-optimal
68
        results if the graph is unconnected. If True and input is unconnected,
69
        a UserWarning is thrown. Not checking for connectedness may result in
70
        faster computation.
71

72
    diag_aug : bool, optional (default = True)
73
        Whether to replace the main diagonal of the adjacency matrix with a vector
74
        corresponding to the degree (or sum of edge weights for a weighted network)
75
        before embedding. Empirically, this produces latent position estimates closer
76
        to the ground truth.
77

78

79
    Attributes
80
    ----------
81
    latent_left_ : array, shape (n_samples, n_components)
82
        Estimated left latent positions of the graph.
83
    latent_right_ : array, shape (n_samples, n_components), or None
84
        Only computed when the graph is directed, or adjacency matrix is assymetric.
85
        Estimated right latent positions of the graph. Otherwise, None.
86
    singular_values_ : array, shape (n_components)
87
        Singular values associated with the latent position matrices.
88

89
    See Also
90
    --------
91
    graspy.embed.selectSVD
92
    graspy.embed.select_dimension
93

94
    Notes
95
    -----
96
    The singular value decomposition:
97

98
    .. math:: A = U \Sigma V^T
99

100
    is used to find an orthonormal basis for a matrix, which in our case is the
101
    adjacency matrix of the graph. These basis vectors (in the matrices U or V) are
102 2
    ordered according to the amount of variance they explain in the original matrix.
103
    By selecting a subset of these basis vectors (through our choice of dimensionality
104
    reduction) we can find a lower dimensional space in which to represent the graph.
105

106
    References
107
    ----------
108
    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
109
       Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,"
110
       Journal of the American Statistical Association, Vol. 107(499), 2012
111 2
    """
112

113
    def __init__(
114
        self,
115
        n_components=None,
116
        n_elbows=2,
117
        algorithm="randomized",
118
        n_iter=5,
119 2
        check_lcc=True,
120 2
        diag_aug=True,
121 2
    ):
122
        super().__init__(
123 2
            n_components=n_components,
124
            n_elbows=n_elbows,
125
            algorithm=algorithm,
126
            n_iter=n_iter,
127
            check_lcc=check_lcc,
128
        )
129

130
        if not isinstance(diag_aug, bool):
131
            raise TypeError("`diag_aug` must be of type bool")
132
        self.diag_aug = diag_aug
133

134
    def fit(self, graph, y=None):
135
        """
136
        Fit ASE model to input graph
137 2

138
        Parameters
139 2
        ----------
140 2
        graph : array_like or networkx.Graph
141 2
            Input graph to embed.
142

143
        Returns
144
        -------
145
        self : object
146 2
            Returns an instance of self.
147
        """
148 2
        A = import_graph(graph)
149 2

150
        if self.check_lcc:
151 2
            if not is_fully_connected(A):
152 2
                msg = (
153
                    "Input graph is not fully connected. Results may not"
154
                    + "be optimal. You can compute the largest connected component by"
155
                    + "using ``graspy.utils.get_lcc``."
156
                )
157
                warnings.warn(msg, UserWarning)
158

159
        if self.diag_aug:
160
            A = augment_diagonal(A)
161

162
        self._reduce_dim(A)
163
        return self

Read our documentation on viewing source code .

Loading