microsoft / graspologic
1
# Copyright 2019 NeuroData (http://neurodata.io)
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4 2
# you may not use this file except in compliance with the License.
5 2
# You may obtain a copy of the License at
6
#
7 2
#     http://www.apache.org/licenses/LICENSE-2.0
8 2
#
9 2
# Unless required by applicable law or agreed to in writing, software
10 2
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13 2
# limitations under the License.
14

15
import numpy as np
16
from scipy.linalg import orthogonal_procrustes
17

18
from ..embed import AdjacencySpectralEmbed, OmnibusEmbed, select_dimension
19
from ..simulations import rdpg
20
from ..utils import import_graph, is_symmetric
21
from .base import BaseInference
22

23

24
class LatentPositionTest(BaseInference):
25
    r"""
26
    Two-sample hypothesis test for the problem of determining whether two random
27
    dot product graphs have the same latent positions.
28

29
    This test assumes that the two input graphs are vertex aligned, that is,
30
    there is a known mapping between vertices in the two graphs and the input graphs
31
    have their vertices sorted in the same order. Currently, the function only
32
    supports undirected graphs.
33

34
    Read more in the :ref:`tutorials <inference_tutorials>`
35

36
    Parameters
37
    ----------
38
    embedding : string, { 'ase' (default), 'omnibus'}
39
        String describing the embedding method to use:
40

41
        - 'ase'
42
            Embed each graph separately using adjacency spectral embedding
43
            and use Procrustes to align the embeddings.
44
        - 'omnibus'
45
            Embed all graphs simultaneously using omnibus embedding.
46

47
    n_components : None (default), or int
48
        Number of embedding dimensions. If None, the optimal embedding
49
        dimensions are found by the Zhu and Godsi algorithm.
50

51
    test_case : string, {'rotation' (default), 'scalar-rotation', 'diagonal-rotation'}
52
        describes the exact form of the hypothesis to test when using 'ase' or 'lse'
53
        as an embedding method. Ignored if using 'omnibus'. Given two latent positions,
54
        :math:`X_1` and :math:`X_2`, and an orthogonal rotation matrix :math:`R` that
55
        minimizes :math:`||X_1 - X_2 R||_F`:
56

57
        - 'rotation'
58
            .. math:: H_o: X_1 = X_2 R
59
        - 'scalar-rotation'
60
            .. math:: H_o: X_1 = c X_2 R
61
            where :math:`c` is a scalar, :math:`c > 0`
62
        - 'diagonal-rotation'
63
            .. math:: H_o: X_1 = D X_2 R
64
            where :math:`D` is an arbitrary diagonal matrix
65

66
    n_bootstraps : int, optional (default 500)
67
        Number of bootstrap simulations to run to generate the null distribution
68

69
    Attributes
70
    ----------
71
    null_distribution_1_, null_distribution_2_ : np.ndarray (n_bootstraps,)
72
        The distribution of T statistics generated under the null, using the first and
73
        and second input graph, respectively. The latent positions of each sample graph
74
        are used independently to sample random dot product graphs, so two null
75
        distributions are generated
76

77
    sample_T_statistic_ : float
78
        The observed difference between the embedded positions of the two input graphs
79
        after an alignment (the type of alignment depends on ``test_case``)
80

81
    p_value_1_, p_value_2_ : float
82
        The p value estimated from the null distributions from sample 1 and sample 2.
83

84
    p_value_ : float
85
        The overall p value from the test; this is the max of p_value_1_ and p_value_2_
86

87
    See also
88
    --------
89 2
    graspy.embed.AdjacencySpectralEmbed
90
    graspy.embed.OmnibusEmbed
91
    graspy.embed.selectSVD
92 2

93 2
    References
94 2
    ----------
95 2
    .. [1] Tang, M., A. Athreya, D. Sussman, V. Lyzinski, Y. Park, Priebe, C.E.
96 2
       "A Semiparametric Two-Sample Hypothesis Testing Problem for Random Graphs"
97 2
       Journal of Computational and Graphical Statistics, Vol. 26(2), 2017
98 2
    """
99 2

100
    def __init__(
101
        self, embedding="ase", n_components=None, n_bootstraps=500, test_case="rotation"
102
    ):
103
        if type(embedding) is not str:
104 2
            raise TypeError("embedding must be str")
105 2
        if type(n_bootstraps) is not int:
106 2
            raise TypeError()
107 2
        if type(test_case) is not str:
108
            raise TypeError()
109
        if n_bootstraps < 1:
110
            raise ValueError(
111
                "{} is invalid number of bootstraps, must be greater than 1".format(
112 2
                    n_bootstraps
113
                )
114 2
            )
115 2
        if embedding not in ["ase", "omnibus"]:
116 2
            raise ValueError("{} is not a valid embedding method.".format(embedding))
117
        if test_case not in ["rotation", "scalar-rotation", "diagonal-rotation"]:
118 2
            raise ValueError(
119 2
                "test_case must be one of 'rotation', 'scalar-rotation',"
120
                + "'diagonal-rotation'"
121 2
            )
122 2

123 2
        super().__init__(n_components=n_components)
124 2

125 2
        self.embedding = embedding
126 2
        self.n_bootstraps = n_bootstraps
127
        self.test_case = test_case
128
        # paper uses these always, but could be kwargs eventually. need to test
129 2
        self.rescale = False
130 2
        self.loops = False
131

132 2
    def _bootstrap(self, X_hat):
133 2
        t_bootstrap = np.zeros(self.n_bootstraps)
134 2
        for i in range(self.n_bootstraps):
135 2
            A1_simulated = rdpg(X_hat, rescale=self.rescale, loops=self.loops)
136 2
            A2_simulated = rdpg(X_hat, rescale=self.rescale, loops=self.loops)
137 2
            X1_hat_simulated, X2_hat_simulated = self._embed(
138 2
                A1_simulated, A2_simulated, check_lcc=False
139 2
            )
140 2
            t_bootstrap[i] = self._difference_norm(X1_hat_simulated, X2_hat_simulated)
141 2
        return t_bootstrap
142 2

143 2
    def _difference_norm(self, X1, X2):
144 2
        if self.embedding in ["ase"]:
145 2
            if self.test_case == "rotation":
146 2
                R = orthogonal_procrustes(X1, X2)[0]
147 2
                return np.linalg.norm(X1 @ R - X2)
148 2
            elif self.test_case == "scalar-rotation":
149
                R, s = orthogonal_procrustes(X1, X2)
150
                return np.linalg.norm(s / np.sum(X1 ** 2) * X1 @ R - X2)
151 2
            elif self.test_case == "diagonal-rotation":
152
                normX1 = np.sum(X1 ** 2, axis=1)
153 2
                normX2 = np.sum(X2 ** 2, axis=1)
154 2
                normX1[normX1 <= 1e-15] = 1
155 2
                normX2[normX2 <= 1e-15] = 1
156
                X1 = X1 / np.sqrt(normX1[:, None])
157
                X2 = X2 / np.sqrt(normX2[:, None])
158 2
                R = orthogonal_procrustes(X1, X2)[0]
159
                return np.linalg.norm(X1 @ R - X2)
160
        else:
161 2
            # in the omni case we don't need to align
162 2
            return np.linalg.norm(X1 - X2)
163

164
    def _embed(self, A1, A2, check_lcc=True):
165 2
        if self.embedding == "ase":
166 2
            X1_hat = AdjacencySpectralEmbed(
167 2
                n_components=self.n_components, check_lcc=check_lcc
168
            ).fit_transform(A1)
169 2
            X2_hat = AdjacencySpectralEmbed(
170
                n_components=self.n_components, check_lcc=check_lcc
171
            ).fit_transform(A2)
172
        elif self.embedding == "omnibus":
173
            X_hat_compound = OmnibusEmbed(
174
                n_components=self.n_components, check_lcc=check_lcc
175
            ).fit_transform((A1, A2))
176
            X1_hat = X_hat_compound[0]
177
            X2_hat = X_hat_compound[1]
178
        return (X1_hat, X2_hat)
179

180
    def fit(self, A1, A2):
181
        """
182
        Fits the test to the two input graphs
183

184 2
        Parameters
185 2
        ----------
186 2
        A1, A2 : nx.Graph, nx.DiGraph, nx.MultiDiGraph, nx.MultiGraph, np.ndarray
187
            The two graphs to run a hypothesis test on.
188 2
            If np.ndarray, shape must be ``(n_vertices, n_vertices)`` for both graphs,
189 2
            where ``n_vertices`` is the same for both
190 2

191
        Returns
192 2
        -------
193 2
        self
194 2
        """
195 2
        A1 = import_graph(A1)
196 2
        A2 = import_graph(A2)
197 2
        if not is_symmetric(A1) or not is_symmetric(A2):
198 2
            raise NotImplementedError()  # TODO asymmetric case
199
        if A1.shape != A2.shape:
200
            raise ValueError("Input matrices do not have matching dimensions")
201 2
        if self.n_components is None:
202
            # get the last elbow from ZG for each and take the maximum
203
            num_dims1 = select_dimension(A1)[0][-1]
204 2
            num_dims2 = select_dimension(A2)[0][-1]
205
            self.n_components = max(num_dims1, num_dims2)
206
        X_hats = self._embed(A1, A2)
207
        sample_T_statistic = self._difference_norm(X_hats[0], X_hats[1])
208 2
        null_distribution_1 = self._bootstrap(X_hats[0])
209
        null_distribution_2 = self._bootstrap(X_hats[1])
210 2

211 2
        # uisng exact mc p-values (see, for example, Phipson and Smyth, 2010)
212 2
        p_value_1 = (
213 2
            len(null_distribution_1[null_distribution_1 >= sample_T_statistic]) + 1
214 2
        ) / (self.n_bootstraps + 1)
215 2
        p_value_2 = (
216
            len(null_distribution_2[null_distribution_2 >= sample_T_statistic]) + 1
217 2
        ) / (self.n_bootstraps + 1)
218

219
        p_value = max(p_value_1, p_value_2)
220

221
        self.null_distribution_1_ = null_distribution_1
222
        self.null_distribution_2_ = null_distribution_2
223
        self.sample_T_statistic_ = sample_T_statistic
224
        self.p_value_1_ = p_value_1
225
        self.p_value_2_ = p_value_2
226
        self.p_value_ = p_value
227

228
        return self

Read our documentation on viewing source code .

Loading