microsoft / graspologic
1
from abc import abstractmethod
2

3
import numpy as np
4 2
from sklearn.base import BaseEstimator
5
from sklearn.utils.validation import check_is_fitted
6 2

7 2
from ..utils import import_graph, is_unweighted
8 2
from ..simulations import sample_edges
9

10 2

11 2
def _calculate_p(block):
12
    n_edges = np.count_nonzero(block)
13
    return n_edges / block.size
14 2

15 2

16 2
def _check_n_samples(n_samples):
17
    if not isinstance(n_samples, (int, float)):
18
        raise TypeError("n_samples must be a scalar value")
19 2
    if n_samples < 1:
20 2
        raise ValueError(
21 2
            "Invalid value for 'n_samples': %d . The sampling requires at "
22 2
            "least one sample." % (n_samples)
23 2
        )
24

25

26
def _n_to_labels(n):
27
    n_cumsum = n.cumsum()
28
    labels = np.zeros(n.sum(), dtype=np.int64)
29 2
    for i in range(1, len(n)):
30 0
        labels[n_cumsum[i - 1] : n_cumsum[i]] = i
31 0
    return labels
32 0

33 0

34 0
class BaseGraphEstimator(BaseEstimator):
35
    def __init__(self, directed=True, loops=False):
36
        if not isinstance(directed, bool):
37 2
            raise TypeError("`directed` must be of type bool")
38 2
        if not isinstance(loops, bool):
39 2
            raise TypeError("`loops` must be of type bool")
40 2
        self.directed = directed
41 2
        self.loops = loops
42 2

43 2
    def bic(self, graph):
44 2
        """
45
        Bayesian information criterion for the current model on the input graph.
46 2

47
        Note that this implicitly assumes the input graph is indexed like the
48
        fit model.
49

50
        Parameters
51
        ----------
52
        graph : np.ndarray
53
            Input graph
54

55
        Returns
56
        -------
57
        bic : float
58
            The lower the better
59
        """
60
        check_is_fitted(self, "p_mat_")
61
        return 2 * np.log(self.n_verts) * self._n_parameters() - 2 * self.score(graph)
62

63 0
    def mse(self, graph):
64 0
        """
65
        Compute mean square error for the current model on the input graph
66 2

67
        Note that this implicitly assumes the input graph is indexed like the
68
        fit model.
69

70
        Parameters
71
        ----------
72
        graph : np.ndarray
73
            Input graph
74

75
        Returns
76
        -------
77
        mse : float
78
            Mean square error for the model's fit P matrix
79
        """
80
        check_is_fitted(self, "p_mat_")
81
        return np.linalg.norm(graph - self.p_mat_) ** 2
82

83 0
    def score_samples(self, graph, clip=None):
84 0
        """
85
        Compute the weighted log probabilities for each potential edge.
86 2

87
        Note that this implicitly assumes the input graph is indexed like the
88
        fit model.
89

90
        Parameters
91
        ----------
92
        graph : np.ndarray
93
            Input graph. Must be same shape as model's ``p_mat_`` attribute
94

95
        clip : scalar or None, optional (default=None)
96
            Values for which to clip probability matrix, entries less than c or more
97
            than 1 - c are set to c or 1 - c, respectively.
98
            If None, values will not be clipped in the likelihood calculation, which may
99
            result in poorly behaved likelihoods depending on the model.
100

101
        Returns
102
        -------
103
        sample_scores : np.ndarray (size of ``graph``)
104
            log-likelihood per potential edge in the graph
105
        """
106
        check_is_fitted(self, "p_mat_")
107
        # P.ravel() <dot> graph * (1 - P.ravel()) <dot> (1 - graph)
108
        graph = import_graph(graph)
109 2
        if not is_unweighted(graph):
110
            raise ValueError("Model only implemented for unweighted graphs")
111 2
        p_mat = self.p_mat_.copy()
112 2

113 0
        if np.shape(p_mat) != np.shape(graph):
114 2
            raise ValueError("Input graph size must be the same size as P matrix")
115

116 2
        inds = None
117 2
        if not self.directed and self.loops:
118
            inds = np.triu_indices_from(graph)  # ignore lower half of graph, symmetric
119 2
        elif not self.directed and not self.loops:
120 2
            inds = np.triu_indices_from(graph, k=1)  # ignore the diagonal
121 0
        elif self.directed and not self.loops:
122 2
            xu, yu = np.triu_indices_from(graph, k=1)
123 2
            xl, yl = np.tril_indices_from(graph, k=-1)
124 2
            x = np.concatenate((xl, xu))
125 2
            y = np.concatenate((yl, yu))
126 2
            inds = (x, y)
127 2
        if inds is not None:
128 2
            p_mat = p_mat[inds]
129 2
            graph = graph[inds]
130 2

131 2
        # clip the probabilities that are degenerate
132 2
        if clip is not None:
133
            p_mat[p_mat < clip] = clip
134
            p_mat[p_mat > 1 - clip] = 1 - clip
135 2

136 0
        # TODO: use nonzero inds here will be faster
137 0
        successes = np.multiply(p_mat, graph)
138
        failures = np.multiply((1 - p_mat), (1 - graph))
139
        likelihood = successes + failures
140 2
        return np.log(likelihood)
141 2

142 2
    def score(self, graph):
143 2
        """
144
        Compute the average log-likelihood over each potential edge of the
145 2
        given graph.
146

147
        Note that this implicitly assumes the input graph is indexed like the
148
        fit model.
149

150
        Parameters
151
        ----------
152
        graph : np.ndarray
153
            Input graph. Must be same shape as model's ``p_mat_`` attribute
154

155
        Returns
156
        -------
157
        score : float
158
            sum of log-loglikelihoods for each potential edge in input graph
159
        """
160
        check_is_fitted(self, "p_mat_")
161
        return np.sum(self.score_samples(graph))
162

163 2
    @property
164 2
    def _pairwise(self):
165
        """This is for sklearn compliance."""
166 2
        return True
167

168
    @abstractmethod
169 0
    def fit(self, graph, y=None):
170
        """
171
        Calculate the parameters for the given graph model
172
        """
173
        return self
174

175
    def sample(self, n_samples=1):
176
        """
177
        Sample graphs (realizations) from the fitted model
178 2

179
        Can only be called after the the model has been fit
180

181
        Parameters
182
        ----------
183
        n_samples : int (default 1), optional
184
            The number of graphs to sample
185

186
        Returns
187
        -------
188
        graphs : np.array (n_samples, n_verts, n_verts)
189
            Array of sampled graphs, where the first dimension
190
            indexes each sample, and the other dimensions represent
191
            (n_verts x n_verts) adjacency matrices for the sampled graphs.
192

193
            Note that if only one sample is drawn, a (1, n_verts, n_verts)
194
            array will still be returned.
195
        """
196
        check_is_fitted(self, "p_mat_")
197
        _check_n_samples(n_samples)
198
        n_verts = self.p_mat_.shape[0]
199 2
        graphs = np.zeros((n_samples, n_verts, n_verts))
200 2
        p_mat = self.p_mat_.copy()
201 2
        p_mat[p_mat > 1] = 1
202 2
        p_mat[p_mat < 0] = 0
203 2
        for i in range(n_samples):
204 2
            graphs[i, :, :] = sample_edges(
205 2
                p_mat, directed=self.directed, loops=self.loops
206 2
            )
207 2
        return graphs
208

209
    @abstractmethod
210 2
    def _n_parameters(self):
211
        n_parameters = 1
212
        return n_parameters

Read our documentation on viewing source code .

Loading