microsoft / graspologic
1
import numpy as np
2
from scipy.stats import fisher_exact
3

4 2

5 2
class SignalSubgraph:
6
    """
7
    Estimate the signal-subgraph of a set of labeled graph samples.
8 2

9
    The incoherent estimator finds the signal-subgraph, constrained by the number of edges.
10
    The coherent estimator finds the signal-subgraph, constrained by the number of edges and by the number of vertices that the edges in the signal-subgraph may be incident to.
11

12
    Parameters
13
    ----------
14
    graphs: array-like, shape (n_vertices, n_vertices, s_samples)
15
        A series of labeled (n_vertices, n_vertices) unweighted graph samples. If undirected, the upper or lower triangle matrices should be used.
16
    labels: vector, length (s_samples)
17
        A vector of class labels. There must be a maximum of two classes.
18

19
    Attributes
20
    ----------
21
    contmat_: array-like, shape (n_vertices, n_vertices, 2, 2)
22
        An array that stores the 2-by-2 contingency matrix for each point in the graph samples.
23
    sigsub_: tuple, shape (2, n_edges)
24
        A tuple of a row index array and column index array, where n_edges is the size of the signal-subgraph determined by *constraints*.
25
    mask_: array-like, shape (n_vertices, n_vertices)
26
        An array of boolean values. Entries are true for edges that are in the signal subgraph.
27

28
    References
29
    ----------
30
    .. [1] J. T. Vogelstein, W. R. Gray, R. J. Vogelstein, and C. E. Priebe, "Graph Classification using Signal-Subgraphs: Applications in Statistical Connectomics," arXiv:1108.1427v2 [stat.AP], 2012.
31

32
    """
33

34
    def __construct_contingency(self):
35
        nverts = np.shape(self.graphs)[0]
36
        out = np.zeros((nverts, nverts, 2, 2))
37 2
        rowsum1 = sum(self.labels)
38 2
        rowsum0 = len(self.labels) - rowsum1
39 2
        for i in range(nverts):
40 2
            for j in range(nverts):
41 2
                a = sum(self.graphs[i, j, self.labels == 0])
42 2
                b = sum(self.graphs[i, j, :]) - a
43 2
                out[i, j, :, :] = [[a, rowsum0 - a], [b, rowsum1 - b]]
44 2
        self.contmat_ = out
45 2

46 2
    def fit(self, graphs, labels, constraints):
47 2
        """
48
        Fit the signal-subgraph estimator according to the constraints given.
49 2

50
        Parameters
51
        ----------
52
        graphs: array-like, shape (n_vertices, n_vertices, s_samples)
53
            A series of labeled (n_vertices, n_vertices) unweighted graph samples. If undirected, the upper or lower triangle matrices should be used.
54
        labels: vector, length (s_samples)
55
            A vector of class labels. There must be a maximum of two classes.
56
        constraints: int or vector
57
            The constraints that will be imposed onto the estimated signal-subgraph.
58

59
            If *constraints* is an int, *constraints* is the number of edges in the signal-subgraph.
60
            If *constraints* is a vector, the first element of *constraints* is the number of edges in the signal-subgraph, and the second element of *constraints* is the number of vertices that the signal-subgraph must be incident to.
61

62
        Returns
63
        -------
64
        self: returns an instance of self
65
        """
66
        if not isinstance(graphs, np.ndarray):
67
            msg = "Input array 'graphs' must be np.ndarray, not {}.".format(
68
                type(graphs)
69 2
            )
70 2
            raise TypeError(msg)
71
        if not isinstance(labels, (list, np.ndarray)):
72
            msg = "Input vector 'labels' must be list or np.ndarray, not {}.".format(
73 2
                type(labels)
74 2
            )
75 0
            raise TypeError(msg)
76

77
        shape = np.shape(graphs)
78 0
        if len(shape) != 3:
79
            msg = "Input array 'graphs' must be 3-dimensional with shape (n_vertices, n_vertices, s_samples)."
80 2
            raise ValueError(msg)
81 2
        if shape[0] != shape[1]:
82 2
            msg = "Input array 'graphs' must have matching number of vertices."
83 2
            raise ValueError(msg)
84 2

85 2
        if len(np.shape(labels)) != 1:
86 2
            msg = "Input vector 'labels' must be 1-dimensional."
87
            raise ValueError(msg)
88 2
        if len(np.unique(labels)) > 2:
89 2
            msg = "Input arrays must have a maximum of two classes, not {}.".format(
90 2
                len(np.unique(labels))
91 2
            )
92 2
            raise ValueError(msg)
93
        if len(labels) != shape[2]:
94
            msg = "Input vector length must match the number of graph samples."
95 2
            raise ValueError(msg)
96 2
        else:
97 2
            self.graphs = graphs
98 2
            self.labels = labels
99

100 2
        self.__construct_contingency()
101 2
        verts = np.shape(self.graphs)[0]
102
        sigmat = np.array(
103 2
            [
104 2
                [fisher_exact(self.contmat_[i, j, :, :])[1] for j in range(verts)]
105 2
                for i in range(verts)
106
            ]
107
        )
108

109
        if isinstance(constraints, (int)):  # incoherent
110
            nedges = constraints
111
            sigsub = np.dstack(
112 2
                np.unravel_index(np.argsort(sigmat.ravel()), np.shape(sigmat))
113 2
            )
114 2
            sigsub = sigsub[0, :nedges, :]
115
            sigsub = tuple(np.transpose(sigsub))
116

117 2
        elif len(constraints) == 2:  # coherent
118 2
            nedges = constraints[0]
119
            nverts = constraints[1]
120 2

121 2
            wset = np.unique(sigmat, axis=None)
122 2
            wcounter = 0
123
            wconv = 0
124 2

125 2
            while wconv == 0:
126 2
                w = wset[wcounter]
127
                blank = sigmat
128 2
                blank = blank > w
129 2

130 2
                score = 2 * verts - (np.sum(blank, axis=1) + np.sum(blank, axis=0))
131 2
                vscore = np.sort(score)[::-1]
132
                vstars = np.argsort(score)[::-1]
133 2

134 2
                if (vscore[:nverts].sum()) >= nedges:
135 2
                    blank = np.ones(np.shape(sigmat))
136
                    nstars = np.amin([len(vscore[vscore > 0]), nverts])
137 2
                    vstars = vstars[:nstars]
138 2

139 2
                    blank[vstars, :] = sigmat[vstars, :]
140 2
                    blank[:, vstars] = sigmat[:, vstars]
141

142 2
                    indsp = np.dstack(
143 2
                        np.unravel_index(np.argsort(blank.ravel()), np.shape(blank))
144
                    )
145 2
                    sigsub = indsp[0, :nedges, :]
146
                    sigsub = tuple(np.transpose(sigsub))
147
                    wconv = 1
148 2
                else:
149 2
                    wcounter = wcounter + 1
150 2
                    if wcounter > len(wset):
151
                        sigsub = []
152 0
                        wconv = 1
153 0
        else:
154 0
            msg = "Input constraints must be an int for the incoherent signal-subgraph estimator, or a vector of length 2 for the coherent subgraph estimator."
155 0
            raise TypeError(msg)
156
        self.sigsub_ = sigsub
157 2
        return self
158 2

159 2
    def fit_transform(self, graphs, labels, constraints):
160 2
        """
161
        A function to return the indices of the signal-subgraph. If *return_mask* is True, also returns a mask for the signal-subgraph.
162 2

163
        Parameters
164
        ----------
165
        graphs: array-like, shape (n_vertices, n_vertices, s_samples)
166
            A series of labeled (n_vertices, n_vertices) unweighted graph samples. If undirected, the upper or lower triangle matrices should be used.
167
        labels: vector, length (s_samples)
168
            A vector of class labels. There must be a maximum of two classes.
169
        constraints: int or vector
170
            The constraints that will be imposed onto the estimated signal-subgraph.
171

172
            If *constraints* is an int, *constraints* is the number of edges in the signal-subgraph.
173
            If *constraints* is a vector, the first element of *constraints* is the number of edges in the signal-subgraph, and the second element of *constraints* is the number of vertices that the signal-subgraph must be incident to.
174

175
        Returns
176
        -------
177
        sigsub: tuple
178
            Contains an array of row indices and an array of column indices.
179
        """
180
        self.fit(graphs, labels, constraints)
181
        verts = np.shape(self.graphs)[0]
182
        mask = np.full((verts, verts), False)
183 2
        mask[self.sigsub_] = True
184 2
        self.mask_ = mask
185 2
        return self.sigsub_

Read our documentation on viewing source code .

Loading