microsoft / graspologic
1
import numpy as np
2

3
from .base import BaseGraphEstimator
4 2
from ..embed import AdjacencySpectralEmbed
5
from ..simulations import p_from_latent
6 2
from ..utils import import_graph, augment_diagonal, is_unweighted
7 2

8 2

9 2
class RDPGEstimator(BaseGraphEstimator):
10
    r"""
11
    Random Dot Product Graph
12 2

13
    Under the random dot product graph model, each node is assumed to have a
14
    "latent position" in some :math:`d`-dimensional Euclidian space. This vector
15
    dictates that node's probability of connection to other nodes. For a given pair
16
    of nodes :math:`i` and :math:`j`, the probability of connection is the dot
17
    product between their latent positions:
18

19
    :math:`P_{ij} = \langle x_i, y_j \rangle`
20

21
    where :math:`x_i` is the left latent position of node :math:`i`, and :math:`y_j` is
22
    the right latent position of node :math:`j`. If the graph being modeled is
23
    is undirected, then :math:`x_i = y_i`. Latent positions can be estimated via
24
    :class:`~graspy.embed.AdjacencySpectralEmbed`.
25

26
    Read more in the :ref:`tutorials <models_tutorials>`
27

28
    Parameters
29
    ----------
30
    loops : boolean, optional (default=False)
31
        Whether to allow entries on the diagonal of the adjacency matrix, i.e. loops in
32
        the graph where a node connects to itself.
33

34
    n_components : int, optional (default=None)
35
        The dimensionality of the latent space used to model the graph. If None, the
36
        method of Zhu and Godsie will be used to select an embedding dimension.
37

38
    ase_kws : dict, optional (default={})
39
        Dictionary of keyword arguments passed down to
40
        :class:`~graspy.embed.AdjacencySpectralEmbed`, which is used to fit the model.
41

42
    diag_aug_weight : int or float, optional (default=1)
43
        Weighting used for diagonal augmentation, which is a form of regularization for
44
        fitting the RDPG model.
45

46
    plus_c_weight : int or float, optional (default=1)
47
        Weighting used for a constant scalar added to the adjacency matrix before
48
        embedding as a form of regularization.
49

50
    Attributes
51
    ----------
52
    latent_ : tuple, length 2, or np.ndarray, shape (n_verts, n_components)
53
        The fit latent positions for the RDPG model. If a tuple, then the graph that was
54
        input to fit was directed, and the first and second elements of the tuple are
55
        the left and right latent positions, respectively. The left and right latent
56
        positions will both be of shape (n_verts, n_components). If `latent_` is an
57
        array, then the graph that was input to fit was undirected and the left and
58
        right latent positions are the same.
59

60
    p_mat_ : np.ndarray, shape (n_verts, n_verts)
61
        Probability matrix :math:`P` for the fit model, from which graphs could be
62
        sampled.
63

64
    See also
65
    --------
66
    graspy.simulations.rdpg
67
    graspy.embed.AdjacencySpectralEmbed
68
    graspy.utils.augment_diagonal
69

70
    References
71
    ----------
72
    .. [1] Athreya, A., Fishkind, D. E., Tang, M., Priebe, C. E., Park, Y.,
73
           Vogelstein, J. T., ... & Sussman, D. L. (2018). Statistical inference
74
           on random dot product graphs: a survey. Journal of Machine Learning
75
           Research, 18(226), 1-92.
76

77
    .. [2] Zhu, M. and Ghodsi, A. (2006).
78
           Automatic dimensionality selection from the scree plot via the use of
79
           profile likelihood. Computational Statistics & Data Analysis, 51(2),
80
           pp.918-930.
81
    """
82

83
    def __init__(
84
        self,
85
        loops=False,
86 2
        n_components=None,
87
        ase_kws={},
88
        diag_aug_weight=1,
89
        plus_c_weight=1,
90
    ):
91
        super().__init__(loops=loops)
92

93
        if not isinstance(ase_kws, dict):
94 2
            raise TypeError("ase_kws must be a dict")
95
        if not isinstance(diag_aug_weight, (int, float)):
96 2
            raise TypeError("diag_aug_weight must be a scalar")
97 2
        if not isinstance(plus_c_weight, (int, float)):
98 2
            raise TypeError("plus_c_weight must be a scalar")
99 2
        if diag_aug_weight < 0:
100 2
            raise ValueError("diag_aug_weight must be at least 0")
101 2
        if plus_c_weight < 0:
102 2
            raise ValueError("plus_c_weight must be at least 0")
103 2

104 2
        self.n_components = n_components
105 2
        self.ase_kws = ase_kws
106
        self.diag_aug_weight = diag_aug_weight
107 2
        self.plus_c_weight = plus_c_weight
108 2

109 2
    def fit(self, graph, y=None):
110 2
        graph = import_graph(graph)
111
        if not is_unweighted(graph):
112 2
            raise NotImplementedError(
113 2
                "Graph model is currently only implemented for unweighted graphs."
114 2
            )
115
        graph = augment_diagonal(graph, weight=self.diag_aug_weight)
116
        graph += self.plus_c_weight / graph.size
117
        ase = AdjacencySpectralEmbed(
118 2
            n_components=self.n_components, diag_aug=False, **self.ase_kws
119 2
        )
120 2
        latent = ase.fit_transform(graph)
121
        self.latent_ = latent
122
        if type(self.latent_) == tuple:
123 2
            X = self.latent_[0]
124 2
            Y = self.latent_[1]
125 2
            self.directed = True
126 2
        else:
127 2
            X = self.latent_
128 2
            Y = self.latent_
129
            self.directed = False
130 2
        p_mat = X @ Y.T
131 2
        if not self.loops:
132 2
            p_mat -= np.diag(np.diag(p_mat))
133 2
        self.p_mat_ = p_mat
134 2
        return self
135 2

136 2
    def _n_parameters(self):
137 2
        if type(self.latent_) == tuple:
138
            return 2 * self.latent_[0].size
139 2
        else:
140 2
            return self.latent_.size

Read our documentation on viewing source code .

Loading