snorkel-team / snorkel
1 2
from typing import Tuple
2

3 2
import numpy as np
4

5

6 2
def generate_simple_label_matrix(
7
    n: int, m: int, cardinality: int, abstain_multiplier: float = 1.0
8
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
9
    """Generate a synthetic label matrix with true parameters and labels.
10

11
    This function generates a set of labeling function conditional probability tables,
12
    P(LF=l | Y=y), stored as a matrix P, and true labels Y, and then generates the
13
    resulting label matrix L.
14

15
    Parameters
16
    ----------
17
    n
18
        Number of data points
19
    m
20
        Number of labeling functions
21
    cardinality
22
        Cardinality of true labels (i.e. not including abstains)
23
    abstain_multiplier
24
        Factor to multiply the probability of abstaining by
25

26
    Returns
27
    -------
28
    Tuple[np.ndarray, np.ndarray, np.ndarray]
29
        A tuple containing the LF conditional probabilities P,
30
        the true labels Y, and the output label matrix L
31
    """
32
    # Generate the conditional probability tables for the LFs
33
    # The first axis is LF, second is LF output label, third is true class label
34
    # Note that we include abstains in the LF output space, and that we bias the
35
    # conditional probabilities towards being non-adversarial
36 2
    P = np.empty((m, cardinality + 1, cardinality))
37 2
    for i in range(m):
38 2
        p = np.random.rand(cardinality + 1, cardinality)
39

40
        # Bias the LFs to being non-adversarial
41 2
        p[1:, :] += (cardinality - 1) * np.eye(cardinality)
42

43
        # Optionally increase the abstain probability by some multiplier; note this is
44
        # to simulate the common setting where LFs label very sparsely
45 2
        p[0, :] *= abstain_multiplier
46

47
        # Normalize the conditional probabilities table
48 2
        P[i] = p @ np.diag(1 / p.sum(axis=0))
49

50
    # Generate the true datapoint labels
51
    # Note: Assuming balanced classes to start
52 2
    Y = np.random.choice(cardinality, n)
53

54
    # Generate the label matrix L
55 2
    L = np.empty((n, m), dtype=int)
56 2
    for i in range(n):
57 2
        for j in range(m):
58 2
            L[i, j] = np.random.choice(cardinality + 1, p=P[j, :, Y[i]]) - 1
59 2
    return P, Y, L

Read our documentation on viewing source code .

Loading