Compare 86a21a2 ... +0 ... ed77718

Coverage Reach
classification/training/trainer.py classification/training/loggers/checkpointer.py classification/training/loggers/log_manager.py classification/training/loggers/log_writer.py classification/training/loggers/tensorboard_writer.py classification/training/loggers/__init__.py classification/training/schedulers/shuffled_scheduler.py classification/training/schedulers/scheduler.py classification/training/schedulers/sequential_scheduler.py classification/training/schedulers/__init__.py classification/multitask_classifier.py classification/data.py classification/utils.py classification/task.py classification/loss.py classification/__init__.py labeling/model/label_model.py labeling/model/logger.py labeling/model/base_labeler.py labeling/model/baselines.py labeling/model/graph_utils.py labeling/model/__init__.py labeling/apply/core.py labeling/apply/pandas.py labeling/apply/dask.py labeling/lf/nlp.py labeling/lf/core.py labeling/lf/__init__.py labeling/analysis.py labeling/utils.py labeling/__init__.py slicing/utils.py slicing/sliceaware_classifier.py slicing/modules/slice_combiner.py slicing/sf/core.py slicing/sf/nlp.py slicing/sf/__init__.py slicing/monitor.py slicing/__init__.py slicing/apply/core.py augmentation/apply/core.py augmentation/apply/pandas.py augmentation/policy/core.py augmentation/policy/sampling.py augmentation/tf.py augmentation/__init__.py analysis/scorer.py analysis/metrics.py analysis/error_analysis.py analysis/__init__.py map/core.py map/__init__.py utils/core.py utils/optimizers.py utils/lr_schedulers.py utils/config_utils.py utils/data_operators.py utils/__init__.py preprocess/nlp.py preprocess/core.py preprocess/__init__.py synthetic/synthetic_data.py types/data.py types/__init__.py types/hashing.py types/classifier.py version.py __init__.py

No flags found

Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.

e.g., #unittest #integration

#production #enterprise

#frontend #backend

Learn more about Codecov Flags here.


@@ -1,3 +1,4 @@
Loading
1 +
import logging
1 2
from collections import defaultdict
2 3
from typing import DefaultDict, Dict, List, Tuple
3 4
@@ -55,3 +56,63 @@
Loading
55 56
    for i, labels in enumerate(zip(*y_flat)):
56 57
        buckets[labels].append(i)
57 58
    return {k: np.array(v) for k, v in buckets.items()}
59 +
60 +
61 +
def get_label_instances(
62 +
    bucket: Tuple[int, ...], x: np.ndarray, *y: np.ndarray
63 +
) -> np.ndarray:
64 +
    """Return instances in x with the specified combination of labels.
65 +
66 +
    Parameters
67 +
    ----------
68 +
    bucket
69 +
        A tuple of label values corresponding to which instances from x are returned
70 +
    x
71 +
        NumPy array of data instances to be returned
72 +
    *y
73 +
        A list of np.ndarray of (int) labels
74 +
75 +
    Returns
76 +
    -------
77 +
    np.ndarray
78 +
        NumPy array of instances from x with the specified combination of labels
79 +
80 +
    Example
81 +
    -------
82 +
    A common use case is calling ``get_label_instances(bucket, x.to_numpy(), Y_gold, Y_pred)``
83 +
    where ``x`` is a NumPy array of data instances that the labels correspond to,
84 +
    ``Y_gold`` is a list of gold (i.e. ground truth) labels, and
85 +
    ``Y_pred`` is a corresponding list of predicted labels.
86 +
87 +
    >>> import pandas as pd
88 +
    >>> x = pd.DataFrame(data={'col1': ["this is a string", "a second string", "a third string"], 'col2': ["1", "2", "3"]})
89 +
    >>> Y_gold = np.array([1, 1, 1])
90 +
    >>> Y_pred = np.array([1, 0, 0])
91 +
    >>> bucket = (1, 0)
92 +
93 +
    The returned NumPy array of data instances from ``x`` will correspond to
94 +
    the rows where the first list had a 1 and the second list had a 0.
95 +
    >>> get_label_instances(bucket, x.to_numpy(), Y_gold, Y_pred)
96 +
    array([['a second string', '2'],
97 +
           ['a third string', '3']], dtype=object)
98 +
99 +
    More generally, given bucket ``(i, j, ...)`` and lists ``y1, y2, ...``
100 +
    the returned data instances from ``x`` will correspond to the rows where
101 +
    y1 had label i, y2 had label j, and so on. Note that ``x`` and ``y``
102 +
    must all be the same length.
103 +
    """
104 +
    if len(y) != len(bucket):
105 +
        raise ValueError("Number of lists must match the amount of labels in bucket")
106 +
    if x.shape[0] != len(y[0]):
107 +
        # Note: the check for all y having the same number of elements occurs in get_label_buckets
108 +
        raise ValueError(
109 +
            "Number of rows in x does not match number of elements in at least one label list"
110 +
        )
111 +
    buckets = get_label_buckets(*y)
112 +
    try:
113 +
        indices = buckets[bucket]
114 +
    except KeyError:
115 +
        logging.warning("Bucket" + str(bucket) + " does not exist.")
116 +
        return np.array([])
117 +
    instances = x[indices]
118 +
    return instances

@@ -1,5 +1,5 @@
Loading
1 1
"""Generic model analysis utilities shared across Snorkel."""
2 2
3 -
from .error_analysis import get_label_buckets  # noqa: F401
3 +
from .error_analysis import get_label_buckets, get_label_instances  # noqa: F401
4 4
from .metrics import metric_score  # noqa: F401
5 5
from .scorer import Scorer  # noqa: F401

Everything is accounted for!

No changes detected that need to be reviewed.
What changes does Codecov check for?
Lines, not adjusted in diff, that have changed coverage data.
Files that introduced coverage data that had none before.
Files that have missing coverage data that once were tracked.
Files Coverage
snorkel 0.02% 97.21%
Project Totals (68 files) 97.21%
Loading