No flags found
Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.
e.g., #unittest #integration
#production #enterprise
#frontend #backend
86a21a2
... +0 ...
ed77718
Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.
e.g., #unittest #integration
#production #enterprise
#frontend #backend
1 | + | import logging |
|
1 | 2 | from collections import defaultdict |
|
2 | 3 | from typing import DefaultDict, Dict, List, Tuple |
|
3 | 4 |
55 | 56 | for i, labels in enumerate(zip(*y_flat)): |
|
56 | 57 | buckets[labels].append(i) |
|
57 | 58 | return {k: np.array(v) for k, v in buckets.items()} |
|
59 | + | ||
60 | + | ||
61 | + | def get_label_instances( |
|
62 | + | bucket: Tuple[int, ...], x: np.ndarray, *y: np.ndarray |
|
63 | + | ) -> np.ndarray: |
|
64 | + | """Return instances in x with the specified combination of labels. |
|
65 | + | ||
66 | + | Parameters |
|
67 | + | ---------- |
|
68 | + | bucket |
|
69 | + | A tuple of label values corresponding to which instances from x are returned |
|
70 | + | x |
|
71 | + | NumPy array of data instances to be returned |
|
72 | + | *y |
|
73 | + | A list of np.ndarray of (int) labels |
|
74 | + | ||
75 | + | Returns |
|
76 | + | ------- |
|
77 | + | np.ndarray |
|
78 | + | NumPy array of instances from x with the specified combination of labels |
|
79 | + | ||
80 | + | Example |
|
81 | + | ------- |
|
82 | + | A common use case is calling ``get_label_instances(bucket, x.to_numpy(), Y_gold, Y_pred)`` |
|
83 | + | where ``x`` is a NumPy array of data instances that the labels correspond to, |
|
84 | + | ``Y_gold`` is a list of gold (i.e. ground truth) labels, and |
|
85 | + | ``Y_pred`` is a corresponding list of predicted labels. |
|
86 | + | ||
87 | + | >>> import pandas as pd |
|
88 | + | >>> x = pd.DataFrame(data={'col1': ["this is a string", "a second string", "a third string"], 'col2': ["1", "2", "3"]}) |
|
89 | + | >>> Y_gold = np.array([1, 1, 1]) |
|
90 | + | >>> Y_pred = np.array([1, 0, 0]) |
|
91 | + | >>> bucket = (1, 0) |
|
92 | + | ||
93 | + | The returned NumPy array of data instances from ``x`` will correspond to |
|
94 | + | the rows where the first list had a 1 and the second list had a 0. |
|
95 | + | >>> get_label_instances(bucket, x.to_numpy(), Y_gold, Y_pred) |
|
96 | + | array([['a second string', '2'], |
|
97 | + | ['a third string', '3']], dtype=object) |
|
98 | + | ||
99 | + | More generally, given bucket ``(i, j, ...)`` and lists ``y1, y2, ...`` |
|
100 | + | the returned data instances from ``x`` will correspond to the rows where |
|
101 | + | y1 had label i, y2 had label j, and so on. Note that ``x`` and ``y`` |
|
102 | + | must all be the same length. |
|
103 | + | """ |
|
104 | + | if len(y) != len(bucket): |
|
105 | + | raise ValueError("Number of lists must match the amount of labels in bucket") |
|
106 | + | if x.shape[0] != len(y[0]): |
|
107 | + | # Note: the check for all y having the same number of elements occurs in get_label_buckets |
|
108 | + | raise ValueError( |
|
109 | + | "Number of rows in x does not match number of elements in at least one label list" |
|
110 | + | ) |
|
111 | + | buckets = get_label_buckets(*y) |
|
112 | + | try: |
|
113 | + | indices = buckets[bucket] |
|
114 | + | except KeyError: |
|
115 | + | logging.warning("Bucket" + str(bucket) + " does not exist.") |
|
116 | + | return np.array([]) |
|
117 | + | instances = x[indices] |
|
118 | + | return instances |
1 | 1 | """Generic model analysis utilities shared across Snorkel.""" |
|
2 | 2 | ||
3 | - | from .error_analysis import get_label_buckets # noqa: F401 |
|
3 | + | from .error_analysis import get_label_buckets, get_label_instances # noqa: F401 |
|
4 | 4 | from .metrics import metric_score # noqa: F401 |
|
5 | 5 | from .scorer import Scorer # noqa: F401 |
Files | Coverage |
---|---|
snorkel | 0.02% 97.21% |
Project Totals (68 files) | 97.21% |
ed77718
86a21a2