scikit-learn / scikit-learn

Compare b8ef9a9 ... +249 ... 280c487

Coverage Reach
linear_model/tests/test_logistic.py linear_model/tests/test_sgd.py linear_model/tests/test_ridge.py linear_model/tests/test_coordinate_descent.py linear_model/tests/test_sag.py linear_model/tests/test_least_angle.py linear_model/tests/test_base.py linear_model/tests/test_ransac.py linear_model/tests/test_sparse_coordinate_descent.py linear_model/tests/test_theil_sen.py linear_model/tests/test_passive_aggressive.py linear_model/tests/test_omp.py linear_model/tests/test_huber.py linear_model/tests/test_bayes.py linear_model/tests/test_perceptron.py linear_model/logistic.py linear_model/ridge.py linear_model/coordinate_descent.py linear_model/stochastic_gradient.py linear_model/least_angle.py linear_model/omp.py linear_model/base.py linear_model/bayes.py linear_model/ransac.py linear_model/theil_sen.py linear_model/huber.py linear_model/sag.py linear_model/passive_aggressive.py linear_model/setup.py linear_model/__init__.py linear_model/perceptron.py utils/tests/test_validation.py utils/tests/test_extmath.py utils/tests/test_sparsefuncs.py utils/tests/test_estimator_checks.py utils/tests/test_testing.py utils/tests/test_utils.py utils/tests/test_pprint.py utils/tests/test_multiclass.py utils/tests/test_class_weight.py utils/tests/test_cython_blas.py utils/tests/test_seq_dataset.py utils/tests/test_random.py utils/tests/test_shortest_path.py utils/tests/test_murmurhash.py utils/tests/test_fixes.py utils/tests/test_metaestimators.py utils/tests/test_deprecation.py utils/tests/test_show_versions.py utils/tests/test_fast_dict.py utils/tests/test_linear_assignment.py utils/tests/test_optimize.py utils/estimator_checks.py utils/testing.py utils/validation.py utils/_pprint.py utils/__init__.py utils/sparsefuncs.py utils/extmath.py utils/multiclass.py utils/fixes.py utils/linear_assignment_.py utils/metaestimators.py utils/_unittest_backport.py utils/optimize.py utils/class_weight.py utils/deprecation.py utils/mocking.py utils/_show_versions.py utils/random.py utils/setup.py utils/graph.py utils/_joblib.py utils/stats.py ensemble/tests/test_gradient_boosting.py ensemble/tests/test_forest.py ensemble/tests/test_bagging.py ensemble/tests/test_voting.py ensemble/tests/test_weight_boosting.py ensemble/tests/test_iforest.py ensemble/tests/test_gradient_boosting_loss_functions.py ensemble/tests/test_partial_dependence.py ensemble/tests/test_base.py ensemble/gradient_boosting.py ensemble/_hist_gradient_boosting/gradient_boosting.py ensemble/_hist_gradient_boosting/grower.py ensemble/_hist_gradient_boosting/loss.py ensemble/_hist_gradient_boosting/binning.py ensemble/_hist_gradient_boosting/predictor.py ensemble/forest.py ensemble/weight_boosting.py ensemble/bagging.py ensemble/_gb_losses.py ensemble/partial_dependence.py ensemble/iforest.py ensemble/voting.py ensemble/base.py ensemble/__init__.py ensemble/setup.py metrics/tests/test_classification.py metrics/tests/test_ranking.py metrics/tests/test_pairwise.py metrics/tests/test_common.py metrics/tests/test_score_objects.py metrics/tests/test_regression.py metrics/cluster/tests/test_supervised.py metrics/cluster/tests/test_unsupervised.py metrics/cluster/tests/test_common.py metrics/cluster/tests/test_bicluster.py metrics/cluster/supervised.py metrics/cluster/unsupervised.py metrics/cluster/bicluster.py metrics/cluster/__init__.py metrics/cluster/setup.py metrics/classification.py metrics/pairwise.py metrics/ranking.py metrics/scorer.py metrics/regression.py metrics/__init__.py metrics/base.py metrics/setup.py preprocessing/tests/test_data.py preprocessing/tests/test_encoders.py preprocessing/tests/test_label.py preprocessing/tests/test_imputation.py preprocessing/tests/test_discretization.py preprocessing/tests/test_common.py preprocessing/tests/test_function_transformer.py preprocessing/tests/test_base.py preprocessing/data.py preprocessing/_encoders.py preprocessing/label.py preprocessing/imputation.py preprocessing/_discretization.py preprocessing/_function_transformer.py preprocessing/base.py preprocessing/__init__.py preprocessing/setup.py tests/test_pipeline.py tests/test_impute.py tests/test_multiclass.py tests/test_dummy.py tests/test_naive_bayes.py tests/test_multioutput.py tests/test_base.py tests/test_isotonic.py tests/test_discriminant_analysis.py tests/test_calibration.py tests/test_random_projection.py tests/test_kernel_approximation.py tests/test_common.py tests/test_metaestimators.py tests/test_docstring_parameters.py tests/test_kernel_ridge.py tests/test_config.py tests/test_init.py tests/test_site_joblib.py tests/test_check_build.py decomposition/tests/test_pca.py decomposition/tests/test_nmf.py decomposition/tests/test_dict_learning.py decomposition/tests/test_online_lda.py decomposition/tests/test_incremental_pca.py decomposition/tests/test_fastica.py decomposition/tests/test_sparse_pca.py decomposition/tests/test_kernel_pca.py decomposition/tests/test_truncated_svd.py decomposition/tests/test_factor_analysis.py decomposition/nmf.py decomposition/dict_learning.py decomposition/online_lda.py decomposition/fastica_.py decomposition/pca.py decomposition/factor_analysis.py decomposition/kernel_pca.py decomposition/sparse_pca.py decomposition/incremental_pca.py decomposition/truncated_svd.py decomposition/base.py decomposition/setup.py decomposition/__init__.py model_selection/tests/test_validation.py model_selection/tests/test_search.py model_selection/tests/test_split.py model_selection/tests/common.py model_selection/_split.py model_selection/_validation.py model_selection/_search.py model_selection/__init__.py cluster/tests/test_k_means.py cluster/tests/test_hierarchical.py cluster/tests/test_dbscan.py cluster/tests/test_bicluster.py cluster/tests/test_optics.py cluster/tests/test_spectral.py cluster/tests/test_birch.py cluster/tests/test_affinity_propagation.py cluster/tests/test_mean_shift.py cluster/tests/test_feature_agglomeration.py cluster/tests/common.py cluster/k_means_.py cluster/hierarchical.py cluster/birch.py cluster/bicluster.py cluster/affinity_propagation_.py cluster/mean_shift_.py cluster/spectral.py cluster/optics_.py cluster/dbscan_.py cluster/_feature_agglomeration.py cluster/setup.py cluster/__init__.py datasets/tests/test_openml.py datasets/tests/test_svmlight_format.py datasets/tests/test_samples_generator.py datasets/tests/test_base.py datasets/tests/test_lfw.py datasets/tests/test_mldata.py datasets/tests/test_20news.py datasets/tests/test_rcv1.py datasets/tests/test_kddcup99.py datasets/tests/test_covtype.py datasets/tests/test_california_housing.py datasets/tests/test_common.py datasets/samples_generator.py datasets/openml.py datasets/base.py datasets/twenty_newsgroups.py datasets/lfw.py datasets/svmlight_format.py datasets/rcv1.py datasets/kddcup99.py datasets/mldata.py datasets/species_distributions.py datasets/covtype.py datasets/__init__.py datasets/california_housing.py datasets/olivetti_faces.py datasets/setup.py neighbors/tests/test_neighbors.py neighbors/tests/test_nca.py neighbors/tests/test_ball_tree.py neighbors/tests/test_kd_tree.py neighbors/tests/test_kde.py neighbors/tests/test_lof.py neighbors/tests/test_dist_metrics.py neighbors/tests/test_nearest_centroid.py neighbors/tests/test_quad_tree.py neighbors/base.py neighbors/nca.py neighbors/classification.py neighbors/kde.py neighbors/lof.py neighbors/nearest_centroid.py neighbors/regression.py neighbors/graph.py neighbors/setup.py neighbors/__init__.py neighbors/unsupervised.py tree/tests/test_tree.py tree/tests/test_export.py tree/tests/test_reingold_tilford.py tree/export.py tree/tree.py tree/_reingold_tilford.py tree/setup.py tree/__init__.py feature_extraction/tests/test_text.py feature_extraction/tests/test_image.py feature_extraction/tests/test_feature_hasher.py feature_extraction/tests/test_dict_vectorizer.py feature_extraction/text.py feature_extraction/image.py feature_extraction/dict_vectorizer.py feature_extraction/hashing.py feature_extraction/setup.py feature_extraction/__init__.py feature_extraction/stop_words.py feature_selection/tests/test_feature_select.py feature_selection/tests/test_rfe.py feature_selection/tests/test_from_model.py feature_selection/tests/test_mutual_info.py feature_selection/tests/test_base.py feature_selection/tests/test_chi2.py feature_selection/tests/test_variance_threshold.py feature_selection/univariate_selection.py feature_selection/rfe.py feature_selection/mutual_info_.py feature_selection/from_model.py feature_selection/base.py feature_selection/variance_threshold.py feature_selection/__init__.py manifold/tests/test_t_sne.py manifold/tests/test_spectral_embedding.py manifold/tests/test_locally_linear.py manifold/tests/test_isomap.py manifold/tests/test_mds.py manifold/t_sne.py manifold/locally_linear.py manifold/spectral_embedding_.py manifold/mds.py manifold/isomap.py manifold/setup.py manifold/__init__.py gaussian_process/kernels.py gaussian_process/tests/test_gpr.py gaussian_process/tests/test_kernels.py gaussian_process/tests/test_gpc.py gaussian_process/gpc.py gaussian_process/gpr.py gaussian_process/correlation_models.py gaussian_process/regression_models.py gaussian_process/__init__.py mixture/tests/test_gaussian_mixture.py mixture/tests/test_bayesian_mixture.py mixture/tests/test_mixture.py mixture/gaussian_mixture.py mixture/base.py mixture/bayesian_mixture.py mixture/__init__.py svm/tests/test_svm.py svm/tests/test_sparse.py svm/tests/test_bounds.py svm/base.py svm/classes.py svm/setup.py svm/bounds.py svm/__init__.py neural_network/tests/test_mlp.py neural_network/tests/test_rbm.py neural_network/tests/test_stochastic_optimizers.py neural_network/multilayer_perceptron.py neural_network/rbm.py neural_network/_stochastic_optimizers.py neural_network/_base.py neural_network/__init__.py covariance/tests/test_covariance.py covariance/tests/test_graphical_lasso.py covariance/tests/test_graph_lasso.py covariance/tests/test_robust_covariance.py covariance/tests/test_elliptic_envelope.py covariance/graph_lasso_.py covariance/robust_covariance.py covariance/shrunk_covariance_.py covariance/empirical_covariance_.py covariance/elliptic_envelope.py covariance/__init__.py compose/tests/test_column_transformer.py compose/tests/test_target.py compose/_column_transformer.py compose/_target.py compose/__init__.py inspection/tests/test_partial_dependence.py inspection/partial_dependence.py inspection/__init__.py impute.py cross_decomposition/pls_.py cross_decomposition/tests/test_pls.py cross_decomposition/cca_.py cross_decomposition/__init__.py pipeline.py naive_bayes.py multiclass.py discriminant_analysis.py semi_supervised/label_propagation.py semi_supervised/tests/test_label_propagation.py semi_supervised/__init__.py base.py multioutput.py dummy.py calibration.py kernel_approximation.py isotonic.py _build_utils/openmp_helpers.py _build_utils/__init__.py random_projection.py setup.py kernel_ridge.py __init__.py __check_build/__init__.py __check_build/setup.py _config.py experimental/tests/test_enable_hist_gradient_boosting.py experimental/enable_hist_gradient_boosting.py exceptions.py

No flags found

Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.

e.g., #unittest #integration

#production #enterprise

#frontend #backend

Learn more about Codecov Flags here.

Showing 13 of 43 files from the diff.
Other files ignored by Codecov
.coveragerc has changed.
doc/conf.py has changed.

@@ -0,0 +1,247 @@
Loading
1 +
"""
2 +
This module contains the loss classes.
3 +
4 +
Specific losses are used for regression, binary classification or multiclass
5 +
classification.
6 +
"""
7 +
# Author: Nicolas Hug
8 +
9 +
from abc import ABC, abstractmethod
10 +
11 +
import numpy as np
12 +
from scipy.special import expit
13 +
try:  # logsumexp was moved from mist to special in 0.19
14 +
    from scipy.special import logsumexp
15 +
except ImportError:
16 +
    from scipy.misc import logsumexp
17 +
18 +
from .types import Y_DTYPE
19 +
from .types import G_H_DTYPE
20 +
from ._loss import _update_gradients_least_squares
21 +
from ._loss import _update_gradients_hessians_binary_crossentropy
22 +
from ._loss import _update_gradients_hessians_categorical_crossentropy
23 +
24 +
25 +
class BaseLoss(ABC):
26 +
    """Base class for a loss."""
27 +
28 +
    def init_gradients_and_hessians(self, n_samples, prediction_dim):
29 +
        """Return initial gradients and hessians.
30 +
31 +
        Unless hessians are constant, arrays are initialized with undefined
32 +
        values.
33 +
34 +
        Parameters
35 +
        ----------
36 +
        n_samples : int
37 +
            The number of samples passed to `fit()`.
38 +
        prediction_dim : int
39 +
            The dimension of a raw prediction, i.e. the number of trees
40 +
            built at each iteration. Equals 1 for regression and binary
41 +
            classification, or K where K is the number of classes for
42 +
            multiclass classification.
43 +
44 +
        Returns
45 +
        -------
46 +
        gradients : ndarray, shape (prediction_dim, n_samples)
47 +
            The initial gradients. The array is not initialized.
48 +
        hessians : ndarray, shape (prediction_dim, n_samples)
49 +
            If hessians are constant (e.g. for `LeastSquares` loss, the
50 +
            array is initialized to ``1``. Otherwise, the array is allocated
51 +
            without being initialized.
52 +
        """
53 +
        shape = (prediction_dim, n_samples)
54 +
        gradients = np.empty(shape=shape, dtype=G_H_DTYPE)
55 +
        if self.hessians_are_constant:
56 +
            # if the hessians are constant, we consider they are equal to 1.
57 +
            # this is correct as long as we adjust the gradients. See e.g. LS
58 +
            # loss
59 +
            hessians = np.ones(shape=(1, 1), dtype=G_H_DTYPE)
60 +
        else:
61 +
            hessians = np.empty(shape=shape, dtype=G_H_DTYPE)
62 +
63 +
        return gradients, hessians
64 +
65 +
    @abstractmethod
66 +
    def get_baseline_prediction(self, y_train, prediction_dim):
67 +
        """Return initial predictions (before the first iteration).
68 +
69 +
        Parameters
70 +
        ----------
71 +
        y_train : ndarray, shape (n_samples,)
72 +
            The target training values.
73 +
        prediction_dim : int
74 +
            The dimension of one prediction: 1 for binary classification and
75 +
            regression, n_classes for multiclass classification.
76 +
77 +
        Returns
78 +
        -------
79 +
        baseline_prediction : float or ndarray, shape (1, prediction_dim)
80 +
            The baseline prediction.
81 +
        """
82 +
83 +
    @abstractmethod
84 +
    def update_gradients_and_hessians(self, gradients, hessians, y_true,
85 +
                                      raw_predictions):
86 +
        """Update gradients and hessians arrays, inplace.
87 +
88 +
        The gradients (resp. hessians) are the first (resp. second) order
89 +
        derivatives of the loss for each sample with respect to the
90 +
        predictions of model, evaluated at iteration ``i - 1``.
91 +
92 +
        Parameters
93 +
        ----------
94 +
        gradients : ndarray, shape (prediction_dim, n_samples)
95 +
            The gradients (treated as OUT array).
96 +
        hessians : ndarray, shape (prediction_dim, n_samples) or \
97 +
            (1,)
98 +
            The hessians (treated as OUT array).
99 +
        y_true : ndarray, shape (n_samples,)
100 +
            The true target values or each training sample.
101 +
        raw_predictions : ndarray, shape (prediction_dim, n_samples)
102 +
            The raw_predictions (i.e. values from the trees) of the tree
103 +
            ensemble at iteration ``i - 1``.
104 +
        """
105 +
106 +
107 +
class LeastSquares(BaseLoss):
108 +
    """Least squares loss, for regression.
109 +
110 +
    For a given sample x_i, least squares loss is defined as::
111 +
112 +
        loss(x_i) = 0.5 * (y_true_i - raw_pred_i)**2
113 +
114 +
    This actually computes the half least squares loss to optimize simplify
115 +
    the computation of the gradients and get a unit hessian (and be consistent
116 +
    with what is done in LightGBM).
117 +
    """
118 +
119 +
    hessians_are_constant = True
120 +
121 +
    def __call__(self, y_true, raw_predictions, average=True):
122 +
        # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to
123 +
        # return a view.
124 +
        raw_predictions = raw_predictions.reshape(-1)
125 +
        loss = 0.5 * np.power(y_true - raw_predictions, 2)
126 +
        return loss.mean() if average else loss
127 +
128 +
    def get_baseline_prediction(self, y_train, prediction_dim):
129 +
        return np.mean(y_train)
130 +
131 +
    @staticmethod
132 +
    def inverse_link_function(raw_predictions):
133 +
        return raw_predictions
134 +
135 +
    def update_gradients_and_hessians(self, gradients, hessians, y_true,
136 +
                                      raw_predictions):
137 +
        # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to
138 +
        # return a view.
139 +
        raw_predictions = raw_predictions.reshape(-1)
140 +
        gradients = gradients.reshape(-1)
141 +
        _update_gradients_least_squares(gradients, y_true, raw_predictions)
142 +
143 +
144 +
class BinaryCrossEntropy(BaseLoss):
145 +
    """Binary cross-entropy loss, for binary classification.
146 +
147 +
    For a given sample x_i, the binary cross-entropy loss is defined as the
148 +
    negative log-likelihood of the model which can be expressed as::
149 +
150 +
        loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i
151 +
152 +
    See The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,
153 +
    section 4.4.1 (about logistic regression).
154 +
    """
155 +
156 +
    hessians_are_constant = False
157 +
    inverse_link_function = staticmethod(expit)
158 +
159 +
    def __call__(self, y_true, raw_predictions, average=True):
160 +
        # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to
161 +
        # return a view.
162 +
        raw_predictions = raw_predictions.reshape(-1)
163 +
        # logaddexp(0, x) = log(1 + exp(x))
164 +
        loss = np.logaddexp(0, raw_predictions) - y_true * raw_predictions
165 +
        return loss.mean() if average else loss
166 +
167 +
    def get_baseline_prediction(self, y_train, prediction_dim):
168 +
        if prediction_dim > 2:
169 +
            raise ValueError(
170 +
                "loss='binary_crossentropy' is not defined for multiclass"
171 +
                " classification with n_classes=%d, use"
172 +
                " loss='categorical_crossentropy' instead" % prediction_dim)
173 +
        proba_positive_class = np.mean(y_train)
174 +
        eps = np.finfo(y_train.dtype).eps
175 +
        proba_positive_class = np.clip(proba_positive_class, eps, 1 - eps)
176 +
        # log(x / 1 - x) is the anti function of sigmoid, or the link function
177 +
        # of the Binomial model.
178 +
        return np.log(proba_positive_class / (1 - proba_positive_class))
179 +
180 +
    def update_gradients_and_hessians(self, gradients, hessians, y_true,
181 +
                                      raw_predictions):
182 +
        # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to
183 +
        # return a view.
184 +
        raw_predictions = raw_predictions.reshape(-1)
185 +
        gradients = gradients.reshape(-1)
186 +
        hessians = hessians.reshape(-1)
187 +
        _update_gradients_hessians_binary_crossentropy(
188 +
            gradients, hessians, y_true, raw_predictions)
189 +
190 +
    def predict_proba(self, raw_predictions):
191 +
        # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to
192 +
        # return a view.
193 +
        raw_predictions = raw_predictions.reshape(-1)
194 +
        proba = np.empty((raw_predictions.shape[0], 2), dtype=Y_DTYPE)
195 +
        proba[:, 1] = expit(raw_predictions)
196 +
        proba[:, 0] = 1 - proba[:, 1]
197 +
        return proba
198 +
199 +
200 +
class CategoricalCrossEntropy(BaseLoss):
201 +
    """Categorical cross-entropy loss, for multiclass classification.
202 +
203 +
    For a given sample x_i, the categorical cross-entropy loss is defined as
204 +
    the negative log-likelihood of the model and generalizes the binary
205 +
    cross-entropy to more than 2 classes.
206 +
    """
207 +
208 +
    hessians_are_constant = False
209 +
210 +
    def __call__(self, y_true, raw_predictions, average=True):
211 +
        one_hot_true = np.zeros_like(raw_predictions)
212 +
        prediction_dim = raw_predictions.shape[0]
213 +
        for k in range(prediction_dim):
214 +
            one_hot_true[k, :] = (y_true == k)
215 +
216 +
        loss = (logsumexp(raw_predictions, axis=0) -
217 +
                (one_hot_true * raw_predictions).sum(axis=0))
218 +
        return loss.mean() if average else loss
219 +
220 +
    def get_baseline_prediction(self, y_train, prediction_dim):
221 +
        init_value = np.zeros(shape=(prediction_dim, 1), dtype=Y_DTYPE)
222 +
        eps = np.finfo(y_train.dtype).eps
223 +
        for k in range(prediction_dim):
224 +
            proba_kth_class = np.mean(y_train == k)
225 +
            proba_kth_class = np.clip(proba_kth_class, eps, 1 - eps)
226 +
            init_value[k, :] += np.log(proba_kth_class)
227 +
228 +
        return init_value
229 +
230 +
    def update_gradients_and_hessians(self, gradients, hessians, y_true,
231 +
                                      raw_predictions):
232 +
        _update_gradients_hessians_categorical_crossentropy(
233 +
            gradients, hessians, y_true, raw_predictions)
234 +
235 +
    def predict_proba(self, raw_predictions):
236 +
        # TODO: This could be done in parallel
237 +
        # compute softmax (using exp(log(softmax)))
238 +
        proba = np.exp(raw_predictions -
239 +
                       logsumexp(raw_predictions, axis=0)[np.newaxis, :])
240 +
        return proba.T
241 +
242 +
243 +
_LOSSES = {
244 +
    'least_squares': LeastSquares,
245 +
    'binary_crossentropy': BinaryCrossEntropy,
246 +
    'categorical_crossentropy': CategoricalCrossEntropy
247 +
}

@@ -396,6 +396,12 @@
Loading
396 396
        # which is more feature than we have in most case.
397 397
        estimator.set_params(k=1)
398 398
399 +
    if name in ('HistGradientBoostingClassifier',
400 +
                'HistGradientBoostingRegressor'):
401 +
        # The default min_samples_leaf (20) isn't appropriate for small
402 +
        # datasets (only very shallow trees are built) that the checks use.
403 +
        estimator.set_params(min_samples_leaf=5)
404 +
399 405
400 406
class NotAnArray:
401 407
    """An object that is convertible to an array
@@ -2462,6 +2468,7 @@
Loading
2462 2468
              if hasattr(estimator, method)}
2463 2469
2464 2470
    # Fit again
2471 +
    set_random_state(estimator)
2465 2472
    estimator.fit(X_train, y_train)
2466 2473
2467 2474
    for method in check_methods:

@@ -2012,6 +2012,7 @@
Loading
2012 2012
2013 2013
    See also
2014 2014
    --------
2015 +
    sklearn.ensemble.HistGradientBoostingClassifier,
2015 2016
    sklearn.tree.DecisionTreeClassifier, RandomForestClassifier
2016 2017
    AdaBoostClassifier
2017 2018
@@ -2472,7 +2473,8 @@
Loading
2472 2473
2473 2474
    See also
2474 2475
    --------
2475 -
    DecisionTreeRegressor, RandomForestRegressor
2476 +
    sklearn.ensemble.HistGradientBoostingRegressor,
2477 +
    sklearn.tree.DecisionTreeRegressor, RandomForestRegressor
2476 2478
2477 2479
    References
2478 2480
    ----------

@@ -0,0 +1,465 @@
Loading
1 +
"""
2 +
This module contains the TreeGrower class.
3 +
4 +
TreeGrowee builds a regression tree fitting a Newton-Raphson step, based on
5 +
the gradients and hessians of the training data.
6 +
"""
7 +
# Author: Nicolas Hug
8 +
9 +
from heapq import heappush, heappop
10 +
import numpy as np
11 +
from timeit import default_timer as time
12 +
import numbers
13 +
14 +
from .splitting import Splitter
15 +
from .histogram import HistogramBuilder
16 +
from .predictor import TreePredictor, PREDICTOR_RECORD_DTYPE
17 +
from .utils import sum_parallel
18 +
19 +
20 +
class TreeNode:
21 +
    """Tree Node class used in TreeGrower.
22 +
23 +
    This isn't used for prediction purposes, only for training (see
24 +
    TreePredictor).
25 +
26 +
    Parameters
27 +
    ----------
28 +
    depth : int
29 +
        The depth of the node, i.e. its distance from the root.
30 +
    sample_indices : ndarray of unsigned int, shape (n_samples_at_node,)
31 +
        The indices of the samples at the node.
32 +
    sum_gradients : float
33 +
        The sum of the gradients of the samples at the node.
34 +
    sum_hessians : float
35 +
        The sum of the hessians of the samples at the node.
36 +
    parent : TreeNode or None, optional (default=None)
37 +
        The parent of the node. None for root.
38 +
39 +
    Attributes
40 +
    ----------
41 +
    depth : int
42 +
        The depth of the node, i.e. its distance from the root.
43 +
    sample_indices : ndarray of unsigned int, shape (n_samples_at_node,)
44 +
        The indices of the samples at the node.
45 +
    sum_gradients : float
46 +
        The sum of the gradients of the samples at the node.
47 +
    sum_hessians : float
48 +
        The sum of the hessians of the samples at the node.
49 +
    parent : TreeNode or None
50 +
        The parent of the node. None for root.
51 +
    split_info : SplitInfo or None
52 +
        The result of the split evaluation.
53 +
    left_child : TreeNode or None
54 +
        The left child of the node. None for leaves.
55 +
    right_child : TreeNode or None
56 +
        The right child of the node. None for leaves.
57 +
    value : float or None
58 +
        The value of the leaf, as computed in finalize_leaf(). None for
59 +
        non-leaf nodes.
60 +
    partition_start : int
61 +
        start position of the node's sample_indices in splitter.partition.
62 +
    partition_stop : int
63 +
        stop position of the node's sample_indices in splitter.partition.
64 +
    """
65 +
66 +
    split_info = None
67 +
    left_child = None
68 +
    right_child = None
69 +
    value = None
70 +
    histograms = None
71 +
    sibling = None
72 +
    parent = None
73 +
74 +
    # start and stop indices of the node in the splitter.partition
75 +
    # array. Concretely,
76 +
    # self.sample_indices = view(self.splitter.partition[start:stop])
77 +
    # Please see the comments about splitter.partition and
78 +
    # splitter.split_indices for more info about this design.
79 +
    # These 2 attributes are only used in _update_raw_prediction, because we
80 +
    # need to iterate over the leaves and I don't know how to efficiently
81 +
    # store the sample_indices views because they're all of different sizes.
82 +
    partition_start = 0
83 +
    partition_stop = 0
84 +
85 +
    def __init__(self, depth, sample_indices, sum_gradients,
86 +
                 sum_hessians, parent=None):
87 +
        self.depth = depth
88 +
        self.sample_indices = sample_indices
89 +
        self.n_samples = sample_indices.shape[0]
90 +
        self.sum_gradients = sum_gradients
91 +
        self.sum_hessians = sum_hessians
92 +
        self.parent = parent
93 +
94 +
    def __lt__(self, other_node):
95 +
        """Comparison for priority queue.
96 +
97 +
        Nodes with high gain are higher priority than nodes with low gain.
98 +
99 +
        heapq.heappush only need the '<' operator.
100 +
        heapq.heappop take the smallest item first (smaller is higher
101 +
        priority).
102 +
103 +
        Parameters
104 +
        -----------
105 +
        other_node : TreeNode
106 +
            The node to compare with.
107 +
        """
108 +
        return self.split_info.gain > other_node.split_info.gain
109 +
110 +
111 +
class TreeGrower:
112 +
    """Tree grower class used to build a tree.
113 +
114 +
    The tree is fitted to predict the values of a Newton-Raphson step. The
115 +
    splits are considered in a best-first fashion, and the quality of a
116 +
    split is defined in splitting._split_gain.
117 +
118 +
    Parameters
119 +
    ----------
120 +
    X_binned : ndarray of int, shape (n_samples, n_features)
121 +
        The binned input samples. Must be Fortran-aligned.
122 +
    gradients : ndarray, shape (n_samples,)
123 +
        The gradients of each training sample. Those are the gradients of the
124 +
        loss w.r.t the predictions, evaluated at iteration ``i - 1``.
125 +
    hessians : ndarray, shape (n_samples,)
126 +
        The hessians of each training sample. Those are the hessians of the
127 +
        loss w.r.t the predictions, evaluated at iteration ``i - 1``.
128 +
    max_leaf_nodes : int or None, optional (default=None)
129 +
        The maximum number of leaves for each tree. If None, there is no
130 +
        maximum limit.
131 +
    max_depth : int or None, optional (default=None)
132 +
        The maximum depth of each tree. The depth of a tree is the number of
133 +
        nodes to go from the root to the deepest leaf.
134 +
    min_samples_leaf : int, optional (default=20)
135 +
        The minimum number of samples per leaf.
136 +
    min_gain_to_split : float, optional (default=0.)
137 +
        The minimum gain needed to split a node. Splits with lower gain will
138 +
        be ignored.
139 +
    max_bins : int, optional (default=256)
140 +
        The maximum number of bins. Used to define the shape of the
141 +
        histograms.
142 +
    actual_n_bins : ndarray of int or int, optional (default=None)
143 +
        The actual number of bins needed for each feature, which is lower or
144 +
        equal to ``max_bins``. If it's an int, all features are considered to
145 +
        have the same number of bins. If None, all features are considered to
146 +
        have ``max_bins`` bins.
147 +
    l2_regularization : float, optional (default=0)
148 +
        The L2 regularization parameter.
149 +
    min_hessian_to_split : float, optional (default=1e-3)
150 +
        The minimum sum of hessians needed in each node. Splits that result in
151 +
        at least one child having a sum of hessians less than
152 +
        ``min_hessian_to_split`` are discarded.
153 +
    shrinkage : float, optional (default=1)
154 +
        The shrinkage parameter to apply to the leaves values, also known as
155 +
        learning rate.
156 +
    """
157 +
    def __init__(self, X_binned, gradients, hessians, max_leaf_nodes=None,
158 +
                 max_depth=None, min_samples_leaf=20, min_gain_to_split=0.,
159 +
                 max_bins=256, actual_n_bins=None, l2_regularization=0.,
160 +
                 min_hessian_to_split=1e-3, shrinkage=1.):
161 +
162 +
        self._validate_parameters(X_binned, max_leaf_nodes, max_depth,
163 +
                                  min_samples_leaf, min_gain_to_split,
164 +
                                  l2_regularization, min_hessian_to_split)
165 +
166 +
        if actual_n_bins is None:
167 +
            actual_n_bins = max_bins
168 +
169 +
        if isinstance(actual_n_bins, numbers.Integral):
170 +
            actual_n_bins = np.array(
171 +
                [actual_n_bins] * X_binned.shape[1],
172 +
                dtype=np.uint32)
173 +
        else:
174 +
            actual_n_bins = np.asarray(actual_n_bins, dtype=np.uint32)
175 +
176 +
        hessians_are_constant = hessians.shape[0] == 1
177 +
        self.histogram_builder = HistogramBuilder(
178 +
            X_binned, max_bins, gradients, hessians, hessians_are_constant)
179 +
        self.splitter = Splitter(
180 +
            X_binned, max_bins, actual_n_bins, l2_regularization,
181 +
            min_hessian_to_split, min_samples_leaf, min_gain_to_split,
182 +
            hessians_are_constant)
183 +
        self.max_leaf_nodes = max_leaf_nodes
184 +
        self.max_bins = max_bins
185 +
        self.n_features = X_binned.shape[1]
186 +
        self.max_depth = max_depth
187 +
        self.min_samples_leaf = min_samples_leaf
188 +
        self.X_binned = X_binned
189 +
        self.min_gain_to_split = min_gain_to_split
190 +
        self.shrinkage = shrinkage
191 +
        self.splittable_nodes = []
192 +
        self.finalized_leaves = []
193 +
        self.total_find_split_time = 0.  # time spent finding the best splits
194 +
        self.total_compute_hist_time = 0.  # time spent computing histograms
195 +
        self.total_apply_split_time = 0.  # time spent splitting nodes
196 +
        self._intilialize_root(gradients, hessians, hessians_are_constant)
197 +
        self.n_nodes = 1
198 +
199 +
    def _validate_parameters(self, X_binned, max_leaf_nodes, max_depth,
200 +
                             min_samples_leaf, min_gain_to_split,
201 +
                             l2_regularization, min_hessian_to_split):
202 +
        """Validate parameters passed to __init__.
203 +
204 +
        Also validate parameters passed to splitter.
205 +
        """
206 +
        if X_binned.dtype != np.uint8:
207 +
            raise NotImplementedError(
208 +
                "X_binned must be of type uint8.")
209 +
        if not X_binned.flags.f_contiguous:
210 +
            raise ValueError(
211 +
                "X_binned should be passed as Fortran contiguous "
212 +
                "array for maximum efficiency.")
213 +
        if max_leaf_nodes is not None and max_leaf_nodes <= 1:
214 +
            raise ValueError('max_leaf_nodes={} should not be'
215 +
                             ' smaller than 2'.format(max_leaf_nodes))
216 +
        if max_depth is not None and max_depth <= 1:
217 +
            raise ValueError('max_depth={} should not be'
218 +
                             ' smaller than 2'.format(max_depth))
219 +
        if min_samples_leaf < 1:
220 +
            raise ValueError('min_samples_leaf={} should '
221 +
                             'not be smaller than 1'.format(min_samples_leaf))
222 +
        if min_gain_to_split < 0:
223 +
            raise ValueError('min_gain_to_split={} '
224 +
                             'must be positive.'.format(min_gain_to_split))
225 +
        if l2_regularization < 0:
226 +
            raise ValueError('l2_regularization={} must be '
227 +
                             'positive.'.format(l2_regularization))
228 +
        if min_hessian_to_split < 0:
229 +
            raise ValueError('min_hessian_to_split={} '
230 +
                             'must be positive.'.format(min_hessian_to_split))
231 +
232 +
    def grow(self):
233 +
        """Grow the tree, from root to leaves."""
234 +
        while self.splittable_nodes:
235 +
            self.split_next()
236 +
237 +
    def _intilialize_root(self, gradients, hessians, hessians_are_constant):
238 +
        """Initialize root node and finalize it if needed."""
239 +
        n_samples = self.X_binned.shape[0]
240 +
        depth = 0
241 +
        sum_gradients = sum_parallel(gradients)
242 +
        if self.histogram_builder.hessians_are_constant:
243 +
            sum_hessians = hessians[0] * n_samples
244 +
        else:
245 +
            sum_hessians = sum_parallel(hessians)
246 +
        self.root = TreeNode(
247 +
            depth=depth,
248 +
            sample_indices=self.splitter.partition,
249 +
            sum_gradients=sum_gradients,
250 +
            sum_hessians=sum_hessians
251 +
        )
252 +
253 +
        self.root.partition_start = 0
254 +
        self.root.partition_stop = n_samples
255 +
256 +
        if self.root.n_samples < 2 * self.min_samples_leaf:
257 +
            # Do not even bother computing any splitting statistics.
258 +
            self._finalize_leaf(self.root)
259 +
            return
260 +
        if sum_hessians < self.splitter.min_hessian_to_split:
261 +
            self._finalize_leaf(self.root)
262 +
            return
263 +
264 +
        self.root.histograms = self.histogram_builder.compute_histograms_brute(
265 +
            self.root.sample_indices)
266 +
        self._compute_best_split_and_push(self.root)
267 +
268 +
    def _compute_best_split_and_push(self, node):
269 +
        """Compute the best possible split (SplitInfo) of a given node.
270 +
271 +
        Also push it in the heap of splittable nodes if gain isn't zero.
272 +
        The gain of a node is 0 if either all the leaves are pure
273 +
        (best gain = 0), or if no split would satisfy the constraints,
274 +
        (min_hessians_to_split, min_gain_to_split, min_samples_leaf)
275 +
        """
276 +
277 +
        node.split_info = self.splitter.find_node_split(
278 +
            node.sample_indices, node.histograms, node.sum_gradients,
279 +
            node.sum_hessians)
280 +
281 +
        if node.split_info.gain <= 0:  # no valid split
282 +
            self._finalize_leaf(node)
283 +
        else:
284 +
            heappush(self.splittable_nodes, node)
285 +
286 +
    def split_next(self):
287 +
        """Split the node with highest potential gain.
288 +
289 +
        Returns
290 +
        -------
291 +
        left : TreeNode
292 +
            The resulting left child.
293 +
        right : TreeNode
294 +
            The resulting right child.
295 +
        """
296 +
        # Consider the node with the highest loss reduction (a.k.a. gain)
297 +
        node = heappop(self.splittable_nodes)
298 +
299 +
        tic = time()
300 +
        (sample_indices_left,
301 +
         sample_indices_right,
302 +
         right_child_pos) = self.splitter.split_indices(node.split_info,
303 +
                                                        node.sample_indices)
304 +
        self.total_apply_split_time += time() - tic
305 +
306 +
        depth = node.depth + 1
307 +
        n_leaf_nodes = len(self.finalized_leaves) + len(self.splittable_nodes)
308 +
        n_leaf_nodes += 2
309 +
310 +
        left_child_node = TreeNode(depth,
311 +
                                   sample_indices_left,
312 +
                                   node.split_info.sum_gradient_left,
313 +
                                   node.split_info.sum_hessian_left,
314 +
                                   parent=node)
315 +
        right_child_node = TreeNode(depth,
316 +
                                    sample_indices_right,
317 +
                                    node.split_info.sum_gradient_right,
318 +
                                    node.split_info.sum_hessian_right,
319 +
                                    parent=node)
320 +
        left_child_node.sibling = right_child_node
321 +
        right_child_node.sibling = left_child_node
322 +
        node.right_child = right_child_node
323 +
        node.left_child = left_child_node
324 +
325 +
        # set start and stop indices
326 +
        left_child_node.partition_start = node.partition_start
327 +
        left_child_node.partition_stop = node.partition_start + right_child_pos
328 +
        right_child_node.partition_start = left_child_node.partition_stop
329 +
        right_child_node.partition_stop = node.partition_stop
330 +
331 +
        self.n_nodes += 2
332 +
333 +
        if self.max_depth is not None and depth == self.max_depth:
334 +
            self._finalize_leaf(left_child_node)
335 +
            self._finalize_leaf(right_child_node)
336 +
            return left_child_node, right_child_node
337 +
338 +
        if (self.max_leaf_nodes is not None
339 +
                and n_leaf_nodes == self.max_leaf_nodes):
340 +
            self._finalize_leaf(left_child_node)
341 +
            self._finalize_leaf(right_child_node)
342 +
            self._finalize_splittable_nodes()
343 +
            return left_child_node, right_child_node
344 +
345 +
        if left_child_node.n_samples < self.min_samples_leaf * 2:
346 +
            self._finalize_leaf(left_child_node)
347 +
        if right_child_node.n_samples < self.min_samples_leaf * 2:
348 +
            self._finalize_leaf(right_child_node)
349 +
350 +
        # Compute histograms of childs, and compute their best possible split
351 +
        # (if needed)
352 +
        should_split_left = left_child_node.value is None  # node isn't a leaf
353 +
        should_split_right = right_child_node.value is None
354 +
        if should_split_left or should_split_right:
355 +
356 +
            # We will compute the histograms of both nodes even if one of them
357 +
            # is a leaf, since computing the second histogram is very cheap
358 +
            # (using histogram subtraction).
359 +
            n_samples_left = left_child_node.sample_indices.shape[0]
360 +
            n_samples_right = right_child_node.sample_indices.shape[0]
361 +
            if n_samples_left < n_samples_right:
362 +
                smallest_child = left_child_node
363 +
                largest_child = right_child_node
364 +
            else:
365 +
                smallest_child = right_child_node
366 +
                largest_child = left_child_node
367 +
368 +
            # We use the brute O(n_samples) method on the child that has the
369 +
            # smallest number of samples, and the subtraction trick O(n_bins)
370 +
            # on the other one.
371 +
            tic = time()
372 +
            smallest_child.histograms = \
373 +
                self.histogram_builder.compute_histograms_brute(
374 +
                    smallest_child.sample_indices)
375 +
            largest_child.histograms = \
376 +
                self.histogram_builder.compute_histograms_subtraction(
377 +
                    node.histograms, smallest_child.histograms)
378 +
            self.total_compute_hist_time += time() - tic
379 +
380 +
            tic = time()
381 +
            if should_split_left:
382 +
                self._compute_best_split_and_push(left_child_node)
383 +
            if should_split_right:
384 +
                self._compute_best_split_and_push(right_child_node)
385 +
            self.total_find_split_time += time() - tic
386 +
387 +
        return left_child_node, right_child_node
388 +
389 +
    def _finalize_leaf(self, node):
390 +
        """Compute the prediction value that minimizes the objective function.
391 +
392 +
        This sets the node.value attribute (node is a leaf iff node.value is
393 +
        not None).
394 +
395 +
        See Equation 5 of:
396 +
        XGBoost: A Scalable Tree Boosting System, T. Chen, C. Guestrin, 2016
397 +
        https://arxiv.org/abs/1603.02754
398 +
        """
399 +
        node.value = -self.shrinkage * node.sum_gradients / (
400 +
            node.sum_hessians + self.splitter.l2_regularization)
401 +
        self.finalized_leaves.append(node)
402 +
403 +
    def _finalize_splittable_nodes(self):
404 +
        """Transform all splittable nodes into leaves.
405 +
406 +
        Used when some constraint is met e.g. maximum number of leaves or
407 +
        maximum depth."""
408 +
        while len(self.splittable_nodes) > 0:
409 +
            node = self.splittable_nodes.pop()
410 +
            self._finalize_leaf(node)
411 +
412 +
    def make_predictor(self, bin_thresholds=None):
413 +
        """Make a TreePredictor object out of the current tree.
414 +
415 +
        Parameters
416 +
        ----------
417 +
        bin_thresholds : array-like of floats, optional (default=None)
418 +
            The actual thresholds values of each bin.
419 +
420 +
        Returns
421 +
        -------
422 +
        A TreePredictor object.
423 +
        """
424 +
        predictor_nodes = np.zeros(self.n_nodes, dtype=PREDICTOR_RECORD_DTYPE)
425 +
        _fill_predictor_node_array(predictor_nodes, self.root,
426 +
                                   bin_thresholds=bin_thresholds)
427 +
        return TreePredictor(predictor_nodes)
428 +
429 +
430 +
def _fill_predictor_node_array(predictor_nodes, grower_node,
431 +
                               bin_thresholds, next_free_idx=0):
432 +
    """Helper used in make_predictor to set the TreePredictor fields."""
433 +
    node = predictor_nodes[next_free_idx]
434 +
    node['count'] = grower_node.n_samples
435 +
    node['depth'] = grower_node.depth
436 +
    if grower_node.split_info is not None:
437 +
        node['gain'] = grower_node.split_info.gain
438 +
    else:
439 +
        node['gain'] = -1
440 +
441 +
    if grower_node.value is not None:
442 +
        # Leaf node
443 +
        node['is_leaf'] = True
444 +
        node['value'] = grower_node.value
445 +
        return next_free_idx + 1
446 +
    else:
447 +
        # Decision node
448 +
        split_info = grower_node.split_info
449 +
        feature_idx, bin_idx = split_info.feature_idx, split_info.bin_idx
450 +
        node['feature_idx'] = feature_idx
451 +
        node['bin_threshold'] = bin_idx
452 +
        if bin_thresholds is not None:
453 +
            threshold = bin_thresholds[feature_idx][bin_idx]
454 +
            node['threshold'] = threshold
455 +
        next_free_idx += 1
456 +
457 +
        node['left'] = next_free_idx
458 +
        next_free_idx = _fill_predictor_node_array(
459 +
            predictor_nodes, grower_node.left_child,
460 +
            bin_thresholds=bin_thresholds, next_free_idx=next_free_idx)
461 +
462 +
        node['right'] = next_free_idx
463 +
        return _fill_predictor_node_array(
464 +
            predictor_nodes, grower_node.right_child,
465 +
            bin_thresholds=bin_thresholds, next_free_idx=next_free_idx)

@@ -4,12 +4,49 @@
Loading
4 4
5 5
def configuration(parent_package="", top_path=None):
6 6
    config = Configuration("ensemble", parent_package, top_path)
7 +
7 8
    config.add_extension("_gradient_boosting",
8 9
                         sources=["_gradient_boosting.pyx"],
9 10
                         include_dirs=[numpy.get_include()])
10 11
11 12
    config.add_subpackage("tests")
12 13
14 +
    # Histogram-based gradient boosting files
15 +
    config.add_extension(
16 +
        "_hist_gradient_boosting._gradient_boosting",
17 +
        sources=["_hist_gradient_boosting/_gradient_boosting.pyx"],
18 +
        include_dirs=[numpy.get_include()])
19 +
20 +
    config.add_extension("_hist_gradient_boosting.histogram",
21 +
                         sources=["_hist_gradient_boosting/histogram.pyx"],
22 +
                         include_dirs=[numpy.get_include()])
23 +
24 +
    config.add_extension("_hist_gradient_boosting.splitting",
25 +
                         sources=["_hist_gradient_boosting/splitting.pyx"],
26 +
                         include_dirs=[numpy.get_include()])
27 +
28 +
    config.add_extension("_hist_gradient_boosting._binning",
29 +
                         sources=["_hist_gradient_boosting/_binning.pyx"],
30 +
                         include_dirs=[numpy.get_include()])
31 +
32 +
    config.add_extension("_hist_gradient_boosting._predictor",
33 +
                         sources=["_hist_gradient_boosting/_predictor.pyx"],
34 +
                         include_dirs=[numpy.get_include()])
35 +
36 +
    config.add_extension("_hist_gradient_boosting._loss",
37 +
                         sources=["_hist_gradient_boosting/_loss.pyx"],
38 +
                         include_dirs=[numpy.get_include()])
39 +
40 +
    config.add_extension("_hist_gradient_boosting.types",
41 +
                         sources=["_hist_gradient_boosting/types.pyx"],
42 +
                         include_dirs=[numpy.get_include()])
43 +
44 +
    config.add_extension("_hist_gradient_boosting.utils",
45 +
                         sources=["_hist_gradient_boosting/utils.pyx"],
46 +
                         include_dirs=[numpy.get_include()])
47 +
48 +
    config.add_subpackage("_hist_gradient_boosting.tests")
49 +
13 50
    return config
14 51
15 52
if __name__ == "__main__":

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Learn more Showing 61 files with coverage changes found.

Changes in sklearn/neighbors/setup.py
-13
+13
Loading file...
Changes in sklearn/preprocessing/setup.py
-9
+9
Loading file...
Changes in sklearn/feature_extraction/setup.py
-10
+10
Loading file...
Changes in sklearn/utils/setup.py
-25
+25
Loading file...
Changes in sklearn/tree/setup.py
-14
+14
Loading file...
Changes in sklearn/ensemble/partial_dependence.py
-99
+99
Loading file...
Changes in sklearn/manifold/setup.py
-9
+9
Loading file...
Changes in sklearn/datasets/setup.py
-10
+10
Loading file...
Changes in sklearn/metrics/setup.py
-8
+8
Loading file...
Changes in sklearn/tests/test_docstring_parameters.py
-46
+46
Loading file...
Changes in sklearn/decomposition/setup.py
-8
+8
Loading file...
Changes in sklearn/linear_model/setup.py
-13
+13
Loading file...
Changes in sklearn/metrics/cluster/setup.py
-7
+7
Loading file...
Changes in sklearn/inspection/partial_dependence.py
-107
+107
Loading file...
Changes in sklearn/__check_build/setup.py
-4
+4
Loading file...
Changes in sklearn/inspection/tests/test_partial_dependence.py
-73
+73
Loading file...
Changes in sklearn/ensemble/tests/test_partial_dependence.py
-41
+41
Loading file...
Changes in sklearn/_build_utils/__init__.py
-15
+15
Loading file...
Changes in sklearn/utils/fixes.py
-31
+31
Loading file...
Changes in sklearn/compose/tests/test_column_transformer.py
-96
+96
Loading file...
Changes in sklearn/tree/export.py
-58
+58
Loading file...
Changes in sklearn/utils/testing.py
-47
+47
Loading file...
Changes in sklearn/manifold/spectral_embedding_.py
-15
+15
Loading file...
Changes in sklearn/utils/tests/test_testing.py
-19
+19
Loading file...
Changes in sklearn/_build_utils/openmp_helpers.py
-4
+4
Loading file...
Changes in sklearn/compose/_column_transformer.py
-16
+16
Loading file...
Changes in sklearn/preprocessing/tests/test_encoders.py
-29
+29
Loading file...
Changes in sklearn/utils/tests/test_utils.py
-14
+14
Loading file...
Changes in sklearn/tree/tests/test_export.py
-8
+8
Loading file...
Changes in sklearn/utils/tests/test_validation.py
-25
+25
Loading file...
Changes in sklearn/preprocessing/tests/test_function_transformer.py
-4
+4
Loading file...
Changes in sklearn/manifold/tests/test_spectral_embedding.py
-5
+5
Loading file...
Changes in sklearn/cluster/tests/test_spectral.py
-3
+3
Loading file...
Changes in sklearn/tree/_reingold_tilford.py
-3
+3
Loading file...
Changes in sklearn/tests/test_impute.py
-12
+12
Loading file...
Changes in sklearn/utils/tests/test_multiclass.py
-3
+3
Loading file...
Changes in sklearn/utils/deprecation.py
-1
+1
Loading file...
Changes in sklearn/svm/classes.py
-1
+1
Loading file...
Changes in sklearn/utils/__init__.py
-2
+2
Loading file...
Changes in sklearn/metrics/pairwise.py
-3
+3
Loading file...
Changes in sklearn/linear_model/ridge.py
-4
+4
Loading file...
Changes in sklearn/neighbors/tests/test_dist_metrics.py
-1
+1
Loading file...
Changes in sklearn/utils/validation.py
-2
+2
Loading file...
Changes in sklearn/utils/multiclass.py
-1
+1
Loading file...
Changes in sklearn/gaussian_process/gpr.py
-1
+1
Loading file...
Changes in sklearn/metrics/cluster/supervised.py
-1
+1
Loading file...
Changes in sklearn/preprocessing/_encoders.py
-2
+2
Loading file...
Changes in sklearn/model_selection/tests/test_validation.py
-5
+5
Loading file...
Changes in sklearn/impute.py
-1
+1
Loading file...
Changes in sklearn/utils/estimator_checks.py
-2
+1
+1
Loading file...
Changes in sklearn/model_selection/tests/test_split.py
-1
+1
Loading file...
Changes in sklearn/model_selection/tests/test_search.py
-1
+1
Loading file...
Changes in sklearn/metrics/tests/test_classification.py
-1
+1
Loading file...
setup.py
Loading file...
New file sklearn/ensemble/_hist_gradient_boosting/predictor.py
New
Loading file...
New file sklearn/ensemble/_hist_gradient_boosting/loss.py
New
Loading file...
New file sklearn/experimental/enable_hist_gradient_boosting.py
New
Loading file...
New file sklearn/ensemble/_hist_gradient_boosting/grower.py
New
Loading file...
New file sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
New
Loading file...
New file sklearn/experimental/tests/test_enable_hist_gradient_boosting.py
New
Loading file...
New file sklearn/ensemble/_hist_gradient_boosting/binning.py
New
Loading file...

251 Commits