scikit-learn / scikit-learn

Compare 1fe00b5 ... +233 ... 8b1f603

Coverage Reach
sklearn/linear_model/tests/test_logistic.py sklearn/linear_model/tests/test_sgd.py sklearn/linear_model/tests/test_ridge.py sklearn/linear_model/tests/test_coordinate_descent.py sklearn/linear_model/tests/test_sag.py sklearn/linear_model/tests/test_least_angle.py sklearn/linear_model/tests/test_base.py sklearn/linear_model/tests/test_ransac.py sklearn/linear_model/tests/test_sparse_coordinate_descent.py sklearn/linear_model/tests/test_theil_sen.py sklearn/linear_model/tests/test_passive_aggressive.py sklearn/linear_model/tests/test_omp.py sklearn/linear_model/tests/test_huber.py sklearn/linear_model/tests/test_bayes.py sklearn/linear_model/tests/test_perceptron.py sklearn/linear_model/logistic.py sklearn/linear_model/ridge.py sklearn/linear_model/coordinate_descent.py sklearn/linear_model/stochastic_gradient.py sklearn/linear_model/least_angle.py sklearn/linear_model/omp.py sklearn/linear_model/base.py sklearn/linear_model/bayes.py sklearn/linear_model/ransac.py sklearn/linear_model/theil_sen.py sklearn/linear_model/huber.py sklearn/linear_model/sag.py sklearn/linear_model/passive_aggressive.py sklearn/linear_model/setup.py sklearn/linear_model/__init__.py sklearn/linear_model/perceptron.py sklearn/utils/tests/test_validation.py sklearn/utils/tests/test_extmath.py sklearn/utils/tests/test_sparsefuncs.py sklearn/utils/tests/test_estimator_checks.py sklearn/utils/tests/test_testing.py sklearn/utils/tests/test_pprint.py sklearn/utils/tests/test_utils.py sklearn/utils/tests/test_multiclass.py sklearn/utils/tests/test_class_weight.py sklearn/utils/tests/test_cython_blas.py sklearn/utils/tests/test_seq_dataset.py sklearn/utils/tests/test_random.py sklearn/utils/tests/test_shortest_path.py sklearn/utils/tests/test_murmurhash.py sklearn/utils/tests/test_fixes.py sklearn/utils/tests/test_metaestimators.py sklearn/utils/tests/test_deprecation.py sklearn/utils/tests/test_show_versions.py sklearn/utils/tests/test_fast_dict.py sklearn/utils/tests/test_linear_assignment.py sklearn/utils/tests/test_optimize.py sklearn/utils/estimator_checks.py sklearn/utils/testing.py sklearn/utils/validation.py sklearn/utils/_pprint.py sklearn/utils/sparsefuncs.py sklearn/utils/extmath.py sklearn/utils/__init__.py sklearn/utils/multiclass.py sklearn/utils/fixes.py sklearn/utils/linear_assignment_.py sklearn/utils/metaestimators.py sklearn/utils/_unittest_backport.py sklearn/utils/optimize.py sklearn/utils/class_weight.py sklearn/utils/mocking.py sklearn/utils/deprecation.py sklearn/utils/_show_versions.py sklearn/utils/random.py sklearn/utils/setup.py sklearn/utils/graph.py sklearn/utils/_joblib.py sklearn/utils/stats.py sklearn/ensemble/tests/test_gradient_boosting.py sklearn/ensemble/tests/test_forest.py sklearn/ensemble/tests/test_bagging.py sklearn/ensemble/tests/test_voting.py sklearn/ensemble/tests/test_weight_boosting.py sklearn/ensemble/tests/test_iforest.py sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py sklearn/ensemble/tests/test_partial_dependence.py sklearn/ensemble/tests/test_base.py sklearn/ensemble/gradient_boosting.py sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py sklearn/ensemble/_hist_gradient_boosting/grower.py sklearn/ensemble/_hist_gradient_boosting/loss.py sklearn/ensemble/_hist_gradient_boosting/binning.py sklearn/ensemble/_hist_gradient_boosting/predictor.py sklearn/ensemble/forest.py sklearn/ensemble/weight_boosting.py sklearn/ensemble/bagging.py sklearn/ensemble/_gb_losses.py sklearn/ensemble/partial_dependence.py sklearn/ensemble/iforest.py sklearn/ensemble/voting.py sklearn/ensemble/base.py sklearn/ensemble/__init__.py sklearn/ensemble/setup.py sklearn/metrics/tests/test_classification.py sklearn/metrics/tests/test_ranking.py sklearn/metrics/tests/test_pairwise.py sklearn/metrics/tests/test_common.py sklearn/metrics/tests/test_score_objects.py sklearn/metrics/tests/test_regression.py sklearn/metrics/cluster/tests/test_supervised.py sklearn/metrics/cluster/tests/test_unsupervised.py sklearn/metrics/cluster/tests/test_common.py sklearn/metrics/cluster/tests/test_bicluster.py sklearn/metrics/cluster/supervised.py sklearn/metrics/cluster/unsupervised.py sklearn/metrics/cluster/bicluster.py sklearn/metrics/cluster/__init__.py sklearn/metrics/cluster/setup.py sklearn/metrics/classification.py sklearn/metrics/pairwise.py sklearn/metrics/ranking.py sklearn/metrics/scorer.py sklearn/metrics/regression.py sklearn/metrics/__init__.py sklearn/metrics/base.py sklearn/metrics/setup.py sklearn/preprocessing/tests/test_data.py sklearn/preprocessing/tests/test_encoders.py sklearn/preprocessing/tests/test_label.py sklearn/preprocessing/tests/test_imputation.py sklearn/preprocessing/tests/test_discretization.py sklearn/preprocessing/tests/test_common.py sklearn/preprocessing/tests/test_function_transformer.py sklearn/preprocessing/tests/test_base.py sklearn/preprocessing/data.py sklearn/preprocessing/_encoders.py sklearn/preprocessing/label.py sklearn/preprocessing/imputation.py sklearn/preprocessing/_discretization.py sklearn/preprocessing/_function_transformer.py sklearn/preprocessing/base.py sklearn/preprocessing/__init__.py sklearn/preprocessing/setup.py sklearn/tests/test_pipeline.py sklearn/tests/test_impute.py sklearn/tests/test_multiclass.py sklearn/tests/test_dummy.py sklearn/tests/test_naive_bayes.py sklearn/tests/test_multioutput.py sklearn/tests/test_base.py sklearn/tests/test_isotonic.py sklearn/tests/test_discriminant_analysis.py sklearn/tests/test_calibration.py sklearn/tests/test_random_projection.py sklearn/tests/test_kernel_approximation.py sklearn/tests/test_common.py sklearn/tests/test_metaestimators.py sklearn/tests/test_docstring_parameters.py sklearn/tests/test_kernel_ridge.py sklearn/tests/test_config.py sklearn/tests/test_init.py sklearn/tests/test_site_joblib.py sklearn/tests/test_check_build.py sklearn/model_selection/tests/test_validation.py sklearn/model_selection/tests/test_search.py sklearn/model_selection/tests/test_split.py sklearn/model_selection/tests/common.py sklearn/model_selection/_split.py sklearn/model_selection/_validation.py sklearn/model_selection/_search.py sklearn/model_selection/__init__.py sklearn/decomposition/tests/test_pca.py sklearn/decomposition/tests/test_nmf.py sklearn/decomposition/tests/test_dict_learning.py sklearn/decomposition/tests/test_online_lda.py sklearn/decomposition/tests/test_incremental_pca.py sklearn/decomposition/tests/test_fastica.py sklearn/decomposition/tests/test_sparse_pca.py sklearn/decomposition/tests/test_kernel_pca.py sklearn/decomposition/tests/test_truncated_svd.py sklearn/decomposition/tests/test_factor_analysis.py sklearn/decomposition/nmf.py sklearn/decomposition/dict_learning.py sklearn/decomposition/online_lda.py sklearn/decomposition/fastica_.py sklearn/decomposition/pca.py sklearn/decomposition/factor_analysis.py sklearn/decomposition/kernel_pca.py sklearn/decomposition/sparse_pca.py sklearn/decomposition/incremental_pca.py sklearn/decomposition/truncated_svd.py sklearn/decomposition/base.py sklearn/decomposition/setup.py sklearn/decomposition/__init__.py sklearn/cluster/tests/test_k_means.py sklearn/cluster/tests/test_hierarchical.py sklearn/cluster/tests/test_dbscan.py sklearn/cluster/tests/test_bicluster.py sklearn/cluster/tests/test_optics.py sklearn/cluster/tests/test_spectral.py sklearn/cluster/tests/test_birch.py sklearn/cluster/tests/test_affinity_propagation.py sklearn/cluster/tests/test_mean_shift.py sklearn/cluster/tests/test_feature_agglomeration.py sklearn/cluster/tests/common.py sklearn/cluster/k_means_.py sklearn/cluster/hierarchical.py sklearn/cluster/birch.py sklearn/cluster/bicluster.py sklearn/cluster/affinity_propagation_.py sklearn/cluster/mean_shift_.py sklearn/cluster/spectral.py sklearn/cluster/optics_.py sklearn/cluster/dbscan_.py sklearn/cluster/_feature_agglomeration.py sklearn/cluster/setup.py sklearn/cluster/__init__.py sklearn/datasets/tests/test_openml.py sklearn/datasets/tests/test_svmlight_format.py sklearn/datasets/tests/test_samples_generator.py sklearn/datasets/tests/test_base.py sklearn/datasets/tests/test_lfw.py sklearn/datasets/tests/test_mldata.py sklearn/datasets/tests/test_20news.py sklearn/datasets/tests/test_rcv1.py sklearn/datasets/tests/test_kddcup99.py sklearn/datasets/tests/test_covtype.py sklearn/datasets/tests/test_california_housing.py sklearn/datasets/tests/test_common.py sklearn/datasets/samples_generator.py sklearn/datasets/openml.py sklearn/datasets/base.py sklearn/datasets/twenty_newsgroups.py sklearn/datasets/lfw.py sklearn/datasets/svmlight_format.py sklearn/datasets/rcv1.py sklearn/datasets/kddcup99.py sklearn/datasets/mldata.py sklearn/datasets/species_distributions.py sklearn/datasets/covtype.py sklearn/datasets/__init__.py sklearn/datasets/california_housing.py sklearn/datasets/olivetti_faces.py sklearn/datasets/setup.py sklearn/neighbors/tests/test_neighbors.py sklearn/neighbors/tests/test_nca.py sklearn/neighbors/tests/test_ball_tree.py sklearn/neighbors/tests/test_kd_tree.py sklearn/neighbors/tests/test_kde.py sklearn/neighbors/tests/test_lof.py sklearn/neighbors/tests/test_dist_metrics.py sklearn/neighbors/tests/test_nearest_centroid.py sklearn/neighbors/tests/test_quad_tree.py sklearn/neighbors/base.py sklearn/neighbors/nca.py sklearn/neighbors/classification.py sklearn/neighbors/kde.py sklearn/neighbors/lof.py sklearn/neighbors/nearest_centroid.py sklearn/neighbors/regression.py sklearn/neighbors/graph.py sklearn/neighbors/setup.py sklearn/neighbors/__init__.py sklearn/neighbors/unsupervised.py sklearn/tree/tests/test_tree.py sklearn/tree/tests/test_export.py sklearn/tree/tests/test_reingold_tilford.py sklearn/tree/export.py sklearn/tree/tree.py sklearn/tree/_reingold_tilford.py sklearn/tree/setup.py sklearn/tree/__init__.py sklearn/feature_extraction/tests/test_text.py sklearn/feature_extraction/tests/test_image.py sklearn/feature_extraction/tests/test_feature_hasher.py sklearn/feature_extraction/tests/test_dict_vectorizer.py sklearn/feature_extraction/text.py sklearn/feature_extraction/image.py sklearn/feature_extraction/dict_vectorizer.py sklearn/feature_extraction/hashing.py sklearn/feature_extraction/setup.py sklearn/feature_extraction/__init__.py sklearn/feature_extraction/stop_words.py sklearn/feature_selection/tests/test_feature_select.py sklearn/feature_selection/tests/test_rfe.py sklearn/feature_selection/tests/test_from_model.py sklearn/feature_selection/tests/test_mutual_info.py sklearn/feature_selection/tests/test_base.py sklearn/feature_selection/tests/test_chi2.py sklearn/feature_selection/tests/test_variance_threshold.py sklearn/feature_selection/univariate_selection.py sklearn/feature_selection/rfe.py sklearn/feature_selection/mutual_info_.py sklearn/feature_selection/from_model.py sklearn/feature_selection/base.py sklearn/feature_selection/variance_threshold.py sklearn/feature_selection/__init__.py sklearn/manifold/tests/test_t_sne.py sklearn/manifold/tests/test_spectral_embedding.py sklearn/manifold/tests/test_locally_linear.py sklearn/manifold/tests/test_isomap.py sklearn/manifold/tests/test_mds.py sklearn/manifold/t_sne.py sklearn/manifold/locally_linear.py sklearn/manifold/spectral_embedding_.py sklearn/manifold/mds.py sklearn/manifold/isomap.py sklearn/manifold/setup.py sklearn/manifold/__init__.py sklearn/gaussian_process/kernels.py sklearn/gaussian_process/tests/test_gpr.py sklearn/gaussian_process/tests/test_kernels.py sklearn/gaussian_process/tests/test_gpc.py sklearn/gaussian_process/gpc.py sklearn/gaussian_process/gpr.py sklearn/gaussian_process/correlation_models.py sklearn/gaussian_process/regression_models.py sklearn/gaussian_process/__init__.py sklearn/mixture/tests/test_gaussian_mixture.py sklearn/mixture/tests/test_bayesian_mixture.py sklearn/mixture/tests/test_mixture.py sklearn/mixture/gaussian_mixture.py sklearn/mixture/base.py sklearn/mixture/bayesian_mixture.py sklearn/mixture/__init__.py sklearn/svm/tests/test_svm.py sklearn/svm/tests/test_sparse.py sklearn/svm/tests/test_bounds.py sklearn/svm/base.py sklearn/svm/classes.py sklearn/svm/setup.py sklearn/svm/bounds.py sklearn/svm/__init__.py sklearn/neural_network/tests/test_mlp.py sklearn/neural_network/tests/test_rbm.py sklearn/neural_network/tests/test_stochastic_optimizers.py sklearn/neural_network/multilayer_perceptron.py sklearn/neural_network/rbm.py sklearn/neural_network/_stochastic_optimizers.py sklearn/neural_network/_base.py sklearn/neural_network/__init__.py sklearn/covariance/tests/test_covariance.py sklearn/covariance/tests/test_graphical_lasso.py sklearn/covariance/tests/test_graph_lasso.py sklearn/covariance/tests/test_robust_covariance.py sklearn/covariance/tests/test_elliptic_envelope.py sklearn/covariance/graph_lasso_.py sklearn/covariance/robust_covariance.py sklearn/covariance/shrunk_covariance_.py sklearn/covariance/empirical_covariance_.py sklearn/covariance/elliptic_envelope.py sklearn/covariance/__init__.py sklearn/compose/tests/test_column_transformer.py sklearn/compose/tests/test_target.py sklearn/compose/_column_transformer.py sklearn/compose/_target.py sklearn/compose/__init__.py sklearn/impute.py sklearn/cross_decomposition/pls_.py sklearn/cross_decomposition/tests/test_pls.py sklearn/cross_decomposition/cca_.py sklearn/cross_decomposition/__init__.py sklearn/naive_bayes.py sklearn/pipeline.py sklearn/multiclass.py sklearn/discriminant_analysis.py sklearn/semi_supervised/label_propagation.py sklearn/semi_supervised/tests/test_label_propagation.py sklearn/semi_supervised/__init__.py sklearn/base.py sklearn/multioutput.py sklearn/dummy.py sklearn/calibration.py sklearn/kernel_approximation.py sklearn/isotonic.py sklearn/_build_utils/openmp_helpers.py sklearn/_build_utils/__init__.py sklearn/random_projection.py sklearn/setup.py sklearn/experimental/tests/test_enable_hist_gradient_boosting.py sklearn/experimental/enable_hist_gradient_boosting.py sklearn/kernel_ridge.py sklearn/__init__.py sklearn/__check_build/__init__.py sklearn/__check_build/setup.py sklearn/_config.py sklearn/exceptions.py setup.py

No flags found

Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.

e.g., #unittest #integration

#production #enterprise

#frontend #backend

Learn more about Codecov Flags here.

Showing 12 of 38 files from the diff.

@@ -0,0 +1,80 @@
Loading
1 +
"""
2 +
This module contains the TreePredictor class which is used for prediction.
3 +
"""
4 +
# Author: Nicolas Hug
5 +
6 +
import numpy as np
7 +
8 +
from .types import X_DTYPE
9 +
from .types import Y_DTYPE
10 +
from .types import X_BINNED_DTYPE
11 +
from ._predictor import _predict_from_numeric_data
12 +
from ._predictor import _predict_from_binned_data
13 +
14 +
15 +
PREDICTOR_RECORD_DTYPE = np.dtype([
16 +
    ('value', Y_DTYPE),
17 +
    ('count', np.uint32),
18 +
    ('feature_idx', np.uint32),
19 +
    ('threshold', X_DTYPE),
20 +
    ('left', np.uint32),
21 +
    ('right', np.uint32),
22 +
    ('gain', Y_DTYPE),
23 +
    ('depth', np.uint32),
24 +
    ('is_leaf', np.uint8),
25 +
    ('bin_threshold', X_BINNED_DTYPE),
26 +
])
27 +
28 +
29 +
class TreePredictor:
30 +
    """Tree class used for predictions.
31 +
32 +
    Parameters
33 +
    ----------
34 +
    nodes : list of PREDICTOR_RECORD_DTYPE
35 +
        The nodes of the tree.
36 +
    """
37 +
    def __init__(self, nodes):
38 +
        self.nodes = nodes
39 +
40 +
    def get_n_leaf_nodes(self):
41 +
        """Return number of leaves."""
42 +
        return int(self.nodes['is_leaf'].sum())
43 +
44 +
    def get_max_depth(self):
45 +
        """Return maximum depth among all leaves."""
46 +
        return int(self.nodes['depth'].max())
47 +
48 +
    def predict(self, X):
49 +
        """Predict raw values for non-binned data.
50 +
51 +
        Parameters
52 +
        ----------
53 +
        X : ndarray, shape (n_samples, n_features)
54 +
            The input samples.
55 +
56 +
        Returns
57 +
        -------
58 +
        y : ndarray, shape (n_samples,)
59 +
            The raw predicted values.
60 +
        """
61 +
        out = np.empty(X.shape[0], dtype=Y_DTYPE)
62 +
        _predict_from_numeric_data(self.nodes, X, out)
63 +
        return out
64 +
65 +
    def predict_binned(self, X):
66 +
        """Predict raw values for binned data.
67 +
68 +
        Parameters
69 +
        ----------
70 +
        X : ndarray, shape (n_samples, n_features)
71 +
            The input samples.
72 +
73 +
        Returns
74 +
        -------
75 +
        y : ndarray, shape (n_samples,)
76 +
            The raw predicted values.
77 +
        """
78 +
        out = np.empty(X.shape[0], dtype=Y_DTYPE)
79 +
        _predict_from_binned_data(self.nodes, X, out)
80 +
        return out

@@ -0,0 +1,249 @@
Loading
1 +
"""
2 +
This module contains the loss classes.
3 +
4 +
Specific losses are used for regression, binary classification or multiclass
5 +
classification.
6 +
"""
7 +
# Author: Nicolas Hug
8 +
9 +
from abc import ABC, abstractmethod
10 +
11 +
import numpy as np
12 +
from scipy.special import expit
13 +
try:  # logsumexp was moved from mist to special in 0.19
14 +
    from scipy.special import logsumexp
15 +
except ImportError:
16 +
    from scipy.misc import logsumexp
17 +
18 +
from .types import Y_DTYPE
19 +
from .types import G_H_DTYPE
20 +
from ._loss import _update_gradients_least_squares
21 +
from ._loss import _update_gradients_hessians_binary_crossentropy
22 +
from ._loss import _update_gradients_hessians_categorical_crossentropy
23 +
24 +
25 +
class BaseLoss(ABC):
26 +
    """Base class for a loss."""
27 +
28 +
    def init_gradients_and_hessians(self, n_samples, prediction_dim):
29 +
        """Return initial gradients and hessians.
30 +
31 +
        Unless hessians are constant, arrays are initialized with undefined
32 +
        values.
33 +
34 +
        Parameters
35 +
        ----------
36 +
        n_samples : int
37 +
            The number of samples passed to `fit()`.
38 +
        prediction_dim : int
39 +
            The dimension of a raw prediction, i.e. the number of trees
40 +
            built at each iteration. Equals 1 for regression and binary
41 +
            classification, or K where K is the number of classes for
42 +
            multiclass classification.
43 +
44 +
        Returns
45 +
        -------
46 +
        gradients : ndarray, shape (prediction_dim, n_samples)
47 +
            The initial gradients. The array is not initialized.
48 +
        hessians : ndarray, shape (prediction_dim, n_samples)
49 +
            If hessians are constant (e.g. for `LeastSquares` loss, the
50 +
            array is initialized to ``1``. Otherwise, the array is allocated
51 +
            without being initialized.
52 +
        """
53 +
        shape = (prediction_dim, n_samples)
54 +
        gradients = np.empty(shape=shape, dtype=G_H_DTYPE)
55 +
        if self.hessians_are_constant:
56 +
            # if the hessians are constant, we consider they are equal to 1.
57 +
            # this is correct as long as we adjust the gradients. See e.g. LS
58 +
            # loss
59 +
            hessians = np.ones(shape=(1, 1), dtype=G_H_DTYPE)
60 +
        else:
61 +
            hessians = np.empty(shape=shape, dtype=G_H_DTYPE)
62 +
63 +
        return gradients, hessians
64 +
65 +
    @abstractmethod
66 +
    def get_baseline_prediction(self, y_train, prediction_dim):
67 +
        """Return initial predictions (before the first iteration).
68 +
69 +
        Parameters
70 +
        ----------
71 +
        y_train : ndarray, shape (n_samples,)
72 +
            The target training values.
73 +
        prediction_dim : int
74 +
            The dimension of one prediction: 1 for binary classification and
75 +
            regression, n_classes for multiclass classification.
76 +
77 +
        Returns
78 +
        -------
79 +
        baseline_prediction : float or ndarray, shape (1, prediction_dim)
80 +
            The baseline prediction.
81 +
        """
82 +
        pass
83 +
84 +
    @abstractmethod
85 +
    def update_gradients_and_hessians(self, gradients, hessians, y_true,
86 +
                                      raw_predictions):
87 +
        """Update gradients and hessians arrays, inplace.
88 +
89 +
        The gradients (resp. hessians) are the first (resp. second) order
90 +
        derivatives of the loss for each sample with respect to the
91 +
        predictions of model, evaluated at iteration ``i - 1``.
92 +
93 +
        Parameters
94 +
        ----------
95 +
        gradients : ndarray, shape (prediction_dim, n_samples)
96 +
            The gradients (treated as OUT array).
97 +
        hessians : ndarray, shape (prediction_dim, n_samples) or \
98 +
            (1,)
99 +
            The hessians (treated as OUT array).
100 +
        y_true : ndarray, shape (n_samples,)
101 +
            The true target values or each training sample.
102 +
        raw_predictions : ndarray, shape (prediction_dim, n_samples)
103 +
            The raw_predictions (i.e. values from the trees) of the tree
104 +
            ensemble at iteration ``i - 1``.
105 +
        """
106 +
        pass
107 +
108 +
109 +
class LeastSquares(BaseLoss):
110 +
    """Least squares loss, for regression.
111 +
112 +
    For a given sample x_i, least squares loss is defined as::
113 +
114 +
        loss(x_i) = 0.5 * (y_true_i - raw_pred_i)**2
115 +
116 +
    This actually computes the half least squares loss to optimize simplify
117 +
    the computation of the gradients and get a unit hessian (and be consistent
118 +
    with what is done in LightGBM).
119 +
    """
120 +
121 +
    hessians_are_constant = True
122 +
123 +
    def __call__(self, y_true, raw_predictions, average=True):
124 +
        # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to
125 +
        # return a view.
126 +
        raw_predictions = raw_predictions.reshape(-1)
127 +
        loss = 0.5 * np.power(y_true - raw_predictions, 2)
128 +
        return loss.mean() if average else loss
129 +
130 +
    def get_baseline_prediction(self, y_train, prediction_dim):
131 +
        return np.mean(y_train)
132 +
133 +
    @staticmethod
134 +
    def inverse_link_function(raw_predictions):
135 +
        return raw_predictions
136 +
137 +
    def update_gradients_and_hessians(self, gradients, hessians, y_true,
138 +
                                      raw_predictions):
139 +
        # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to
140 +
        # return a view.
141 +
        raw_predictions = raw_predictions.reshape(-1)
142 +
        gradients = gradients.reshape(-1)
143 +
        _update_gradients_least_squares(gradients, y_true, raw_predictions)
144 +
145 +
146 +
class BinaryCrossEntropy(BaseLoss):
147 +
    """Binary cross-entropy loss, for binary classification.
148 +
149 +
    For a given sample x_i, the binary cross-entropy loss is defined as the
150 +
    negative log-likelihood of the model which can be expressed as::
151 +
152 +
        loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i
153 +
154 +
    See The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,
155 +
    section 4.4.1 (about logistic regression).
156 +
    """
157 +
158 +
    hessians_are_constant = False
159 +
    inverse_link_function = staticmethod(expit)
160 +
161 +
    def __call__(self, y_true, raw_predictions, average=True):
162 +
        # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to
163 +
        # return a view.
164 +
        raw_predictions = raw_predictions.reshape(-1)
165 +
        # logaddexp(0, x) = log(1 + exp(x))
166 +
        loss = np.logaddexp(0, raw_predictions) - y_true * raw_predictions
167 +
        return loss.mean() if average else loss
168 +
169 +
    def get_baseline_prediction(self, y_train, prediction_dim):
170 +
        if prediction_dim > 2:
171 +
            raise ValueError(
172 +
                "loss='binary_crossentropy' is not defined for multiclass"
173 +
                " classification with n_classes=%d, use"
174 +
                " loss='categorical_crossentropy' instead" % prediction_dim)
175 +
        proba_positive_class = np.mean(y_train)
176 +
        eps = np.finfo(y_train.dtype).eps
177 +
        proba_positive_class = np.clip(proba_positive_class, eps, 1 - eps)
178 +
        # log(x / 1 - x) is the anti function of sigmoid, or the link function
179 +
        # of the Binomial model.
180 +
        return np.log(proba_positive_class / (1 - proba_positive_class))
181 +
182 +
    def update_gradients_and_hessians(self, gradients, hessians, y_true,
183 +
                                      raw_predictions):
184 +
        # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to
185 +
        # return a view.
186 +
        raw_predictions = raw_predictions.reshape(-1)
187 +
        gradients = gradients.reshape(-1)
188 +
        hessians = hessians.reshape(-1)
189 +
        _update_gradients_hessians_binary_crossentropy(
190 +
            gradients, hessians, y_true, raw_predictions)
191 +
192 +
    def predict_proba(self, raw_predictions):
193 +
        # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to
194 +
        # return a view.
195 +
        raw_predictions = raw_predictions.reshape(-1)
196 +
        proba = np.empty((raw_predictions.shape[0], 2), dtype=Y_DTYPE)
197 +
        proba[:, 1] = expit(raw_predictions)
198 +
        proba[:, 0] = 1 - proba[:, 1]
199 +
        return proba
200 +
201 +
202 +
class CategoricalCrossEntropy(BaseLoss):
203 +
    """Categorical cross-entropy loss, for multiclass classification.
204 +
205 +
    For a given sample x_i, the categorical cross-entropy loss is defined as
206 +
    the negative log-likelihood of the model and generalizes the binary
207 +
    cross-entropy to more than 2 classes.
208 +
    """
209 +
210 +
    hessians_are_constant = False
211 +
212 +
    def __call__(self, y_true, raw_predictions, average=True):
213 +
        one_hot_true = np.zeros_like(raw_predictions)
214 +
        prediction_dim = raw_predictions.shape[0]
215 +
        for k in range(prediction_dim):
216 +
            one_hot_true[k, :] = (y_true == k)
217 +
218 +
        loss = (logsumexp(raw_predictions, axis=0) -
219 +
                (one_hot_true * raw_predictions).sum(axis=0))
220 +
        return loss.mean() if average else loss
221 +
222 +
    def get_baseline_prediction(self, y_train, prediction_dim):
223 +
        init_value = np.zeros(shape=(prediction_dim, 1), dtype=Y_DTYPE)
224 +
        eps = np.finfo(y_train.dtype).eps
225 +
        for k in range(prediction_dim):
226 +
            proba_kth_class = np.mean(y_train == k)
227 +
            proba_kth_class = np.clip(proba_kth_class, eps, 1 - eps)
228 +
            init_value[k, :] += np.log(proba_kth_class)
229 +
230 +
        return init_value
231 +
232 +
    def update_gradients_and_hessians(self, gradients, hessians, y_true,
233 +
                                      raw_predictions):
234 +
        _update_gradients_hessians_categorical_crossentropy(
235 +
            gradients, hessians, y_true, raw_predictions)
236 +
237 +
    def predict_proba(self, raw_predictions):
238 +
        # TODO: This could be done in parallel
239 +
        # compute softmax (using exp(log(softmax)))
240 +
        proba = np.exp(raw_predictions -
241 +
                       logsumexp(raw_predictions, axis=0)[np.newaxis, :])
242 +
        return proba.T
243 +
244 +
245 +
_LOSSES = {
246 +
    'least_squares': LeastSquares,
247 +
    'binary_crossentropy': BinaryCrossEntropy,
248 +
    'categorical_crossentropy': CategoricalCrossEntropy
249 +
}

@@ -43,6 +43,10 @@
Loading
43 43
    config.add_subpackage('preprocessing/tests')
44 44
    config.add_subpackage('semi_supervised')
45 45
    config.add_subpackage('semi_supervised/tests')
46 +
    config.add_subpackage('experimental')
47 +
    config.add_subpackage('experimental/tests')
48 +
    config.add_subpackage('ensemble/_hist_gradient_boosting')
49 +
    config.add_subpackage('ensemble/_hist_gradient_boosting/tests')
46 50
47 51
    # submodules which have their own setup.py
48 52
    config.add_subpackage('cluster')

@@ -87,6 +87,7 @@
Loading
87 87
               'naive_bayes', 'neighbors', 'neural_network', 'pipeline',
88 88
               'preprocessing', 'random_projection', 'semi_supervised',
89 89
               'svm', 'tree', 'discriminant_analysis', 'impute', 'compose',
90 +
               'experimental',
90 91
               # Non-modules:
91 92
               'clone', 'get_config', 'set_config', 'config_context',
92 93
               'show_versions']

@@ -4,12 +4,49 @@
Loading
4 4
5 5
def configuration(parent_package="", top_path=None):
6 6
    config = Configuration("ensemble", parent_package, top_path)
7 +
7 8
    config.add_extension("_gradient_boosting",
8 9
                         sources=["_gradient_boosting.pyx"],
9 10
                         include_dirs=[numpy.get_include()])
10 11
11 12
    config.add_subpackage("tests")
12 13
14 +
    # Histogram-based gradient boosting files
15 +
    config.add_extension(
16 +
        "_hist_gradient_boosting._gradient_boosting",
17 +
        sources=["_hist_gradient_boosting/_gradient_boosting.pyx"],
18 +
        include_dirs=[numpy.get_include()])
19 +
20 +
    config.add_extension("_hist_gradient_boosting.histogram",
21 +
                         sources=["_hist_gradient_boosting/histogram.pyx"],
22 +
                         include_dirs=[numpy.get_include()])
23 +
24 +
    config.add_extension("_hist_gradient_boosting.splitting",
25 +
                         sources=["_hist_gradient_boosting/splitting.pyx"],
26 +
                         include_dirs=[numpy.get_include()])
27 +
28 +
    config.add_extension("_hist_gradient_boosting._binning",
29 +
                         sources=["_hist_gradient_boosting/_binning.pyx"],
30 +
                         include_dirs=[numpy.get_include()])
31 +
32 +
    config.add_extension("_hist_gradient_boosting._predictor",
33 +
                         sources=["_hist_gradient_boosting/_predictor.pyx"],
34 +
                         include_dirs=[numpy.get_include()])
35 +
36 +
    config.add_extension("_hist_gradient_boosting._loss",
37 +
                         sources=["_hist_gradient_boosting/_loss.pyx"],
38 +
                         include_dirs=[numpy.get_include()])
39 +
40 +
    config.add_extension("_hist_gradient_boosting.types",
41 +
                         sources=["_hist_gradient_boosting/types.pyx"],
42 +
                         include_dirs=[numpy.get_include()])
43 +
44 +
    config.add_extension("_hist_gradient_boosting.utils",
45 +
                         sources=["_hist_gradient_boosting/utils.pyx"],
46 +
                         include_dirs=[numpy.get_include()])
47 +
48 +
    config.add_subpackage("_hist_gradient_boosting.tests")
49 +
13 50
    return config
14 51
15 52
if __name__ == "__main__":

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Learn more Showing 8 files with coverage changes found.

Changes in setup.py
+4
Loading file...
New file sklearn/ensemble/_hist_gradient_boosting/predictor.py
New
Loading file...
New file sklearn/ensemble/_hist_gradient_boosting/loss.py
New
Loading file...
New file sklearn/experimental/enable_hist_gradient_boosting.py
New
Loading file...
New file sklearn/ensemble/_hist_gradient_boosting/grower.py
New
Loading file...
New file sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
New
Loading file...
New file sklearn/experimental/tests/test_enable_hist_gradient_boosting.py
New
Loading file...
New file sklearn/ensemble/_hist_gradient_boosting/binning.py
New
Loading file...

235 Commits

Hiding 234 contexual commits
Files Coverage
sklearn 0.01% 96.69%
setup.py -0.53% 7.02%
Project Totals (384 files) 96.61%
Loading