shaypal5 / skift
Showing 1 of 3 files from the diff.
Other files ignored by Codecov
README.rst has changed.

@@ -95,7 +95,7 @@
Loading
95 95
    def _input_col(self, X):
96 96
        pass  # pragma: no cover
97 97
98 -
    def fit(self, X, y):
98 +
    def fit(self, X, y, X_validation=None, y_validation=None):
99 99
        """Fits the classifier
100 100
101 101
        Parameters
@@ -104,6 +104,10 @@
Loading
104 104
            The training input samples.
105 105
        y : array-like, shape = [n_samples]
106 106
            The target values. An array of int.
107 +
        X_validation : array-like, shape = [n_samples, n_features]
108 +
            The validation input samples.
109 +
        y_validation : array-like, shape = [n_samples]
110 +
            The validation target values. An array of int.
107 111
108 112
        Returns
109 113
        -------
@@ -114,10 +118,16 @@
Loading
114 118
        self._validate_x(X)
115 119
        y = self._validate_y(y)
116 120
        input_col = self._input_col(X)
121 +
        if X_validation is not None:
122 +
            self._validate_x(X_validation)
123 +
            y_validation = self._validate_y(y_validation)
124 +
            input_col_validation = self._input_col(X_validation)
125 +
        else:
126 +
            input_col_validation = None
117 127
118 -
        return self._fit_input_col(input_col, y)
128 +
        return self._fit_input_col(input_col, y, input_col_validation, y_validation)
119 129
120 -
    def _fit_input_col(self, input_col, y):
130 +
    def _fit_input_col(self, input_col, y, input_col_validation=None, y_validation=None):
121 131
        # Store the classes seen during fit
122 132
        self.classes_ = unique_labels(y)
123 133
        self.num_classes_ = len(self.classes_)
@@ -126,9 +136,23 @@
Loading
126 136
        # Dump training set to a fasttext-compatible file
127 137
        temp_trainset_fpath = temp_dataset_fpath()
128 138
        dump_xy_to_fasttext_format(input_col, y, temp_trainset_fpath)
129 -
        # train
130 -
        self.model = train_supervised(
131 -
            input=temp_trainset_fpath, **self.kwargs)
139 +
        if input_col_validation is not None:
140 +
            n_classes_validation = len(unique_labels(y_validation))
141 +
            assert n_classes_validation == self.num_classes_,\
142 +
                "Number of validation classes doesn't match number of training classes"
143 +
            temp_trainset_fpath_validation = temp_dataset_fpath()
144 +
            dump_xy_to_fasttext_format(input_col_validation, y_validation, temp_trainset_fpath_validation)
145 +
            # train
146 +
            self.model = train_supervised(
147 +
                input=temp_trainset_fpath, **{'autotuneValidationFile': temp_trainset_fpath_validation, **self.kwargs})
148 +
            try:
149 +
                os.remove(temp_trainset_fpath_validation)
150 +
            except FileNotFoundError:  # pragma: no cover
151 +
                pass
152 +
        else:
153 +
            self.model = train_supervised(
154 +
                input=temp_trainset_fpath, **self.kwargs)
155 +
132 156
        # Return the classifier
133 157
        try:
134 158
            os.remove(temp_trainset_fpath)
@@ -372,7 +396,7 @@
Loading
372 396
    def _input_col(self, X):
373 397
        pass
374 398
375 -
    def fit(self, X, y):
399 +
    def fit(self, X, y, X_validation=None, y_validation=None):
376 400
        """Fits the classifier
377 401
378 402
        Parameters
@@ -381,6 +405,10 @@
Loading
381 405
            The training input samples.
382 406
        y : array-like, shape = [n_samples]
383 407
            The target values. An array of int.
408 +
        X_validation : pd.Series
409 +
            The validation input samples.
410 +
        y_validation : array-like, shape = [n_samples]
411 +
            The validation target values. An array of int.
384 412
385 413
        Returns
386 414
        -------
@@ -393,7 +421,15 @@
Loading
393 421
        except AttributeError:
394 422
            input_col = X
395 423
        y = self._validate_y(y)
396 -
        return self._fit_input_col(input_col, y)
424 +
        if X_validation is not None:
425 +
            try:
426 +
                input_col_validation = X_validation.values
427 +
            except AttributeError:
428 +
                input_col_validation = X_validation
429 +
            y_validation = self._validate_y(y_validation)
430 +
        else:
431 +
            input_col_validation = None
432 +
        return self._fit_input_col(input_col, y, input_col_validation, y_validation)
397 433
398 434
    def _predict(self, X, k=1):
399 435
        # Ensure that fit had been called
Files Coverage
skift 98.46%
Project Totals (2 files) 98.46%
1
codecov:
2
  notify:
3
      require_ci_to_pass: yes
4

5
coverage:
6
  precision: 2
7
  round: down
8
  range: "70...100"
9
  ignore: Tests
10
  status:
11
    patch:
12
      default:
13
        target: '80'
14
    project: false
15

16
comment:
17
  layout: header, changes, diff
18
    behavior: default
19
    require_changes: false
20
    branches: null
21
    flags: null
22
    paths: null
23

24
ignore:
25
  - "versioneer.py"
26
  - "tests"
27
  - "**/_version.py"
28
  - "**/__init__.py"
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading