Allow sparse matrix input to visuals.formatcluster_statistics
Showing 1 of 2 files from the diff.
kmapper/visuals.py
changed.
Other files ignored by Codecov
test/test_visuals.py
has changed.
@@ -1,5 +1,6 @@
Loading
1 | 1 | # A small helper class to house functions needed by KeplerMapper.visualize |
|
2 | 2 | import numpy as np |
|
3 | + | import scipy.sparse |
|
3 | 4 | from sklearn import preprocessing |
|
4 | 5 | import json |
|
5 | 6 | from collections import defaultdict |
@@ -58,7 +59,7 @@
Loading
58 | 59 | ||
59 | 60 | ||
60 | 61 | def _colors_to_rgb(colorscale): |
|
61 | - | """ Ensure that the color scale is formatted in rgb strings. |
|
62 | + | """ Ensure that the color scale is formatted in rgb strings. |
|
62 | 63 | If the colorscale is a hex string, then convert to rgb. |
|
63 | 64 | """ |
|
64 | 65 | if colorscale[0][1][0] == "#": |
@@ -266,9 +267,25 @@
Loading
266 | 267 | if X_names.shape[0] == 0: |
|
267 | 268 | X_names = np.array(["f_%s" % (i) for i in range(X.shape[1])]) |
|
268 | 269 | ||
269 | - | cluster_X_mean = np.mean(X[member_ids], axis=0) |
|
270 | - | X_mean = np.mean(X, axis=0) |
|
271 | - | X_std = np.std(X, axis=0) |
|
270 | + | # be explicit about the allowed sparse formats |
|
271 | + | if scipy.sparse.issparse(X): |
|
272 | + | if X.format not in ["csr", "csc"]: |
|
273 | + | raise ValueError( |
|
274 | + | "sparse matrix format must be csr or csc but found {}".format(X.format)) |
|
275 | + | ||
276 | + | # wrap cluster_X_mean, X_mean, and X_std in np.array(---).squeeze() |
|
277 | + | # to get the same treatment for dense and sparse arrays |
|
278 | + | cluster_X_mean = np.array( |
|
279 | + | np.mean(X[member_ids], axis=0) |
|
280 | + | ).squeeze() |
|
281 | + | X_mean = np.array( |
|
282 | + | np.mean(X, axis=0) |
|
283 | + | ).squeeze() |
|
284 | + | X_std = np.array( |
|
285 | + | # use StandardScaler as a way to get std for dense or sparse array |
|
286 | + | np.sqrt(preprocessing.StandardScaler(with_mean=False).fit(X).var_) |
|
287 | + | ).squeeze() |
|
288 | + | ||
272 | 289 | above_mean = cluster_X_mean > X_mean |
|
273 | 290 | std_m = np.sqrt((cluster_X_mean - X_mean) ** 2) / X_std |
|
274 | 291 |
@@ -276,10 +293,10 @@
Loading
276 | 293 | zip( |
|
277 | 294 | std_m, |
|
278 | 295 | X_names, |
|
279 | - | np.mean(X, axis=0), |
|
296 | + | X_mean, |
|
280 | 297 | cluster_X_mean, |
|
281 | 298 | above_mean, |
|
282 | - | np.std(X, axis=0), |
|
299 | + | X_std, |
|
283 | 300 | ) |
|
284 | 301 | ) |
|
285 | 302 | stats = sorted(stat_zip, reverse=True) |
Files | Coverage |
---|---|
kmapper | 79.20% |
Project Totals (10 files) | 79.20% |
440.4
3.6=.6 TRAVIS_OS_NAME=linux
440.1
TRAVIS_OS_NAME=linux 2.7=.7
440.3
3.5=.5 TRAVIS_OS_NAME=linux
440.5
3.7=.7 TRAVIS_OS_NAME=linux
440.2
3.4=.4 TRAVIS_OS_NAME=linux
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file.
The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files.
The size and color of each slice is representing the number of statements and the coverage, respectively.