Dana-Farber-AIOS / pathml
Showing 23 of 32 files from the diff.
Newly tracked file
pathml/_logging.py created.

@@ -7,6 +7,7 @@
Loading
7 7
import reprlib
8 8
from collections import OrderedDict
9 9
from pathlib import Path
10 +
from loguru import logger
10 11
11 12
import h5py
12 13
import pathml.core.h5managers

@@ -3,6 +3,7 @@
Loading
3 3
License: GNU GPL 2.0
4 4
"""
5 5
6 +
from loguru import logger
6 7
import h5py
7 8
import numpy as np
8 9
import torch

@@ -8,6 +8,7 @@
Loading
8 8
9 9
import numpy as np
10 10
import openslide
11 +
from loguru import logger
11 12
import pathml.core
12 13
import pathml.core.tile
13 14
from javabridge.jutil import JavaException
@@ -26,12 +27,9 @@
Loading
26 27
    import javabridge
27 28
    from bioformats.metadatatools import createOMEXMLMetadata
28 29
except ImportError:
30 +
    logger.exception("Unable to import bioformats, javabridge")
29 31
    raise Exception(
30 -
        """Installation of PathML not complete. Please install openjdk8, bioformats, and javabridge:
31 -
            conda install openjdk==8.0.152
32 -
            pip install javabridge==1.0.19 python-bioformats==4.0.0
33 -
34 -
            For detailed installation instructions, please see https://github.com/Dana-Farber-AIOS/pathml/"""
32 +
        f"Installation of PathML not complete. Please install openjdk8, bioformats, and javabridge:\nconda install openjdk==8.0.152\npip install javabridge==1.0.19 python-bioformats==4.0.0\nFor detailed installation instructions, please see https://github.com/Dana-Farber-AIOS/pathml/"
35 33
    )
36 34
37 35
@@ -62,6 +60,7 @@
Loading
62 60
    """
63 61
64 62
    def __init__(self, filename):
63 +
        logger.info(f"OpenSlideBackend loading file at: {filename}")
65 64
        self.filename = filename
66 65
        self.slide = openslide.open_slide(filename=filename)
67 66
        self.level_count = self.slide.level_count
@@ -198,16 +197,19 @@
Loading
198 197
199 198
        stride_i, stride_j = stride
200 199
201 -
        if pad:
202 -
            n_chunk_i = i // stride_i + 1
203 -
            n_chunk_j = j // stride_j + 1
204 -
200 +
        # calculate number of expected tiles
201 +
        # check for tile shape evenly dividing slide shape to fix https://github.com/Dana-Farber-AIOS/pathml/issues/305
202 +
        if pad and i % stride_i != 0:
203 +
            n_tiles_i = i // stride_i + 1
205 204
        else:
206 -
            n_chunk_i = (i - shape[0]) // stride_i + 1
207 -
            n_chunk_j = (j - shape[1]) // stride_j + 1
205 +
            n_tiles_i = (i - shape[0]) // stride_i + 1
206 +
        if pad and j % stride_j != 0:
207 +
            n_tiles_j = j // stride_j + 1
208 +
        else:
209 +
            n_tiles_j = (j - shape[1]) // stride_j + 1
208 210
209 -
        for ix_i in range(n_chunk_i):
210 -
            for ix_j in range(n_chunk_j):
211 +
        for ix_i in range(n_tiles_i):
212 +
            for ix_j in range(n_tiles_j):
211 213
                coords = (int(ix_i * stride_i), int(ix_j * stride_j))
212 214
                # get image for tile
213 215
                tile_im = self.extract_region(location=coords, size=shape, level=level)
@@ -237,6 +239,7 @@
Loading
237 239
    javabridge.call(
238 240
        rootLogger, "setLevel", "(Lch/qos/logback/classic/Level;)V", logLevel
239 241
    )
242 +
    logger.info("silenced javabridge logging")
240 243
241 244
242 245
class BioFormatsBackend(SlideBackend):
@@ -260,6 +263,7 @@
Loading
260 263
261 264
    def __init__(self, filename, dtype=None):
262 265
        self.filename = filename
266 +
        logger.info(f"BioFormatsBackend loading file at: {filename}")
263 267
        # init java virtual machine
264 268
        javabridge.start_vm(class_path=bioformats.JARS, max_heap_size="50G")
265 269
        # disable verbose JVM logging if possible
@@ -274,6 +278,7 @@
Loading
274 278
        reader.setMetadataStore(omeMeta)
275 279
        reader.setId(str(self.filename))
276 280
        seriesCount = reader.getSeriesCount()
281 +
        logger.info(f"Found n={seriesCount} series in image")
277 282
278 283
        sizeSeries = []
279 284
        for s in range(seriesCount):
@@ -294,11 +299,14 @@
Loading
294 299
        self.shape_list = sizeSeries  # shape on all levels
295 300
        self.metadata = bioformats.get_omexml_metadata(self.filename)
296 301
302 +
        logger.info(f"Bioformats OMEXML metadata: {self.metadata}")
303 +
297 304
        if dtype:
298 305
            assert isinstance(
299 306
                dtype, np.dtype
300 307
            ), f"dtype is of type {type(dtype)}. Must be a np.dtype"
301 308
            self.pixel_dtype = dtype
309 +
            logger.info(f"Using specified dtype: {dtype}")
302 310
        else:
303 311
            # infer pixel data type from metadata
304 312
            # map from ome pixel datatypes to numpy types. Based on:
@@ -318,9 +326,12 @@
Loading
318 326
            ome_pixeltype = (
319 327
                bioformats.OMEXML(self.metadata).image().Pixels.get_PixelType()
320 328
            )
329 +
            logger.info(f"Using pixel dtype found in OME metadata: {ome_pixeltype}")
321 330
            try:
322 331
                self.pixel_dtype = pixel_dtype_map[ome_pixeltype]
332 +
                logger.info(f"Found corresponding dtype: {self.pixel_dtype}")
323 333
            except:
334 +
                logger.exception("datatype from metadata not found in pixel_dtype_map")
324 335
                raise Exception(
325 336
                    f"pixel type '{ome_pixeltype}' detected from OME metadata not recognized."
326 337
                )
@@ -400,19 +411,26 @@
Loading
400 411
                f"input size {size} invalid. Must be a tuple of integer coordinates of len<2"
401 412
            )
402 413
        if series_as_channels:
403 -
            assert (
404 -
                level == 0
405 -
            ), f"Multi-level images not supported with series_as_channels=True. Input 'level={level}' invalid. Use 'level=0'."
414 +
            logger.info(f"using series_as_channels=True")
415 +
            if level != 0:
416 +
                logger.exception(
417 +
                    f"When series_as_channels=True, must use level=0. Input 'level={level}' invalid."
418 +
                )
419 +
                raise ValueError(
420 +
                    f"Multi-level images not supported with series_as_channels=True. Input 'level={level}' invalid. Use 'level=0'."
421 +
                )
406 422
407 423
        javabridge.start_vm(class_path=bioformats.JARS, max_heap_size="100G")
408 424
        with bioformats.ImageReader(str(self.filename), perform_init=True) as reader:
409 425
            # expand size
426 +
            logger.info(f"extracting region with input size = {size}")
410 427
            size = list(size)
411 428
            arrayshape = list(size)
412 429
            for i in range(len(self.shape_list[level])):
413 430
                if i > len(size) - 1:
414 431
                    arrayshape.append(self.shape_list[level][i])
415 432
            arrayshape = tuple(arrayshape)
433 +
            logger.info(f"input size converted to {arrayshape}")
416 434
            array = np.empty(arrayshape)
417 435
418 436
            # read a very small region to check whether the image has channels incorrectly stored as series
@@ -427,6 +445,7 @@
Loading
427 445
            # need this part because some facilities output images where the channels are incorrectly stored as series
428 446
            # in this case we pull the image for each series, then stack them together as channels
429 447
            if series_as_channels:
448 +
                logger.info("reading series as channels")
430 449
                for z in range(self.shape_list[level][2]):
431 450
                    for c in range(self.shape_list[level][3]):
432 451
                        for t in range(self.shape_list[level][4]):
@@ -443,6 +462,7 @@
Loading
443 462
444 463
            # in this case, channels are correctly stored as channels, and we can support multi-level images as series
445 464
            else:
465 +
                logger.info("reading image")
446 466
                for z in range(self.shape_list[level][2]):
447 467
                    for t in range(self.shape_list[level][4]):
448 468
                        slicearray = reader.read(
@@ -459,11 +479,16 @@
Loading
459 479
                            array[:, :, z, level, t] = slicearray
460 480
461 481
        if not normalize:
482 +
            logger.info("returning extracted region without normalizing dtype")
462 483
            return array
463 484
        else:
485 +
            logger.info("normalizing extracted region to uint8")
464 486
            # scale array before converting: https://github.com/Dana-Farber-AIOS/pathml/issues/271
465 487
            # first scale to [0-1]
466 488
            array_scaled = array / (2 ** (8 * self.pixel_dtype.itemsize))
489 +
            logger.info(
490 +
                f"Scaling image to [0, 1] by dividing by {(2 ** (8 * self.pixel_dtype.itemsize))}"
491 +
            )
467 492
            # then scale to [0-255] and convert to 8 bit
468 493
            array_scaled = array_scaled * 2 ** 8
469 494
            return array_scaled.astype(np.uint8)
@@ -541,6 +566,7 @@
Loading
541 566
        ), f"input stride {stride} invalid. Must be a tuple of (stride_H, stride_W), or a single int"
542 567
543 568
        if stride is None:
569 +
            logger.info(f"stride not specified, using stride=shape ({shape})")
544 570
            stride = shape
545 571
        elif isinstance(stride, int):
546 572
            stride = (stride, stride)
@@ -549,16 +575,21 @@
Loading
549 575
550 576
        stride_i, stride_j = stride
551 577
552 -
        if pad:
553 -
            n_chunk_i = i // stride_i + 1
554 -
            n_chunk_j = j // stride_j + 1
555 -
578 +
        # calculate number of expected tiles
579 +
        # check for tile shape evenly dividing slide shape to fix https://github.com/Dana-Farber-AIOS/pathml/issues/305
580 +
        if pad and i % stride_i != 0:
581 +
            n_tiles_i = i // stride_i + 1
556 582
        else:
557 -
            n_chunk_i = (i - shape[0]) // stride_i + 1
558 -
            n_chunk_j = (j - shape[1]) // stride_j + 1
583 +
            n_tiles_i = (i - shape[0]) // stride_i + 1
584 +
        if pad and j % stride_j != 0:
585 +
            n_tiles_j = j // stride_j + 1
586 +
        else:
587 +
            n_tiles_j = (j - shape[1]) // stride_j + 1
588 +
589 +
        logger.info(f"expected number of tiles: {n_tiles_i} x {n_tiles_j}")
559 590
560 -
        for ix_i in range(n_chunk_i):
561 -
            for ix_j in range(n_chunk_j):
591 +
        for ix_i in range(n_tiles_i):
592 +
            for ix_j in range(n_tiles_j):
562 593
                coords = (int(ix_i * stride_i), int(ix_j * stride_j))
563 594
                if coords[0] + shape[0] < i and coords[1] + shape[1] < j:
564 595
                    # get image for tile
@@ -597,6 +628,7 @@
Loading
597 628
598 629
    def __init__(self, filename):
599 630
        self.filename = str(filename)
631 +
        logger.info(f"DICOMBackend loading file at: {filename}")
600 632
        # read metadata fields of interest from DICOM, without reading the entire PixelArray
601 633
        tags = [
602 634
            "NumberOfFrames",
@@ -616,6 +648,9 @@
Loading
616 648
        self.n_rows = -(-self.shape[0] // self.frame_shape[0])
617 649
        self.n_cols = -(-self.shape[1] // self.frame_shape[1])
618 650
        self.transfer_syntax_uid = UID(metadata.file_meta.TransferSyntaxUID)
651 +
        logger.info(
652 +
            f"DICOM metadata: frame_shape={self.frame_shape}, nrows = {self.n_rows}, ncols = {self.n_cols}"
653 +
        )
619 654
620 655
        # actual file
621 656
        self.fp = DicomFile(self.filename, mode="rb")
@@ -708,9 +743,9 @@
Loading
708 743
        frame_i, frame_j = self.frame_shape
709 744
        # frame size must evenly divide coords, otherwise we aren't on a frame corner
710 745
        if i % frame_i or j % frame_j:
746 +
            logger.exception(f"i={i}, j={j}, frame shape = {self.frame_shape}")
711 747
            raise ValueError(
712 -
                f"coords {coords} are not evenly divided by frame size {(frame_i, frame_j)}. "
713 -
                f"Must provide coords at upper left corner of Frame."
748 +
                f"coords {coords} are not evenly divided by frame size {(frame_i, frame_j)}. Must provide coords at upper left corner of Frame."
714 749
            )
715 750
716 751
        row_ix = i / frame_i

@@ -8,6 +8,7 @@
Loading
8 8
from pathlib import Path
9 9
10 10
import anndata
11 +
from loguru import logger
11 12
import dask.distributed
12 13
import h5py
13 14
import matplotlib.pyplot as plt
@@ -278,8 +279,7 @@
Loading
278 279
            # in this case, tiles already exist
279 280
            if not overwrite_existing_tiles:
280 281
                raise Exception(
281 -
                    "Slide already has tiles. Running the pipeline will overwrite the existing tiles."
282 -
                    "use overwrite_existing_tiles=True to force overwriting existing tiles."
282 +
                    f"Slide already has tiles. Running the pipeline will overwrite the existing tiles. Use overwrite_existing_tiles=True to force overwriting existing tiles."
283 283
                )
284 284
            else:
285 285
                # delete all existing tiles
@@ -302,6 +302,9 @@
Loading
302 302
            if client is None:
303 303
                client = dask.distributed.Client()
304 304
                shutdown_after = True
305 +
                logger.info(
306 +
                    f"creating a default distributed.Client(): {client.scheduler_info()}"
307 +
                )
305 308
306 309
            # map pipeline application onto each tile
307 310
            processed_tile_futures = []
@@ -442,7 +445,7 @@
Loading
442 445
        except:
443 446
            if not self.slide:
444 447
                raise NotImplementedError(
445 -
                    "Plotting only supported via backend, but SlideData has no backend."
448 +
                    f"Plotting only supported via backend, but SlideData has no backend."
446 449
                )
447 450
            else:
448 451
                raise NotImplementedError(
@@ -468,7 +471,7 @@
Loading
468 471
            self.tiles.h5manager.counts = value
469 472
        else:
470 473
            raise AttributeError(
471 -
                "cannot assign counts slidedata contains no tiles, first generate tiles"
474 +
                f"cannot assign counts slidedata contains no tiles, first generate tiles"
472 475
            )
473 476
474 477
    def write(self, path):

@@ -4,6 +4,7 @@
Loading
4 4
"""
5 5
6 6
import numpy as np
7 +
from loguru import logger
7 8
8 9
9 10
def extract_tiles(arr, tile_size, stride=None):
@@ -27,8 +28,7 @@
Loading
27 28
    i, j, n_channels = arr.shape
28 29
    if (i - tile_size) % stride != 0 or (j - tile_size) % stride != 0:
29 30
        raise NotImplementedError(
30 -
            f"Array of shape {arr.shape} is not perfectly tiled by tiles of size "
31 -
            f"{tile_size} and stride {stride}."
31 +
            f"Array of shape {arr.shape} is not perfectly tiled by tiles of size {tile_size} and stride {stride}."
32 32
        )
33 33
    patch_strides = arr.strides
34 34
    patch_shape = (tile_size, tile_size, n_channels)

@@ -6,20 +6,24 @@
Loading
6 6
import os
7 7
from warnings import warn
8 8
9 +
from loguru import logger
9 10
import anndata
10 11
import cv2
11 12
import numpy as np
12 13
import pandas as pd
13 14
import pathml.core
14 15
import pathml.core.slide_data
15 -
from pathml.utils import (RGB_to_GREY, RGB_to_HSI, RGB_to_HSV, RGB_to_OD,
16 -
                          normalize_matrix_cols)
16 +
from pathml.utils import (
17 +
    RGB_to_GREY,
18 +
    RGB_to_HSI,
19 +
    RGB_to_HSV,
20 +
    RGB_to_OD,
21 +
    normalize_matrix_cols,
22 +
)
17 23
from skimage import restoration
18 -
from skimage.exposure import (equalize_adapthist, equalize_hist,
19 -
                              rescale_intensity)
24 +
from skimage.exposure import equalize_adapthist, equalize_hist, rescale_intensity
20 25
from skimage.measure import regionprops_table
21 26
22 -
23 27
# Base class
24 28
class Transform:
25 29
    """
@@ -646,7 +650,7 @@
Loading
646 650
                import spams
647 651
            except (ImportError, ModuleNotFoundError):
648 652
                raise Exception(
649 -
                    "Vahadane method requires `spams` package to be installed"
653 +
                    f"Vahadane method requires `spams` package to be installed"
650 654
                )
651 655
652 656
        self.target = target.lower()
@@ -708,8 +712,7 @@
Loading
708 712
            stain_matrix = self._estimate_stain_vectors_vahadane(image)
709 713
        else:
710 714
            raise Exception(
711 -
                f"Error: input stain estimation method {self.stain_estimation_method} must be one of "
712 -
                f"'macenko' or 'vahadane'"
715 +
                f"Error: input stain estimation method {self.stain_estimation_method} must be one of 'macenko' or 'vahadane'"
713 716
            )
714 717
        return stain_matrix
715 718
@@ -740,7 +743,7 @@
Loading
740 743
        try:
741 744
            import spams
742 745
        except (ImportError, ModuleNotFoundError):
743 -
            raise Exception("Vahadane method requires `spams` package to be installed")
746 +
            raise Exception(f"Vahadane method requires `spams` package to be installed")
744 747
        # convert to Optical Density (OD) space
745 748
        image_OD = RGB_to_OD(image)
746 749
        # reshape to (M*N)x3
@@ -787,7 +790,7 @@
Loading
787 790
        try:
788 791
            _, v = np.linalg.eigh(np.cov(OD.T))
789 792
        except np.linalg.LinAlgError as err:
790 -
            print(f"Error in computing eigenvectors: {err}")
793 +
            logger.exception(f"Error in computing eigenvectors: {err}")
791 794
            raise
792 795
        pcs = v[:, 1:3]
793 796
        # project OD pixels onto plane of first 2 PCs
@@ -844,7 +847,7 @@
Loading
844 847
        try:
845 848
            import spams
846 849
        except (ImportError, ModuleNotFoundError):
847 -
            raise Exception("Vahadane method requires `spams` package to be installed")
850 +
            raise Exception(f"Vahadane method requires `spams` package to be installed")
848 851
        image_OD = RGB_to_OD(image).reshape(-1, 3)
849 852
850 853
        # Get concentrations of each stain at each pixel
@@ -1290,7 +1293,9 @@
Loading
1290 1293
        nuclear_channel(int): channel that defines cell nucleus
1291 1294
        cytoplasm_channel(int): channel that defines cell membrane or cytoplasm
1292 1295
        image_resolution(float): pixel resolution of image in microns
1293 -
        gpu(bool): flag indicating whether gpu will be used for inference
1296 +
        preprocess_kwargs(dict): keyword arguemnts to pass to pre-processing function
1297 +
        postprocess_kwargs_nuclear(dict): keyword arguments to pass to post-processing function
1298 +
        postprocess_kwargs_whole_cell(dict): keyword arguments to pass to post-processing function
1294 1299
1295 1300
    References:
1296 1301
        Greenwald, N.F., Miller, G., Moen, E. et al. Whole-cell segmentation of tissue images with human-level
@@ -1307,9 +1312,9 @@
Loading
1307 1312
        nuclear_channel=None,
1308 1313
        cytoplasm_channel=None,
1309 1314
        image_resolution=0.5,
1310 -
        gpu=True,
1315 +
        preprocess_kwargs=None,
1316 +
        postprocess_kwargs_nuclear=None,
1311 1317
        postprocess_kwargs_whole_cell=None,
1312 -
        postprocess_kwrags_nuclear=None,
1313 1318
    ):
1314 1319
        assert isinstance(
1315 1320
            nuclear_channel, int
@@ -1320,42 +1325,42 @@
Loading
1320 1325
        self.nuclear_channel = nuclear_channel
1321 1326
        self.cytoplasm_channel = cytoplasm_channel
1322 1327
        self.image_resolution = image_resolution
1323 -
        self.gpu = gpu
1328 +
        self.preprocess_kwargs = preprocess_kwargs if preprocess_kwargs else {}
1329 +
        self.postprocess_kwargs_nuclear = (
1330 +
            postprocess_kwargs_nuclear if postprocess_kwargs_nuclear else {}
1331 +
        )
1332 +
        self.postprocess_kwargs_whole_cell = (
1333 +
            postprocess_kwargs_whole_cell if postprocess_kwargs_whole_cell else {}
1334 +
        )
1324 1335
1325 1336
        if model.lower() == "mesmer":
1326 1337
            try:
1327 1338
                from deepcell.applications import Mesmer
1328 1339
            except ImportError:
1329 -
                warn(
1330 -
                    """The Mesmer model in SegmentMIF requires extra libraries to be installed.
1331 -
                You can install these via pip using:
1332 -
1333 -
                pip install deepcell
1334 -
                """
1340 +
                logger.warning(
1341 +
                    "The Mesmer model in SegmentMIF requires extra libraries to be installed.\nYou can install these via pip using:\npip install deepcell"
1335 1342
                )
1336 1343
                raise ImportError(
1337 -
                    "The Mesmer model in SegmentMIF requires deepcell to be installed"
1344 +
                    f"The Mesmer model in SegmentMIF requires deepcell to be installed"
1338 1345
                ) from None
1339 1346
            self.model = model.lower()
1340 1347
        elif model.lower() == "cellpose":
1341 1348
            """from cellpose import models
1342 1349
            self.model = models.Cellpose(gpu=self.gpu, model_type='cyto')"""
1343 -
            raise NotImplementedError("Cellpose model not currently supported")
1350 +
            raise NotImplementedError(f"Cellpose model not currently supported")
1344 1351
        else:
1345 -
            raise ValueError(f"currently only support mesmer model")
1352 +
            raise ValueError(f"currently only supports mesmer model")
1346 1353
1347 1354
    def __repr__(self):
1348 1355
        return (
1349 -
            f"SegmentMIF(model={self.model}, image_resolution={self.image_resolution}, "
1350 -
            f"gpu={self.gpu})"
1356 +
            f"SegmentMIF(model={self.model}, image_resolution={self.image_resolution})"
1351 1357
        )
1352 1358
1353 1359
    def F(self, image):
1354 1360
        img = image.copy()
1355 1361
        if len(img.shape) not in [3, 4]:
1356 1362
            raise ValueError(
1357 -
                f"input image has shape {img.shape}. supported image shapes are x,y,c or batch,x,y,c."
1358 -
                "did you forget to apply 'CollapseRuns*()' transform?"
1363 +
                f"input image has shape {img.shape}. supported image shapes are x,y,c or batch,x,y,c. Did you forget to apply 'CollapseRuns*()' transform?"
1359 1364
            )
1360 1365
        if len(img.shape) == 3:
1361 1366
            img = np.expand_dims(img, axis=0)
@@ -1371,10 +1376,18 @@
Loading
1371 1376
1372 1377
            model = Mesmer()
1373 1378
            cell_segmentation_predictions = model.predict(
1374 -
                nuc_cytoplasm, image_mpp=self.image_resolution, compartment="whole-cell"
1379 +
                nuc_cytoplasm,
1380 +
                image_mpp=self.image_resolution,
1381 +
                compartment="whole-cell",
1382 +
                preprocess_kwargs=self.preprocess_kwargs,
1383 +
                postprocess_kwargs_whole_cell=self.postprocess_kwargs_whole_cell,
1375 1384
            )
1376 1385
            nuclear_segmentation_predictions = model.predict(
1377 -
                nuc_cytoplasm, image_mpp=self.image_resolution, compartment="nuclear"
1386 +
                nuc_cytoplasm,
1387 +
                image_mpp=self.image_resolution,
1388 +
                compartment="nuclear",
1389 +
                preprocess_kwargs=self.preprocess_kwargs,
1390 +
                postprocess_kwargs_nuclear=self.postprocess_kwargs_nuclear,
1378 1391
            )
1379 1392
            cell_segmentation_predictions = np.squeeze(
1380 1393
                cell_segmentation_predictions, axis=0
@@ -1403,7 +1416,7 @@
Loading
1403 1416
    """
1404 1417
    Convert segmented image into anndata.AnnData counts object `AnnData <https://anndata.readthedocs.io/en/latest/>`_.
1405 1418
    Counts objects are used to interface with the Python single cell analysis ecosystem `Scanpy <https://scanpy.readthedocs.io/en/stable/>`_.
1406 -
    The counts object contains a summary of protein expression statistics in each cell along with its coordinate.
1419 +
    The counts object contains a summary of channel statistics in each cell along with its coordinate.
1407 1420
1408 1421
    Args:
1409 1422
        segmentation_mask (str): key indicating which mask to use as label image
@@ -1415,14 +1428,30 @@
Loading
1415 1428
    def __repr__(self):
1416 1429
        return f"QuantifyMIF(segmentation_mask={self.segmentation_mask})"
1417 1430
1418 -
    def F(self, tile):
1419 -
        # pass (x, y, channel) image and (x, y) segmentation
1420 -
        img = tile.image.copy()
1421 -
        segmentation = tile.masks[self.segmentation_mask][:, :, 0]
1431 +
    def F(self, img, segmentation, coords_offset=(0, 0)):
1432 +
        """
1433 +
        Functional implementation
1434 +
1435 +
        Args:
1436 +
            img (np.ndarray): Input image of shape (i, j, n_channels)
1437 +
            segmentation (np.ndarray): Segmentation map of shape (i, j) or (i, j, 1). Zeros are background. Regions should be
1438 +
                labelled with unique integers.
1439 +
            coords_offset (tuple, optional): Coordinates (i, j) used to convert tile-level coordinates to slide-level.
1440 +
                Defaults to (0, 0) for no offset.
1441 +
1442 +
        Returns:
1443 +
            Counts matrix
1444 +
        """
1445 +
        if segmentation.ndim != 2:
1446 +
            assert (
1447 +
                segmentation.shape[2] == 1
1448 +
            ), f"input segmentation is of shape {segmentation.shape}. must be (x, y) or (x, y, 1)"
1449 +
            segmentation = segmentation.squeeze(2)
1422 1450
        countsdataframe = regionprops_table(
1423 1451
            label_image=segmentation,
1424 1452
            intensity_image=img,
1425 1453
            properties=[
1454 +
                "label",
1426 1455
                "coords",
1427 1456
                "max_intensity",
1428 1457
                "mean_intensity",
@@ -1438,15 +1467,17 @@
Loading
1438 1467
        for i in range(img.shape[-1]):
1439 1468
            X[i] = countsdataframe[f"mean_intensity-{i}"]
1440 1469
        # populate anndata object
1470 +
        # i,j are relative to the input image (0 to img.shape). Adding offset converts to slide-level coordinates
1441 1471
        counts = anndata.AnnData(
1442 1472
            X=X,
1443 1473
            obs=[
1444 -
                tuple([i + tile.coords[0], j + tile.coords[1]])
1474 +
                tuple([i + coords_offset[0], j + coords_offset[1]])
1445 1475
                for i, j in zip(
1446 1476
                    countsdataframe["centroid-0"], countsdataframe["centroid-1"]
1447 1477
                )
1448 1478
            ],
1449 1479
        )
1480 +
        counts.obs["label"] = countsdataframe["label"]
1450 1481
        counts.obs = counts.obs.rename(columns={0: "y", 1: "x"})
1451 1482
        counts.obs["filled_area"] = countsdataframe["filled_area"]
1452 1483
        counts.obs["euler_number"] = countsdataframe["euler_number"]
@@ -1461,7 +1492,7 @@
Loading
1461 1492
        try:
1462 1493
            counts.obsm["spatial"] = np.array(counts.obs[["x", "y"]])
1463 1494
        except:
1464 -
            print("warning: did not log coordinates in obsm")
1495 +
            logger.warning("did not log coordinates in obsm")
1465 1496
        return counts
1466 1497
1467 1498
    def apply(self, tile):
@@ -1474,7 +1505,11 @@
Loading
1474 1505
        assert (
1475 1506
            tile.slide_type.stain == "Fluor"
1476 1507
        ), f"Tile has slide_type.stain='{tile.slide_type.stain}', but must be 'Fluor'"
1477 -
        tile.counts = self.F(tile)
1508 +
        tile.counts = self.F(
1509 +
            img=tile.image,
1510 +
            segmentation=tile.masks[self.segmentation_mask],
1511 +
            coords_offset=tile.coords,
1512 +
        )
1478 1513
1479 1514
1480 1515
class CollapseRunsVectra(Transform):

@@ -7,4 +7,5 @@
Loading
7 7
from . import datasets as ds
8 8
from . import ml
9 9
from . import preprocessing as pp
10 +
from ._logging import PathMLLogger
10 11
from ._version import __version__

@@ -9,6 +9,7 @@
Loading
9 9
from collections import OrderedDict
10 10
11 11
import anndata
12 +
from loguru import logger
12 13
import h5py
13 14
import numpy as np
14 15
import pathml.core
@@ -95,7 +96,7 @@
Loading
95 96
            tile(pathml.core.tile.Tile): Tile object
96 97
        """
97 98
        if str(tile.coords) in self.h5["tiles"].keys():
98 -
            print(f"Tile is already in tiles. Overwriting {tile.coords} inplace.")
99 +
            logger.info(f"Tile is already in tiles. Overwriting {tile.coords} inplace.")
99 100
            # remove old cells from self.counts so they do not duplicate
100 101
            if tile.counts:
101 102
                if "tile" in self.counts.obs.keys():
@@ -114,7 +115,6 @@
Loading
114 115
            raise ValueError(
115 116
                f"cannot add tile of shape {tile.image.shape}. Must match shape of existing tiles: {existing_shape}"
116 117
            )
117 -
118 118
        if self.slide_type and tile.slide_type:
119 119
            # check that slide types match
120 120
            if tile.slide_type != self.slide_type:
@@ -127,7 +127,7 @@
Loading
127 127
128 128
        # create a group for tile and write tile
129 129
        if str(tile.coords) in self.h5["tiles"]:
130 -
            print(f"overwriting tile at {str(tile.coords)}")
130 +
            logger.info(f"overwriting tile at {str(tile.coords)}")
131 131
            del self.h5["tiles"][str(tile.coords)]
132 132
        self.h5["tiles"].create_group(str(tile.coords))
133 133
        self.h5["tiles"][str(tile.coords)].create_dataset(
@@ -155,7 +155,9 @@
Loading
155 155
            # add tile-level masks
156 156
            for key, mask in tile.masks.items():
157 157
                self.h5["tiles"][str(tile.coords)]["masks"].create_dataset(
158 -
                    str(key), data=mask, dtype="float16",
158 +
                    str(key),
159 +
                    data=mask,
160 +
                    dtype="float16",
159 161
                )
160 162
161 163
        # add coords
@@ -209,8 +211,7 @@
Loading
209 211
            item = list(self.h5["tiles"].keys())[item]
210 212
        else:
211 213
            raise KeyError(
212 -
                f"invalid item type: {type(item)}. must getitem by coord (type tuple[int]),"
213 -
                f"index (type int), or name (type str)"
214 +
                f"invalid item type: {type(item)}. must getitem by coord (type tuple[int]), index (type int), or name (type str)"
214 215
            )
215 216
        tile = self.h5["tiles"][item]["array"][:]
216 217
@@ -339,7 +340,7 @@
Loading
339 340
                f"masks keys must be of type(str) but key was passed of type {type(key)}"
340 341
            )
341 342
        if key not in self.h5["masks"].keys():
342 -
            raise KeyError("key is not in Masks")
343 +
            raise KeyError(f"key is not in Masks")
343 344
        del self.h5["masks"][key]
344 345
345 346
    def get_slidetype(self):

@@ -10,6 +10,7 @@
Loading
10 10
import cv2
11 11
import matplotlib.pyplot as plt
12 12
import numpy as np
13 +
from loguru import logger
13 14
from matplotlib.colors import TABLEAU_COLORS
14 15
15 16
@@ -338,3 +339,9 @@
Loading
338 339
            nuclei_mask = masks[i, ...] == label
339 340
            x, y = segmentation_lines(nuclei_mask.astype(np.uint8))
340 341
            ax.scatter(x, y, color=palette[i], marker=".", s=markersize)
342 +
343 +
344 +
def _test_log(msg):
345 +
    # passes thru message to pathml logger
346 +
    # used for testing logging
347 +
    logger.info(msg)

@@ -24,5 +24,5 @@
Loading
24 24
    CollapseRunsCODEX,
25 25
    RescaleIntensity,
26 26
    HistogramEqualization,
27 -
    AdaptiveHistogramEqualization
27 +
    AdaptiveHistogramEqualization,
28 28
)

@@ -8,6 +8,7 @@
Loading
8 8
from torch.nn import functional as F
9 9
import numpy as np
10 10
import cv2
11 +
from loguru import logger
11 12
from skimage.segmentation import watershed
12 13
from scipy.ndimage.morphology import binary_fill_holes
13 14
import matplotlib.pyplot as plt
@@ -431,9 +432,8 @@
Loading
431 432
    try:
432 433
        inst_list.remove(0)  # 0 is background
433 434
    except:
434 -
        warn(
435 -
            "No pixels with 0 label. This means that there are no background pixels."
436 -
            "This may indicate a problem. Ignore this warning if this is expected/intended."
435 +
        logger.warning(
436 +
            "No pixels with 0 label. This means that there are no background pixels. This may indicate a problem. Ignore this warning if this is expected/intended."
437 437
        )
438 438
439 439
    for inst_id in inst_list:

@@ -9,6 +9,7 @@
Loading
9 9
from collections import OrderedDict
10 10
import h5py
11 11
import reprlib
12 +
from loguru import logger
12 13
13 14
import pathml.core.h5managers
14 15
@@ -31,7 +32,7 @@
Loading
31 32
        if masks:
32 33
            if not isinstance(masks, dict):
33 34
                raise ValueError(
34 -
                    f"masks must be passed as dicts of the form key1:mask1,key2:mask2,..."
35 +
                    "masks must be passed as dicts of the form {key1:mask1, key2:mask2, ...}"
35 36
                )
36 37
            for val in masks.values():
37 38
                if not isinstance(val, np.ndarray):

@@ -5,6 +5,7 @@
Loading
5 5
6 6
import reprlib
7 7
from pathlib import Path
8 +
from loguru import logger
8 9
9 10
import dask.distributed
10 11
from torch.utils.data import ConcatDataset
@@ -53,6 +54,9 @@
Loading
53 54
        if client is None and distributed:
54 55
            client = dask.distributed.Client()
55 56
            shutdown_after = True
57 +
            logger.info(
58 +
                f"creating a default distributed.Client(): {client.scheduler_info()}"
59 +
            )
56 60
        for slide in self.slides:
57 61
            slide.run(
58 62
                pipeline=pipeline, client=client, distributed=distributed, **kwargs
@@ -60,10 +64,6 @@
Loading
60 64
        if shutdown_after:
61 65
            client.shutdown()
62 66
63 -
    def reshape(self, shape, centercrop=False):
64 -
        for slide in self.slides:
65 -
            slide.tiles.reshape(shape=shape, centercrop=centercrop)
66 -
67 67
    def write(self, dir, filenames=None):
68 68
        """
69 69
        Write all SlideData objects to the specified directory.
@@ -78,8 +78,7 @@
Loading
78 78
        if filenames:
79 79
            if len(filenames) != self.__len__():
80 80
                raise ValueError(
81 -
                    f"input list of filenames has {len(filenames)} elements "
82 -
                    f"but must be same length as number of slides in dataset ({self.__len__()})"
81 +
                    f"input list of filenames has {len(filenames)} elements but must be same length as number of slides in dataset ({self.__len__()})"
83 82
                )
84 83
85 84
        for i, slide in enumerate(self.slides):
@@ -89,6 +88,6 @@
Loading
89 88
                slide_path = d / (slide.name + ".h5path")
90 89
            else:
91 90
                raise ValueError(
92 -
                    "slide does not have a .name attribute. Must supply a 'filenames' argument."
91 +
                    f"slide does not have a .name attribute. Must supply a 'filenames' argument."
93 92
                )
94 93
            slide.write(slide_path)

@@ -7,6 +7,7 @@
Loading
7 7
import torch
8 8
from torch.nn import functional as F
9 9
import numpy as np
10 +
from loguru import logger
10 11
11 12
12 13
def center_crop_im_batch(batch, dims, batch_order="BCHW"):
@@ -42,7 +43,7 @@
Loading
42 43
        elif batch_order == "BCHW":
43 44
            batch_cropped = batch[:, :, crop_t:-crop_b, crop_l:-crop_r]
44 45
        else:
45 -
            raise Exception("Input batch order not valid")
46 +
            raise Exception(f"Input batch order not valid")
46 47
47 48
    return batch_cropped
48 49

@@ -0,0 +1,96 @@
Loading
1 +
"""
2 +
Copyright 2021, Dana-Farber Cancer Institute and Weill Cornell Medicine
3 +
License: GNU GPL 2.0
4 +
"""
5 +
6 +
from loguru import logger
7 +
import functools
8 +
import sys
9 +
10 +
11 +
class PathMLLogger:
12 +
    """
13 +
    Convenience methods for turning on or off and configuring logging for PathML.
14 +
    Note that this can also be achieved by interfacing with loguru directly
15 +
16 +
    Example::
17 +
18 +
        from pathml import PathMLLogger as pml
19 +
20 +
        # turn on logging for PathML
21 +
        pml.enable()
22 +
23 +
        # turn off logging for PathML
24 +
        pml.disable()
25 +
26 +
        # turn on logging and output logs to a file named 'logs.txt', with colorization enabled
27 +
        pml.enable(sink="logs.txt", colorize=True)
28 +
    """
29 +
30 +
    logger.disable("pathml")
31 +
    logger.disable(__name__)
32 +
33 +
    @staticmethod
34 +
    def disable():
35 +
        """
36 +
        Turn off logging for PathML
37 +
        """
38 +
        logger.disable("pathml")
39 +
        logger.disable(__name__)
40 +
        logger.info(
41 +
            "Disabled Logging For PathML! If you are seeing this, there is a problem"
42 +
        )
43 +
44 +
    @staticmethod
45 +
    def enable(
46 +
        sink=sys.stderr,
47 +
        level="DEBUG",
48 +
        fmt="PathML:{level}:{time:HH:mm:ss} | {module}:{function}:{line} | {message}",
49 +
        **kwargs
50 +
    ):
51 +
        """
52 +
        Turn on and configure logging for PathML
53 +
54 +
        Args:
55 +
            sink (str or io._io.TextIOWrapper, optional):
56 +
                Destination sink for log messages. Defaults to ``sys.stderr``.
57 +
            level (str):
58 +
                level of logs to capture. Defaults to 'DEBUG'.
59 +
            fmt (str):
60 +
                Formatting for the log message. Defaults to: 'PathML:{level}:{time:HH:mm:ss} | {module}:{function}:{line} | {message}'
61 +
            **kwargs (dict, optional):
62 +
                additional options passed to configure logger. See:
63 +
                `loguru documentation <https://loguru.readthedocs.io/en/stable/api/logger.html#loguru._logger.Logger.add>`_
64 +
        """
65 +
        logger.enable("pathml")
66 +
        logger.enable(__name__)
67 +
        # remove pre-configured logger (https://github.com/Delgan/loguru/issues/208#issuecomment-581002215)
68 +
        logger.remove(0)
69 +
        handler_id = logger.add(sink=sink, level=level, format=fmt, **kwargs)
70 +
        logger.info("Enabled Logging For PathML!")
71 +
        return handler_id
72 +
73 +
74 +
# courtesy of the people at loguru
75 +
# https://loguru.readthedocs.io/en/stable/resources/recipes.html#:~:text=or%20fallback%20policy.-,Logging%20entry%20and%20exit%20of%20functions%20with%20a%20decorator,-%EF%83%81
76 +
def logger_wraps(*, entry=True, exit=True, level="DEBUG"):
77 +
    def wrapper(func):
78 +
        name = func.__name__
79 +
80 +
        @functools.wraps(func)
81 +
        def wrapped(*args, **kwargs):
82 +
            logger_ = logger.opt(depth=1)
83 +
            if entry:
84 +
                logger_.bind(enter_exit=True).log(
85 +
                    level, "Entering '{}' (args={}, kwargs={})", name, args, kwargs
86 +
                )
87 +
            result = func(*args, **kwargs)
88 +
            if exit:
89 +
                logger_.bind(enter_exit=True).log(
90 +
                    level, "Exiting '{}' (result={})", name, result
91 +
                )
92 +
            return result
93 +
94 +
        return wrapped
95 +
96 +
    return wrapper

@@ -5,6 +5,7 @@
Loading
5 5
6 6
import hashlib
7 7
import os
8 +
from loguru import logger
8 9
from pathlib import Path
9 10
10 11
import h5py
@@ -85,7 +86,7 @@
Loading
85 86
86 87
    def _download_deepfocus(self, root):
87 88
        if self._check_integrity():
88 -
            print("File already downloaded with correct hash.")
89 +
            logger.info("File already downloaded with correct hash.")
89 90
            return
90 91
        self.data_dir.mkdir(parents=True, exist_ok=True)
91 92
        download_from_url(

@@ -3,4 +3,4 @@
Loading
3 3
License: GNU GPL 2.0
4 4
"""
5 5
6 -
__version__ = "2.0.4"
6 +
__version__ = "2.1.0"

@@ -7,7 +7,7 @@
Loading
7 7
import tempfile
8 8
from collections import OrderedDict
9 9
from dataclasses import asdict
10 -
10 +
from loguru import logger
11 11
import anndata
12 12
import h5py
13 13
import numpy as np

@@ -4,6 +4,7 @@
Loading
4 4
"""
5 5
6 6
import pickle
7 +
from loguru import logger
7 8
8 9
import pathml.core.tile
9 10
from pathml.preprocessing.transforms import Transform

@@ -2,6 +2,7 @@
Loading
2 2
Copyright 2021, Dana-Farber Cancer Institute and Weill Cornell Medicine
3 3
License: GNU GPL 2.0
4 4
"""
5 +
from loguru import logger
5 6
6 7
7 8
class SlideType:

@@ -12,6 +12,7 @@
Loading
12 12
13 13
import cv2
14 14
import numpy as np
15 +
from loguru import logger
15 16
import torch
16 17
import torch.utils.data as data
17 18
from pathml.datasets.base_data_module import BaseDataModule
@@ -233,11 +234,15 @@
Loading
233 234
        self.batch_size = batch_size
234 235
        self.hovernet_preprocess = hovernet_preprocess
235 236
236 -
    def _get_dataset(self, fold_ix):
237 +
    def _get_dataset(self, fold_ix, augment=True):
238 +
        if augment:
239 +
            transforms = self.transforms
240 +
        else:
241 +
            transforms = None
237 242
        return PanNukeDataset(
238 243
            data_dir=self.data_dir,
239 244
            fold_ix=fold_ix,
240 -
            transforms=self.transforms,
245 +
            transforms=transforms,
241 246
            nucleus_type_labels=self.nucleus_type_labels,
242 247
            hovernet_preprocess=self.hovernet_preprocess,
243 248
        )
@@ -248,7 +253,7 @@
Loading
248 253
            p = os.path.join(download_dir, "Fold " + str(fold_ix))
249 254
            # don't download if the directory already exists
250 255
            if not os.path.isdir(p):
251 -
                print(f"Downloading fold {fold_ix}")
256 +
                logger.info(f"Downloading fold {fold_ix}")
252 257
                url = f"https://warwick.ac.uk/fac/cross_fac/tia/data/pannuke/fold_{fold_ix}.zip"
253 258
                name = os.path.basename(url)
254 259
                download_from_url(url=url, download_dir=download_dir, name=name)
@@ -257,7 +262,7 @@
Loading
257 262
                with zipfile.ZipFile(path, "r") as zip_ref:
258 263
                    zip_ref.extractall(download_dir)
259 264
            else:
260 -
                warn(
265 +
                logger.warning(
261 266
                    f"Skipping download of fold {fold_ix}, using local data found at {p}"
262 267
                )
263 268
@@ -362,7 +367,7 @@
Loading
362 367
        Yields (image, mask, tissue_type), or (image, mask, hv, tissue_type) for HoVer-Net
363 368
        """
364 369
        return data.DataLoader(
365 -
            dataset=self._get_dataset(fold_ix=self.split),
370 +
            dataset=self._get_dataset(fold_ix=self.split, augment=True),
366 371
            batch_size=self.batch_size,
367 372
            shuffle=self.shuffle,
368 373
            pin_memory=True,
@@ -379,7 +384,7 @@
Loading
379 384
        else:
380 385
            fold_ix = 1
381 386
        return data.DataLoader(
382 -
            self._get_dataset(fold_ix=fold_ix),
387 +
            self._get_dataset(fold_ix=fold_ix, augment=False),
383 388
            batch_size=self.batch_size,
384 389
            shuffle=self.shuffle,
385 390
            pin_memory=True,
@@ -396,7 +401,7 @@
Loading
396 401
        else:
397 402
            fold_ix = 1
398 403
        return data.DataLoader(
399 -
            self._get_dataset(fold_ix=fold_ix),
404 +
            self._get_dataset(fold_ix=fold_ix, augment=False),
400 405
            batch_size=self.batch_size,
401 406
            shuffle=self.shuffle,
402 407
            pin_memory=True,

@@ -4,6 +4,7 @@
Loading
4 4
"""
5 5
6 6
import numpy as np
7 +
from loguru import logger
7 8
8 9
def pannuke_multiclass_mask_to_nucleus_mask(multiclass_mask):
9 10
    """

@@ -9,6 +9,7 @@
Loading
9 9
import matplotlib.pyplot as plt
10 10
import h5py
11 11
import reprlib
12 +
from loguru import logger
12 13
13 14
import pathml.core.masks
14 15
Files Coverage
pathml 86.52%
Project Totals (27 files) 86.52%
codecov-umbrella
Build #2209966002 -
PYTHON=undefined
OS=undefined
codecov-umbrella
Build #2209966002 -
PYTHON=undefined
OS=undefined

No yaml found.

Create your codecov.yml to customize your Codecov experience

Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading