1
"""
2
Some functionality related to dataset
3

4
Copyright (C) 2016-2019 Jiri Borovec <jiri.borovec@fel.cvut.cz>
5
"""
6

7 30
import glob
8 30
import logging
9 30
import os
10 30
import re
11

12 30
import matplotlib.pyplot as plt
13 30
import numpy as np
14 30
from PIL import Image
15 30
from cv2 import (
16
    IMWRITE_JPEG_QUALITY,
17
    IMWRITE_PNG_COMPRESSION,
18
    COLOR_RGBA2RGB,
19
    COLOR_RGB2BGR,
20
    INTER_LINEAR,
21
    GaussianBlur,
22
    cvtColor,
23
    imwrite,
24
    resize,
25
)
26 30
from matplotlib.path import Path
27 30
from scipy import spatial, optimize
28 30
from skimage.color import rgb2hsv, hsv2rgb, rgb2lab, lab2rgb, lch2lab, lab2lch, rgb2hed, hed2rgb, rgb2luv, luv2rgb
29 30
from skimage.exposure import rescale_intensity
30 30
from skimage.filters import threshold_otsu
31

32
#: threshold of tissue/background presence on potential cutting line
33 30
TISSUE_CONTENT = 0.01
34
#: supported image extensions
35 30
IMAGE_EXTENSIONS = ('.png', '.jpg', '.jpeg')
36
# https://github.com/opencv/opencv/issues/6729
37
# https://www.life2coding.com/save-opencv-images-jpeg-quality-png-compression
38 30
IMAGE_COMPRESSION_OPTIONS = (IMWRITE_JPEG_QUALITY, 98) + \
39
                            (IMWRITE_PNG_COMPRESSION, 9)
40
#: template for detecting/parsing scale from folder name
41 30
REEXP_FOLDER_SCALE = r'\S*scale-(\d+)pc'
42
# ERROR:root:error: Image size (... pixels) exceeds limit of ... pixels,
43
# could be decompression bomb DOS attack.
44
# SEE: https://gitlab.mister-muffin.de/josch/img2pdf/issues/42
45 30
Image.MAX_IMAGE_PIXELS = None
46
#: maximal image size for visualisations, larger images will be downscaled
47 30
MAX_IMAGE_SIZE = 5000
48
#: define pair of forward and backward color space conversion
49 30
CONVERT_RGB = {
50
    'rgb': (lambda img: img, lambda img: img),
51
    'hsv': (rgb2hsv, hsv2rgb),
52
    'lab': (rgb2lab, lab2rgb),
53
    'luv': (rgb2luv, luv2rgb),
54
    'hed': (rgb2hed, hed2rgb),
55
    'lch': (lambda img: lab2lch(rgb2lab(img)), lambda img: lab2rgb(lch2lab(img))),
56
}
57

58

59 30
def detect_binary_blocks(vec_bin):
60
    """ detect the binary object by beginning, end and length in !d signal
61

62
    :param list(bool) vec_bin: binary vector with 1 for an object
63
    :return tuple(list(int),list(int),list(int)):
64

65
    >>> vec = np.array([1] * 15 + [0] * 5 + [1] * 20)
66
    >>> detect_binary_blocks(vec)
67
    ([0, 20], [15, 39], [14, 19])
68
    """
69 30
    begins, ends, lengths = [], [], []
70

71
    # in case that it starts with an object
72 30
    if vec_bin[0]:
73 30
        begins.append(0)
74

75 30
    length = 0
76
    # iterate over whole array, skip the first one
77 30
    for i in range(1, len(vec_bin)):
78 30
        if vec_bin[i] > vec_bin[i - 1]:
79 30
            begins.append(i)
80 30
        elif vec_bin[i] < vec_bin[i - 1]:
81 30
            ends.append(i)
82 30
            lengths.append(length)
83 30
            length = 0
84 30
        elif vec_bin[i] == 1:
85 30
            length += 1
86

87
    # in case that it ends with an object
88 30
    if vec_bin[-1]:
89 30
        ends.append(len(vec_bin) - 1)
90 30
        lengths.append(length)
91

92 30
    return begins, ends, lengths
93

94

95 30
def find_split_objects(hist, nb_objects=2, threshold=TISSUE_CONTENT):
96
    """ find the N largest objects and set split as middle distance among them
97

98
    :param list(float) hist: input vector
99
    :param int nb_objects: number of desired objects
100
    :param float threshold: threshold for input vector
101
    :return list(int):
102

103
    >>> vec = np.array([1] * 15 + [0] * 5 + [1] * 20)
104
    >>> find_split_objects(vec)
105
    [17]
106
    """
107 30
    hist_bin = hist > threshold
108 30
    begins, ends, lengths = detect_binary_blocks(hist_bin)
109

110 30
    if len(lengths) < nb_objects:
111 0
        logging.error('not enough objects')
112 0
        return []
113

114
    # select only the number of largest objects
115 30
    obj_sorted = sorted(zip(lengths, range(len(lengths))), reverse=True)
116 30
    obj_select = sorted([o[1] for o in obj_sorted][:nb_objects])
117

118
    # compute the mean in the gup
119 30
    splits = [np.mean((ends[obj_select[i]], begins[obj_select[i + 1]])) for i in range(len(obj_select) - 1)]
120 30
    splits = list(map(int, splits))
121

122 30
    return splits
123

124

125 30
def find_largest_object(hist, threshold=TISSUE_CONTENT):
126
    """ find the largest objects and give its beginning end end
127

128
    :param list(float) hist: input vector
129
    :param float threshold: threshold for input vector
130
    :return list(int):
131

132
    >>> vec = np.array([1] * 15 + [0] * 5 + [1] * 20)
133
    >>> find_largest_object(vec)
134
    (20, 39)
135
    """
136 30
    hist_bin = hist > threshold
137 30
    begins, ends, lengths = detect_binary_blocks(hist_bin)
138

139 30
    assert lengths, 'no object found'
140

141
    # select only the number of largest objects
142 30
    obj_sorted = sorted(zip(lengths, range(len(lengths))), reverse=True)
143 30
    obj_select = obj_sorted[0][1]
144

145 30
    return begins[obj_select], ends[obj_select]
146

147

148 30
def project_object_edge(img, dimension):
149
    """ scale the image, binarise with Othu and project to one dimension
150

151
    :param ndarray img:
152
    :param int dimension: select dimension for projection
153
    :return list(float):
154

155
    >>> img = np.zeros((20, 10, 3))
156
    >>> img[2:6, 1:7, :] = 1
157
    >>> img[10:17, 4:6, :] = 1
158
    >>> project_object_edge(img, 0).tolist()  # doctest: +NORMALIZE_WHITESPACE
159
    [0.0, 0.0, 0.7, 0.7, 0.7, 0.7, 0.0, 0.0, 0.0, 0.0,
160
     0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0, 0.0, 0.0]
161
    """
162 30
    assert dimension in (0, 1), 'not supported dimension %i' % dimension
163 30
    assert img.ndim == 3, 'unsupported image shape %r' % img.shape
164 30
    img_gray = np.mean(img, axis=-1)
165 30
    img_gray = GaussianBlur(img_gray, (5, 5), 0)
166 30
    p_low, p_high = np.percentile(img_gray, (1, 95))
167 30
    img_gray = rescale_intensity(img_gray, in_range=(p_low, p_high))
168 30
    img_bin = img_gray > threshold_otsu(img_gray)
169 30
    img_edge = np.mean(img_bin, axis=1 - dimension)
170 30
    return img_edge
171

172

173 30
def load_large_image(img_path):
174
    """ loading very large images
175

176
    .. note:: For the loading we have to use matplotlib while ImageMagic nor other
177
     lib (opencv, skimage, Pillow) is able to load larger images then 64k or 32k.
178

179
    :param str img_path: path to the image
180
    :return ndarray: image
181
    """
182 30
    assert os.path.isfile(img_path), 'missing image: %s' % img_path
183 30
    img = plt.imread(img_path)
184 30
    if img.ndim == 3 and img.shape[2] == 4:
185 0
        img = cvtColor(img, COLOR_RGBA2RGB)
186 30
    if np.max(img) <= 1.5:
187 30
        np.clip(img, a_min=0, a_max=1, out=img)
188
        # this command split should reduce mount of required memory
189 30
        np.multiply(img, 255, out=img)
190 30
        img = img.astype(np.uint8, copy=False)
191 30
    return img
192

193

194 30
def save_large_image(img_path, img):
195
    """ saving large images more then 50k x 50k
196

197
    .. note:: For the saving we have to use openCV while other
198
     lib (matplotlib, Pillow, ITK) is not able to save larger images then 32k.
199

200
    :param str img_path: path to the new image
201
    :param ndarray img: image
202

203
    >>> img = np.zeros((2500, 3200, 4), dtype=np.uint8)
204
    >>> img[:, :, 0] = 255
205
    >>> img[:, :, 1] = 127
206
    >>> img_path = './sample-image.jpg'
207
    >>> save_large_image(img_path, img)
208
    >>> img2 = load_large_image(img_path)
209
    >>> img2[0, 0].tolist()
210
    [255, 127, 0]
211
    >>> img.shape[:2] == img2.shape[:2]
212
    True
213
    >>> os.remove(img_path)
214
    >>> img_path = './sample-image.png'
215
    >>> save_large_image(img_path, img.astype(np.uint16) * 255)
216
    >>> img3 = load_large_image(img_path)
217
    >>> img.shape[:2] == img3.shape[:2]
218
    True
219
    >>> img3[0, 0].tolist()
220
    [255, 127, 0]
221
    >>> save_large_image(img_path, img2 / 255. * 1.15)  # test overwrite message
222
    >>> os.remove(img_path)
223
    """
224
    # drop transparency
225 30
    if img.ndim == 3 and img.shape[2] == 4:
226 30
        img = img[:, :, :3]
227
    # for some reasons with linear interpolation some the range overflow (0, 1)
228 30
    if np.max(img) <= 1.5:
229 30
        np.clip(img, a_min=0, a_max=1, out=img)
230
        # this command split should reduce mount of required memory
231 30
        np.multiply(img, 255, out=img)
232 30
        img = img.astype(np.uint8, copy=False)
233
    # some tiff images have higher ranger int16
234 30
    elif np.max(img) > 255:
235 30
        img = img / 255
236
    # for images as integer clip the value range as (0, 255)
237 30
    if img.dtype != np.uint8:
238 30
        np.clip(img, a_min=0, a_max=255, out=img)
239 30
        img = img.astype(np.uint8, copy=False)
240 30
    if os.path.isfile(img_path):
241 30
        logging.debug('WARNING: this image will be overwritten: %s', img_path)
242
    # why cv2 imwrite changes the color of pics
243
    # https://stackoverflow.com/questions/42406338
244 30
    img = cvtColor(img, COLOR_RGB2BGR)
245 30
    imwrite(img_path, img, IMAGE_COMPRESSION_OPTIONS)
246

247

248 30
def generate_pairing(count, step_hide=None):
249
    """ generate registration pairs with an option of hidden landmarks
250

251
    :param int count: total number of samples
252
    :param int|None step_hide: hide every N sample
253
    :return list((int, int)), list(bool): registration pairs
254

255
    >>> generate_pairing(4, None)  # doctest: +NORMALIZE_WHITESPACE
256
    ([(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)],
257
     [True, True, True, True, True, True])
258
    >>> generate_pairing(4, step_hide=3)  # doctest: +NORMALIZE_WHITESPACE
259
    ([(0, 1), (0, 2), (1, 2), (3, 1), (3, 2)],
260
     [False, False, True, False, False])
261
    """
262 30
    idxs_all = list(range(count))
263 30
    idxs_hide = idxs_all[::step_hide] if step_hide is not None else []
264
    # prune image on diagonal and missing both landmarks (target and source)
265 30
    idxs_pairs = [(i, j) for i in idxs_all for j in idxs_all if i != j and j not in idxs_hide]
266
    # prune symmetric image pairs
267 30
    idxs_pairs = [(i, j) for k, (i, j) in enumerate(idxs_pairs) if (j, i) not in idxs_pairs[:k]]
268 30
    public = [not (i in idxs_hide or j in idxs_hide) for i, j in idxs_pairs]
269 30
    return idxs_pairs, public
270

271

272 30
def parse_path_scale(path_folder):
273
    """ from given path with annotation parse scale
274

275
    :param str path_folder: path to the scale folder
276
    :return int: scale
277

278
    >>> parse_path_scale('scale-.1pc')
279
    nan
280
    >>> parse_path_scale('user-JB_scale-50pc')
281
    50
282
    >>> parse_path_scale('scale-10pc')
283
    10
284
    """
285 30
    folder = os.path.basename(path_folder)
286 30
    obj = re.match(REEXP_FOLDER_SCALE, folder)
287 30
    if obj is None:
288 30
        return np.nan
289 30
    scale = int(obj.groups()[0])
290 30
    return scale
291

292

293 30
def line_angle_2d(point_begin, point_end, deg=True):
294
    """ Compute direction of line with given two points
295

296
    the zero is horizontal in direction [1, 0]
297

298
    :param list(float) point_begin: starting line point
299
    :param list(float) point_end: ending line point
300
    :param bool deg: return angle in degrees
301
    :return float: orientation
302

303
    >>> [line_angle_2d([0, 0], p) for p in ((1, 0), (0, 1), (-1, 0), (0, -1))]
304
    [0.0, 90.0, 180.0, -90.0]
305
    >>> line_angle_2d([1, 1], [2, 3])  # doctest: +ELLIPSIS
306
    63.43...
307
    >>> line_angle_2d([1, 2], [-2, -3])  # doctest: +ELLIPSIS
308
    -120.96...
309
    """
310 30
    diff = np.asarray(point_end) - np.asarray(point_begin)
311 30
    angle = -np.arctan2(diff[0], diff[1]) + np.pi / 2.
312 30
    if deg:
313 30
        angle = angle / np.pi * 180.
314 30
    angle = norm_angle(angle, deg)
315 30
    return angle
316

317

318 30
def norm_angle(angle, deg=True):
319
    """ Normalise to be in range (-180, 180) degrees
320

321
    :param float angle: input angle
322
    :param bool deg: use degrees
323
    :return float: norma angle
324
    """
325 30
    alpha = 180. if deg else np.pi
326 30
    while angle > alpha:
327 30
        angle = -2 * alpha + angle
328 30
    while angle < -alpha:
329 30
        angle = 2 * alpha + angle
330 30
    return angle
331

332

333 30
def is_point_inside_perpendicular(point_begin, point_end, point_test):
334
    """ If point is left from line and perpendicularly in between line segment
335

336
    .. note:: negative response does not mean that that the point is on tight side
337

338
    :param list(float) point_begin: starting line point
339
    :param list(float) point_end: ending line point
340
    :param list(float) point_test: testing point
341
    :return int: gives +1 if it is above, -1 if bellow and 0 elsewhere
342

343
    >>> is_point_inside_perpendicular([1, 1], [3, 1], [2, 2])
344
    1
345
    >>> is_point_inside_perpendicular([1, 1], [3, 1], [2, 0])
346
    -1
347
    >>> is_point_inside_perpendicular([1, 1], [3, 1], [4, 2])
348
    0
349
    """
350 30
    angle_line = line_angle_2d(point_begin, point_end, deg=True)
351
    # compute angle of end - test compare to begin - end
352 30
    angle_a = norm_angle(line_angle_2d(point_end, point_test, deg=True) - angle_line, deg=True)
353
    # compute angle of begin - test compare to begin - end
354 30
    angle_b = norm_angle(line_angle_2d(point_begin, point_test, deg=True) - angle_line, deg=True)
355 30
    if (angle_a >= 90) and (angle_b <= 90):
356 30
        state = 1
357 30
    elif (angle_a <= -90) and (angle_b >= -90):
358 30
        state = -1
359
    else:
360 30
        state = 0
361 30
    return state
362

363

364 30
def is_point_in_quadrant_left(point_begin, point_end, point_test):
365
    """ If point is left quadrant from line end point
366

367
    .. note:: negative response does not mean that that the point is on tight side
368

369
    :param list(float) point_begin: starting line point
370
    :param list(float) point_end: ending line point
371
    :param list(float) point_test: testing point
372
    :return int: gives +1 if it is above, -1 if bellow and 0 elsewhere
373

374
    >>> is_point_in_quadrant_left([1, 1], [3, 1], [2, 2])
375
    1
376
    >>> is_point_in_quadrant_left([3, 1], [1, 1], [2, 0])
377
    1
378
    >>> is_point_in_quadrant_left([1, 1], [3, 1], [2, 0])
379
    -1
380
    >>> is_point_in_quadrant_left([1, 1], [3, 1], [4, 2])
381
    0
382
    """
383 30
    angle_line = line_angle_2d(point_begin, point_end, deg=True)
384
    # compute angle of end - test compare to begin - end
385 30
    angle_pt = norm_angle(line_angle_2d(point_end, point_test, deg=True) - angle_line, deg=True)
386 30
    if (180 >= angle_pt >= 90) or angle_pt == -180:
387 30
        state = 1
388 30
    elif (-180 <= angle_pt <= -90) or angle_pt == 180:
389 30
        state = -1
390
    else:
391 30
        state = 0
392 30
    return state
393

394

395 30
def is_point_above_line(point_begin, point_end, point_test):
396
    """ If point is left from line
397

398
    :param list(float) point_begin: starting line point
399
    :param list(float) point_end: ending line point
400
    :param list(float) point_test: testing point
401
    :return bool: left from line
402

403
    >>> is_point_above_line([1, 1], [2, 2], [3, 4])
404
    True
405
    """
406
    # compute angle of end - test compare to begin - end
407 30
    angle_line = line_angle_2d(point_begin, point_end, deg=True)
408 30
    angle_test = line_angle_2d(point_end, point_test, deg=True)
409 30
    state = 0 <= norm_angle(angle_test - angle_line, deg=True) <= 180
410 30
    return state
411

412

413 30
def compute_half_polygon(landmarks, idx_start=0, idx_end=-1):
414
    """ compute half polygon path
415

416
    :param int idx_start: index of starting point
417
    :param int idx_end: index of ending point
418
    :param ndarray landmarks: set of points
419
    :return ndarray: set of points
420

421
    >>> pts = [(-1, 1), (0, 0), (0, 2), (1, 1), (1, -0.5), (2, 0)]
422
    >>> compute_half_polygon(pts, idx_start=0, idx_end=-1)
423
    [[-1.0, 1.0], [0.0, 2.0], [1.0, 1.0], [2.0, 0.0]]
424
    >>> compute_half_polygon(pts[:2], idx_start=-1, idx_end=0)
425
    [[-1, 1], [0, 0]]
426
    >>> pts = [[0, 2], [1, 5], [2, 4], [2, 5], [4, 4], [4, 6], [4, 8], [5, 8], [5, 8]]
427
    >>> compute_half_polygon(pts)
428
    [[0, 2], [1, 5], [2, 5], [4, 6], [4, 8], [5, 8]]
429
    """
430
    # the three or less are always minimal polygon
431 30
    if len(landmarks) < 3:
432 30
        return np.array(landmarks).tolist()
433
    # normalise indexes to be larger then 0
434 30
    while idx_start < 0:
435 30
        idx_start = len(landmarks) + idx_start
436 30
    while idx_end < 0:
437 30
        idx_end = len(landmarks) + idx_end
438
    # select points
439 30
    pt_begin, pt_end = landmarks[idx_start], landmarks[idx_end]
440 30
    del idx_start, idx_end
441
    # only unique points
442 30
    points = np.vstack({tuple(lnd) for lnd in landmarks})
443 30
    dists = spatial.distance.cdist(points, points, metric='euclidean')
444 30
    poly = [pt_begin]
445

446 30
    def _in(pt0, pts):
447 30
        return any([np.array_equal(pt, pt0) for pt in pts])
448

449 30
    def _disturbed(poly, pt_new, pt_test):
450 30
        last = is_point_in_quadrant_left(poly[-1], pt_new, pt_test) == 1
451 30
        path = sum(is_point_inside_perpendicular(pt0, pt1, pt_test) for pt0, pt1 in zip(poly, poly[1:] + [pt_new])) < 0
452 30
        return last and not path
453

454
    # iterated until you add the lst point to chain
455 30
    while not np.array_equal(poly[-1], pt_end):
456 30
        idx_last = np.argmax([np.array_equal(pt, poly[-1]) for pt in points])
457
        # walk over ordered list by distance starting with the closest
458 30
        pt_order = np.argsort(dists[idx_last])
459
        # iterate over all possible candidates not in chain already
460 30
        for pt0 in (pt for pt in points[pt_order] if not _in(pt, poly)):
461
            # find a point which does not have any point on the left perpendic
462 30
            if any(_disturbed(poly, pt0, pt) for pt in points if not _in(pt, poly + [pt0])):
463 30
                continue
464
            else:
465 30
                poly.append(pt0)
466 30
                break
467 30
    poly = np.array(poly).tolist()
468 30
    return poly
469

470

471 30
def get_close_diag_corners(points):
472
    """ finds points closes to the top left and bottom right corner
473

474
    :param ndarray points: set of points
475
    :return tuple(ndarray,ndarray): begin and end of imaginary diagonal
476

477
    >>> np.random.seed(0)
478
    >>> points = np.random.randint(1, 9, (20, 2))
479
    >>> get_close_diag_corners(points)
480
    (array([1, 2]), array([7, 8]), (12, 10))
481
    """
482 30
    pt_min = np.min(points, axis=0)
483 30
    pt_max = np.max(points, axis=0)
484 30
    dists = spatial.distance.cdist([pt_min, pt_max], points)
485 30
    idx_begin = np.argmin(dists[0])
486 30
    idx_end = np.argmin(dists[1])
487 30
    pt_begin = points[np.argmin(dists[0])]
488 30
    pt_end = points[np.argmin(dists[1])]
489 30
    return pt_begin, pt_end, (idx_begin, idx_end)
490

491

492 30
def simplify_polygon(points, tol_degree=5):
493
    """ simplify path, drop point on the same line
494

495
    :param ndarray points: point in polygon
496
    :param float tol_degree: tolerance on change in orientation
497
    :return list(list(float)): pints of polygon
498

499
    >>> pts = [[1, 2], [2, 4], [1, 5], [2, 8], [3, 8], [5, 8], [7, 8], [8, 7],
500
    ...     [8, 5], [8, 3], [8, 1], [7, 1], [6, 1], [4, 1], [3, 1], [3, 2], [2, 2]]
501
    >>> simplify_polygon(pts)
502
    [[1, 2], [2, 4], [1, 5], [2, 8], [7, 8], [8, 7], [8, 1], [3, 1], [3, 2]]
503
    """
504 30
    if len(points) < 3:
505 0
        return points
506 30
    path = [points[0]]
507 30
    for i in range(1, len(points)):
508 30
        angle0 = line_angle_2d(path[-1], points[i], deg=True)
509 30
        angle1 = line_angle_2d(points[i], points[(i + 1) % len(points)], deg=True)
510 30
        if abs(norm_angle(angle0 - angle1, deg=True)) > tol_degree:
511 30
            path.append(points[i])
512 30
    return np.array(path).tolist()
513

514

515 30
def compute_bounding_polygon(landmarks):
516
    """ get the polygon where all point lies inside
517

518
    :param ndarray landmarks: set of points
519
    :return ndarray: pints of polygon
520

521
    >>> np.random.seed(0)
522
    >>> points = np.random.randint(1, 9, (45, 2))
523
    >>> compute_bounding_polygon(points)  # doctest: +NORMALIZE_WHITESPACE
524
    [[1, 2], [2, 4], [1, 5], [2, 8], [7, 8], [8, 7], [8, 1], [3, 1], [3, 2]]
525
    """
526
    # the three or less are always mimimal polygon
527 30
    if len(landmarks) <= 3:
528 0
        return np.array(landmarks)
529 30
    points = np.array(landmarks)
530
    # split to two half by diagonal from [min, min] to [max, max]
531 30
    points = points[points[:, 0].argsort()]
532 30
    pt_begin, pt_end, _ = get_close_diag_corners(points)
533 30
    is_above = np.array([is_point_above_line(pt_begin, pt_end, pt) for pt in points])
534

535 30
    poly = []
536
    # compute one curve starting with [min, min] until [max, max] is added
537 30
    pts_above = [pt_begin] + [pt for i, pt in enumerate(points) if is_above[i]] + [pt_end]
538 30
    poly += compute_half_polygon(pts_above, idx_start=0, idx_end=-1)[:-1]
539
    # analogy got second curve from [max, max] to [min, min]
540 30
    pts_bellow = [pt_begin] + [pt for i, pt in enumerate(points) if not is_above[i]] + [pt_end]
541 30
    poly += compute_half_polygon(pts_bellow, idx_start=-1, idx_end=0)[:-1]
542 30
    return simplify_polygon(poly)
543

544

545 30
def compute_convex_hull(landmarks):
546
    """ compute convex hull around landmarks
547

548
    * http://lagrange.univ-lyon1.fr/docs/scipy/0.17.1/generated/scipy.spatial.ConvexHull.html
549
    * https://stackoverflow.com/questions/21727199
550

551
    :param ndarray landmarks: set of points
552
    :return ndarray: pints of polygon
553

554
    >>> np.random.seed(0)
555
    >>> pts = np.random.randint(15, 30, (10, 2))
556
    >>> compute_convex_hull(pts)
557
    array([[27, 20],
558
           [27, 25],
559
           [22, 24],
560
           [16, 21],
561
           [15, 18],
562
           [26, 18]])
563
    """
564 30
    chull = spatial.ConvexHull(landmarks)
565 30
    chull_points = landmarks[chull.vertices]
566 30
    return chull_points
567

568

569 30
def inside_polygon(polygon, point):
570
    """ check if a point is strictly inside the polygon
571

572
    :param ndarray|list polygon: polygon contour
573
    :param tuple|list point: sample point
574
    :return bool: inside
575

576
    >>> poly = [[1, 1], [1, 3], [3, 3], [3, 1]]
577
    >>> inside_polygon(poly, [0, 0])
578
    False
579
    >>> inside_polygon(poly, [1, 1])
580
    False
581
    >>> inside_polygon(poly, [2, 2])
582
    True
583
    """
584 30
    path = Path(polygon)
585 30
    return path.contains_points([point])[0]
586

587

588 30
def list_sub_folders(path_folder, name='*'):
589
    """ list all sub folders with particular name pattern
590

591
    :param str path_folder: path to a particular folder
592
    :param str name: name pattern
593
    :return list(str): folders
594

595
    >>> from birl.utilities.data_io import update_path
596
    >>> paths = list_sub_folders(update_path('data-images'))
597
    >>> list(map(os.path.basename, paths))  # doctest: +ELLIPSIS
598
    ['images', 'landmarks', 'lesions_', 'rat-kidney_'...]
599
    """
600 30
    sub_dirs = sorted([p for p in glob.glob(os.path.join(path_folder, name)) if os.path.isdir(p)])
601 30
    return sub_dirs
602

603

604 30
def common_landmarks(points1, points2, threshold=1.5):
605
    """ find common landmarks in two sets
606

607
    :param ndarray|list(list(float)) points1: first point set
608
    :param ndarray|list(list(float)) points2: second point set
609
    :param float threshold: threshold for assignment (for landmarks in pixels)
610
    :return list(bool): flags
611

612
    >>> np.random.seed(0)
613
    >>> common = np.random.random((5, 2))
614
    >>> pts1 = np.vstack([common, np.random.random((10, 2))])
615
    >>> pts2 = np.vstack([common, np.random.random((15, 2))])
616
    >>> common_landmarks(pts1, pts2, threshold=1e-3)
617
    array([[0, 0],
618
           [1, 1],
619
           [2, 2],
620
           [3, 3],
621
           [4, 4]])
622
    >>> np.random.shuffle(pts2)
623
    >>> common_landmarks(pts1, pts2, threshold=1e-3)
624
    array([[ 0, 13],
625
           [ 1, 10],
626
           [ 2,  9],
627
           [ 3, 14],
628
           [ 4,  8]])
629
    """
630 30
    points1 = np.asarray(points1)
631 30
    points2 = np.asarray(points2)
632 30
    dist = spatial.distance.cdist(points1, points2, 'euclidean')
633 30
    ind_row, ind_col = optimize.linear_sum_assignment(dist)
634 30
    dist_sel = dist[ind_row, ind_col]
635 30
    pairs = [(i, j) for (i, j, d) in zip(ind_row, ind_col, dist_sel) if d < threshold]
636 30
    assert len(pairs) <= min([len(points1), len(points2)])
637 30
    return np.array(pairs, dtype=int)
638

639

640 30
def args_expand_images(parser, nb_workers=1, overwrite=True):
641
    """ expand the parser by standard parameters related to images:
642
        * image paths
643
        * allow overwrite (optional)
644
        * number of jobs
645

646
    :param obj parser: existing parser
647
    :param int nb_workers: number threads by default
648
    :param bool overwrite: allow overwrite images
649
    :return obj:
650

651
    >>> import argparse
652
    >>> args_expand_images(argparse.ArgumentParser())  # doctest: +ELLIPSIS
653
    ArgumentParser(...)
654
    """
655 30
    parser.add_argument('-i', '--path_images', type=str, required=True, help='path (pattern) to the input image')
656 30
    parser.add_argument(
657
        '--nb_workers', type=int, required=False, default=nb_workers, help='number of processes running in parallel'
658
    )
659 30
    if overwrite:
660 30
        parser.add_argument(
661
            '--overwrite', action='store_true', required=False, default=False, help='allow overwrite existing images'
662
        )
663 30
    return parser
664

665

666 30
def args_expand_parse_images(parser, nb_workers=1, overwrite=True):
667
    """ expand the parser by standard parameters related to images:
668
        * image paths
669
        * allow overwrite (optional)
670
        * number of jobs
671

672
    :param obj parser: existing parser
673
    :param int nb_workers: number threads by default
674
    :param bool overwrite: allow overwrite images
675
    :return dict:
676
    """
677 30
    parser = args_expand_images(parser, nb_workers, overwrite)
678 30
    args = vars(parser.parse_args())
679 30
    args['path_images'] = os.path.expanduser(args['path_images'])
680 30
    return args
681

682

683 30
def estimate_scaling(images, max_size=MAX_IMAGE_SIZE):
684
    """ find scaling for given set of images and maximal image size
685

686
    :param list(ndarray) images: input images
687
    :param float max_size: max image size in any dimension
688
    :return float: scaling in range (0, 1)
689

690
    >>> estimate_scaling([np.zeros((12000, 300, 3))])  # doctest: +ELLIPSIS
691
    0.4...
692
    >>> estimate_scaling([np.zeros((1200, 800, 3))])
693
    1.0
694
    """
695 30
    sizes = [img.shape[:2] for img in images if img is not None]
696 30
    if not sizes:
697 30
        return 1.
698 30
    max_dim = np.max(sizes)
699 30
    scale = np.round(float(max_size) / max_dim, 1) if max_dim > max_size else 1.
700 30
    return scale
701

702

703 30
def scale_large_images_landmarks(images, landmarks):
704
    """ scale images and landmarks up to maximal image size
705

706
    :param list(ndarray) images: list of images
707
    :param list(ndarray) landmarks: list of landmarks
708
    :return tuple(list(ndarray),list(ndarray)): lists of images and landmarks
709

710
    >>> scale_large_images_landmarks([np.zeros((8000, 500, 3), dtype=np.uint8)],
711
    ...                              [None, None])  # doctest: +ELLIPSIS
712
    ([array(...)], [None, None])
713
    """
714 30
    if not images:
715 0
        return images, landmarks
716 30
    scale = estimate_scaling(images)
717 30
    if scale < 1.:
718 30
        logging.debug(
719
            'One or more images are larger then recommended size for visualisation,'
720
            ' an resize with factor %f will be applied', scale
721
        )
722
    # using float16 as image raise TypeError: src data type = 23 is not supported
723 30
    images = [
724
        resize(img, None, fx=scale, fy=scale, interpolation=INTER_LINEAR) if img is not None else None for img in images
725
    ]
726 30
    landmarks = [lnds * scale if lnds is not None else None for lnds in landmarks]
727 30
    return images, landmarks
728

729

730 30
def convert_landmarks_to_itk(lnds, image_size):
731
    """converting used landmarks to ITK format
732

733
    .. seealso:: https://github.com/SuperElastix/elastix/issues/156#issuecomment-511712784
734

735
    :param ndarray lnds: landmarks
736
    :param (int,int) image_size: image size - height, width
737
    :return ndarray: landmarks
738

739
    >>> convert_landmarks_to_itk([[5, 20], [100, 150], [0, 100]], (150, 200))
740
    array([[ 20, 145],
741
           [150,  50],
742
           [100, 150]])
743
    """
744 30
    height, width = image_size
745
    # swap rows-columns to X-Y
746 30
    lnds = np.array(lnds)[:, [1, 0]]
747
    # revert the Y by height
748 30
    lnds[:, 1] = height - lnds[:, 1]
749 30
    return lnds
750

751

752 30
def convert_landmarks_from_itk(lnds, image_size):
753
    """converting ITK format to used in ImageJ
754

755
    .. seealso:: https://github.com/SuperElastix/elastix/issues/156#issuecomment-511712784
756

757
    :param ndarray lnds: landmarks
758
    :param (int,int) image_size: image height, width
759
    :return ndarray: landmarks
760

761
    >>> convert_landmarks_from_itk([[ 20, 145], [150,  50], [100, 150]], (150, 200))
762
    array([[  5,  20],
763
           [100, 150],
764
           [  0, 100]])
765
    >>> lnds = [[ 20, 145], [150,  50], [100, 150], [0, 0], [150, 200]]
766
    >>> img_size = (150, 200)
767
    >>> lnds2 = convert_landmarks_from_itk(convert_landmarks_to_itk(lnds, img_size), img_size)
768
    >>> np.array_equal(lnds, lnds2)
769
    True
770
    """
771 30
    height, width = image_size
772
    # swap rows-columns to X-Y
773 30
    lnds = np.array(lnds)[:, [1, 0]]
774
    # revert the Y by height
775 30
    lnds[:, 0] = height - lnds[:, 0]
776 30
    return lnds
777

778

779 30
def image_histogram_matching(source, reference, use_color='hsv', norm_img_size=4096):
780
    """ adjust image histogram between two images
781

782
    Optionally transform the image to more continues color space.
783
    The source and target image does not need to be the same size, but RGB/gray.
784

785
    See cor related information:
786

787
    * https://www.researchgate.net/post/Histogram_matching_for_color_images
788
    * https://github.com/scikit-image/scikit-image/blob/master/skimage/transform/histogram_matching.py
789
    * https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x
790
    * https://github.com/mapbox/rio-hist/issues/3
791

792
    :param ndarray source: 2D image to be transformed
793
    :param ndarray reference: reference 2D image
794
    :param str use_color: using color space for hist matching
795
    :param int norm_img_size: subsample image to this max size
796
    :return ndarray: transformed image
797

798
    >>> from birl.utilities.data_io import update_path, load_image
799
    >>> path_imgs = os.path.join(update_path('data-images'), 'rat-kidney_', 'scale-5pc')
800
    >>> img1 = load_image(os.path.join(path_imgs, 'Rat-Kidney_HE.jpg'))
801
    >>> img2 = load_image(os.path.join(path_imgs, 'Rat-Kidney_PanCytokeratin.jpg'))
802
    >>> image_histogram_matching(img1, img2).shape == img1.shape
803
    True
804
    >>> img = image_histogram_matching(img1[..., 0], np.expand_dims(img2[..., 0], 2))
805
    >>> img.shape == img1.shape[:2]
806
    True
807
    >>> # this should return unchanged source image
808
    >>> image_histogram_matching(np.random.random((10, 20, 30, 5)),
809
    ...                          np.random.random((30, 10, 20, 5))).ndim
810
    4
811
    """
812

813
    # in case gray images normalise dimensionality
814 30
    def _normalise_image(img):
815
        # normalise gray-scale images
816 30
        if img.ndim == 3 and img.shape[-1] == 1:
817 30
            img = img[..., 0]
818 30
        return img
819

820 30
    source = _normalise_image(source)
821 30
    reference = _normalise_image(reference)
822 30
    assert source.ndim == reference.ndim, 'the image dimensionality has to be equal'
823

824 30
    if source.ndim == 2:
825 30
        matched = histogram_match_cumulative_cdf(source, reference, norm_img_size=norm_img_size)
826 30
    elif source.ndim == 3:
827 30
        conv_from_rgb, conv_to_rgb = CONVERT_RGB.get(use_color.lower(), (None, None))
828 30
        if conv_from_rgb:
829 30
            source = conv_from_rgb(source[:, :, :3])
830 30
            reference = conv_from_rgb(reference[:, :, :3])
831 30
        matched = np.empty(source.shape, dtype=source.dtype)
832 30
        for ch in range(source.shape[-1]):
833 30
            matched[..., ch] = histogram_match_cumulative_cdf(
834
                source[..., ch],
835
                reference[..., ch],
836
                norm_img_size=norm_img_size,
837
            )
838 30
        if conv_to_rgb:
839 30
            matched = conv_to_rgb(matched)
840
    else:
841 30
        logging.warning('unsupported image dimensions: %r', source.shape)
842 30
        matched = source
843

844 30
    return matched
845

846

847 30
def histogram_match_cumulative_cdf(source, reference, norm_img_size=1024):
848
    """ Adjust the pixel values of a gray-scale image such that its histogram
849
    matches that of a target image
850

851
    :param ndarray source: 2D image to be transformed, np.array<height1, width1>
852
    :param ndarray reference: reference 2D image, np.array<height2, width2>
853
    :return ndarray: transformed image, np.array<height1, width1>
854

855
    >>> np.random.seed(0)
856
    >>> img = histogram_match_cumulative_cdf(np.random.randint(128, 145, (150, 200)),
857
    ...                                      np.random.randint(0, 18, (200, 180)))
858
    >>> img.astype(int)  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
859
    array([[13, 16,  0, ..., 12,  2,  5],
860
           [17,  9,  1, ..., 16,  9,  0],
861
           [11, 12, 14, ...,  8,  5,  4],
862
           ...,
863
           [12,  6,  3, ..., 15,  0,  3],
864
           [11, 17,  2, ..., 12, 12,  5],
865
           [ 6, 12,  3, ...,  8,  0,  1]])
866
    >>> np.bincount(img.ravel()).astype(int)  # doctest: +NORMALIZE_WHITESPACE
867
    array([1705, 1706, 1728, 1842, 1794, 1866, 1771,    0, 1717, 1752, 1757,
868
           1723, 1823, 1833, 1749, 1718, 1769, 1747])
869
    >>> img_source = np.random.randint(50, 245, (2500, 3000)).astype(float)
870
    >>> img_source[-1, -1] = 255
871
    >>> img = histogram_match_cumulative_cdf(img_source / 255., img)
872
    >>> np.array(img.shape, dtype=int)
873
    array([2500, 3000])
874
    """
875
    # use smaller image
876 30
    step_src = max(1, int(np.max(source.shape) / norm_img_size))
877 30
    step_ref = max(1, int(np.max(reference.shape) / norm_img_size))
878

879
    # determine if we need remember that output should be float valued
880 30
    out_float = source.max() < 1.5
881
    # if the image is flout in range (0, 1) extend it
882 30
    source = np.round(source * 255) if source.max() < 1.5 else source
883
    # here we need convert to int values
884 30
    source = source.astype(np.int16)
885
    # work with just a small image
886 30
    src_small = source[::step_src, ::step_src]
887

888
    # here we need work with just a small image
889 30
    ref_small = reference[::step_ref, ::step_ref]
890
    # if the image is flout in range (0, 1) extend it
891 30
    ref_small = np.round(ref_small * 255) if reference.max() < 1.5 else ref_small
892
    # here we need convert to int values
893 30
    ref_small = ref_small.astype(np.int16)
894

895
    # some color spaces have also negative values, then shisfting to zero is needed
896 30
    offset = min(0, src_small.min(), ref_small.min())
897
    # get value histograms
898 30
    src_counts = np.bincount(src_small.ravel() - offset)
899
    # src_values = np.arange(0, len(src_counts))
900 30
    ref_counts = np.bincount(ref_small.ravel() - offset)
901 30
    ref_values = np.arange(0, len(ref_counts))
902
    # calculate normalized quantiles for each array
903 30
    src_quantiles = np.cumsum(src_counts) / float(source.size)
904 30
    ref_quantiles = np.cumsum(ref_counts) / float(reference.size)
905

906 30
    interp_values = np.round(np.interp(src_quantiles, ref_quantiles, ref_values))
907
    # in case that it overflows, due to sampling step may skip some high values
908 30
    if source.max() >= len(interp_values):
909 30
        logging.warning('source image max value %i overflow generated LUT of size %i', source.max(), len(interp_values))
910
        # then clip the source image values to fit ot the range
911 30
        source[source >= len(interp_values)] = len(interp_values) - 1
912 30
    matched = np.round(interp_values)[source - offset].astype(np.int16) + offset
913

914 30
    if out_float:
915 30
        matched = matched.astype(float) / 255.
916 30
    return matched

Read our documentation on viewing source code .

Loading