1
# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
2
# vi: set ft=python sts=4 ts=4 sw=4 et:
3
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
4
#
5
#   See COPYING file distributed along with the NiBabel package for the
6
#   copyright and license terms.
7
#
8
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
9 33
""" Utility functions for analyze-like formats """
10

11 33
import sys
12 33
import warnings
13 33
import gzip
14 33
from collections import OrderedDict
15 33
from os.path import exists, splitext
16 33
from operator import mul
17 33
from functools import reduce
18

19 33
import numpy as np
20

21 33
from .casting import (shared_range, type_info, OK_FLOATS)
22 33
from .openers import Opener, BZ2File, IndexedGzipFile
23 33
from .deprecated import deprecate_with_version
24 33
from .externals.oset import OrderedSet
25

26 33
sys_is_le = sys.byteorder == 'little'
27 33
native_code = sys_is_le and '<' or '>'
28 33
swapped_code = sys_is_le and '>' or '<'
29

30 33
endian_codes = (  # numpy code, aliases
31
    ('<', 'little', 'l', 'le', 'L', 'LE'),
32
    ('>', 'big', 'BIG', 'b', 'be', 'B', 'BE'),
33
    (native_code, 'native', 'n', 'N', '=', '|', 'i', 'I'),
34
    (swapped_code, 'swapped', 's', 'S', '!'))
35
# We'll put these into the Recoder class after we define it
36

37
#: default compression level when writing gz and bz2 files
38 33
default_compresslevel = 1
39

40
#: file-like classes known to hold compressed data
41 33
COMPRESSED_FILE_LIKES = (gzip.GzipFile, BZ2File, IndexedGzipFile)
42

43

44 33
class Recoder(object):
45
    """ class to return canonical code(s) from code or aliases
46

47
    The concept is a lot easier to read in the implementation and
48
    tests than it is to explain, so...
49

50
    >>> # If you have some codes, and several aliases, like this:
51
    >>> code1 = 1; aliases1=['one', 'first']
52
    >>> code2 = 2; aliases2=['two', 'second']
53
    >>> # You might want to do this:
54
    >>> codes = [[code1]+aliases1,[code2]+aliases2]
55
    >>> recodes = Recoder(codes)
56
    >>> recodes.code['one']
57
    1
58
    >>> recodes.code['second']
59
    2
60
    >>> recodes.code[2]
61
    2
62
    >>> # Or maybe you have a code, a label and some aliases
63
    >>> codes=((1,'label1','one', 'first'),(2,'label2','two'))
64
    >>> # you might want to get back the code or the label
65
    >>> recodes = Recoder(codes, fields=('code','label'))
66
    >>> recodes.code['first']
67
    1
68
    >>> recodes.code['label1']
69
    1
70
    >>> recodes.label[2]
71
    'label2'
72
    >>> # For convenience, you can get the first entered name by
73
    >>> # indexing the object directly
74
    >>> recodes[2]
75
    2
76
    """
77

78 33
    def __init__(self, codes, fields=('code',), map_maker=OrderedDict):
79
        """ Create recoder object
80

81
        ``codes`` give a sequence of code, alias sequences
82
        ``fields`` are names by which the entries in these sequences can be
83
        accessed.
84

85
        By default ``fields`` gives the first column the name
86
        "code".  The first column is the vector of first entries
87
        in each of the sequences found in ``codes``.  Thence you can
88
        get the equivalent first column value with ob.code[value],
89
        where value can be a first column value, or a value in any of
90
        the other columns in that sequence.
91

92
        You can give other columns names too, and access them in the
93
        same way - see the examples in the class docstring.
94

95
        Parameters
96
        ----------
97
        codes : sequence of sequences
98
            Each sequence defines values (codes) that are equivalent
99
        fields : {('code',) string sequence}, optional
100
            names by which elements in sequences can be accessed
101
        map_maker: callable, optional
102
            constructor for dict-like objects used to store key value pairs.
103
            Default is ``dict``.  ``map_maker()`` generates an empty mapping.
104
            The mapping need only implement ``__getitem__, __setitem__, keys,
105
            values``.
106
        """
107 33
        self.fields = tuple(fields)
108 33
        self.field1 = {}  # a placeholder for the check below
109 33
        for name in fields:
110 33
            if name in self.__dict__:
111 33
                raise KeyError(f'Input name {name} already in object dict')
112 33
            self.__dict__[name] = map_maker()
113 33
        self.field1 = self.__dict__[fields[0]]
114 33
        self.add_codes(codes)
115

116 33
    def add_codes(self, code_syn_seqs):
117
        """ Add codes to object
118

119
        Parameters
120
        ----------
121
        code_syn_seqs : sequence
122
            sequence of sequences, where each sequence ``S = code_syn_seqs[n]``
123
            for n in 0..len(code_syn_seqs), is a sequence giving values in the
124
            same order as ``self.fields``.  Each S should be at least of the
125
            same length as ``self.fields``.  After this call, if ``self.fields
126
            == ['field1', 'field2'], then ``self.field1[S[n]] == S[0]`` for all
127
            n in 0..len(S) and ``self.field2[S[n]] == S[1]`` for all n in
128
            0..len(S).
129

130
        Examples
131
        --------
132
        >>> code_syn_seqs = ((2, 'two'), (1, 'one'))
133
        >>> rc = Recoder(code_syn_seqs)
134
        >>> rc.value_set() == set((1,2))
135
        True
136
        >>> rc.add_codes(((3, 'three'), (1, 'first')))
137
        >>> rc.value_set() == set((1,2,3))
138
        True
139
        >>> print(rc.value_set())  # set is actually ordered
140
        OrderedSet([2, 1, 3])
141
        """
142 33
        for code_syns in code_syn_seqs:
143
            # Add all the aliases
144 33
            for alias in code_syns:
145
                # For all defined fields, make every value in the sequence be
146
                # an entry to return matching index value.
147 33
                for field_ind, field_name in enumerate(self.fields):
148 33
                    self.__dict__[field_name][alias] = code_syns[field_ind]
149

150 33
    def __getitem__(self, key):
151
        """ Return value from field1 dictionary (first column of values)
152

153
        Returns same value as ``obj.field1[key]`` and, with the
154
        default initializing ``fields`` argument of fields=('code',),
155
        this will return the same as ``obj.code[key]``
156

157
        >>> codes = ((1, 'one'), (2, 'two'))
158
        >>> Recoder(codes)['two']
159
        2
160
        """
161 33
        return self.field1[key]
162

163 33
    def __contains__(self, key):
164
        """ True if field1 in recoder contains `key`
165
        """
166 33
        try:
167 33
            self.field1[key]
168 33
        except KeyError:
169 33
            return False
170 33
        return True
171

172 33
    def keys(self):
173
        """ Return all available code and alias values
174

175
        Returns same value as ``obj.field1.keys()`` and, with the
176
        default initializing ``fields`` argument of fields=('code',),
177
        this will return the same as ``obj.code.keys()``
178

179
        >>> codes = ((1, 'one'), (2, 'two'), (1, 'repeat value'))
180
        >>> k = Recoder(codes).keys()
181
        >>> set(k) == set([1, 2, 'one', 'repeat value', 'two'])
182
        True
183
        """
184 33
        return self.field1.keys()
185

186 33
    def value_set(self, name=None):
187
        """ Return OrderedSet of possible returned values for column
188

189
        By default, the column is the first column.
190

191
        Returns same values as ``set(obj.field1.values())`` and,
192
        with the default initializing``fields`` argument of
193
        fields=('code',), this will return the same as
194
        ``set(obj.code.values())``
195

196
        Parameters
197
        ----------
198
        name : {None, string}
199
            Where default of none gives result for first column
200

201
        >>> codes = ((1, 'one'), (2, 'two'), (1, 'repeat value'))
202
        >>> vs = Recoder(codes).value_set()
203
        >>> vs == set([1, 2]) # Sets are not ordered, hence this test
204
        True
205
        >>> rc = Recoder(codes, fields=('code', 'label'))
206
        >>> rc.value_set('label') == set(('one', 'two', 'repeat value'))
207
        True
208
        """
209 33
        if name is None:
210 33
            d = self.field1
211
        else:
212 33
            d = self.__dict__[name]
213 33
        return OrderedSet(d.values())
214

215

216
# Endian code aliases
217 33
endian_codes = Recoder(endian_codes)
218

219

220 33
class DtypeMapper(object):
221
    """ Specialized mapper for numpy dtypes
222

223
    We pass this mapper into the Recoder class to deal with numpy dtype
224
    hashing.
225

226
    The hashing problem is that dtypes that compare equal may not have the same
227
    hash.  This is true for numpys up to the current at time of writing
228
    (1.6.0).  For numpy 1.2.1 at least, even dtypes that look exactly the same
229
    in terms of fields don't always have the same hash.  This makes dtypes
230
    difficult to use as keys in a dictionary.
231

232
    This class wraps a dictionary in order to implement a __getitem__ to deal
233
    with dtype hashing. If the key doesn't appear to be in the mapping, and it
234
    is a dtype, we compare (using ==) all known dtype keys to the input key,
235
    and return any matching values for the matching key.
236
    """
237

238 33
    def __init__(self):
239 33
        self._dict = {}
240 33
        self._dtype_keys = []
241

242 33
    def keys(self):
243 33
        return self._dict.keys()
244

245 33
    def values(self):
246 33
        return self._dict.values()
247

248 33
    def __setitem__(self, key, value):
249
        """ Set item into mapping, checking for dtype keys
250

251
        Cache dtype keys for comparison test in __getitem__
252
        """
253 33
        self._dict[key] = value
254 33
        if hasattr(key, 'subdtype'):
255 33
            self._dtype_keys.append(key)
256

257 33
    def __getitem__(self, key):
258
        """ Get item from mapping, checking for dtype keys
259

260
        First do simple hash lookup, then check for a dtype key that has failed
261
        the hash lookup.  Look then for any known dtype keys that compare equal
262
        to `key`.
263
        """
264 33
        try:
265 33
            return self._dict[key]
266 33
        except KeyError:
267 33
            pass
268 33
        if hasattr(key, 'subdtype'):
269 33
            for dt in self._dtype_keys:
270 33
                if key == dt:
271 0
                    return self._dict[dt]
272 33
        raise KeyError(key)
273

274

275 33
def pretty_mapping(mapping, getterfunc=None):
276
    """ Make pretty string from mapping
277

278
    Adjusts text column to print values on basis of longest key.
279
    Probably only sensible if keys are mainly strings.
280

281
    You can pass in a callable that does clever things to get the values
282
    out of the mapping, given the names.  By default, we just use
283
    ``__getitem__``
284

285
    Parameters
286
    ----------
287
    mapping : mapping
288
       implementing iterator returning keys and .items()
289
    getterfunc : None or callable
290
       callable taking two arguments, ``obj`` and ``key`` where ``obj``
291
       is the passed mapping.  If None, just use ``lambda obj, key:
292
       obj[key]``
293

294
    Returns
295
    -------
296
    str : string
297

298
    Examples
299
    --------
300
    >>> d = {'a key': 'a value'}
301
    >>> print(pretty_mapping(d))
302
    a key  : a value
303
    >>> class C(object): # to control ordering, show get_ method
304
    ...     def __iter__(self):
305
    ...         return iter(('short_field','longer_field'))
306
    ...     def __getitem__(self, key):
307
    ...         if key == 'short_field':
308
    ...             return 0
309
    ...         if key == 'longer_field':
310
    ...             return 'str'
311
    ...     def get_longer_field(self):
312
    ...         return 'method string'
313
    >>> def getter(obj, key):
314
    ...     # Look for any 'get_<name>' methods
315
    ...     try:
316
    ...         return obj.__getattribute__('get_' + key)()
317
    ...     except AttributeError:
318
    ...         return obj[key]
319
    >>> print(pretty_mapping(C(), getter))
320
    short_field   : 0
321
    longer_field  : method string
322
    """
323 33
    if getterfunc is None:
324 33
        getterfunc = lambda obj, key: obj[key]
325 33
    lens = [len(str(name)) for name in mapping]
326 33
    mxlen = np.max(lens)
327 33
    fmt = '%%-%ds  : %%s' % mxlen
328 33
    out = []
329 33
    for name in mapping:
330 33
        value = getterfunc(mapping, name)
331 33
        out.append(fmt % (name, value))
332 33
    return '\n'.join(out)
333

334

335 33
def make_dt_codes(codes_seqs):
336
    """ Create full dt codes Recoder instance from datatype codes
337

338
    Include created numpy dtype (from numpy type) and opposite endian
339
    numpy dtype
340

341
    Parameters
342
    ----------
343
    codes_seqs : sequence of sequences
344
       contained sequences make be length 3 or 4, but must all be the same
345
       length. Elements are data type code, data type name, and numpy
346
       type (such as ``np.float32``).  The fourth element is the nifti string
347
       representation of the code (e.g. "NIFTI_TYPE_FLOAT32")
348

349
    Returns
350
    -------
351
    rec : ``Recoder`` instance
352
       Recoder that, by default, returns ``code`` when indexed with any
353
       of the corresponding code, name, type, dtype, or swapped dtype.
354
       You can also index with ``niistring`` values if codes_seqs had sequences
355
       of length 4 instead of 3.
356
    """
357 33
    fields = ['code', 'label', 'type']
358 33
    len0 = len(codes_seqs[0])
359 33
    if len0 not in (3, 4):
360 33
        raise ValueError('Sequences must be length 3 or 4')
361 33
    if len0 == 4:
362 33
        fields.append('niistring')
363 33
    dt_codes = []
364 33
    for seq in codes_seqs:
365 33
        if len(seq) != len0:
366 33
            raise ValueError('Sequences must all have the same length')
367 33
        np_type = seq[2]
368 33
        this_dt = np.dtype(np_type)
369
        # Add swapped dtype to synonyms
370 33
        code_syns = list(seq) + [this_dt, this_dt.newbyteorder(swapped_code)]
371 33
        dt_codes.append(code_syns)
372 33
    return Recoder(dt_codes, fields + ['dtype', 'sw_dtype'], DtypeMapper)
373

374

375 33
@deprecate_with_version('can_cast deprecated. '
376
                        'Please use arraywriter classes instead',
377
                        '1.2',
378
                        '3.0')
379 33
def can_cast(in_type, out_type, has_intercept=False, has_slope=False):
380
    """ Return True if we can safely cast ``in_type`` to ``out_type``
381

382
    Parameters
383
    ----------
384
    in_type : numpy type
385
       type of data we will case from
386
    out_dtype : numpy type
387
       type that we want to cast to
388
    has_intercept : bool, optional
389
       Whether we can subtract a constant from the data (before scaling)
390
       before casting to ``out_dtype``.  Default is False
391
    has_slope : bool, optional
392
       Whether we can use a scaling factor to adjust slope of
393
       relationship of data to data in cast array.  Default is False
394

395
    Returns
396
    -------
397
    tf : bool
398
       True if we can safely cast, False otherwise
399

400
    Examples
401
    --------
402
    >>> can_cast(np.float64, np.float32)  # doctest: +SKIP
403
    True
404
    >>> can_cast(np.complex128, np.float32)  # doctest: +SKIP
405
    False
406
    >>> can_cast(np.int64, np.float32)  # doctest: +SKIP
407
    True
408
    >>> can_cast(np.float32, np.int16)  # doctest: +SKIP
409
    False
410
    >>> can_cast(np.float32, np.int16, False, True)  # doctest: +SKIP
411
    True
412
    >>> can_cast(np.int16, np.uint8)  # doctest: +SKIP
413
    False
414

415
    Whether we can actually cast int to uint when we don't have an intercept
416
    depends on the data.  That's why this function isn't very useful. But we
417
    assume that an integer is using its full range, and check whether scaling
418
    works in that situation.
419

420
    Here we need an intercept to scale the full range of an int to a uint
421

422
    >>> can_cast(np.int16, np.uint8, False, True)  # doctest: +SKIP
423
    False
424
    >>> can_cast(np.int16, np.uint8, True, True)  # doctest: +SKIP
425
    True
426
    """
427 0
    in_dtype = np.dtype(in_type)
428
    # Whether we can cast depends on the data, and we've only got the type.
429
    # Let's assume integers use all of their range but floats etc not
430 33
    if in_dtype.kind in 'iu':
431 0
        info = np.iinfo(in_dtype)
432 0
        data = np.array([info.min, info.max], dtype=in_dtype)
433
    else:  # Float or complex or something. Any old thing will do
434 0
        data = np.ones((1,), in_type)
435 0
    from .arraywriters import make_array_writer, WriterError
436 0
    try:
437 0
        make_array_writer(data, out_type, has_slope, has_intercept)
438 0
    except WriterError:
439 0
        return False
440 0
    return True
441

442

443 33
def _is_compressed_fobj(fobj):
444
    """ Return True if fobj represents a compressed data file-like object
445
    """
446 33
    return isinstance(fobj, COMPRESSED_FILE_LIKES)
447

448

449 33
def array_from_file(shape, in_dtype, infile, offset=0, order='F', mmap=True):
450
    """ Get array from file with specified shape, dtype and file offset
451

452
    Parameters
453
    ----------
454
    shape : sequence
455
        sequence specifying output array shape
456
    in_dtype : numpy dtype
457
        fully specified numpy dtype, including correct endianness
458
    infile : file-like
459
        open file-like object implementing at least read() and seek()
460
    offset : int, optional
461
        offset in bytes into `infile` to start reading array data. Default is 0
462
    order : {'F', 'C'} string
463
        order in which to write data.  Default is 'F' (fortran order).
464
    mmap : {True, False, 'c', 'r', 'r+'}
465
        `mmap` controls the use of numpy memory mapping for reading data.  If
466
        False, do not try numpy ``memmap`` for data array.  If one of {'c',
467
        'r', 'r+'}, try numpy memmap with ``mode=mmap``.  A `mmap` value of
468
        True gives the same behavior as ``mmap='c'``.  If `infile` cannot be
469
        memory-mapped, ignore `mmap` value and read array from file.
470

471
    Returns
472
    -------
473
    arr : array-like
474
        array like object that can be sliced, containing data
475

476
    Examples
477
    --------
478
    >>> from io import BytesIO
479
    >>> bio = BytesIO()
480
    >>> arr = np.arange(6).reshape(1,2,3)
481
    >>> _ = bio.write(arr.tobytes('F'))  # outputs int
482
    >>> arr2 = array_from_file((1,2,3), arr.dtype, bio)
483
    >>> np.all(arr == arr2)
484
    True
485
    >>> bio = BytesIO()
486
    >>> _ = bio.write(b' ' * 10)
487
    >>> _ = bio.write(arr.tobytes('F'))
488
    >>> arr2 = array_from_file((1,2,3), arr.dtype, bio, 10)
489
    >>> np.all(arr == arr2)
490
    True
491
    """
492 33
    if mmap not in (True, False, 'c', 'r', 'r+'):
493 33
        raise ValueError("mmap value should be one of True, False, 'c', "
494
                         "'r', 'r+'")
495 33
    if mmap is True:
496 33
        mmap = 'c'
497 33
    in_dtype = np.dtype(in_dtype)
498
    # Get file-like object from Opener instance
499 33
    infile = getattr(infile, 'fobj', infile)
500 33
    if mmap and not _is_compressed_fobj(infile):
501 33
        try:  # Try memmapping file on disk
502 33
            return np.memmap(infile,
503
                             in_dtype,
504
                             mode=mmap,
505
                             shape=shape,
506
                             order=order,
507
                             offset=offset)
508
            # The error raised by memmap, for different file types, has
509
            # changed in different incarnations of the numpy routine
510 33
        except (AttributeError, TypeError, ValueError):
511 33
            pass
512 33
    if len(shape) == 0:
513 33
        return np.array([])
514
    # Use reduce and mul to work around numpy integer overflow
515 33
    n_bytes = reduce(mul, shape) * in_dtype.itemsize
516 33
    if n_bytes == 0:
517 33
        return np.array([])
518
    # Read data from file
519 33
    infile.seek(offset)
520 33
    if hasattr(infile, 'readinto'):
521 33
        data_bytes = bytearray(n_bytes)
522 33
        n_read = infile.readinto(data_bytes)
523 33
        needs_copy = False
524
    else:
525 33
        data_bytes = infile.read(n_bytes)
526 33
        n_read = len(data_bytes)
527 33
        needs_copy = True
528 33
    if n_bytes != n_read:
529 33
        raise IOError(f"Expected {n_bytes} bytes, got {n_read} bytes from "
530
                      f"{getattr(infile, 'name', 'object')}\n - could the file be damaged?")
531 33
    arr = np.ndarray(shape, in_dtype, buffer=data_bytes, order=order)
532 33
    if needs_copy:
533 0
        return arr.copy()
534 33
    arr.flags.writeable = True
535 33
    return arr
536

537

538 33
def array_to_file(data, fileobj, out_dtype=None, offset=0,
539
                  intercept=0.0, divslope=1.0,
540
                  mn=None, mx=None, order='F', nan2zero=True):
541
    """ Helper function for writing arrays to file objects
542

543
    Writes arrays as scaled by `intercept` and `divslope`, and clipped
544
    at (prescaling) `mn` minimum, and `mx` maximum.
545

546
    * Clip `data` array at min `mn`, max `max` where there are not None ->
547
      ``clipped`` (this is *pre scale clipping*)
548
    * Scale ``clipped`` with ``clipped_scaled = (clipped - intercept) /
549
      divslope``
550
    * Clip ``clipped_scaled`` to fit into range of `out_dtype` (*post scale
551
      clipping*) -> ``clipped_scaled_clipped``
552
    * If converting to integer `out_dtype` and `nan2zero` is True, set NaN
553
      values in ``clipped_scaled_clipped`` to 0
554
    * Write ``clipped_scaled_clipped_n2z`` to fileobj `fileobj` starting at
555
      offset `offset` in memory layout `order`
556

557
    Parameters
558
    ----------
559
    data : array-like
560
        array or array-like to write.
561
    fileobj : file-like
562
        file-like object implementing ``write`` method.
563
    out_dtype : None or dtype, optional
564
        dtype to write array as.  Data array will be coerced to this dtype
565
        before writing. If None (default) then use input data type.
566
    offset : None or int, optional
567
        offset into fileobj at which to start writing data. Default is 0. None
568
        means start at current file position
569
    intercept : scalar, optional
570
        scalar to subtract from data, before dividing by ``divslope``.  Default
571
        is 0.0
572
    divslope : None or scalar, optional
573
        scalefactor to *divide* data by before writing.  Default is 1.0. If
574
        None, there is no valid data, we write zeros.
575
    mn : scalar, optional
576
        minimum threshold in (unscaled) data, such that all data below this
577
        value are set to this value. Default is None (no threshold). The
578
        typical use is to set -np.inf in the data to have this value (which
579
        might be the minimum non-finite value in the data).
580
    mx : scalar, optional
581
        maximum threshold in (unscaled) data, such that all data above this
582
        value are set to this value. Default is None (no threshold). The
583
        typical use is to set np.inf in the data to have this value (which
584
        might be the maximum non-finite value in the data).
585
    order : {'F', 'C'}, optional
586
        memory order to write array.  Default is 'F'
587
    nan2zero : {True, False}, optional
588
        Whether to set NaN values to 0 when writing integer output.  Defaults
589
        to True.  If False, NaNs will be represented as numpy does when
590
        casting; this depends on the underlying C library and is undefined. In
591
        practice `nan2zero` == False might be a good choice when you completely
592
        sure there will be no NaNs in the data. This value ignored for float
593
        outut types.  NaNs are treated as zero *before* applying `intercept`
594
        and `divslope` - so an array ``[np.nan]`` with an `intercept` of 10
595
        becomes ``[-10]`` after conversion to integer `out_dtype` with
596
        `nan2zero` set.  That is because you will likely apply `divslope` and
597
        `intercept` in reverse order when reading the data back, returning the
598
        zero you probably expected from the input NaN.
599

600
    Examples
601
    --------
602
    >>> from io import BytesIO
603
    >>> sio = BytesIO()
604
    >>> data = np.arange(10, dtype=np.float)
605
    >>> array_to_file(data, sio, np.float)
606
    >>> sio.getvalue() == data.tobytes('F')
607
    True
608
    >>> _ = sio.truncate(0); _ = sio.seek(0)  # outputs 0
609
    >>> array_to_file(data, sio, np.int16)
610
    >>> sio.getvalue() == data.astype(np.int16).tobytes()
611
    True
612
    >>> _ = sio.truncate(0); _ = sio.seek(0)
613
    >>> array_to_file(data.byteswap(), sio, np.float)
614
    >>> sio.getvalue() == data.byteswap().tobytes('F')
615
    True
616
    >>> _ = sio.truncate(0); _ = sio.seek(0)
617
    >>> array_to_file(data, sio, np.float, order='C')
618
    >>> sio.getvalue() == data.tobytes('C')
619
    True
620
    """
621
    # Shield special case
622 33
    div_none = divslope is None
623 33
    if not np.all(
624
            np.isfinite((intercept, 1.0 if div_none else divslope))):
625 33
        raise ValueError('divslope and intercept must be finite')
626 33
    if divslope == 0:
627 33
        raise ValueError('divslope cannot be zero')
628 33
    data = np.asanyarray(data)
629 33
    in_dtype = data.dtype
630 33
    if out_dtype is None:
631 33
        out_dtype = in_dtype
632
    else:
633 33
        out_dtype = np.dtype(out_dtype)
634 33
    if offset is not None:
635 33
        seek_tell(fileobj, offset)
636 33
    if (div_none or (mn, mx) == (0, 0) or
637
            ((mn is not None and mx is not None) and mx < mn)):
638 33
        write_zeros(fileobj, data.size * out_dtype.itemsize)
639 33
        return
640 33
    if order not in 'FC':
641 0
        raise ValueError('Order should be one of F or C')
642
    # Simple cases
643 33
    pre_clips = None if (mn is None and mx is None) else (mn, mx)
644 33
    null_scaling = (intercept == 0 and divslope == 1)
645 33
    if in_dtype.type == np.void:
646 33
        if not null_scaling:
647 0
            raise ValueError('Cannot scale non-numeric types')
648 33
        if pre_clips is not None:
649 33
            raise ValueError('Cannot clip non-numeric types')
650 33
        return _write_data(data, fileobj, out_dtype, order)
651 33
    if pre_clips is not None:
652 33
        pre_clips = _dt_min_max(in_dtype, *pre_clips)
653 33
    if null_scaling and np.can_cast(in_dtype, out_dtype):
654 33
        return _write_data(data, fileobj, out_dtype, order,
655
                           pre_clips=pre_clips)
656
    # Force upcasting for floats by making atleast_1d.
657 33
    slope, inter = [np.atleast_1d(v) for v in (divslope, intercept)]
658
    # Default working point type for applying slope / inter
659 33
    if slope.dtype.kind in 'iu':
660 33
        slope = slope.astype(float)
661 33
    if inter.dtype.kind in 'iu':
662 33
        inter = inter.astype(float)
663 33
    in_kind = in_dtype.kind
664 33
    out_kind = out_dtype.kind
665 33
    if out_kind in 'fc':
666 33
        return _write_data(data, fileobj, out_dtype, order,
667
                           slope=slope,
668
                           inter=inter,
669
                           pre_clips=pre_clips)
670 33
    assert out_kind in 'iu'
671 33
    if in_kind in 'iu':
672 33
        if null_scaling:
673
            # Must be large int to small int conversion; add clipping to
674
            # pre scale thresholds
675 33
            mn, mx = _dt_min_max(in_dtype, mn, mx)
676 33
            mn_out, mx_out = _dt_min_max(out_dtype)
677 33
            pre_clips = max(mn, mn_out), min(mx, mx_out)
678 33
            return _write_data(data, fileobj, out_dtype, order,
679
                               pre_clips=pre_clips)
680
        # In any case, we do not want to check for nans beause we've already
681
        # disallowed scaling that generates nans
682 33
        nan2zero = False
683
    # We are either scaling into c/floats or starting with c/floats, then we're
684
    # going to integers
685
    # Because we're going to integers, complex inter and slope will only slow
686
    # us down, cast to float
687 33
    slope, inter = [v.astype(_matching_float(v.dtype)) for v in (slope, inter)]
688
    # We'll do the thresholding on the scaled data, so turn off the
689
    # thresholding on the unscaled data
690 33
    pre_clips = None
691
    # We may need to cast the original array to another type
692 33
    cast_in_dtype = in_dtype
693 33
    if in_kind == 'c':
694
        # Cast to floats before anything else
695 33
        cast_in_dtype = np.dtype(_matching_float(in_dtype))
696 33
    elif in_kind == 'f' and in_dtype.itemsize == 2:
697
        # Make sure we don't use float16 as a working type
698 33
        cast_in_dtype = np.dtype(np.float32)
699 33
    w_type = working_type(cast_in_dtype, slope, inter)
700 33
    dt_mnmx = _dt_min_max(cast_in_dtype, mn, mx)
701
    # We explore for a good precision to avoid infs and clipping
702
    # Find smallest float type equal or larger than the current working
703
    # type, that can contain range of extremes after scaling, without going
704
    # to +-inf
705 33
    extremes = np.array(dt_mnmx, dtype=cast_in_dtype)
706 33
    w_type = best_write_scale_ftype(extremes, slope, inter, w_type)
707
    # Push up precision by casting the slope, inter
708 33
    slope, inter = [v.astype(w_type) for v in (slope, inter)]
709
    # We need to know the result of applying slope and inter to the min and
710
    # max of the array, in order to clip the output array, after applying
711
    # the slope and inter.  Otherwise we'd need to clip twice, once before
712
    # applying (slope, inter), and again after, to ensure we have not hit
713
    # over- or under-flow. For the same reason we need to know the result of
714
    # applying slope, inter to 0, in order to fill in the nan output value
715
    # after scaling etc. We could fill with 0 before scaling, but then we'd
716
    # have to do an extra copy before filling nans with 0, to avoid
717
    # overwriting the input array
718
    # Run min, max, 0 through scaling / rint
719 33
    specials = np.array(dt_mnmx + (0,), dtype=w_type)
720 33
    if inter != 0.0:
721 33
        specials = specials - inter
722 33
    if slope != 1.0:
723 33
        specials = specials / slope
724 33
    assert specials.dtype.type == w_type
725 33
    post_mn, post_mx, nan_fill = np.rint(specials)
726 33
    if post_mn > post_mx:  # slope could be negative
727 33
        post_mn, post_mx = post_mx, post_mn
728
    # Make sure that the thresholds exclude any value that will get badly cast
729
    # to the integer type.  This is not the same as using the maximumum of the
730
    # output dtype as thresholds, because these may not be exactly represented
731
    # in the float type.
732
    #
733
    # The thresholds assume that the data are in `wtype` dtype after applying
734
    # the slope and intercept.
735 33
    both_mn, both_mx = shared_range(w_type, out_dtype)
736
    # Check that nan2zero output value is in range
737 33
    if nan2zero and not both_mn <= nan_fill <= both_mx:
738
        # Estimated error for (0 - inter) / slope is 2 * eps * abs(inter /
739
        # slope).  Assume errors are for working float type. Round for integer
740
        # rounding
741 33
        est_err = np.round(2 * np.finfo(w_type).eps * abs(inter / slope))
742 33
        if ((nan_fill < both_mn and abs(nan_fill - both_mn) < est_err) or
743
                (nan_fill > both_mx and abs(nan_fill - both_mx) < est_err)):
744
            # nan_fill can be (just) outside clip range
745 2
            nan_fill = np.clip(nan_fill, both_mn, both_mx)
746
        else:
747 33
            raise ValueError(f"nan_fill == {nan_fill}, outside safe int range "
748
                             f"({int(both_mn)}-{int(both_mx)}); "
749
                             "change scaling or set nan2zero=False?")
750
    # Make sure non-nan output clipped to shared range
751 33
    post_mn = np.max([post_mn, both_mn])
752 33
    post_mx = np.min([post_mx, both_mx])
753 33
    in_cast = None if cast_in_dtype == in_dtype else cast_in_dtype
754 33
    return _write_data(data, fileobj, out_dtype, order,
755
                       in_cast=in_cast,
756
                       pre_clips=pre_clips,
757
                       inter=inter,
758
                       slope=slope,
759
                       post_clips=(post_mn, post_mx),
760
                       nan_fill=nan_fill if nan2zero else None)
761

762

763 33
def _write_data(data,
764
                fileobj,
765
                out_dtype,
766
                order,
767
                in_cast=None,
768
                pre_clips=None,
769
                inter=0.,
770
                slope=1.,
771
                post_clips=None,
772
                nan_fill=None):
773
    """ Write array `data` to `fileobj` as `out_dtype` type, layout `order`
774

775
    Does not modify `data` in-place.
776

777
    Parameters
778
    ----------
779
    data : ndarray
780
    fileobj : object
781
        implementing ``obj.write``
782
    out_dtype : numpy type
783
        Type to which to cast output data just before writing
784
    order : {'F', 'C'}
785
        memory layout of array in fileobj after writing
786
    in_cast : None or numpy type, optional
787
        If not None, inital cast to do on `data` slices before further
788
        processing
789
    pre_clips : None or 2-sequence, optional
790
        If not None, minimum and maximum of input values at which to clip.
791
    inter : scalar or array, optional
792
        Intercept to subtract before writing ``out = data - inter``
793
    slope : scalar or array, optional
794
        Slope by which to divide before writing ``out2 = out / slope``
795
    post_clips : None or 2-sequence, optional
796
        If not None, minimum and maximum of scaled values at which to clip.
797
    nan_fill : None or scalar, optional
798
        If not None, values that were NaN in `data` will receive `nan_fill`
799
        in array as output to disk (after scaling).
800
    """
801 33
    data = np.squeeze(data)
802 33
    if data.ndim < 2:  # Trick to allow loop over rows for 1D arrays
803 33
        data = np.atleast_2d(data)
804 33
    elif order == 'F':
805 33
        data = data.T
806 33
    nan_need_copy = ((pre_clips, in_cast, inter, slope, post_clips) ==
807
                     (None, None, 0, 1, None))
808 33
    for dslice in data:  # cycle over first dimension to save memory
809 33
        if pre_clips is not None:
810 33
            dslice = np.clip(dslice, *pre_clips)
811 33
        if in_cast is not None:
812 33
            dslice = dslice.astype(in_cast)
813 33
        if inter != 0.0:
814 33
            dslice = dslice - inter
815 33
        if slope != 1.0:
816 33
            dslice = dslice / slope
817 33
        if post_clips is not None:
818 33
            dslice = np.clip(np.rint(dslice), *post_clips)
819 33
        if nan_fill is not None:
820 33
            nans = np.isnan(dslice)
821 33
            if np.any(nans):
822 33
                if nan_need_copy:
823 33
                    dslice = dslice.copy()
824 33
                dslice[nans] = nan_fill
825 33
        if dslice.dtype != out_dtype:
826 33
            dslice = dslice.astype(out_dtype)
827 33
        fileobj.write(dslice.tobytes())
828

829

830 33
def _dt_min_max(dtype_like, mn=None, mx=None):
831 33
    dt = np.dtype(dtype_like)
832 33
    if dt.kind in 'fc':
833 33
        dt_mn, dt_mx = (-np.inf, np.inf)
834 33
    elif dt.kind in 'iu':
835 33
        info = np.iinfo(dt)
836 33
        dt_mn, dt_mx = (info.min, info.max)
837
    else:
838 0
        raise ValueError("unknown dtype")
839 33
    return dt_mn if mn is None else mn, dt_mx if mx is None else mx
840

841

842 33
_CSIZE2FLOAT = {
843
    8: np.float32,
844
    16: np.float64,
845
    24: np.longdouble,
846
    32: np.longdouble}
847

848

849 33
def _matching_float(np_type):
850
    """ Return floating point type matching `np_type`
851
    """
852 33
    dtype = np.dtype(np_type)
853 33
    if dtype.kind not in 'cf':
854 0
        raise ValueError('Expecting float or complex type as input')
855 33
    if dtype.kind in 'f':
856 33
        return dtype.type
857 33
    return _CSIZE2FLOAT[dtype.itemsize]
858

859

860 33
def write_zeros(fileobj, count, block_size=8194):
861
    """ Write `count` zero bytes to `fileobj`
862

863
    Parameters
864
    ----------
865
    fileobj : file-like object
866
        with ``write`` method
867
    count : int
868
        number of bytes to write
869
    block_size : int, optional
870
        largest continuous block to write.
871
    """
872 33
    nblocks = int(count // block_size)
873 33
    rem = count % block_size
874 33
    blk = b'\x00' * block_size
875 33
    for bno in range(nblocks):
876 33
        fileobj.write(blk)
877 33
    fileobj.write(b'\x00' * rem)
878

879

880 33
def seek_tell(fileobj, offset, write0=False):
881
    """ Seek in `fileobj` or check we're in the right place already
882

883
    Parameters
884
    ----------
885
    fileobj : file-like
886
        object implementing ``seek`` and (if seek raises an IOError) ``tell``
887
    offset : int
888
        position in file to which to seek
889
    write0 : {False, True}, optional
890
        If True, and standard seek fails, try to write zeros to the file to
891
        reach `offset`.  This can be useful when writing bz2 files, that cannot
892
        do write seeks.
893
    """
894 33
    try:
895 33
        fileobj.seek(offset)
896 33
    except IOError as e:
897
        # This can be a negative seek in write mode for gz file object or any
898
        # seek in write mode for a bz2 file object
899 33
        pos = fileobj.tell()
900 33
        if pos == offset:
901 33
            return
902 33
        if not write0:
903 33
            raise IOError(str(e))
904 33
        if pos > offset:
905 0
            raise IOError("Can't write to seek backwards")
906 33
        fileobj.write(b'\x00' * (offset - pos))
907 33
        assert fileobj.tell() == offset
908

909

910 33
def apply_read_scaling(arr, slope=None, inter=None):
911
    """ Apply scaling in `slope` and `inter` to array `arr`
912

913
    This is for loading the array from a file (as opposed to the reverse
914
    scaling when saving an array to file)
915

916
    Return data will be ``arr * slope + inter``. The trick is that we have to
917
    find a good precision to use for applying the scaling.  The heuristic is
918
    that the data is always upcast to the higher of the types from `arr,
919
    `slope`, `inter` if `slope` and / or `inter` are not default values. If the
920
    dtype of `arr` is an integer, then we assume the data more or less fills
921
    the integer range, and upcast to a type such that the min, max of
922
    ``arr.dtype`` * scale + inter, will be finite.
923

924
    Parameters
925
    ----------
926
    arr : array-like
927
    slope : None or float, optional
928
        slope value to apply to `arr` (``arr * slope + inter``).  None
929
        corresponds to a value of 1.0
930
    inter : None or float, optional
931
        intercept value to apply to `arr` (``arr * slope + inter``).  None
932
        corresponds to a value of 0.0
933

934
    Returns
935
    -------
936
    ret : array
937
        array with scaling applied.  Maybe upcast in order to give room for the
938
        scaling. If scaling is default (1, 0), then `ret` may be `arr` ``ret is
939
        arr``.
940
    """
941 33
    if slope is None:
942 33
        slope = 1.0
943 33
    if inter is None:
944 33
        inter = 0.0
945 33
    if (slope, inter) == (1, 0):
946 33
        return arr
947 33
    shape = arr.shape
948
    # Force float / float upcasting by promoting to arrays
949 33
    arr, slope, inter = [np.atleast_1d(v) for v in (arr, slope, inter)]
950 33
    if arr.dtype.kind in 'iu':
951
        # int to float; get enough precision to avoid infs
952
        # Find floating point type for which scaling does not overflow,
953
        # starting at given type
954 33
        default = (slope.dtype.type if slope.dtype.kind == 'f' else np.float64)
955 33
        ftype = int_scinter_ftype(arr.dtype, slope, inter, default)
956 33
        slope = slope.astype(ftype)
957 33
        inter = inter.astype(ftype)
958 33
    if slope != 1.0:
959 33
        arr = arr * slope
960 33
    if inter != 0.0:
961 33
        arr = arr + inter
962 33
    return arr.reshape(shape)
963

964

965 33
def working_type(in_type, slope=1.0, inter=0.0):
966
    """ Return array type from applying `slope`, `inter` to array of `in_type`
967

968
    Numpy type that results from an array of type `in_type` being combined with
969
    `slope` and `inter`. It returns something like the dtype type of
970
    ``((np.zeros((2,), dtype=in_type) - inter) / slope)``, but ignoring the
971
    actual values of `slope` and `inter`.
972

973
    Note that you would not necessarily get the same type by applying slope and
974
    inter the other way round.  Also, you'll see that the order in which slope
975
    and inter are applied is the opposite of the order in which they are
976
    passed.
977

978
    Parameters
979
    ----------
980
    in_type : numpy type specifier
981
        Numpy type of input array.  Any valid input for ``np.dtype()``
982
    slope : scalar, optional
983
        slope to apply to array.  If 1.0 (default), ignore this value and its
984
        type.
985
    inter : scalar, optional
986
        intercept to apply to array.  If 0.0 (default), ignore this value and
987
        its type.
988

989
    Returns
990
    -------
991
    wtype: numpy type
992
        Numpy type resulting from applying `inter` and `slope` to array of type
993
        `in_type`.
994
    """
995 33
    val = np.array([1], dtype=in_type)
996 33
    slope = np.array(slope)
997 33
    inter = np.array(inter)
998
    # Don't use real values to avoid overflows.  Promote to 1D to avoid scalar
999
    # casting rules.  Don't use ones_like, zeros_like because of a bug in numpy
1000
    # <= 1.5.1 in converting complex192 / complex256 scalars.
1001 33
    if inter != 0:
1002 33
        val = val + np.array([0], dtype=inter.dtype)
1003 33
    if slope != 1:
1004 33
        val = val / np.array([1], dtype=slope.dtype)
1005 33
    return val.dtype.type
1006

1007

1008 33
@deprecate_with_version('calculate_scale deprecated. '
1009
                        'Please use arraywriter classes instead',
1010
                        '1.2',
1011
                        '3.0')
1012 5
def calculate_scale(data, out_dtype, allow_intercept):
1013
    """ Calculate scaling and optional intercept for data
1014

1015
    Parameters
1016
    ----------
1017
    data : array
1018
    out_dtype : dtype
1019
       output data type in some form understood by ``np.dtype``
1020
    allow_intercept : bool
1021
       If True allow non-zero intercept
1022

1023
    Returns
1024
    -------
1025
    scaling : None or float
1026
       scalefactor to divide into data.  None if no valid data
1027
    intercept : None or float
1028
       intercept to subtract from data.  None if no valid data
1029
    mn : None or float
1030
       minimum of finite value in data or None if this will not
1031
       be used to threshold data
1032
    mx : None or float
1033
       minimum of finite value in data, or None if this will not
1034
       be used to threshold data
1035
    """
1036
    # Code here is a compatibility shell around arraywriters refactor
1037 0
    in_dtype = data.dtype
1038 0
    out_dtype = np.dtype(out_dtype)
1039 33
    if np.can_cast(in_dtype, out_dtype):
1040 0
        return 1.0, 0.0, None, None
1041 0
    from .arraywriters import make_array_writer, WriterError, get_slope_inter
1042 0
    try:
1043 0
        writer = make_array_writer(data, out_dtype, True, allow_intercept)
1044 0
    except WriterError as e:
1045 0
        raise ValueError(str(e))
1046 33
    if out_dtype.kind in 'fc':
1047 0
        return (1.0, 0.0, None, None)
1048 0
    mn, mx = writer.finite_range()
1049 33
    if (mn, mx) == (np.inf, -np.inf):  # No valid data
1050 0
        return (None, None, None, None)
1051 33
    if in_dtype.kind not in 'fc':
1052 0
        mn, mx = (None, None)
1053 0
    return get_slope_inter(writer) + (mn, mx)
1054

1055

1056 33
@deprecate_with_version('scale_min_max deprecated. Please use arraywriter '
1057
                        'classes instead.',
1058
                        '1.2',
1059
                        '3.0')
1060 5
def scale_min_max(mn, mx, out_type, allow_intercept):
1061
    """ Return scaling and intercept min, max of data, given output type
1062

1063
    Returns ``scalefactor`` and ``intercept`` to best fit data with
1064
    given ``mn`` and ``mx`` min and max values into range of data type
1065
    with ``type_min`` and ``type_max`` min and max values for type.
1066

1067
    The calculated scaling is therefore::
1068

1069
        scaled_data = (data-intercept) / scalefactor
1070

1071
    Parameters
1072
    ----------
1073
    mn : scalar
1074
       data minimum value
1075
    mx : scalar
1076
       data maximum value
1077
    out_type : numpy type
1078
       numpy type of output
1079
    allow_intercept : bool
1080
       If true, allow calculation of non-zero intercept.  Otherwise,
1081
       returned intercept is always 0.0
1082

1083
    Returns
1084
    -------
1085
    scalefactor : numpy scalar, dtype=np.maximum_sctype(np.float)
1086
       scalefactor by which to divide data after subtracting intercept
1087
    intercept : numpy scalar, dtype=np.maximum_sctype(np.float)
1088
       value to subtract from data before dividing by scalefactor
1089

1090
    Examples
1091
    --------
1092
    >>> scale_min_max(0, 255, np.uint8, False)  # doctest: +SKIP
1093
    (1.0, 0.0)
1094
    >>> scale_min_max(-128, 127, np.int8, False)  # doctest: +SKIP
1095
    (1.0, 0.0)
1096
    >>> scale_min_max(0, 127, np.int8, False)  # doctest: +SKIP
1097
    (1.0, 0.0)
1098
    >>> scaling, intercept = scale_min_max(0, 127, np.int8,  True)  # doctest: +SKIP
1099
    >>> np.allclose((0 - intercept) / scaling, -128)  # doctest: +SKIP
1100
    True
1101
    >>> np.allclose((127 - intercept) / scaling, 127)  # doctest: +SKIP
1102
    True
1103
    >>> scaling, intercept = scale_min_max(-10, -1, np.int8, True)  # doctest: +SKIP
1104
    >>> np.allclose((-10 - intercept) / scaling, -128)  # doctest: +SKIP
1105
    True
1106
    >>> np.allclose((-1 - intercept) / scaling, 127)  # doctest: +SKIP
1107
    True
1108
    >>> scaling, intercept = scale_min_max(1, 10, np.int8, True)  # doctest: +SKIP
1109
    >>> np.allclose((1 - intercept) / scaling, -128)  # doctest: +SKIP
1110
    True
1111
    >>> np.allclose((10 - intercept) / scaling, 127)  # doctest: +SKIP
1112
    True
1113

1114
    Notes
1115
    -----
1116
    We don't use this function anywhere in nibabel now, it's here for API
1117
    compatibility only.
1118

1119
    The large integers lead to python long types as max / min for type.
1120
    To contain the rounding error, we need to use the maximum numpy
1121
    float types when casting to float.
1122
    """
1123 33
    if mn > mx:
1124 0
        raise ValueError('min value > max value')
1125 0
    info = type_info(out_type)
1126 0
    mn, mx, type_min, type_max = np.array(
1127
        [mn, mx, info['min'], info['max']], np.maximum_sctype(np.float))
1128
    # with intercept
1129 33
    if allow_intercept:
1130 0
        data_range = mx - mn
1131 33
        if data_range == 0:
1132 0
            return 1.0, mn
1133 0
        type_range = type_max - type_min
1134 0
        scaling = data_range / type_range
1135 0
        intercept = mn - type_min * scaling
1136 0
        return scaling, intercept
1137
    # without intercept
1138 33
    if mx == 0 and mn == 0:
1139 0
        return 1.0, 0.0
1140 33
    if type_min == 0:  # uint
1141 33
        if mn < 0 and mx > 0:
1142 0
            raise ValueError('Cannot scale negative and positive '
1143
                             'numbers to uint without intercept')
1144 33
        if mx < 0:
1145 0
            scaling = mn / type_max
1146
        else:
1147 0
            scaling = mx / type_max
1148
    else:  # int
1149 33
        if abs(mx) >= abs(mn):
1150 0
            scaling = mx / type_max
1151
        else:
1152 0
            scaling = mn / type_min
1153 0
    return scaling, 0.0
1154

1155

1156 33
def int_scinter_ftype(ifmt, slope=1.0, inter=0.0, default=np.float32):
1157
    """ float type containing int type `ifmt` * `slope` + `inter`
1158

1159
    Return float type that can represent the max and the min of the `ifmt` type
1160
    after multiplication with `slope` and addition of `inter` with something
1161
    like ``np.array([imin, imax], dtype=ifmt) * slope + inter``.
1162

1163
    Note that ``slope`` and ``inter`` get promoted to 1D arrays for this
1164
    purpose to avoid the numpy scalar casting rules, which prevent scalars
1165
    upcasting the array.
1166

1167
    Parameters
1168
    ----------
1169
    ifmt : object
1170
        numpy integer type (e.g. np.int32)
1171
    slope : float, optional
1172
        slope, default 1.0
1173
    inter : float, optional
1174
        intercept, default 0.0
1175
    default_out : object, optional
1176
        numpy floating point type, default is ``np.float32``
1177

1178
    Returns
1179
    -------
1180
    ftype : object
1181
        numpy floating point type
1182

1183
    Examples
1184
    --------
1185
    >>> int_scinter_ftype(np.int8, 1.0, 0.0) == np.float32
1186
    True
1187
    >>> int_scinter_ftype(np.int8, 1e38, 0.0) == np.float64
1188
    True
1189

1190
    Notes
1191
    -----
1192
    It is difficult to make floats overflow with just addition because the
1193
    deltas are so large at the extremes of floating point.  For example::
1194

1195
        >>> arr = np.array([np.finfo(np.float32).max], dtype=np.float32)
1196
        >>> res = arr + np.iinfo(np.int16).max
1197
        >>> arr == res
1198
        array([ True])
1199
    """
1200 33
    ii = np.iinfo(ifmt)
1201 33
    tst_arr = np.array([ii.min, ii.max], dtype=ifmt)
1202 33
    try:
1203 33
        return _ftype4scaled_finite(tst_arr, slope, inter, 'read', default)
1204 0
    except ValueError:
1205 0
        raise ValueError('Overflow using highest floating point type')
1206

1207

1208 33
def best_write_scale_ftype(arr, slope=1.0, inter=0.0, default=np.float32):
1209
    """ Smallest float type to contain range of ``arr`` after scaling
1210

1211
    Scaling that will be applied to ``arr`` is ``(arr - inter) / slope``.
1212

1213
    Note that ``slope`` and ``inter`` get promoted to 1D arrays for this
1214
    purpose to avoid the numpy scalar casting rules, which prevent scalars
1215
    upcasting the array.
1216

1217
    Parameters
1218
    ----------
1219
    arr : array-like
1220
        array that will be scaled
1221
    slope : array-like, optional
1222
        scalar such that output array will be ``(arr - inter) / slope``.
1223
    inter : array-like, optional
1224
        scalar such that output array will be ``(arr - inter) / slope``
1225
    default : numpy type, optional
1226
        minimum float type to return
1227

1228
    Returns
1229
    -------
1230
    ftype : numpy type
1231
        Best floating point type for scaling.  If no floating point type
1232
        prevents overflow, return the top floating point type.  If the input
1233
        array ``arr`` already contains inf values, return the greater of the
1234
        input type and the default type.
1235

1236
    Examples
1237
    --------
1238
    >>> arr = np.array([0, 1, 2], dtype=np.int16)
1239
    >>> best_write_scale_ftype(arr, 1, 0) is np.float32
1240
    True
1241

1242
    Specify higher default return value
1243

1244
    >>> best_write_scale_ftype(arr, 1, 0, default=np.float64) is np.float64
1245
    True
1246

1247
    Even large values that don't overflow don't change output
1248

1249
    >>> arr = np.array([0, np.finfo(np.float32).max], dtype=np.float32)
1250
    >>> best_write_scale_ftype(arr, 1, 0) is np.float32
1251
    True
1252

1253
    Scaling > 1 reduces output values, so no upcast needed
1254

1255
    >>> best_write_scale_ftype(arr, np.float32(2), 0) is np.float32
1256
    True
1257

1258
    Scaling < 1 increases values, so upcast may be needed (and is here)
1259

1260
    >>> best_write_scale_ftype(arr, np.float32(0.5), 0) is np.float64
1261
    True
1262
    """
1263 33
    default = better_float_of(arr.dtype.type, default)
1264 33
    if not np.all(np.isfinite(arr)):
1265 33
        return default
1266 33
    try:
1267 33
        return _ftype4scaled_finite(arr, slope, inter, 'write', default)
1268 0
    except ValueError:
1269 0
        return OK_FLOATS[-1]
1270

1271

1272 33
def better_float_of(first, second, default=np.float32):
1273
    """ Return more capable float type of `first` and `second`
1274

1275
    Return `default` if neither of `first` or `second` is a float
1276

1277
    Parameters
1278
    ----------
1279
    first : numpy type specifier
1280
        Any valid input to `np.dtype()``
1281
    second : numpy type specifier
1282
        Any valid input to `np.dtype()``
1283
    default : numpy type specifier, optional
1284
        Any valid input to `np.dtype()``
1285

1286
    Returns
1287
    -------
1288
    better_type : numpy type
1289
        More capable of `first` or `second` if both are floats; if only one is
1290
        a float return that, otherwise return `default`.
1291

1292
    Examples
1293
    --------
1294
    >>> better_float_of(np.float32, np.float64) is np.float64
1295
    True
1296
    >>> better_float_of(np.float32, 'i4') is np.float32
1297
    True
1298
    >>> better_float_of('i2', 'u4') is np.float32
1299
    True
1300
    >>> better_float_of('i2', 'u4', np.float64) is np.float64
1301
    True
1302
    """
1303 33
    first = np.dtype(first)
1304 33
    second = np.dtype(second)
1305 33
    default = np.dtype(default).type
1306 33
    kinds = (first.kind, second.kind)
1307 33
    if 'f' not in kinds:
1308 33
        return default
1309 33
    if kinds == ('f', 'f'):
1310 33
        if first.itemsize >= second.itemsize:
1311 33
            return first.type
1312 33
        return second.type
1313 33
    if first.kind == 'f':
1314 33
        return first.type
1315 33
    return second.type
1316

1317

1318 33
def _ftype4scaled_finite(tst_arr, slope, inter, direction='read',
1319
                         default=np.float32):
1320
    """ Smallest float type for scaling of `tst_arr` that does not overflow
1321
    """
1322 33
    assert direction in ('read', 'write')
1323 33
    if default not in OK_FLOATS and default is np.longdouble:
1324
        # Omitted longdouble
1325 0
        return default
1326 33
    def_ind = OK_FLOATS.index(default)
1327
    # promote to arrays to avoid numpy scalar casting rules
1328 33
    tst_arr = np.atleast_1d(tst_arr)
1329 33
    slope = np.atleast_1d(slope)
1330 33
    inter = np.atleast_1d(inter)
1331 33
    overflow_filter = ('error', '.*overflow.*', RuntimeWarning)
1332 33
    for ftype in OK_FLOATS[def_ind:]:
1333 33
        tst_trans = tst_arr.copy()
1334 33
        slope = slope.astype(ftype)
1335 33
        inter = inter.astype(ftype)
1336 33
        try:
1337 33
            with warnings.catch_warnings():
1338
                # Error on overflows to short circuit the logic
1339 33
                warnings.filterwarnings(*overflow_filter)
1340 33
                if direction == 'read':  # as in reading of image from disk
1341 33
                    if slope != 1.0:
1342 33
                        tst_trans = tst_trans * slope
1343 33
                    if inter != 0.0:
1344 33
                        tst_trans = tst_trans + inter
1345 33
                elif direction == 'write':
1346 33
                    if inter != 0.0:
1347 33
                        tst_trans = tst_trans - inter
1348 33
                    if slope != 1.0:
1349 33
                        tst_trans = tst_trans / slope
1350
            # Double-check that result is finite
1351 33
            if np.all(np.isfinite(tst_trans)):
1352 33
                return ftype
1353 33
        except RuntimeWarning:
1354 33
            pass
1355 0
    raise ValueError('Overflow using highest floating point type')
1356

1357

1358 33
def finite_range(arr, check_nan=False):
1359
    """ Get range (min, max) or range and flag (min, max, has_nan) from `arr`
1360

1361
    Parameters
1362
    ----------
1363
    arr : array-like
1364
    check_nan : {False, True}, optional
1365
        Whether to return third output, a bool signaling whether there are NaN
1366
        values in `arr`
1367

1368
    Returns
1369
    -------
1370
    mn : scalar
1371
       minimum of values in (flattened) array
1372
    mx : scalar
1373
       maximum of values in (flattened) array
1374
    has_nan : bool
1375
       Returned if `check_nan` is True. `has_nan` is True if there are one or
1376
       more NaN values in `arr`
1377

1378
    Examples
1379
    --------
1380
    >>> a = np.array([[-1, 0, 1],[np.inf, np.nan, -np.inf]])
1381
    >>> finite_range(a)
1382
    (-1.0, 1.0)
1383
    >>> a = np.array([[-1, 0, 1],[np.inf, np.nan, -np.inf]])
1384
    >>> finite_range(a, check_nan=True)
1385
    (-1.0, 1.0, True)
1386
    >>> a = np.array([[np.nan],[np.nan]])
1387
    >>> finite_range(a) == (np.inf, -np.inf)
1388
    True
1389
    >>> a = np.array([[-3, 0, 1],[2,-1,4]], dtype=np.int)
1390
    >>> finite_range(a)
1391
    (-3, 4)
1392
    >>> a = np.array([[1, 0, 1],[2,3,4]], dtype=np.uint)
1393
    >>> finite_range(a)
1394
    (0, 4)
1395
    >>> a = a + 1j
1396
    >>> finite_range(a)
1397
    (1j, (4+1j))
1398
    >>> a = np.zeros((2,), dtype=[('f1', 'i2')])
1399
    >>> finite_range(a)
1400
    Traceback (most recent call last):
1401
       ...
1402
    TypeError: Can only handle numeric types
1403
    """
1404 33
    arr = np.asarray(arr)
1405 33
    if arr.size == 0:
1406 33
        return (np.inf, -np.inf) + (False,) * check_nan
1407
    # Resort array to slowest->fastest memory change indices
1408 33
    stride_order = np.argsort(arr.strides)[::-1]
1409 33
    sarr = arr.transpose(stride_order)
1410 33
    kind = sarr.dtype.kind
1411 33
    if kind in 'iu':
1412 33
        if check_nan:
1413 33
            return np.min(sarr), np.max(sarr), False
1414 33
        return np.min(sarr), np.max(sarr)
1415 33
    if kind not in 'cf':
1416 33
        raise TypeError('Can only handle numeric types')
1417
    # Deal with 1D arrays in loop below
1418 33
    sarr = np.atleast_2d(sarr)
1419
    # Loop to avoid big temporary arrays
1420 33
    has_nan = False
1421 33
    n_slices = sarr.shape[0]
1422 33
    maxes = np.zeros(n_slices, dtype=sarr.dtype) - np.inf
1423 33
    mins = np.zeros(n_slices, dtype=sarr.dtype) + np.inf
1424 33
    for s in range(n_slices):
1425 33
        this_slice = sarr[s]  # view
1426 33
        if not has_nan:
1427 33
            maxes[s] = np.max(this_slice)
1428
            # May have a non-nan non-inf max before we trip on min. If so,
1429
            # record so we don't recalculate
1430 33
            max_good = False
1431 33
            if np.isnan(maxes[s]):
1432 33
                has_nan = True
1433 33
            elif maxes[s] != np.inf:
1434 33
                max_good = True
1435 33
                mins[s] = np.min(this_slice)
1436 33
                if mins[s] != -np.inf:
1437
                    # Only case where we escape the default np.isfinite
1438
                    # algorithm
1439 33
                    continue
1440 33
        tmp = this_slice[np.isfinite(this_slice)]
1441 33
        if tmp.size == 0:  # No finite values
1442
            # Reset max, min in case set in tests above
1443 33
            maxes[s] = -np.inf
1444 33
            mins[s] = np.inf
1445 33
            continue
1446 33
        if not max_good:
1447 33
            maxes[s] = np.max(tmp)
1448 33
        mins[s] = np.min(tmp)
1449 33
    if check_nan:
1450 33
        return np.nanmin(mins), np.nanmax(maxes), has_nan
1451 33
    return np.nanmin(mins), np.nanmax(maxes)
1452

1453

1454 33
def shape_zoom_affine(shape, zooms, x_flip=True):
1455
    """ Get affine implied by given shape and zooms
1456

1457
    We get the translations from the center of the image (implied by
1458
    `shape`).
1459

1460
    Parameters
1461
    ----------
1462
    shape : (N,) array-like
1463
       shape of image data. ``N`` is the number of dimensions
1464
    zooms : (N,) array-like
1465
       zooms (voxel sizes) of the image
1466
    x_flip : {True, False}
1467
       whether to flip the X row of the affine.  Corresponds to
1468
       radiological storage on disk.
1469

1470
    Returns
1471
    -------
1472
    aff : (4,4) array
1473
       affine giving correspondance of voxel coordinates to mm
1474
       coordinates, taking the center of the image as origin
1475

1476
    Examples
1477
    --------
1478
    >>> shape = (3, 5, 7)
1479
    >>> zooms = (3, 2, 1)
1480
    >>> shape_zoom_affine((3, 5, 7), (3, 2, 1))
1481
    array([[-3.,  0.,  0.,  3.],
1482
           [ 0.,  2.,  0., -4.],
1483
           [ 0.,  0.,  1., -3.],
1484
           [ 0.,  0.,  0.,  1.]])
1485
    >>> shape_zoom_affine((3, 5, 7), (3, 2, 1), False)
1486
    array([[ 3.,  0.,  0., -3.],
1487
           [ 0.,  2.,  0., -4.],
1488
           [ 0.,  0.,  1., -3.],
1489
           [ 0.,  0.,  0.,  1.]])
1490
    """
1491 33
    shape = np.asarray(shape)
1492 33
    zooms = np.array(zooms)  # copy because of flip below
1493 33
    ndims = len(shape)
1494 33
    if ndims != len(zooms):
1495 0
        raise ValueError('Should be same length of zooms and shape')
1496 33
    if ndims >= 3:
1497 33
        shape = shape[:3]
1498 33
        zooms = zooms[:3]
1499
    else:
1500 33
        full_shape = np.ones((3,))
1501 33
        full_zooms = np.ones((3,))
1502 33
        full_shape[:ndims] = shape[:]
1503 33
        full_zooms[:ndims] = zooms[:]
1504 33
        shape = full_shape
1505 33
        zooms = full_zooms
1506 33
    if x_flip:
1507 33
        zooms[0] *= -1
1508
    # Get translations from center of image
1509 33
    origin = (shape - 1) / 2.0
1510 33
    aff = np.eye(4)
1511 33
    aff[:3, :3] = np.diag(zooms)
1512 33
    aff[:3, -1] = -origin * zooms
1513 33
    return aff
1514

1515

1516 33
def rec2dict(rec):
1517
    """ Convert recarray to dictionary
1518

1519
    Also converts scalar values to scalars
1520

1521
    Parameters
1522
    ----------
1523
    rec : ndarray
1524
       structured ndarray
1525

1526
    Returns
1527
    -------
1528
    dct : dict
1529
       dict with key, value pairs as for `rec`
1530

1531
    Examples
1532
    --------
1533
    >>> r = np.zeros((), dtype = [('x', 'i4'), ('s', 'S10')])
1534
    >>> d = rec2dict(r)
1535
    >>> d == {'x': 0, 's': b''}
1536
    True
1537
    """
1538 33
    dct = {}
1539 33
    for key in rec.dtype.fields:
1540 33
        val = rec[key]
1541 33
        try:
1542 33
            val = val.item()
1543 0
        except ValueError:
1544 0
            pass
1545 33
        dct[key] = val
1546 33
    return dct
1547

1548

1549 33
class BinOpener(Opener):
1550
    """ Deprecated class that used to handle .mgz through specialized logic."""
1551 33
    __doc__ = Opener.__doc__
1552

1553 33
    @deprecate_with_version('BinOpener class deprecated. '
1554
                            "Please use Opener class instead."
1555
                            '2.1', '4.0')
1556 5
    def __init__(self, *args, **kwargs):
1557 0
        return super(BinOpener, self).__init__(*args, **kwargs)
1558

1559

1560 33
def fname_ext_ul_case(fname):
1561
    """ `fname` with ext changed to upper / lower case if file exists
1562

1563
    Check for existence of `fname`.  If it does exist, return unmodified.  If
1564
    it doesn't, check for existence of `fname` with case changed from lower to
1565
    upper, or upper to lower.  Return this modified `fname` if it exists.
1566
    Otherwise return `fname` unmodified
1567

1568
    Parameters
1569
    ----------
1570
    fname : str
1571
        filename.
1572

1573
    Returns
1574
    -------
1575
    mod_fname : str
1576
        filename, maybe with extension of opposite case
1577
    """
1578 33
    if exists(fname):
1579 33
        return fname
1580 33
    froot, ext = splitext(fname)
1581 33
    if ext == ext.lower():
1582 33
        mod_fname = froot + ext.upper()
1583 33
        if exists(mod_fname):
1584 23
            return mod_fname
1585 33
    elif ext == ext.upper():
1586 33
        mod_fname = froot + ext.lower()
1587 33
        if exists(mod_fname):
1588 23
            return mod_fname
1589 33
    return fname
1590

1591

1592 33
@deprecate_with_version('allopen is deprecated. '
1593
                        'Please use "Opener" class instead.',
1594
                        '2.0', '4.0')
1595 5
def allopen(fileish, *args, **kwargs):
1596
    """ Compatibility wrapper for old ``allopen`` function
1597

1598
    Wraps creation of ``Opener`` instance, while picking up module global
1599
    ``default_compresslevel``.
1600

1601
    Please see docstring of ``Opener`` for details.
1602
    """
1603

1604 33
    class MyOpener(Opener):
1605 33
        default_compresslevel = default_compresslevel
1606

1607 33
    return MyOpener(fileish, *args, **kwargs)

Read our documentation on viewing source code .

Loading