BUG: Use manager to set title
1 |
# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
|
|
2 |
# vi: set ft=python sts=4 ts=4 sw=4 et:
|
|
3 |
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
|
|
4 |
#
|
|
5 |
# See COPYING file distributed along with the NiBabel package for the
|
|
6 |
# copyright and license terms.
|
|
7 |
#
|
|
8 |
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
|
|
9 | 33 |
""" Utility functions for analyze-like formats """
|
10 |
|
|
11 | 33 |
import sys |
12 | 33 |
import warnings |
13 | 33 |
import gzip |
14 | 33 |
from collections import OrderedDict |
15 | 33 |
from os.path import exists, splitext |
16 | 33 |
from operator import mul |
17 | 33 |
from functools import reduce |
18 |
|
|
19 | 33 |
import numpy as np |
20 |
|
|
21 | 33 |
from .casting import (shared_range, type_info, OK_FLOATS) |
22 | 33 |
from .openers import Opener, BZ2File, IndexedGzipFile |
23 | 33 |
from .deprecated import deprecate_with_version |
24 | 33 |
from .externals.oset import OrderedSet |
25 |
|
|
26 | 33 |
sys_is_le = sys.byteorder == 'little' |
27 | 33 |
native_code = sys_is_le and '<' or '>' |
28 | 33 |
swapped_code = sys_is_le and '>' or '<' |
29 |
|
|
30 | 33 |
endian_codes = ( # numpy code, aliases |
31 |
('<', 'little', 'l', 'le', 'L', 'LE'), |
|
32 |
('>', 'big', 'BIG', 'b', 'be', 'B', 'BE'), |
|
33 |
(native_code, 'native', 'n', 'N', '=', '|', 'i', 'I'), |
|
34 |
(swapped_code, 'swapped', 's', 'S', '!')) |
|
35 |
# We'll put these into the Recoder class after we define it
|
|
36 |
|
|
37 |
#: default compression level when writing gz and bz2 files
|
|
38 | 33 |
default_compresslevel = 1 |
39 |
|
|
40 |
#: file-like classes known to hold compressed data
|
|
41 | 33 |
COMPRESSED_FILE_LIKES = (gzip.GzipFile, BZ2File, IndexedGzipFile) |
42 |
|
|
43 |
|
|
44 | 33 |
class Recoder(object): |
45 |
""" class to return canonical code(s) from code or aliases
|
|
46 |
|
|
47 |
The concept is a lot easier to read in the implementation and
|
|
48 |
tests than it is to explain, so...
|
|
49 |
|
|
50 |
>>> # If you have some codes, and several aliases, like this:
|
|
51 |
>>> code1 = 1; aliases1=['one', 'first']
|
|
52 |
>>> code2 = 2; aliases2=['two', 'second']
|
|
53 |
>>> # You might want to do this:
|
|
54 |
>>> codes = [[code1]+aliases1,[code2]+aliases2]
|
|
55 |
>>> recodes = Recoder(codes)
|
|
56 |
>>> recodes.code['one']
|
|
57 |
1
|
|
58 |
>>> recodes.code['second']
|
|
59 |
2
|
|
60 |
>>> recodes.code[2]
|
|
61 |
2
|
|
62 |
>>> # Or maybe you have a code, a label and some aliases
|
|
63 |
>>> codes=((1,'label1','one', 'first'),(2,'label2','two'))
|
|
64 |
>>> # you might want to get back the code or the label
|
|
65 |
>>> recodes = Recoder(codes, fields=('code','label'))
|
|
66 |
>>> recodes.code['first']
|
|
67 |
1
|
|
68 |
>>> recodes.code['label1']
|
|
69 |
1
|
|
70 |
>>> recodes.label[2]
|
|
71 |
'label2'
|
|
72 |
>>> # For convenience, you can get the first entered name by
|
|
73 |
>>> # indexing the object directly
|
|
74 |
>>> recodes[2]
|
|
75 |
2
|
|
76 |
"""
|
|
77 |
|
|
78 | 33 |
def __init__(self, codes, fields=('code',), map_maker=OrderedDict): |
79 |
""" Create recoder object
|
|
80 |
|
|
81 |
``codes`` give a sequence of code, alias sequences
|
|
82 |
``fields`` are names by which the entries in these sequences can be
|
|
83 |
accessed.
|
|
84 |
|
|
85 |
By default ``fields`` gives the first column the name
|
|
86 |
"code". The first column is the vector of first entries
|
|
87 |
in each of the sequences found in ``codes``. Thence you can
|
|
88 |
get the equivalent first column value with ob.code[value],
|
|
89 |
where value can be a first column value, or a value in any of
|
|
90 |
the other columns in that sequence.
|
|
91 |
|
|
92 |
You can give other columns names too, and access them in the
|
|
93 |
same way - see the examples in the class docstring.
|
|
94 |
|
|
95 |
Parameters
|
|
96 |
----------
|
|
97 |
codes : sequence of sequences
|
|
98 |
Each sequence defines values (codes) that are equivalent
|
|
99 |
fields : {('code',) string sequence}, optional
|
|
100 |
names by which elements in sequences can be accessed
|
|
101 |
map_maker: callable, optional
|
|
102 |
constructor for dict-like objects used to store key value pairs.
|
|
103 |
Default is ``dict``. ``map_maker()`` generates an empty mapping.
|
|
104 |
The mapping need only implement ``__getitem__, __setitem__, keys,
|
|
105 |
values``.
|
|
106 |
"""
|
|
107 | 33 |
self.fields = tuple(fields) |
108 | 33 |
self.field1 = {} # a placeholder for the check below |
109 |
for name in fields: |
|
110 |
if name in self.__dict__: |
|
111 | 33 |
raise KeyError(f'Input name {name} already in object dict') |
112 | 33 |
self.__dict__[name] = map_maker() |
113 | 33 |
self.field1 = self.__dict__[fields[0]] |
114 | 33 |
self.add_codes(codes) |
115 |
|
|
116 | 33 |
def add_codes(self, code_syn_seqs): |
117 |
""" Add codes to object
|
|
118 |
|
|
119 |
Parameters
|
|
120 |
----------
|
|
121 |
code_syn_seqs : sequence
|
|
122 |
sequence of sequences, where each sequence ``S = code_syn_seqs[n]``
|
|
123 |
for n in 0..len(code_syn_seqs), is a sequence giving values in the
|
|
124 |
same order as ``self.fields``. Each S should be at least of the
|
|
125 |
same length as ``self.fields``. After this call, if ``self.fields
|
|
126 |
== ['field1', 'field2'], then ``self.field1[S[n]] == S[0]`` for all
|
|
127 |
n in 0..len(S) and ``self.field2[S[n]] == S[1]`` for all n in
|
|
128 |
0..len(S).
|
|
129 |
|
|
130 |
Examples
|
|
131 |
--------
|
|
132 |
>>> code_syn_seqs = ((2, 'two'), (1, 'one'))
|
|
133 |
>>> rc = Recoder(code_syn_seqs)
|
|
134 |
>>> rc.value_set() == set((1,2))
|
|
135 |
True
|
|
136 |
>>> rc.add_codes(((3, 'three'), (1, 'first')))
|
|
137 |
>>> rc.value_set() == set((1,2,3))
|
|
138 |
True
|
|
139 |
>>> print(rc.value_set()) # set is actually ordered
|
|
140 |
OrderedSet([2, 1, 3])
|
|
141 |
"""
|
|
142 |
for code_syns in code_syn_seqs: |
|
143 |
# Add all the aliases
|
|
144 |
for alias in code_syns: |
|
145 |
# For all defined fields, make every value in the sequence be
|
|
146 |
# an entry to return matching index value.
|
|
147 |
for field_ind, field_name in enumerate(self.fields): |
|
148 | 33 |
self.__dict__[field_name][alias] = code_syns[field_ind] |
149 |
|
|
150 | 33 |
def __getitem__(self, key): |
151 |
""" Return value from field1 dictionary (first column of values)
|
|
152 |
|
|
153 |
Returns same value as ``obj.field1[key]`` and, with the
|
|
154 |
default initializing ``fields`` argument of fields=('code',),
|
|
155 |
this will return the same as ``obj.code[key]``
|
|
156 |
|
|
157 |
>>> codes = ((1, 'one'), (2, 'two'))
|
|
158 |
>>> Recoder(codes)['two']
|
|
159 |
2
|
|
160 |
"""
|
|
161 | 33 |
return self.field1[key] |
162 |
|
|
163 | 33 |
def __contains__(self, key): |
164 |
""" True if field1 in recoder contains `key`
|
|
165 |
"""
|
|
166 | 33 |
try: |
167 | 33 |
self.field1[key] |
168 | 33 |
except KeyError: |
169 | 33 |
return False |
170 | 33 |
return True |
171 |
|
|
172 | 33 |
def keys(self): |
173 |
""" Return all available code and alias values
|
|
174 |
|
|
175 |
Returns same value as ``obj.field1.keys()`` and, with the
|
|
176 |
default initializing ``fields`` argument of fields=('code',),
|
|
177 |
this will return the same as ``obj.code.keys()``
|
|
178 |
|
|
179 |
>>> codes = ((1, 'one'), (2, 'two'), (1, 'repeat value'))
|
|
180 |
>>> k = Recoder(codes).keys()
|
|
181 |
>>> set(k) == set([1, 2, 'one', 'repeat value', 'two'])
|
|
182 |
True
|
|
183 |
"""
|
|
184 | 33 |
return self.field1.keys() |
185 |
|
|
186 | 33 |
def value_set(self, name=None): |
187 |
""" Return OrderedSet of possible returned values for column
|
|
188 |
|
|
189 |
By default, the column is the first column.
|
|
190 |
|
|
191 |
Returns same values as ``set(obj.field1.values())`` and,
|
|
192 |
with the default initializing``fields`` argument of
|
|
193 |
fields=('code',), this will return the same as
|
|
194 |
``set(obj.code.values())``
|
|
195 |
|
|
196 |
Parameters
|
|
197 |
----------
|
|
198 |
name : {None, string}
|
|
199 |
Where default of none gives result for first column
|
|
200 |
|
|
201 |
>>> codes = ((1, 'one'), (2, 'two'), (1, 'repeat value'))
|
|
202 |
>>> vs = Recoder(codes).value_set()
|
|
203 |
>>> vs == set([1, 2]) # Sets are not ordered, hence this test
|
|
204 |
True
|
|
205 |
>>> rc = Recoder(codes, fields=('code', 'label'))
|
|
206 |
>>> rc.value_set('label') == set(('one', 'two', 'repeat value'))
|
|
207 |
True
|
|
208 |
"""
|
|
209 |
if name is None: |
|
210 | 33 |
d = self.field1 |
211 |
else: |
|
212 | 33 |
d = self.__dict__[name] |
213 | 33 |
return OrderedSet(d.values()) |
214 |
|
|
215 |
|
|
216 |
# Endian code aliases
|
|
217 | 33 |
endian_codes = Recoder(endian_codes) |
218 |
|
|
219 |
|
|
220 | 33 |
class DtypeMapper(object): |
221 |
""" Specialized mapper for numpy dtypes
|
|
222 |
|
|
223 |
We pass this mapper into the Recoder class to deal with numpy dtype
|
|
224 |
hashing.
|
|
225 |
|
|
226 |
The hashing problem is that dtypes that compare equal may not have the same
|
|
227 |
hash. This is true for numpys up to the current at time of writing
|
|
228 |
(1.6.0). For numpy 1.2.1 at least, even dtypes that look exactly the same
|
|
229 |
in terms of fields don't always have the same hash. This makes dtypes
|
|
230 |
difficult to use as keys in a dictionary.
|
|
231 |
|
|
232 |
This class wraps a dictionary in order to implement a __getitem__ to deal
|
|
233 |
with dtype hashing. If the key doesn't appear to be in the mapping, and it
|
|
234 |
is a dtype, we compare (using ==) all known dtype keys to the input key,
|
|
235 |
and return any matching values for the matching key.
|
|
236 |
"""
|
|
237 |
|
|
238 | 33 |
def __init__(self): |
239 | 33 |
self._dict = {} |
240 | 33 |
self._dtype_keys = [] |
241 |
|
|
242 | 33 |
def keys(self): |
243 | 33 |
return self._dict.keys() |
244 |
|
|
245 | 33 |
def values(self): |
246 | 33 |
return self._dict.values() |
247 |
|
|
248 | 33 |
def __setitem__(self, key, value): |
249 |
""" Set item into mapping, checking for dtype keys
|
|
250 |
|
|
251 |
Cache dtype keys for comparison test in __getitem__
|
|
252 |
"""
|
|
253 | 33 |
self._dict[key] = value |
254 |
if hasattr(key, 'subdtype'): |
|
255 | 33 |
self._dtype_keys.append(key) |
256 |
|
|
257 | 33 |
def __getitem__(self, key): |
258 |
""" Get item from mapping, checking for dtype keys
|
|
259 |
|
|
260 |
First do simple hash lookup, then check for a dtype key that has failed
|
|
261 |
the hash lookup. Look then for any known dtype keys that compare equal
|
|
262 |
to `key`.
|
|
263 |
"""
|
|
264 | 33 |
try: |
265 | 33 |
return self._dict[key] |
266 | 33 |
except KeyError: |
267 | 33 |
pass
|
268 |
if hasattr(key, 'subdtype'): |
|
269 |
for dt in self._dtype_keys: |
|
270 |
if key == dt: |
|
271 |
return self._dict[dt] |
|
272 | 33 |
raise KeyError(key) |
273 |
|
|
274 |
|
|
275 | 33 |
def pretty_mapping(mapping, getterfunc=None): |
276 |
""" Make pretty string from mapping
|
|
277 |
|
|
278 |
Adjusts text column to print values on basis of longest key.
|
|
279 |
Probably only sensible if keys are mainly strings.
|
|
280 |
|
|
281 |
You can pass in a callable that does clever things to get the values
|
|
282 |
out of the mapping, given the names. By default, we just use
|
|
283 |
``__getitem__``
|
|
284 |
|
|
285 |
Parameters
|
|
286 |
----------
|
|
287 |
mapping : mapping
|
|
288 |
implementing iterator returning keys and .items()
|
|
289 |
getterfunc : None or callable
|
|
290 |
callable taking two arguments, ``obj`` and ``key`` where ``obj``
|
|
291 |
is the passed mapping. If None, just use ``lambda obj, key:
|
|
292 |
obj[key]``
|
|
293 |
|
|
294 |
Returns
|
|
295 |
-------
|
|
296 |
str : string
|
|
297 |
|
|
298 |
Examples
|
|
299 |
--------
|
|
300 |
>>> d = {'a key': 'a value'}
|
|
301 |
>>> print(pretty_mapping(d))
|
|
302 |
a key : a value
|
|
303 |
>>> class C(object): # to control ordering, show get_ method
|
|
304 |
... def __iter__(self):
|
|
305 |
... return iter(('short_field','longer_field'))
|
|
306 |
... def __getitem__(self, key):
|
|
307 |
... if key == 'short_field':
|
|
308 |
... return 0
|
|
309 |
... if key == 'longer_field':
|
|
310 |
... return 'str'
|
|
311 |
... def get_longer_field(self):
|
|
312 |
... return 'method string'
|
|
313 |
>>> def getter(obj, key):
|
|
314 |
... # Look for any 'get_<name>' methods
|
|
315 |
... try:
|
|
316 |
... return obj.__getattribute__('get_' + key)()
|
|
317 |
... except AttributeError:
|
|
318 |
... return obj[key]
|
|
319 |
>>> print(pretty_mapping(C(), getter))
|
|
320 |
short_field : 0
|
|
321 |
longer_field : method string
|
|
322 |
"""
|
|
323 |
if getterfunc is None: |
|
324 |
getterfunc = lambda obj, key: obj[key] |
|
325 |
lens = [len(str(name)) for name in mapping] |
|
326 | 33 |
mxlen = np.max(lens) |
327 | 33 |
fmt = '%%-%ds : %%s' % mxlen |
328 | 33 |
out = [] |
329 |
for name in mapping: |
|
330 | 33 |
value = getterfunc(mapping, name) |
331 | 33 |
out.append(fmt % (name, value)) |
332 | 33 |
return '\n'.join(out) |
333 |
|
|
334 |
|
|
335 | 33 |
def make_dt_codes(codes_seqs): |
336 |
""" Create full dt codes Recoder instance from datatype codes
|
|
337 |
|
|
338 |
Include created numpy dtype (from numpy type) and opposite endian
|
|
339 |
numpy dtype
|
|
340 |
|
|
341 |
Parameters
|
|
342 |
----------
|
|
343 |
codes_seqs : sequence of sequences
|
|
344 |
contained sequences make be length 3 or 4, but must all be the same
|
|
345 |
length. Elements are data type code, data type name, and numpy
|
|
346 |
type (such as ``np.float32``). The fourth element is the nifti string
|
|
347 |
representation of the code (e.g. "NIFTI_TYPE_FLOAT32")
|
|
348 |
|
|
349 |
Returns
|
|
350 |
-------
|
|
351 |
rec : ``Recoder`` instance
|
|
352 |
Recoder that, by default, returns ``code`` when indexed with any
|
|
353 |
of the corresponding code, name, type, dtype, or swapped dtype.
|
|
354 |
You can also index with ``niistring`` values if codes_seqs had sequences
|
|
355 |
of length 4 instead of 3.
|
|
356 |
"""
|
|
357 | 33 |
fields = ['code', 'label', 'type'] |
358 | 33 |
len0 = len(codes_seqs[0]) |
359 |
if len0 not in (3, 4): |
|
360 | 33 |
raise ValueError('Sequences must be length 3 or 4') |
361 |
if len0 == 4: |
|
362 | 33 |
fields.append('niistring') |
363 | 33 |
dt_codes = [] |
364 |
for seq in codes_seqs: |
|
365 |
if len(seq) != len0: |
|
366 | 33 |
raise ValueError('Sequences must all have the same length') |
367 | 33 |
np_type = seq[2] |
368 | 33 |
this_dt = np.dtype(np_type) |
369 |
# Add swapped dtype to synonyms
|
|
370 | 33 |
code_syns = list(seq) + [this_dt, this_dt.newbyteorder(swapped_code)] |
371 | 33 |
dt_codes.append(code_syns) |
372 | 33 |
return Recoder(dt_codes, fields + ['dtype', 'sw_dtype'], DtypeMapper) |
373 |
|
|
374 |
|
|
375 | 33 |
@deprecate_with_version('can_cast deprecated. ' |
376 |
'Please use arraywriter classes instead', |
|
377 |
'1.2', |
|
378 |
'3.0') |
|
379 | 33 |
def can_cast(in_type, out_type, has_intercept=False, has_slope=False): |
380 |
""" Return True if we can safely cast ``in_type`` to ``out_type``
|
|
381 |
|
|
382 |
Parameters
|
|
383 |
----------
|
|
384 |
in_type : numpy type
|
|
385 |
type of data we will case from
|
|
386 |
out_dtype : numpy type
|
|
387 |
type that we want to cast to
|
|
388 |
has_intercept : bool, optional
|
|
389 |
Whether we can subtract a constant from the data (before scaling)
|
|
390 |
before casting to ``out_dtype``. Default is False
|
|
391 |
has_slope : bool, optional
|
|
392 |
Whether we can use a scaling factor to adjust slope of
|
|
393 |
relationship of data to data in cast array. Default is False
|
|
394 |
|
|
395 |
Returns
|
|
396 |
-------
|
|
397 |
tf : bool
|
|
398 |
True if we can safely cast, False otherwise
|
|
399 |
|
|
400 |
Examples
|
|
401 |
--------
|
|
402 |
>>> can_cast(np.float64, np.float32) # doctest: +SKIP
|
|
403 |
True
|
|
404 |
>>> can_cast(np.complex128, np.float32) # doctest: +SKIP
|
|
405 |
False
|
|
406 |
>>> can_cast(np.int64, np.float32) # doctest: +SKIP
|
|
407 |
True
|
|
408 |
>>> can_cast(np.float32, np.int16) # doctest: +SKIP
|
|
409 |
False
|
|
410 |
>>> can_cast(np.float32, np.int16, False, True) # doctest: +SKIP
|
|
411 |
True
|
|
412 |
>>> can_cast(np.int16, np.uint8) # doctest: +SKIP
|
|
413 |
False
|
|
414 |
|
|
415 |
Whether we can actually cast int to uint when we don't have an intercept
|
|
416 |
depends on the data. That's why this function isn't very useful. But we
|
|
417 |
assume that an integer is using its full range, and check whether scaling
|
|
418 |
works in that situation.
|
|
419 |
|
|
420 |
Here we need an intercept to scale the full range of an int to a uint
|
|
421 |
|
|
422 |
>>> can_cast(np.int16, np.uint8, False, True) # doctest: +SKIP
|
|
423 |
False
|
|
424 |
>>> can_cast(np.int16, np.uint8, True, True) # doctest: +SKIP
|
|
425 |
True
|
|
426 |
"""
|
|
427 |
in_dtype = np.dtype(in_type) |
|
428 |
# Whether we can cast depends on the data, and we've only got the type.
|
|
429 |
# Let's assume integers use all of their range but floats etc not
|
|
430 |
if in_dtype.kind in 'iu': |
|
431 |
info = np.iinfo(in_dtype) |
|
432 |
data = np.array([info.min, info.max], dtype=in_dtype) |
|
433 |
else: # Float or complex or something. Any old thing will do |
|
434 |
data = np.ones((1,), in_type) |
|
435 |
from .arraywriters import make_array_writer, WriterError |
|
436 |
try: |
|
437 |
make_array_writer(data, out_type, has_slope, has_intercept) |
|
438 |
except WriterError: |
|
439 |
return False |
|
440 |
return True |
|
441 |
|
|
442 |
|
|
443 | 33 |
def _is_compressed_fobj(fobj): |
444 |
""" Return True if fobj represents a compressed data file-like object
|
|
445 |
"""
|
|
446 | 33 |
return isinstance(fobj, COMPRESSED_FILE_LIKES) |
447 |
|
|
448 |
|
|
449 | 33 |
def array_from_file(shape, in_dtype, infile, offset=0, order='F', mmap=True): |
450 |
""" Get array from file with specified shape, dtype and file offset
|
|
451 |
|
|
452 |
Parameters
|
|
453 |
----------
|
|
454 |
shape : sequence
|
|
455 |
sequence specifying output array shape
|
|
456 |
in_dtype : numpy dtype
|
|
457 |
fully specified numpy dtype, including correct endianness
|
|
458 |
infile : file-like
|
|
459 |
open file-like object implementing at least read() and seek()
|
|
460 |
offset : int, optional
|
|
461 |
offset in bytes into `infile` to start reading array data. Default is 0
|
|
462 |
order : {'F', 'C'} string
|
|
463 |
order in which to write data. Default is 'F' (fortran order).
|
|
464 |
mmap : {True, False, 'c', 'r', 'r+'}
|
|
465 |
`mmap` controls the use of numpy memory mapping for reading data. If
|
|
466 |
False, do not try numpy ``memmap`` for data array. If one of {'c',
|
|
467 |
'r', 'r+'}, try numpy memmap with ``mode=mmap``. A `mmap` value of
|
|
468 |
True gives the same behavior as ``mmap='c'``. If `infile` cannot be
|
|
469 |
memory-mapped, ignore `mmap` value and read array from file.
|
|
470 |
|
|
471 |
Returns
|
|
472 |
-------
|
|
473 |
arr : array-like
|
|
474 |
array like object that can be sliced, containing data
|
|
475 |
|
|
476 |
Examples
|
|
477 |
--------
|
|
478 |
>>> from io import BytesIO
|
|
479 |
>>> bio = BytesIO()
|
|
480 |
>>> arr = np.arange(6).reshape(1,2,3)
|
|
481 |
>>> _ = bio.write(arr.tobytes('F')) # outputs int
|
|
482 |
>>> arr2 = array_from_file((1,2,3), arr.dtype, bio)
|
|
483 |
>>> np.all(arr == arr2)
|
|
484 |
True
|
|
485 |
>>> bio = BytesIO()
|
|
486 |
>>> _ = bio.write(b' ' * 10)
|
|
487 |
>>> _ = bio.write(arr.tobytes('F'))
|
|
488 |
>>> arr2 = array_from_file((1,2,3), arr.dtype, bio, 10)
|
|
489 |
>>> np.all(arr == arr2)
|
|
490 |
True
|
|
491 |
"""
|
|
492 |
if mmap not in (True, False, 'c', 'r', 'r+'): |
|
493 | 33 |
raise ValueError("mmap value should be one of True, False, 'c', " |
494 |
"'r', 'r+'") |
|
495 |
if mmap is True: |
|
496 | 33 |
mmap = 'c' |
497 | 33 |
in_dtype = np.dtype(in_dtype) |
498 |
# Get file-like object from Opener instance
|
|
499 | 33 |
infile = getattr(infile, 'fobj', infile) |
500 |
if mmap and not _is_compressed_fobj(infile): |
|
501 | 33 |
try: # Try memmapping file on disk |
502 | 33 |
return np.memmap(infile, |
503 |
in_dtype, |
|
504 |
mode=mmap, |
|
505 |
shape=shape, |
|
506 |
order=order, |
|
507 |
offset=offset) |
|
508 |
# The error raised by memmap, for different file types, has
|
|
509 |
# changed in different incarnations of the numpy routine
|
|
510 | 33 |
except (AttributeError, TypeError, ValueError): |
511 | 33 |
pass
|
512 |
if len(shape) == 0: |
|
513 | 33 |
return np.array([]) |
514 |
# Use reduce and mul to work around numpy integer overflow
|
|
515 | 33 |
n_bytes = reduce(mul, shape) * in_dtype.itemsize |
516 |
if n_bytes == 0: |
|
517 | 33 |
return np.array([]) |
518 |
# Read data from file
|
|
519 | 33 |
infile.seek(offset) |
520 |
if hasattr(infile, 'readinto'): |
|
521 | 33 |
data_bytes = bytearray(n_bytes) |
522 | 33 |
n_read = infile.readinto(data_bytes) |
523 | 33 |
needs_copy = False |
524 |
else: |
|
525 | 33 |
data_bytes = infile.read(n_bytes) |
526 | 33 |
n_read = len(data_bytes) |
527 | 33 |
needs_copy = True |
528 |
if n_bytes != n_read: |
|
529 | 33 |
raise IOError(f"Expected {n_bytes} bytes, got {n_read} bytes from " |
530 |
f"{getattr(infile, 'name', 'object')}\n - could the file be damaged?") |
|
531 | 33 |
arr = np.ndarray(shape, in_dtype, buffer=data_bytes, order=order) |
532 |
if needs_copy: |
|
533 |
return arr.copy() |
|
534 | 33 |
arr.flags.writeable = True |
535 | 33 |
return arr |
536 |
|
|
537 |
|
|
538 | 33 |
def array_to_file(data, fileobj, out_dtype=None, offset=0, |
539 |
intercept=0.0, divslope=1.0, |
|
540 |
mn=None, mx=None, order='F', nan2zero=True): |
|
541 |
""" Helper function for writing arrays to file objects
|
|
542 |
|
|
543 |
Writes arrays as scaled by `intercept` and `divslope`, and clipped
|
|
544 |
at (prescaling) `mn` minimum, and `mx` maximum.
|
|
545 |
|
|
546 |
* Clip `data` array at min `mn`, max `max` where there are not None ->
|
|
547 |
``clipped`` (this is *pre scale clipping*)
|
|
548 |
* Scale ``clipped`` with ``clipped_scaled = (clipped - intercept) /
|
|
549 |
divslope``
|
|
550 |
* Clip ``clipped_scaled`` to fit into range of `out_dtype` (*post scale
|
|
551 |
clipping*) -> ``clipped_scaled_clipped``
|
|
552 |
* If converting to integer `out_dtype` and `nan2zero` is True, set NaN
|
|
553 |
values in ``clipped_scaled_clipped`` to 0
|
|
554 |
* Write ``clipped_scaled_clipped_n2z`` to fileobj `fileobj` starting at
|
|
555 |
offset `offset` in memory layout `order`
|
|
556 |
|
|
557 |
Parameters
|
|
558 |
----------
|
|
559 |
data : array-like
|
|
560 |
array or array-like to write.
|
|
561 |
fileobj : file-like
|
|
562 |
file-like object implementing ``write`` method.
|
|
563 |
out_dtype : None or dtype, optional
|
|
564 |
dtype to write array as. Data array will be coerced to this dtype
|
|
565 |
before writing. If None (default) then use input data type.
|
|
566 |
offset : None or int, optional
|
|
567 |
offset into fileobj at which to start writing data. Default is 0. None
|
|
568 |
means start at current file position
|
|
569 |
intercept : scalar, optional
|
|
570 |
scalar to subtract from data, before dividing by ``divslope``. Default
|
|
571 |
is 0.0
|
|
572 |
divslope : None or scalar, optional
|
|
573 |
scalefactor to *divide* data by before writing. Default is 1.0. If
|
|
574 |
None, there is no valid data, we write zeros.
|
|
575 |
mn : scalar, optional
|
|
576 |
minimum threshold in (unscaled) data, such that all data below this
|
|
577 |
value are set to this value. Default is None (no threshold). The
|
|
578 |
typical use is to set -np.inf in the data to have this value (which
|
|
579 |
might be the minimum non-finite value in the data).
|
|
580 |
mx : scalar, optional
|
|
581 |
maximum threshold in (unscaled) data, such that all data above this
|
|
582 |
value are set to this value. Default is None (no threshold). The
|
|
583 |
typical use is to set np.inf in the data to have this value (which
|
|
584 |
might be the maximum non-finite value in the data).
|
|
585 |
order : {'F', 'C'}, optional
|
|
586 |
memory order to write array. Default is 'F'
|
|
587 |
nan2zero : {True, False}, optional
|
|
588 |
Whether to set NaN values to 0 when writing integer output. Defaults
|
|
589 |
to True. If False, NaNs will be represented as numpy does when
|
|
590 |
casting; this depends on the underlying C library and is undefined. In
|
|
591 |
practice `nan2zero` == False might be a good choice when you completely
|
|
592 |
sure there will be no NaNs in the data. This value ignored for float
|
|
593 |
outut types. NaNs are treated as zero *before* applying `intercept`
|
|
594 |
and `divslope` - so an array ``[np.nan]`` with an `intercept` of 10
|
|
595 |
becomes ``[-10]`` after conversion to integer `out_dtype` with
|
|
596 |
`nan2zero` set. That is because you will likely apply `divslope` and
|
|
597 |
`intercept` in reverse order when reading the data back, returning the
|
|
598 |
zero you probably expected from the input NaN.
|
|
599 |
|
|
600 |
Examples
|
|
601 |
--------
|
|
602 |
>>> from io import BytesIO
|
|
603 |
>>> sio = BytesIO()
|
|
604 |
>>> data = np.arange(10, dtype=np.float)
|
|
605 |
>>> array_to_file(data, sio, np.float)
|
|
606 |
>>> sio.getvalue() == data.tobytes('F')
|
|
607 |
True
|
|
608 |
>>> _ = sio.truncate(0); _ = sio.seek(0) # outputs 0
|
|
609 |
>>> array_to_file(data, sio, np.int16)
|
|
610 |
>>> sio.getvalue() == data.astype(np.int16).tobytes()
|
|
611 |
True
|
|
612 |
>>> _ = sio.truncate(0); _ = sio.seek(0)
|
|
613 |
>>> array_to_file(data.byteswap(), sio, np.float)
|
|
614 |
>>> sio.getvalue() == data.byteswap().tobytes('F')
|
|
615 |
True
|
|
616 |
>>> _ = sio.truncate(0); _ = sio.seek(0)
|
|
617 |
>>> array_to_file(data, sio, np.float, order='C')
|
|
618 |
>>> sio.getvalue() == data.tobytes('C')
|
|
619 |
True
|
|
620 |
"""
|
|
621 |
# Shield special case
|
|
622 | 33 |
div_none = divslope is None |
623 |
if not np.all( |
|
624 |
np.isfinite((intercept, 1.0 if div_none else divslope))): |
|
625 | 33 |
raise ValueError('divslope and intercept must be finite') |
626 |
if divslope == 0: |
|
627 | 33 |
raise ValueError('divslope cannot be zero') |
628 | 33 |
data = np.asanyarray(data) |
629 | 33 |
in_dtype = data.dtype |
630 |
if out_dtype is None: |
|
631 | 33 |
out_dtype = in_dtype |
632 |
else: |
|
633 | 33 |
out_dtype = np.dtype(out_dtype) |
634 |
if offset is not None: |
|
635 | 33 |
seek_tell(fileobj, offset) |
636 |
if (div_none or (mn, mx) == (0, 0) or |
|
637 |
((mn is not None and mx is not None) and mx < mn)): |
|
638 | 33 |
write_zeros(fileobj, data.size * out_dtype.itemsize) |
639 | 33 |
return
|
640 |
if order not in 'FC': |
|
641 |
raise ValueError('Order should be one of F or C') |
|
642 |
# Simple cases
|
|
643 | 33 |
pre_clips = None if (mn is None and mx is None) else (mn, mx) |
644 | 33 |
null_scaling = (intercept == 0 and divslope == 1) |
645 |
if in_dtype.type == np.void: |
|
646 |
if not null_scaling: |
|
647 |
raise ValueError('Cannot scale non-numeric types') |
|
648 |
if pre_clips is not None: |
|
649 | 33 |
raise ValueError('Cannot clip non-numeric types') |
650 | 33 |
return _write_data(data, fileobj, out_dtype, order) |
651 |
if pre_clips is not None: |
|
652 | 33 |
pre_clips = _dt_min_max(in_dtype, *pre_clips) |
653 |
if null_scaling and np.can_cast(in_dtype, out_dtype): |
|
654 | 33 |
return _write_data(data, fileobj, out_dtype, order, |
655 |
pre_clips=pre_clips) |
|
656 |
# Force upcasting for floats by making atleast_1d.
|
|
657 |
slope, inter = [np.atleast_1d(v) for v in (divslope, intercept)] |
|
658 |
# Default working point type for applying slope / inter
|
|
659 |
if slope.dtype.kind in 'iu': |
|
660 | 33 |
slope = slope.astype(float) |
661 |
if inter.dtype.kind in 'iu': |
|
662 | 33 |
inter = inter.astype(float) |
663 | 33 |
in_kind = in_dtype.kind |
664 | 33 |
out_kind = out_dtype.kind |
665 |
if out_kind in 'fc': |
|
666 | 33 |
return _write_data(data, fileobj, out_dtype, order, |
667 |
slope=slope, |
|
668 |
inter=inter, |
|
669 |
pre_clips=pre_clips) |
|
670 | 33 |
assert out_kind in 'iu' |
671 |
if in_kind in 'iu': |
|
672 |
if null_scaling: |
|
673 |
# Must be large int to small int conversion; add clipping to
|
|
674 |
# pre scale thresholds
|
|
675 | 33 |
mn, mx = _dt_min_max(in_dtype, mn, mx) |
676 | 33 |
mn_out, mx_out = _dt_min_max(out_dtype) |
677 | 33 |
pre_clips = max(mn, mn_out), min(mx, mx_out) |
678 | 33 |
return _write_data(data, fileobj, out_dtype, order, |
679 |
pre_clips=pre_clips) |
|
680 |
# In any case, we do not want to check for nans beause we've already
|
|
681 |
# disallowed scaling that generates nans
|
|
682 | 33 |
nan2zero = False |
683 |
# We are either scaling into c/floats or starting with c/floats, then we're
|
|
684 |
# going to integers
|
|
685 |
# Because we're going to integers, complex inter and slope will only slow
|
|
686 |
# us down, cast to float
|
|
687 |
slope, inter = [v.astype(_matching_float(v.dtype)) for v in (slope, inter)] |
|
688 |
# We'll do the thresholding on the scaled data, so turn off the
|
|
689 |
# thresholding on the unscaled data
|
|
690 | 33 |
pre_clips = None |
691 |
# We may need to cast the original array to another type
|
|
692 | 33 |
cast_in_dtype = in_dtype |
693 |
if in_kind == 'c': |
|
694 |
# Cast to floats before anything else
|
|
695 | 33 |
cast_in_dtype = np.dtype(_matching_float(in_dtype)) |
696 |
elif in_kind == 'f' and in_dtype.itemsize == 2: |
|
697 |
# Make sure we don't use float16 as a working type
|
|
698 | 33 |
cast_in_dtype = np.dtype(np.float32) |
699 | 33 |
w_type = working_type(cast_in_dtype, slope, inter) |
700 | 33 |
dt_mnmx = _dt_min_max(cast_in_dtype, mn, mx) |
701 |
# We explore for a good precision to avoid infs and clipping
|
|
702 |
# Find smallest float type equal or larger than the current working
|
|
703 |
# type, that can contain range of extremes after scaling, without going
|
|
704 |
# to +-inf
|
|
705 | 33 |
extremes = np.array(dt_mnmx, dtype=cast_in_dtype) |
706 | 33 |
w_type = best_write_scale_ftype(extremes, slope, inter, w_type) |
707 |
# Push up precision by casting the slope, inter
|
|
708 |
slope, inter = [v.astype(w_type) for v in (slope, inter)] |
|
709 |
# We need to know the result of applying slope and inter to the min and
|
|
710 |
# max of the array, in order to clip the output array, after applying
|
|
711 |
# the slope and inter. Otherwise we'd need to clip twice, once before
|
|
712 |
# applying (slope, inter), and again after, to ensure we have not hit
|
|
713 |
# over- or under-flow. For the same reason we need to know the result of
|
|
714 |
# applying slope, inter to 0, in order to fill in the nan output value
|
|
715 |
# after scaling etc. We could fill with 0 before scaling, but then we'd
|
|
716 |
# have to do an extra copy before filling nans with 0, to avoid
|
|
717 |
# overwriting the input array
|
|
718 |
# Run min, max, 0 through scaling / rint
|
|
719 | 33 |
specials = np.array(dt_mnmx + (0,), dtype=w_type) |
720 |
if inter != 0.0: |
|
721 | 33 |
specials = specials - inter |
722 |
if slope != 1.0: |
|
723 | 33 |
specials = specials / slope |
724 | 33 |
assert specials.dtype.type == w_type |
725 | 33 |
post_mn, post_mx, nan_fill = np.rint(specials) |
726 |
if post_mn > post_mx: # slope could be negative |
|
727 | 33 |
post_mn, post_mx = post_mx, post_mn |
728 |
# Make sure that the thresholds exclude any value that will get badly cast
|
|
729 |
# to the integer type. This is not the same as using the maximumum of the
|
|
730 |
# output dtype as thresholds, because these may not be exactly represented
|
|
731 |
# in the float type.
|
|
732 |
#
|
|
733 |
# The thresholds assume that the data are in `wtype` dtype after applying
|
|
734 |
# the slope and intercept.
|
|
735 | 33 |
both_mn, both_mx = shared_range(w_type, out_dtype) |
736 |
# Check that nan2zero output value is in range
|
|
737 |
if nan2zero and not both_mn <= nan_fill <= both_mx: |
|
738 |
# Estimated error for (0 - inter) / slope is 2 * eps * abs(inter /
|
|
739 |
# slope). Assume errors are for working float type. Round for integer
|
|
740 |
# rounding
|
|
741 | 33 |
est_err = np.round(2 * np.finfo(w_type).eps * abs(inter / slope)) |
742 |
if ((nan_fill < both_mn and abs(nan_fill - both_mn) < est_err) or |
|
743 |
(nan_fill > both_mx and abs(nan_fill - both_mx) < est_err)): |
|
744 |
# nan_fill can be (just) outside clip range
|
|
745 | 2 |
nan_fill = np.clip(nan_fill, both_mn, both_mx) |
746 |
else: |
|
747 | 33 |
raise ValueError(f"nan_fill == {nan_fill}, outside safe int range " |
748 |
f"({int(both_mn)}-{int(both_mx)}); " |
|
749 |
"change scaling or set nan2zero=False?") |
|
750 |
# Make sure non-nan output clipped to shared range
|
|
751 | 33 |
post_mn = np.max([post_mn, both_mn]) |
752 | 33 |
post_mx = np.min([post_mx, both_mx]) |
753 | 33 |
in_cast = None if cast_in_dtype == in_dtype else cast_in_dtype |
754 | 33 |
return _write_data(data, fileobj, out_dtype, order, |
755 |
in_cast=in_cast, |
|
756 |
pre_clips=pre_clips, |
|
757 |
inter=inter, |
|
758 |
slope=slope, |
|
759 |
post_clips=(post_mn, post_mx), |
|
760 |
nan_fill=nan_fill if nan2zero else None) |
|
761 |
|
|
762 |
|
|
763 | 33 |
def _write_data(data, |
764 |
fileobj, |
|
765 |
out_dtype, |
|
766 |
order, |
|
767 |
in_cast=None, |
|
768 |
pre_clips=None, |
|
769 |
inter=0., |
|
770 |
slope=1., |
|
771 |
post_clips=None, |
|
772 |
nan_fill=None): |
|
773 |
""" Write array `data` to `fileobj` as `out_dtype` type, layout `order`
|
|
774 |
|
|
775 |
Does not modify `data` in-place.
|
|
776 |
|
|
777 |
Parameters
|
|
778 |
----------
|
|
779 |
data : ndarray
|
|
780 |
fileobj : object
|
|
781 |
implementing ``obj.write``
|
|
782 |
out_dtype : numpy type
|
|
783 |
Type to which to cast output data just before writing
|
|
784 |
order : {'F', 'C'}
|
|
785 |
memory layout of array in fileobj after writing
|
|
786 |
in_cast : None or numpy type, optional
|
|
787 |
If not None, inital cast to do on `data` slices before further
|
|
788 |
processing
|
|
789 |
pre_clips : None or 2-sequence, optional
|
|
790 |
If not None, minimum and maximum of input values at which to clip.
|
|
791 |
inter : scalar or array, optional
|
|
792 |
Intercept to subtract before writing ``out = data - inter``
|
|
793 |
slope : scalar or array, optional
|
|
794 |
Slope by which to divide before writing ``out2 = out / slope``
|
|
795 |
post_clips : None or 2-sequence, optional
|
|
796 |
If not None, minimum and maximum of scaled values at which to clip.
|
|
797 |
nan_fill : None or scalar, optional
|
|
798 |
If not None, values that were NaN in `data` will receive `nan_fill`
|
|
799 |
in array as output to disk (after scaling).
|
|
800 |
"""
|
|
801 | 33 |
data = np.squeeze(data) |
802 |
if data.ndim < 2: # Trick to allow loop over rows for 1D arrays |
|
803 | 33 |
data = np.atleast_2d(data) |
804 |
elif order == 'F': |
|
805 | 33 |
data = data.T |
806 | 33 |
nan_need_copy = ((pre_clips, in_cast, inter, slope, post_clips) == |
807 |
(None, None, 0, 1, None)) |
|
808 |
for dslice in data: # cycle over first dimension to save memory |
|
809 |
if pre_clips is not None: |
|
810 | 33 |
dslice = np.clip(dslice, *pre_clips) |
811 |
if in_cast is not None: |
|
812 | 33 |
dslice = dslice.astype(in_cast) |
813 |
if inter != 0.0: |
|
814 | 33 |
dslice = dslice - inter |
815 |
if slope != 1.0: |
|
816 | 33 |
dslice = dslice / slope |
817 |
if post_clips is not None: |
|
818 | 33 |
dslice = np.clip(np.rint(dslice), *post_clips) |
819 |
if nan_fill is not None: |
|
820 | 33 |
nans = np.isnan(dslice) |
821 |
if np.any(nans): |
|
822 |
if nan_need_copy: |
|
823 | 33 |
dslice = dslice.copy() |
824 | 33 |
dslice[nans] = nan_fill |
825 |
if dslice.dtype != out_dtype: |
|
826 | 33 |
dslice = dslice.astype(out_dtype) |
827 | 33 |
fileobj.write(dslice.tobytes()) |
828 |
|
|
829 |
|
|
830 | 33 |
def _dt_min_max(dtype_like, mn=None, mx=None): |
831 | 33 |
dt = np.dtype(dtype_like) |
832 |
if dt.kind in 'fc': |
|
833 | 33 |
dt_mn, dt_mx = (-np.inf, np.inf) |
834 |
elif dt.kind in 'iu': |
|
835 | 33 |
info = np.iinfo(dt) |
836 | 33 |
dt_mn, dt_mx = (info.min, info.max) |
837 |
else: |
|
838 |
raise ValueError("unknown dtype") |
|
839 | 33 |
return dt_mn if mn is None else mn, dt_mx if mx is None else mx |
840 |
|
|
841 |
|
|
842 | 33 |
_CSIZE2FLOAT = { |
843 |
8: np.float32, |
|
844 |
16: np.float64, |
|
845 |
24: np.longdouble, |
|
846 |
32: np.longdouble} |
|
847 |
|
|
848 |
|
|
849 | 33 |
def _matching_float(np_type): |
850 |
""" Return floating point type matching `np_type`
|
|
851 |
"""
|
|
852 | 33 |
dtype = np.dtype(np_type) |
853 |
if dtype.kind not in 'cf': |
|
854 |
raise ValueError('Expecting float or complex type as input') |
|
855 |
if dtype.kind in 'f': |
|
856 | 33 |
return dtype.type |
857 | 33 |
return _CSIZE2FLOAT[dtype.itemsize] |
858 |
|
|
859 |
|
|
860 | 33 |
def write_zeros(fileobj, count, block_size=8194): |
861 |
""" Write `count` zero bytes to `fileobj`
|
|
862 |
|
|
863 |
Parameters
|
|
864 |
----------
|
|
865 |
fileobj : file-like object
|
|
866 |
with ``write`` method
|
|
867 |
count : int
|
|
868 |
number of bytes to write
|
|
869 |
block_size : int, optional
|
|
870 |
largest continuous block to write.
|
|
871 |
"""
|
|
872 | 33 |
nblocks = int(count // block_size) |
873 | 33 |
rem = count % block_size |
874 | 33 |
blk = b'\x00' * block_size |
875 |
for bno in range(nblocks): |
|
876 | 33 |
fileobj.write(blk) |
877 | 33 |
fileobj.write(b'\x00' * rem) |
878 |
|
|
879 |
|
|
880 | 33 |
def seek_tell(fileobj, offset, write0=False): |
881 |
""" Seek in `fileobj` or check we're in the right place already
|
|
882 |
|
|
883 |
Parameters
|
|
884 |
----------
|
|
885 |
fileobj : file-like
|
|
886 |
object implementing ``seek`` and (if seek raises an IOError) ``tell``
|
|
887 |
offset : int
|
|
888 |
position in file to which to seek
|
|
889 |
write0 : {False, True}, optional
|
|
890 |
If True, and standard seek fails, try to write zeros to the file to
|
|
891 |
reach `offset`. This can be useful when writing bz2 files, that cannot
|
|
892 |
do write seeks.
|
|
893 |
"""
|
|
894 | 33 |
try: |
895 | 33 |
fileobj.seek(offset) |
896 | 33 |
except IOError as e: |
897 |
# This can be a negative seek in write mode for gz file object or any
|
|
898 |
# seek in write mode for a bz2 file object
|
|
899 | 33 |
pos = fileobj.tell() |
900 |
if pos == offset: |
|
901 | 33 |
return
|
902 |
if not write0: |
|
903 | 33 |
raise IOError(str(e)) |
904 |
if pos > offset: |
|
905 |
raise IOError("Can't write to seek backwards") |
|
906 | 33 |
fileobj.write(b'\x00' * (offset - pos)) |
907 | 33 |
assert fileobj.tell() == offset |
908 |
|
|
909 |
|
|
910 | 33 |
def apply_read_scaling(arr, slope=None, inter=None): |
911 |
""" Apply scaling in `slope` and `inter` to array `arr`
|
|
912 |
|
|
913 |
This is for loading the array from a file (as opposed to the reverse
|
|
914 |
scaling when saving an array to file)
|
|
915 |
|
|
916 |
Return data will be ``arr * slope + inter``. The trick is that we have to
|
|
917 |
find a good precision to use for applying the scaling. The heuristic is
|
|
918 |
that the data is always upcast to the higher of the types from `arr,
|
|
919 |
`slope`, `inter` if `slope` and / or `inter` are not default values. If the
|
|
920 |
dtype of `arr` is an integer, then we assume the data more or less fills
|
|
921 |
the integer range, and upcast to a type such that the min, max of
|
|
922 |
``arr.dtype`` * scale + inter, will be finite.
|
|
923 |
|
|
924 |
Parameters
|
|
925 |
----------
|
|
926 |
arr : array-like
|
|
927 |
slope : None or float, optional
|
|
928 |
slope value to apply to `arr` (``arr * slope + inter``). None
|
|
929 |
corresponds to a value of 1.0
|
|
930 |
inter : None or float, optional
|
|
931 |
intercept value to apply to `arr` (``arr * slope + inter``). None
|
|
932 |
corresponds to a value of 0.0
|
|
933 |
|
|
934 |
Returns
|
|
935 |
-------
|
|
936 |
ret : array
|
|
937 |
array with scaling applied. Maybe upcast in order to give room for the
|
|
938 |
scaling. If scaling is default (1, 0), then `ret` may be `arr` ``ret is
|
|
939 |
arr``.
|
|
940 |
"""
|
|
941 |
if slope is None: |
|
942 | 33 |
slope = 1.0 |
943 |
if inter is None: |
|
944 | 33 |
inter = 0.0 |
945 |
if (slope, inter) == (1, 0): |
|
946 | 33 |
return arr |
947 | 33 |
shape = arr.shape |
948 |
# Force float / float upcasting by promoting to arrays
|
|
949 |
arr, slope, inter = [np.atleast_1d(v) for v in (arr, slope, inter)] |
|
950 |
if arr.dtype.kind in 'iu': |
|
951 |
# int to float; get enough precision to avoid infs
|
|
952 |
# Find floating point type for which scaling does not overflow,
|
|
953 |
# starting at given type
|
|
954 | 33 |
default = (slope.dtype.type if slope.dtype.kind == 'f' else np.float64) |
955 | 33 |
ftype = int_scinter_ftype(arr.dtype, slope, inter, default) |
956 | 33 |
slope = slope.astype(ftype) |
957 | 33 |
inter = inter.astype(ftype) |
958 |
if slope != 1.0: |
|
959 | 33 |
arr = arr * slope |
960 |
if inter != 0.0: |
|
961 | 33 |
arr = arr + inter |
962 | 33 |
return arr.reshape(shape) |
963 |
|
|
964 |
|
|
965 | 33 |
def working_type(in_type, slope=1.0, inter=0.0): |
966 |
""" Return array type from applying `slope`, `inter` to array of `in_type`
|
|
967 |
|
|
968 |
Numpy type that results from an array of type `in_type` being combined with
|
|
969 |
`slope` and `inter`. It returns something like the dtype type of
|
|
970 |
``((np.zeros((2,), dtype=in_type) - inter) / slope)``, but ignoring the
|
|
971 |
actual values of `slope` and `inter`.
|
|
972 |
|
|
973 |
Note that you would not necessarily get the same type by applying slope and
|
|
974 |
inter the other way round. Also, you'll see that the order in which slope
|
|
975 |
and inter are applied is the opposite of the order in which they are
|
|
976 |
passed.
|
|
977 |
|
|
978 |
Parameters
|
|
979 |
----------
|
|
980 |
in_type : numpy type specifier
|
|
981 |
Numpy type of input array. Any valid input for ``np.dtype()``
|
|
982 |
slope : scalar, optional
|
|
983 |
slope to apply to array. If 1.0 (default), ignore this value and its
|
|
984 |
type.
|
|
985 |
inter : scalar, optional
|
|
986 |
intercept to apply to array. If 0.0 (default), ignore this value and
|
|
987 |
its type.
|
|
988 |
|
|
989 |
Returns
|
|
990 |
-------
|
|
991 |
wtype: numpy type
|
|
992 |
Numpy type resulting from applying `inter` and `slope` to array of type
|
|
993 |
`in_type`.
|
|
994 |
"""
|
|
995 | 33 |
val = np.array([1], dtype=in_type) |
996 | 33 |
slope = np.array(slope) |
997 | 33 |
inter = np.array(inter) |
998 |
# Don't use real values to avoid overflows. Promote to 1D to avoid scalar
|
|
999 |
# casting rules. Don't use ones_like, zeros_like because of a bug in numpy
|
|
1000 |
# <= 1.5.1 in converting complex192 / complex256 scalars.
|
|
1001 |
if inter != 0: |
|
1002 | 33 |
val = val + np.array([0], dtype=inter.dtype) |
1003 |
if slope != 1: |
|
1004 | 33 |
val = val / np.array([1], dtype=slope.dtype) |
1005 | 33 |
return val.dtype.type |
1006 |
|
|
1007 |
|
|
1008 | 33 |
@deprecate_with_version('calculate_scale deprecated. ' |
1009 |
'Please use arraywriter classes instead', |
|
1010 |
'1.2', |
|
1011 |
'3.0') |
|
1012 | 5 |
def calculate_scale(data, out_dtype, allow_intercept): |
1013 |
""" Calculate scaling and optional intercept for data
|
|
1014 |
|
|
1015 |
Parameters
|
|
1016 |
----------
|
|
1017 |
data : array
|
|
1018 |
out_dtype : dtype
|
|
1019 |
output data type in some form understood by ``np.dtype``
|
|
1020 |
allow_intercept : bool
|
|
1021 |
If True allow non-zero intercept
|
|
1022 |
|
|
1023 |
Returns
|
|
1024 |
-------
|
|
1025 |
scaling : None or float
|
|
1026 |
scalefactor to divide into data. None if no valid data
|
|
1027 |
intercept : None or float
|
|
1028 |
intercept to subtract from data. None if no valid data
|
|
1029 |
mn : None or float
|
|
1030 |
minimum of finite value in data or None if this will not
|
|
1031 |
be used to threshold data
|
|
1032 |
mx : None or float
|
|
1033 |
minimum of finite value in data, or None if this will not
|
|
1034 |
be used to threshold data
|
|
1035 |
"""
|
|
1036 |
# Code here is a compatibility shell around arraywriters refactor
|
|
1037 |
in_dtype = data.dtype |
|
1038 |
out_dtype = np.dtype(out_dtype) |
|
1039 |
if np.can_cast(in_dtype, out_dtype): |
|
1040 |
return 1.0, 0.0, None, None |
|
1041 |
from .arraywriters import make_array_writer, WriterError, get_slope_inter |
|
1042 |
try: |
|
1043 |
writer = make_array_writer(data, out_dtype, True, allow_intercept) |
|
1044 |
except WriterError as e: |
|
1045 |
raise ValueError(str(e)) |
|
1046 |
if out_dtype.kind in 'fc': |
|
1047 |
return (1.0, 0.0, None, None) |
|
1048 |
mn, mx = writer.finite_range() |
|
1049 |
if (mn, mx) == (np.inf, -np.inf): # No valid data |
|
1050 |
return (None, None, None, None) |
|
1051 |
if in_dtype.kind not in 'fc': |
|
1052 |
mn, mx = (None, None) |
|
1053 |
return get_slope_inter(writer) + (mn, mx) |
|
1054 |
|
|
1055 |
|
|
1056 | 33 |
@deprecate_with_version('scale_min_max deprecated. Please use arraywriter ' |
1057 |
'classes instead.', |
|
1058 |
'1.2', |
|
1059 |
'3.0') |
|
1060 | 5 |
def scale_min_max(mn, mx, out_type, allow_intercept): |
1061 |
""" Return scaling and intercept min, max of data, given output type
|
|
1062 |
|
|
1063 |
Returns ``scalefactor`` and ``intercept`` to best fit data with
|
|
1064 |
given ``mn`` and ``mx`` min and max values into range of data type
|
|
1065 |
with ``type_min`` and ``type_max`` min and max values for type.
|
|
1066 |
|
|
1067 |
The calculated scaling is therefore::
|
|
1068 |
|
|
1069 |
scaled_data = (data-intercept) / scalefactor
|
|
1070 |
|
|
1071 |
Parameters
|
|
1072 |
----------
|
|
1073 |
mn : scalar
|
|
1074 |
data minimum value
|
|
1075 |
mx : scalar
|
|
1076 |
data maximum value
|
|
1077 |
out_type : numpy type
|
|
1078 |
numpy type of output
|
|
1079 |
allow_intercept : bool
|
|
1080 |
If true, allow calculation of non-zero intercept. Otherwise,
|
|
1081 |
returned intercept is always 0.0
|
|
1082 |
|
|
1083 |
Returns
|
|
1084 |
-------
|
|
1085 |
scalefactor : numpy scalar, dtype=np.maximum_sctype(np.float)
|
|
1086 |
scalefactor by which to divide data after subtracting intercept
|
|
1087 |
intercept : numpy scalar, dtype=np.maximum_sctype(np.float)
|
|
1088 |
value to subtract from data before dividing by scalefactor
|
|
1089 |
|
|
1090 |
Examples
|
|
1091 |
--------
|
|
1092 |
>>> scale_min_max(0, 255, np.uint8, False) # doctest: +SKIP
|
|
1093 |
(1.0, 0.0)
|
|
1094 |
>>> scale_min_max(-128, 127, np.int8, False) # doctest: +SKIP
|
|
1095 |
(1.0, 0.0)
|
|
1096 |
>>> scale_min_max(0, 127, np.int8, False) # doctest: +SKIP
|
|
1097 |
(1.0, 0.0)
|
|
1098 |
>>> scaling, intercept = scale_min_max(0, 127, np.int8, True) # doctest: +SKIP
|
|
1099 |
>>> np.allclose((0 - intercept) / scaling, -128) # doctest: +SKIP
|
|
1100 |
True
|
|
1101 |
>>> np.allclose((127 - intercept) / scaling, 127) # doctest: +SKIP
|
|
1102 |
True
|
|
1103 |
>>> scaling, intercept = scale_min_max(-10, -1, np.int8, True) # doctest: +SKIP
|
|
1104 |
>>> np.allclose((-10 - intercept) / scaling, -128) # doctest: +SKIP
|
|
1105 |
True
|
|
1106 |
>>> np.allclose((-1 - intercept) / scaling, 127) # doctest: +SKIP
|
|
1107 |
True
|
|
1108 |
>>> scaling, intercept = scale_min_max(1, 10, np.int8, True) # doctest: +SKIP
|
|
1109 |
>>> np.allclose((1 - intercept) / scaling, -128) # doctest: +SKIP
|
|
1110 |
True
|
|
1111 |
>>> np.allclose((10 - intercept) / scaling, 127) # doctest: +SKIP
|
|
1112 |
True
|
|
1113 |
|
|
1114 |
Notes
|
|
1115 |
-----
|
|
1116 |
We don't use this function anywhere in nibabel now, it's here for API
|
|
1117 |
compatibility only.
|
|
1118 |
|
|
1119 |
The large integers lead to python long types as max / min for type.
|
|
1120 |
To contain the rounding error, we need to use the maximum numpy
|
|
1121 |
float types when casting to float.
|
|
1122 |
"""
|
|
1123 |
if mn > mx: |
|
1124 |
raise ValueError('min value > max value') |
|
1125 |
info = type_info(out_type) |
|
1126 |
mn, mx, type_min, type_max = np.array( |
|
1127 |
[mn, mx, info['min'], info['max']], np.maximum_sctype(np.float)) |
|
1128 |
# with intercept
|
|
1129 |
if allow_intercept: |
|
1130 |
data_range = mx - mn |
|
1131 |
if data_range == 0: |
|
1132 |
return 1.0, mn |
|
1133 |
type_range = type_max - type_min |
|
1134 |
scaling = data_range / type_range |
|
1135 |
intercept = mn - type_min * scaling |
|
1136 |
return scaling, intercept |
|
1137 |
# without intercept
|
|
1138 |
if mx == 0 and mn == 0: |
|
1139 |
return 1.0, 0.0 |
|
1140 |
if type_min == 0: # uint |
|
1141 |
if mn < 0 and mx > 0: |
|
1142 |
raise ValueError('Cannot scale negative and positive ' |
|
1143 |
'numbers to uint without intercept') |
|
1144 |
if mx < 0: |
|
1145 |
scaling = mn / type_max |
|
1146 |
else: |
|
1147 |
scaling = mx / type_max |
|
1148 |
else: # int |
|
1149 |
if abs(mx) >= abs(mn): |
|
1150 |
scaling = mx / type_max |
|
1151 |
else: |
|
1152 |
scaling = mn / type_min |
|
1153 |
return scaling, 0.0 |
|
1154 |
|
|
1155 |
|
|
1156 | 33 |
def int_scinter_ftype(ifmt, slope=1.0, inter=0.0, default=np.float32): |
1157 |
""" float type containing int type `ifmt` * `slope` + `inter`
|
|
1158 |
|
|
1159 |
Return float type that can represent the max and the min of the `ifmt` type
|
|
1160 |
after multiplication with `slope` and addition of `inter` with something
|
|
1161 |
like ``np.array([imin, imax], dtype=ifmt) * slope + inter``.
|
|
1162 |
|
|
1163 |
Note that ``slope`` and ``inter`` get promoted to 1D arrays for this
|
|
1164 |
purpose to avoid the numpy scalar casting rules, which prevent scalars
|
|
1165 |
upcasting the array.
|
|
1166 |
|
|
1167 |
Parameters
|
|
1168 |
----------
|
|
1169 |
ifmt : object
|
|
1170 |
numpy integer type (e.g. np.int32)
|
|
1171 |
slope : float, optional
|
|
1172 |
slope, default 1.0
|
|
1173 |
inter : float, optional
|
|
1174 |
intercept, default 0.0
|
|
1175 |
default_out : object, optional
|
|
1176 |
numpy floating point type, default is ``np.float32``
|
|
1177 |
|
|
1178 |
Returns
|
|
1179 |
-------
|
|
1180 |
ftype : object
|
|
1181 |
numpy floating point type
|
|
1182 |
|
|
1183 |
Examples
|
|
1184 |
--------
|
|
1185 |
>>> int_scinter_ftype(np.int8, 1.0, 0.0) == np.float32
|
|
1186 |
True
|
|
1187 |
>>> int_scinter_ftype(np.int8, 1e38, 0.0) == np.float64
|
|
1188 |
True
|
|
1189 |
|
|
1190 |
Notes
|
|
1191 |
-----
|
|
1192 |
It is difficult to make floats overflow with just addition because the
|
|
1193 |
deltas are so large at the extremes of floating point. For example::
|
|
1194 |
|
|
1195 |
>>> arr = np.array([np.finfo(np.float32).max], dtype=np.float32)
|
|
1196 |
>>> res = arr + np.iinfo(np.int16).max
|
|
1197 |
>>> arr == res
|
|
1198 |
array([ True])
|
|
1199 |
"""
|
|
1200 | 33 |
ii = np.iinfo(ifmt) |
1201 | 33 |
tst_arr = np.array([ii.min, ii.max], dtype=ifmt) |
1202 | 33 |
try: |
1203 | 33 |
return _ftype4scaled_finite(tst_arr, slope, inter, 'read', default) |
1204 |
except ValueError: |
|
1205 |
raise ValueError('Overflow using highest floating point type') |
|
1206 |
|
|
1207 |
|
|
1208 | 33 |
def best_write_scale_ftype(arr, slope=1.0, inter=0.0, default=np.float32): |
1209 |
""" Smallest float type to contain range of ``arr`` after scaling
|
|
1210 |
|
|
1211 |
Scaling that will be applied to ``arr`` is ``(arr - inter) / slope``.
|
|
1212 |
|
|
1213 |
Note that ``slope`` and ``inter`` get promoted to 1D arrays for this
|
|
1214 |
purpose to avoid the numpy scalar casting rules, which prevent scalars
|
|
1215 |
upcasting the array.
|
|
1216 |
|
|
1217 |
Parameters
|
|
1218 |
----------
|
|
1219 |
arr : array-like
|
|
1220 |
array that will be scaled
|
|
1221 |
slope : array-like, optional
|
|
1222 |
scalar such that output array will be ``(arr - inter) / slope``.
|
|
1223 |
inter : array-like, optional
|
|
1224 |
scalar such that output array will be ``(arr - inter) / slope``
|
|
1225 |
default : numpy type, optional
|
|
1226 |
minimum float type to return
|
|
1227 |
|
|
1228 |
Returns
|
|
1229 |
-------
|
|
1230 |
ftype : numpy type
|
|
1231 |
Best floating point type for scaling. If no floating point type
|
|
1232 |
prevents overflow, return the top floating point type. If the input
|
|
1233 |
array ``arr`` already contains inf values, return the greater of the
|
|
1234 |
input type and the default type.
|
|
1235 |
|
|
1236 |
Examples
|
|
1237 |
--------
|
|
1238 |
>>> arr = np.array([0, 1, 2], dtype=np.int16)
|
|
1239 |
>>> best_write_scale_ftype(arr, 1, 0) is np.float32
|
|
1240 |
True
|
|
1241 |
|
|
1242 |
Specify higher default return value
|
|
1243 |
|
|
1244 |
>>> best_write_scale_ftype(arr, 1, 0, default=np.float64) is np.float64
|
|
1245 |
True
|
|
1246 |
|
|
1247 |
Even large values that don't overflow don't change output
|
|
1248 |
|
|
1249 |
>>> arr = np.array([0, np.finfo(np.float32).max], dtype=np.float32)
|
|
1250 |
>>> best_write_scale_ftype(arr, 1, 0) is np.float32
|
|
1251 |
True
|
|
1252 |
|
|
1253 |
Scaling > 1 reduces output values, so no upcast needed
|
|
1254 |
|
|
1255 |
>>> best_write_scale_ftype(arr, np.float32(2), 0) is np.float32
|
|
1256 |
True
|
|
1257 |
|
|
1258 |
Scaling < 1 increases values, so upcast may be needed (and is here)
|
|
1259 |
|
|
1260 |
>>> best_write_scale_ftype(arr, np.float32(0.5), 0) is np.float64
|
|
1261 |
True
|
|
1262 |
"""
|
|
1263 | 33 |
default = better_float_of(arr.dtype.type, default) |
1264 |
if not np.all(np.isfinite(arr)): |
|
1265 | 33 |
return default |
1266 | 33 |
try: |
1267 | 33 |
return _ftype4scaled_finite(arr, slope, inter, 'write', default) |
1268 |
except ValueError: |
|
1269 |
return OK_FLOATS[-1] |
|
1270 |
|
|
1271 |
|
|
1272 | 33 |
def better_float_of(first, second, default=np.float32): |
1273 |
""" Return more capable float type of `first` and `second`
|
|
1274 |
|
|
1275 |
Return `default` if neither of `first` or `second` is a float
|
|
1276 |
|
|
1277 |
Parameters
|
|
1278 |
----------
|
|
1279 |
first : numpy type specifier
|
|
1280 |
Any valid input to `np.dtype()``
|
|
1281 |
second : numpy type specifier
|
|
1282 |
Any valid input to `np.dtype()``
|
|
1283 |
default : numpy type specifier, optional
|
|
1284 |
Any valid input to `np.dtype()``
|
|
1285 |
|
|
1286 |
Returns
|
|
1287 |
-------
|
|
1288 |
better_type : numpy type
|
|
1289 |
More capable of `first` or `second` if both are floats; if only one is
|
|
1290 |
a float return that, otherwise return `default`.
|
|
1291 |
|
|
1292 |
Examples
|
|
1293 |
--------
|
|
1294 |
>>> better_float_of(np.float32, np.float64) is np.float64
|
|
1295 |
True
|
|
1296 |
>>> better_float_of(np.float32, 'i4') is np.float32
|
|
1297 |
True
|
|
1298 |
>>> better_float_of('i2', 'u4') is np.float32
|
|
1299 |
True
|
|
1300 |
>>> better_float_of('i2', 'u4', np.float64) is np.float64
|
|
1301 |
True
|
|
1302 |
"""
|
|
1303 | 33 |
first = np.dtype(first) |
1304 | 33 |
second = np.dtype(second) |
1305 | 33 |
default = np.dtype(default).type |
1306 | 33 |
kinds = (first.kind, second.kind) |
1307 |
if 'f' not in kinds: |
|
1308 | 33 |
return default |
1309 |
if kinds == ('f', 'f'): |
|
1310 |
if first.itemsize >= second.itemsize: |
|
1311 | 33 |
return first.type |
1312 | 33 |
return second.type |
1313 |
if first.kind == 'f': |
|
1314 | 33 |
return first.type |
1315 | 33 |
return second.type |
1316 |
|
|
1317 |
|
|
1318 | 33 |
def _ftype4scaled_finite(tst_arr, slope, inter, direction='read', |
1319 |
default=np.float32): |
|
1320 |
""" Smallest float type for scaling of `tst_arr` that does not overflow
|
|
1321 |
"""
|
|
1322 | 33 |
assert direction in ('read', 'write') |
1323 |
if default not in OK_FLOATS and default is np.longdouble: |
|
1324 |
# Omitted longdouble
|
|
1325 |
return default |
|
1326 | 33 |
def_ind = OK_FLOATS.index(default) |
1327 |
# promote to arrays to avoid numpy scalar casting rules
|
|
1328 | 33 |
tst_arr = np.atleast_1d(tst_arr) |
1329 | 33 |
slope = np.atleast_1d(slope) |
1330 | 33 |
inter = np.atleast_1d(inter) |
1331 | 33 |
overflow_filter = ('error', '.*overflow.*', RuntimeWarning) |
1332 |
for ftype in OK_FLOATS[def_ind:]: |
|
1333 | 33 |
tst_trans = tst_arr.copy() |
1334 | 33 |
slope = slope.astype(ftype) |
1335 | 33 |
inter = inter.astype(ftype) |
1336 | 33 |
try: |
1337 | 33 |
with warnings.catch_warnings(): |
1338 |
# Error on overflows to short circuit the logic
|
|
1339 | 33 |
warnings.filterwarnings(*overflow_filter) |
1340 |
if direction == 'read': # as in reading of image from disk |
|
1341 |
if slope != 1.0: |
|
1342 | 33 |
tst_trans = tst_trans * slope |
1343 |
if inter != 0.0: |
|
1344 | 33 |
tst_trans = tst_trans + inter |
1345 |
elif direction == 'write': |
|
1346 |
if inter != 0.0: |
|
1347 | 33 |
tst_trans = tst_trans - inter |
1348 |
if slope != 1.0: |
|
1349 | 33 |
tst_trans = tst_trans / slope |
1350 |
# Double-check that result is finite
|
|
1351 |
if np.all(np.isfinite(tst_trans)): |
|
1352 | 33 |
return ftype |
1353 | 33 |
except RuntimeWarning: |
1354 | 33 |
pass
|
1355 |
raise ValueError('Overflow using highest floating point type') |
|
1356 |
|
|
1357 |
|
|
1358 | 33 |
def finite_range(arr, check_nan=False): |
1359 |
""" Get range (min, max) or range and flag (min, max, has_nan) from `arr`
|
|
1360 |
|
|
1361 |
Parameters
|
|
1362 |
----------
|
|
1363 |
arr : array-like
|
|
1364 |
check_nan : {False, True}, optional
|
|
1365 |
Whether to return third output, a bool signaling whether there are NaN
|
|
1366 |
values in `arr`
|
|
1367 |
|
|
1368 |
Returns
|
|
1369 |
-------
|
|
1370 |
mn : scalar
|
|
1371 |
minimum of values in (flattened) array
|
|
1372 |
mx : scalar
|
|
1373 |
maximum of values in (flattened) array
|
|
1374 |
has_nan : bool
|
|
1375 |
Returned if `check_nan` is True. `has_nan` is True if there are one or
|
|
1376 |
more NaN values in `arr`
|
|
1377 |
|
|
1378 |
Examples
|
|
1379 |
--------
|
|
1380 |
>>> a = np.array([[-1, 0, 1],[np.inf, np.nan, -np.inf]])
|
|
1381 |
>>> finite_range(a)
|
|
1382 |
(-1.0, 1.0)
|
|
1383 |
>>> a = np.array([[-1, 0, 1],[np.inf, np.nan, -np.inf]])
|
|
1384 |
>>> finite_range(a, check_nan=True)
|
|
1385 |
(-1.0, 1.0, True)
|
|
1386 |
>>> a = np.array([[np.nan],[np.nan]])
|
|
1387 |
>>> finite_range(a) == (np.inf, -np.inf)
|
|
1388 |
True
|
|
1389 |
>>> a = np.array([[-3, 0, 1],[2,-1,4]], dtype=np.int)
|
|
1390 |
>>> finite_range(a)
|
|
1391 |
(-3, 4)
|
|
1392 |
>>> a = np.array([[1, 0, 1],[2,3,4]], dtype=np.uint)
|
|
1393 |
>>> finite_range(a)
|
|
1394 |
(0, 4)
|
|
1395 |
>>> a = a + 1j
|
|
1396 |
>>> finite_range(a)
|
|
1397 |
(1j, (4+1j))
|
|
1398 |
>>> a = np.zeros((2,), dtype=[('f1', 'i2')])
|
|
1399 |
>>> finite_range(a)
|
|
1400 |
Traceback (most recent call last):
|
|
1401 |
...
|
|
1402 |
TypeError: Can only handle numeric types
|
|
1403 |
"""
|
|
1404 | 33 |
arr = np.asarray(arr) |
1405 |
if arr.size == 0: |
|
1406 | 33 |
return (np.inf, -np.inf) + (False,) * check_nan |
1407 |
# Resort array to slowest->fastest memory change indices
|
|
1408 | 33 |
stride_order = np.argsort(arr.strides)[::-1] |
1409 | 33 |
sarr = arr.transpose(stride_order) |
1410 | 33 |
kind = sarr.dtype.kind |
1411 |
if kind in 'iu': |
|
1412 |
if check_nan: |
|
1413 | 33 |
return np.min(sarr), np.max(sarr), False |
1414 | 33 |
return np.min(sarr), np.max(sarr) |
1415 |
if kind not in 'cf': |
|
1416 | 33 |
raise TypeError('Can only handle numeric types') |
1417 |
# Deal with 1D arrays in loop below
|
|
1418 | 33 |
sarr = np.atleast_2d(sarr) |
1419 |
# Loop to avoid big temporary arrays
|
|
1420 | 33 |
has_nan = False |
1421 | 33 |
n_slices = sarr.shape[0] |
1422 | 33 |
maxes = np.zeros(n_slices, dtype=sarr.dtype) - np.inf |
1423 | 33 |
mins = np.zeros(n_slices, dtype=sarr.dtype) + np.inf |
1424 |
for s in range(n_slices): |
|
1425 | 33 |
this_slice = sarr[s] # view |
1426 |
if not has_nan: |
|
1427 | 33 |
maxes[s] = np.max(this_slice) |
1428 |
# May have a non-nan non-inf max before we trip on min. If so,
|
|
1429 |
# record so we don't recalculate
|
|
1430 | 33 |
max_good = False |
1431 |
if np.isnan(maxes[s]): |
|
1432 | 33 |
has_nan = True |
1433 |
elif maxes[s] != np.inf: |
|
1434 | 33 |
max_good = True |
1435 | 33 |
mins[s] = np.min(this_slice) |
1436 |
if mins[s] != -np.inf: |
|
1437 |
# Only case where we escape the default np.isfinite
|
|
1438 |
# algorithm
|
|
1439 | 33 |
continue
|
1440 | 33 |
tmp = this_slice[np.isfinite(this_slice)] |
1441 |
if tmp.size == 0: # No finite values |
|
1442 |
# Reset max, min in case set in tests above
|
|
1443 | 33 |
maxes[s] = -np.inf |
1444 | 33 |
mins[s] = np.inf |
1445 | 33 |
continue
|
1446 |
if not max_good: |
|
1447 | 33 |
maxes[s] = np.max(tmp) |
1448 | 33 |
mins[s] = np.min(tmp) |
1449 |
if check_nan: |
|
1450 | 33 |
return np.nanmin(mins), np.nanmax(maxes), has_nan |
1451 | 33 |
return np.nanmin(mins), np.nanmax(maxes) |
1452 |
|
|
1453 |
|
|
1454 | 33 |
def shape_zoom_affine(shape, zooms, x_flip=True): |
1455 |
""" Get affine implied by given shape and zooms
|
|
1456 |
|
|
1457 |
We get the translations from the center of the image (implied by
|
|
1458 |
`shape`).
|
|
1459 |
|
|
1460 |
Parameters
|
|
1461 |
----------
|
|
1462 |
shape : (N,) array-like
|
|
1463 |
shape of image data. ``N`` is the number of dimensions
|
|
1464 |
zooms : (N,) array-like
|
|
1465 |
zooms (voxel sizes) of the image
|
|
1466 |
x_flip : {True, False}
|
|
1467 |
whether to flip the X row of the affine. Corresponds to
|
|
1468 |
radiological storage on disk.
|
|
1469 |
|
|
1470 |
Returns
|
|
1471 |
-------
|
|
1472 |
aff : (4,4) array
|
|
1473 |
affine giving correspondance of voxel coordinates to mm
|
|
1474 |
coordinates, taking the center of the image as origin
|
|
1475 |
|
|
1476 |
Examples
|
|
1477 |
--------
|
|
1478 |
>>> shape = (3, 5, 7)
|
|
1479 |
>>> zooms = (3, 2, 1)
|
|
1480 |
>>> shape_zoom_affine((3, 5, 7), (3, 2, 1))
|
|
1481 |
array([[-3., 0., 0., 3.],
|
|
1482 |
[ 0., 2., 0., -4.],
|
|
1483 |
[ 0., 0., 1., -3.],
|
|
1484 |
[ 0., 0., 0., 1.]])
|
|
1485 |
>>> shape_zoom_affine((3, 5, 7), (3, 2, 1), False)
|
|
1486 |
array([[ 3., 0., 0., -3.],
|
|
1487 |
[ 0., 2., 0., -4.],
|
|
1488 |
[ 0., 0., 1., -3.],
|
|
1489 |
[ 0., 0., 0., 1.]])
|
|
1490 |
"""
|
|
1491 | 33 |
shape = np.asarray(shape) |
1492 | 33 |
zooms = np.array(zooms) # copy because of flip below |
1493 | 33 |
ndims = len(shape) |
1494 |
if ndims != len(zooms): |
|
1495 |
raise ValueError('Should be same length of zooms and shape') |
|
1496 |
if ndims >= 3: |
|
1497 | 33 |
shape = shape[:3] |
1498 | 33 |
zooms = zooms[:3] |
1499 |
else: |
|
1500 | 33 |
full_shape = np.ones((3,)) |
1501 | 33 |
full_zooms = np.ones((3,)) |
1502 | 33 |
full_shape[:ndims] = shape[:] |
1503 | 33 |
full_zooms[:ndims] = zooms[:] |
1504 | 33 |
shape = full_shape |
1505 | 33 |
zooms = full_zooms |
1506 |
if x_flip: |
|
1507 | 33 |
zooms[0] *= -1 |
1508 |
# Get translations from center of image
|
|
1509 | 33 |
origin = (shape - 1) / 2.0 |
1510 | 33 |
aff = np.eye(4) |
1511 | 33 |
aff[:3, :3] = np.diag(zooms) |
1512 | 33 |
aff[:3, -1] = -origin * zooms |
1513 | 33 |
return aff |
1514 |
|
|
1515 |
|
|
1516 | 33 |
def rec2dict(rec): |
1517 |
""" Convert recarray to dictionary
|
|
1518 |
|
|
1519 |
Also converts scalar values to scalars
|
|
1520 |
|
|
1521 |
Parameters
|
|
1522 |
----------
|
|
1523 |
rec : ndarray
|
|
1524 |
structured ndarray
|
|
1525 |
|
|
1526 |
Returns
|
|
1527 |
-------
|
|
1528 |
dct : dict
|
|
1529 |
dict with key, value pairs as for `rec`
|
|
1530 |
|
|
1531 |
Examples
|
|
1532 |
--------
|
|
1533 |
>>> r = np.zeros((), dtype = [('x', 'i4'), ('s', 'S10')])
|
|
1534 |
>>> d = rec2dict(r)
|
|
1535 |
>>> d == {'x': 0, 's': b''}
|
|
1536 |
True
|
|
1537 |
"""
|
|
1538 | 33 |
dct = {} |
1539 |
for key in rec.dtype.fields: |
|
1540 | 33 |
val = rec[key] |
1541 | 33 |
try: |
1542 | 33 |
val = val.item() |
1543 |
except ValueError: |
|
1544 |
pass
|
|
1545 | 33 |
dct[key] = val |
1546 | 33 |
return dct |
1547 |
|
|
1548 |
|
|
1549 | 33 |
class BinOpener(Opener): |
1550 |
""" Deprecated class that used to handle .mgz through specialized logic."""
|
|
1551 | 33 |
__doc__ = Opener.__doc__ |
1552 |
|
|
1553 | 33 |
@deprecate_with_version('BinOpener class deprecated. ' |
1554 |
"Please use Opener class instead."
|
|
1555 |
'2.1', '4.0') |
|
1556 | 5 |
def __init__(self, *args, **kwargs): |
1557 |
return super(BinOpener, self).__init__(*args, **kwargs) |
|
1558 |
|
|
1559 |
|
|
1560 | 33 |
def fname_ext_ul_case(fname): |
1561 |
""" `fname` with ext changed to upper / lower case if file exists
|
|
1562 |
|
|
1563 |
Check for existence of `fname`. If it does exist, return unmodified. If
|
|
1564 |
it doesn't, check for existence of `fname` with case changed from lower to
|
|
1565 |
upper, or upper to lower. Return this modified `fname` if it exists.
|
|
1566 |
Otherwise return `fname` unmodified
|
|
1567 |
|
|
1568 |
Parameters
|
|
1569 |
----------
|
|
1570 |
fname : str
|
|
1571 |
filename.
|
|
1572 |
|
|
1573 |
Returns
|
|
1574 |
-------
|
|
1575 |
mod_fname : str
|
|
1576 |
filename, maybe with extension of opposite case
|
|
1577 |
"""
|
|
1578 |
if exists(fname): |
|
1579 | 33 |
return fname |
1580 | 33 |
froot, ext = splitext(fname) |
1581 |
if ext == ext.lower(): |
|
1582 | 33 |
mod_fname = froot + ext.upper() |
1583 |
if exists(mod_fname): |
|
1584 | 23 |
return mod_fname |
1585 |
elif ext == ext.upper(): |
|
1586 | 33 |
mod_fname = froot + ext.lower() |
1587 |
if exists(mod_fname): |
|
1588 | 23 |
return mod_fname |
1589 | 33 |
return fname |
1590 |
|
|
1591 |
|
|
1592 | 33 |
@deprecate_with_version('allopen is deprecated. ' |
1593 |
'Please use "Opener" class instead.', |
|
1594 |
'2.0', '4.0') |
|
1595 | 5 |
def allopen(fileish, *args, **kwargs): |
1596 |
""" Compatibility wrapper for old ``allopen`` function
|
|
1597 |
|
|
1598 |
Wraps creation of ``Opener`` instance, while picking up module global
|
|
1599 |
``default_compresslevel``.
|
|
1600 |
|
|
1601 |
Please see docstring of ``Opener`` for details.
|
|
1602 |
"""
|
|
1603 |
|
|
1604 | 33 |
class MyOpener(Opener): |
1605 | 33 |
default_compresslevel = default_compresslevel |
1606 |
|
|
1607 | 33 |
return MyOpener(fileish, *args, **kwargs) |
Read our documentation on viewing source code .