BUG: Use manager to set title
1 |
""" Utilties for casting numpy values in various ways
|
|
2 |
|
|
3 |
Most routines work round some numpy oddities in floating point precision and
|
|
4 |
casting. Others work round numpy casting to and from python ints
|
|
5 |
"""
|
|
6 |
|
|
7 | 33 |
import warnings |
8 | 33 |
from numbers import Integral |
9 | 33 |
from platform import processor, machine |
10 |
|
|
11 | 33 |
import numpy as np |
12 |
|
|
13 |
|
|
14 | 33 |
class CastingError(Exception): |
15 | 33 |
pass
|
16 |
|
|
17 |
|
|
18 |
# Test for VC truncation when casting floats to uint64
|
|
19 |
# Christoph Gohlke says this is so for MSVC <= 2010 because VC is using x87
|
|
20 |
# instructions; see:
|
|
21 |
# https://github.com/scipy/scipy/blob/99bb8411f6391d921cb3f4e56619291e91ddf43b/scipy/ndimage/tests/test_datatypes.py#L51
|
|
22 | 33 |
_test_val = 2**63 + 2**11 # Should be exactly representable in float64 |
23 | 33 |
TRUNC_UINT64 = np.float64(_test_val).astype(np.uint64) != _test_val |
24 |
|
|
25 |
|
|
26 | 33 |
def float_to_int(arr, int_type, nan2zero=True, infmax=False): |
27 |
""" Convert floating point array `arr` to type `int_type`
|
|
28 |
|
|
29 |
* Rounds numbers to nearest integer
|
|
30 |
* Clips values to prevent overflows when casting
|
|
31 |
* Converts NaN to 0 (for `nan2zero` == True)
|
|
32 |
|
|
33 |
Casting floats to integers is delicate because the result is undefined
|
|
34 |
and platform specific for float values outside the range of `int_type`.
|
|
35 |
Define ``shared_min`` to be the minimum value that can be exactly
|
|
36 |
represented in both the float type of `arr` and `int_type`. Define
|
|
37 |
`shared_max` to be the equivalent maximum value. To avoid undefined
|
|
38 |
results we threshold `arr` at ``shared_min`` and ``shared_max``.
|
|
39 |
|
|
40 |
Parameters
|
|
41 |
----------
|
|
42 |
arr : array-like
|
|
43 |
Array of floating point type
|
|
44 |
int_type : object
|
|
45 |
Numpy integer type
|
|
46 |
nan2zero : {True, False, None}
|
|
47 |
Whether to convert NaN value to zero. Default is True. If False, and
|
|
48 |
NaNs are present, raise CastingError. If None, do not check for NaN
|
|
49 |
values and pass through directly to the ``astype`` casting mechanism.
|
|
50 |
In this last case, the resulting value is undefined.
|
|
51 |
infmax : {False, True}
|
|
52 |
If True, set np.inf values in `arr` to be `int_type` integer maximum
|
|
53 |
value, -np.inf as `int_type` integer minimum. If False, set +/- infs
|
|
54 |
to be ``shared_min``, ``shared_max`` as defined above. Therefore False
|
|
55 |
gives faster conversion at the expense of infs that are further from
|
|
56 |
infinity.
|
|
57 |
|
|
58 |
Returns
|
|
59 |
-------
|
|
60 |
iarr : ndarray
|
|
61 |
of type `int_type`
|
|
62 |
|
|
63 |
Examples
|
|
64 |
--------
|
|
65 |
>>> float_to_int([np.nan, np.inf, -np.inf, 1.1, 6.6], np.int16)
|
|
66 |
array([ 0, 32767, -32768, 1, 7], dtype=int16)
|
|
67 |
|
|
68 |
Notes
|
|
69 |
-----
|
|
70 |
Numpy relies on the C library to cast from float to int using the standard
|
|
71 |
``astype`` method of the array.
|
|
72 |
|
|
73 |
Quoting from section F4 of the C99 standard:
|
|
74 |
|
|
75 |
If the floating value is infinite or NaN or if the integral part of the
|
|
76 |
floating value exceeds the range of the integer type, then the
|
|
77 |
"invalid" floating-point exception is raised and the resulting value
|
|
78 |
is unspecified.
|
|
79 |
|
|
80 |
Hence we threshold at ``shared_min`` and ``shared_max`` to avoid casting to
|
|
81 |
values that are undefined.
|
|
82 |
|
|
83 |
See: https://en.wikipedia.org/wiki/C99 . There are links to the C99
|
|
84 |
standard from that page.
|
|
85 |
"""
|
|
86 | 33 |
arr = np.asarray(arr) |
87 | 33 |
flt_type = arr.dtype.type |
88 | 33 |
int_type = np.dtype(int_type).type |
89 |
# Deal with scalar as input; fancy indexing needs 1D
|
|
90 | 33 |
shape = arr.shape |
91 | 33 |
arr = np.atleast_1d(arr) |
92 | 33 |
mn, mx = shared_range(flt_type, int_type) |
93 |
if nan2zero is None: |
|
94 | 33 |
seen_nans = False |
95 |
else: |
|
96 | 33 |
nans = np.isnan(arr) |
97 | 33 |
seen_nans = np.any(nans) |
98 |
if not nan2zero and seen_nans: |
|
99 | 33 |
raise CastingError('NaNs in array, nan2zero is False') |
100 | 33 |
iarr = np.clip(np.rint(arr), mn, mx).astype(int_type) |
101 |
if seen_nans: |
|
102 | 33 |
iarr[nans] = 0 |
103 |
if not infmax: |
|
104 | 33 |
return iarr.reshape(shape) |
105 | 33 |
ii = np.iinfo(int_type) |
106 | 33 |
iarr[arr == np.inf] = ii.max |
107 |
if ii.min != int(mn): |
|
108 | 33 |
iarr[arr == -np.inf] = ii.min |
109 | 33 |
return iarr.reshape(shape) |
110 |
|
|
111 |
|
|
112 |
# Cache range values
|
|
113 | 33 |
_SHARED_RANGES = {} |
114 |
|
|
115 |
|
|
116 | 33 |
def shared_range(flt_type, int_type): |
117 |
""" Min and max in float type that are >=min, <=max in integer type
|
|
118 |
|
|
119 |
This is not as easy as it sounds, because the float type may not be able to
|
|
120 |
exactly represent the max or min integer values, so we have to find the
|
|
121 |
next exactly representable floating point value to do the thresholding.
|
|
122 |
|
|
123 |
Parameters
|
|
124 |
----------
|
|
125 |
flt_type : dtype specifier
|
|
126 |
A dtype specifier referring to a numpy floating point type. For
|
|
127 |
example, ``f4``, ``np.dtype('f4')``, ``np.float32`` are equivalent.
|
|
128 |
int_type : dtype specifier
|
|
129 |
A dtype specifier referring to a numpy integer type. For example,
|
|
130 |
``i4``, ``np.dtype('i4')``, ``np.int32`` are equivalent
|
|
131 |
|
|
132 |
Returns
|
|
133 |
-------
|
|
134 |
mn : object
|
|
135 |
Number of type `flt_type` that is the minumum value in the range of
|
|
136 |
`int_type`, such that ``mn.astype(int_type)`` >= min of `int_type`
|
|
137 |
mx : object
|
|
138 |
Number of type `flt_type` that is the maximum value in the range of
|
|
139 |
`int_type`, such that ``mx.astype(int_type)`` <= max of `int_type`
|
|
140 |
|
|
141 |
Examples
|
|
142 |
--------
|
|
143 |
>>> shared_range(np.float32, np.int32) == (-2147483648.0, 2147483520.0)
|
|
144 |
True
|
|
145 |
>>> shared_range('f4', 'i4') == (-2147483648.0, 2147483520.0)
|
|
146 |
True
|
|
147 |
"""
|
|
148 | 33 |
flt_type = np.dtype(flt_type).type |
149 | 33 |
int_type = np.dtype(int_type).type |
150 | 33 |
key = (flt_type, int_type) |
151 |
# Used cached value if present
|
|
152 | 33 |
try: |
153 | 33 |
return _SHARED_RANGES[key] |
154 | 33 |
except KeyError: |
155 | 33 |
pass
|
156 | 33 |
ii = np.iinfo(int_type) |
157 | 33 |
fi = np.finfo(flt_type) |
158 | 33 |
mn = ceil_exact(ii.min, flt_type) |
159 |
if mn == -np.inf: |
|
160 | 33 |
mn = fi.min |
161 | 33 |
mx = floor_exact(ii.max, flt_type) |
162 |
if mx == np.inf: |
|
163 | 33 |
mx = fi.max |
164 |
elif TRUNC_UINT64 and int_type == np.uint64: |
|
165 |
mx = min(mx, flt_type(2**63)) |
|
166 | 33 |
_SHARED_RANGES[key] = (mn, mx) |
167 | 33 |
return mn, mx |
168 |
|
|
169 |
|
|
170 |
# ----------------------------------------------------------------------------
|
|
171 |
# Routines to work out the next lowest representable integer in floating point
|
|
172 |
# types.
|
|
173 |
# ----------------------------------------------------------------------------
|
|
174 |
|
|
175 | 33 |
class FloatingError(Exception): |
176 | 33 |
pass
|
177 |
|
|
178 |
|
|
179 | 33 |
def on_powerpc(): |
180 |
""" True if we are running on a Power PC platform
|
|
181 |
|
|
182 |
Has to deal with older Macs and IBM POWER7 series among others
|
|
183 |
"""
|
|
184 | 33 |
return processor() == 'powerpc' or machine().startswith('ppc') |
185 |
|
|
186 |
|
|
187 | 33 |
def type_info(np_type): |
188 |
""" Return dict with min, max, nexp, nmant, width for numpy type `np_type`
|
|
189 |
|
|
190 |
Type can be integer in which case nexp and nmant are None.
|
|
191 |
|
|
192 |
Parameters
|
|
193 |
----------
|
|
194 |
np_type : numpy type specifier
|
|
195 |
Any specifier for a numpy dtype
|
|
196 |
|
|
197 |
Returns
|
|
198 |
-------
|
|
199 |
info : dict
|
|
200 |
with fields ``min`` (minimum value), ``max`` (maximum value), ``nexp``
|
|
201 |
(exponent width), ``nmant`` (significand precision not including
|
|
202 |
implicit first digit), ``minexp`` (minimum exponent), ``maxexp``
|
|
203 |
(maximum exponent), ``width`` (width in bytes). (``nexp``, ``nmant``,
|
|
204 |
``minexp``, ``maxexp``) are None for integer types. Both ``min`` and
|
|
205 |
``max`` are of type `np_type`.
|
|
206 |
|
|
207 |
Raises
|
|
208 |
------
|
|
209 |
FloatingError
|
|
210 |
for floating point types we don't recognize
|
|
211 |
|
|
212 |
Notes
|
|
213 |
-----
|
|
214 |
You might be thinking that ``np.finfo`` does this job, and it does, except
|
|
215 |
for PPC long doubles (https://github.com/numpy/numpy/issues/2669) and
|
|
216 |
float96 on Windows compiled with Mingw. This routine protects against such
|
|
217 |
errors in ``np.finfo`` by only accepting values that we know are likely to
|
|
218 |
be correct.
|
|
219 |
"""
|
|
220 | 33 |
dt = np.dtype(np_type) |
221 | 33 |
np_type = dt.type |
222 | 33 |
width = dt.itemsize |
223 | 33 |
try: # integer type |
224 | 33 |
info = np.iinfo(dt) |
225 | 33 |
except ValueError: |
226 | 33 |
pass
|
227 |
else: |
|
228 | 33 |
return dict(min=np_type(info.min), max=np_type(info.max), minexp=None, |
229 |
maxexp=None, nmant=None, nexp=None, width=width) |
|
230 | 33 |
info = np.finfo(dt) |
231 |
# Trust the standard IEEE types
|
|
232 | 33 |
nmant, nexp = info.nmant, info.nexp |
233 | 33 |
ret = dict(min=np_type(info.min), |
234 |
max=np_type(info.max), |
|
235 |
nmant=nmant, |
|
236 |
nexp=nexp, |
|
237 |
minexp=info.minexp, |
|
238 |
maxexp=info.maxexp, |
|
239 |
width=width) |
|
240 |
if np_type in (np.float16, np.float32, np.float64, |
|
241 |
np.complex64, np.complex128): |
|
242 | 33 |
return ret |
243 | 33 |
info_64 = np.finfo(np.float64) |
244 |
if dt.kind == 'c': |
|
245 | 27 |
assert np_type is np.longcomplex |
246 | 27 |
vals = (nmant, nexp, width / 2) |
247 |
else: |
|
248 | 33 |
assert np_type is np.longdouble |
249 | 33 |
vals = (nmant, nexp, width) |
250 |
if vals in ((112, 15, 16), # binary128 |
|
251 |
(info_64.nmant, info_64.nexp, 8), # float64 |
|
252 |
(63, 15, 12), (63, 15, 16)): # Intel extended 80 |
|
253 | 33 |
return ret # these are OK without modification |
254 |
# The remaining types are longdoubles with bad finfo values. Some we
|
|
255 |
# correct, others we wait to hear of errors.
|
|
256 |
# We start with float64 as basis
|
|
257 |
ret = type_info(np.float64) |
|
258 |
if vals in ((52, 15, 12), # windows float96 |
|
259 |
(52, 15, 16)): # windows float128? |
|
260 |
# On windows 32 bit at least, float96 is Intel 80 storage but operating
|
|
261 |
# at float64 precision. The finfo values give nexp == 15 (as for intel
|
|
262 |
# 80) but in calculations nexp in fact appears to be 11 as for float64
|
|
263 |
ret.update(dict(width=width)) |
|
264 |
return ret |
|
265 |
if vals == (105, 11, 16): # correctly detected double double |
|
266 |
ret.update(dict(nmant=nmant, nexp=nexp, width=width)) |
|
267 |
return ret |
|
268 |
# Oh dear, we don't recognize the type information. Try some known types
|
|
269 |
# and then give up. At this stage we're expecting exotic longdouble or
|
|
270 |
# their complex equivalent.
|
|
271 |
if np_type not in (np.longdouble, np.longcomplex) or width not in (16, 32): |
|
272 |
raise FloatingError(f'We had not expected type {np_type}') |
|
273 |
if (vals == (1, 1, 16) and on_powerpc() and |
|
274 |
_check_maxexp(np.longdouble, 1024)): |
|
275 |
# double pair on PPC. The _check_nmant routine does not work for this
|
|
276 |
# type, hence the powerpc platform check instead
|
|
277 |
ret.update(dict(nmant=106, width=width)) |
|
278 |
elif (_check_nmant(np.longdouble, 52) and |
|
279 |
_check_maxexp(np.longdouble, 11)): |
|
280 |
# Got float64 despite everything
|
|
281 |
pass
|
|
282 |
elif (_check_nmant(np.longdouble, 112) and |
|
283 |
_check_maxexp(np.longdouble, 16384)): |
|
284 |
# binary 128, but with some busted type information. np.longcomplex
|
|
285 |
# seems to break here too, so we need to use np.longdouble and
|
|
286 |
# complexify
|
|
287 |
two = np.longdouble(2) |
|
288 |
# See: https://matthew-brett.github.io/pydagogue/floating_point.html
|
|
289 |
max_val = (two ** 113 - 1) / (two ** 112) * two ** 16383 |
|
290 |
if np_type is np.longcomplex: |
|
291 |
max_val += 0j |
|
292 |
ret = dict(min=-max_val, |
|
293 |
max=max_val, |
|
294 |
nmant=112, |
|
295 |
nexp=15, |
|
296 |
minexp=-16382, |
|
297 |
maxexp=16384, |
|
298 |
width=width) |
|
299 |
else: # don't recognize the type |
|
300 |
raise FloatingError(f'We had not expected long double type {np_type} with info {info}') |
|
301 |
return ret |
|
302 |
|
|
303 |
|
|
304 | 33 |
def _check_nmant(np_type, nmant): |
305 |
""" True if fp type `np_type` seems to have `nmant` significand digits
|
|
306 |
|
|
307 |
Note 'digits' does not include implicit digits. And in fact if there are
|
|
308 |
no implicit digits, the `nmant` number is one less than the actual digits.
|
|
309 |
Assumes base 2 representation.
|
|
310 |
|
|
311 |
Parameters
|
|
312 |
----------
|
|
313 |
np_type : numpy type specifier
|
|
314 |
Any specifier for a numpy dtype
|
|
315 |
nmant : int
|
|
316 |
Number of digits to test against
|
|
317 |
|
|
318 |
Returns
|
|
319 |
-------
|
|
320 |
tf : bool
|
|
321 |
True if `nmant` is the correct number of significand digits, false
|
|
322 |
otherwise
|
|
323 |
"""
|
|
324 | 33 |
np_type = np.dtype(np_type).type |
325 | 33 |
max_contig = np_type(2 ** (nmant + 1)) # maximum of contiguous integers |
326 | 33 |
tests = max_contig + np.array([-2, -1, 0, 1, 2], dtype=np_type) |
327 | 33 |
return np.all(tests - max_contig == [-2, -1, 0, 0, 2]) |
328 |
|
|
329 |
|
|
330 | 33 |
def _check_maxexp(np_type, maxexp): |
331 |
""" True if fp type `np_type` seems to have `maxexp` maximum exponent
|
|
332 |
|
|
333 |
We're testing "maxexp" as returned by numpy. This value is set to one
|
|
334 |
greater than the maximum power of 2 that `np_type` can represent.
|
|
335 |
|
|
336 |
Assumes base 2 representation. Very crude check
|
|
337 |
|
|
338 |
Parameters
|
|
339 |
----------
|
|
340 |
np_type : numpy type specifier
|
|
341 |
Any specifier for a numpy dtype
|
|
342 |
maxexp : int
|
|
343 |
Maximum exponent to test against
|
|
344 |
|
|
345 |
Returns
|
|
346 |
-------
|
|
347 |
tf : bool
|
|
348 |
True if `maxexp` is the correct maximum exponent, False otherwise.
|
|
349 |
"""
|
|
350 | 33 |
dt = np.dtype(np_type) |
351 | 33 |
np_type = dt.type |
352 | 33 |
two = np_type(2).reshape((1,)) # to avoid upcasting |
353 | 33 |
with warnings.catch_warnings(): |
354 | 33 |
warnings.simplefilter("ignore", RuntimeWarning) # Expected overflow warning |
355 | 33 |
return np.isfinite(two ** (maxexp - 1)) and not np.isfinite(two ** maxexp) |
356 |
|
|
357 |
|
|
358 | 33 |
def as_int(x, check=True): |
359 |
""" Return python integer representation of number
|
|
360 |
|
|
361 |
This is useful because the numpy int(val) mechanism is broken for large
|
|
362 |
values in np.longdouble.
|
|
363 |
|
|
364 |
It is also useful to work around a numpy 1.4.1 bug in conversion of uints
|
|
365 |
to python ints.
|
|
366 |
|
|
367 |
This routine will still raise an OverflowError for values that are outside
|
|
368 |
the range of float64.
|
|
369 |
|
|
370 |
Parameters
|
|
371 |
----------
|
|
372 |
x : object
|
|
373 |
integer, unsigned integer or floating point value
|
|
374 |
check : {True, False}
|
|
375 |
If True, raise error for values that are not integers
|
|
376 |
|
|
377 |
Returns
|
|
378 |
-------
|
|
379 |
i : int
|
|
380 |
Python integer
|
|
381 |
|
|
382 |
Examples
|
|
383 |
--------
|
|
384 |
>>> as_int(2.0)
|
|
385 |
2
|
|
386 |
>>> as_int(-2.0)
|
|
387 |
-2
|
|
388 |
>>> as_int(2.1) #doctest: +IGNORE_EXCEPTION_DETAIL
|
|
389 |
Traceback (most recent call last):
|
|
390 |
...
|
|
391 |
FloatingError: Not an integer: 2.1
|
|
392 |
>>> as_int(2.1, check=False)
|
|
393 |
2
|
|
394 |
"""
|
|
395 | 33 |
x = np.array(x) |
396 |
if x.dtype.kind in 'iu': |
|
397 |
# This works around a nasty numpy 1.4.1 bug such that:
|
|
398 |
# >>> int(np.uint32(2**32-1)
|
|
399 |
# -1
|
|
400 | 33 |
return int(str(x)) |
401 | 33 |
ix = int(x) |
402 |
if ix == x: |
|
403 | 33 |
return ix |
404 | 33 |
fx = np.floor(x) |
405 |
if check and fx != x: |
|
406 | 33 |
raise FloatingError(f'Not an integer: {x}') |
407 |
if not fx.dtype.type == np.longdouble: |
|
408 | 33 |
return int(x) |
409 |
# Subtract float64 chunks until we have all of the number. If the int is
|
|
410 |
# too large, it will overflow
|
|
411 | 27 |
ret = 0 |
412 |
while fx != 0: |
|
413 | 27 |
f64 = np.float64(fx) |
414 | 27 |
fx -= f64 |
415 | 27 |
ret += int(f64) |
416 | 27 |
return ret |
417 |
|
|
418 |
|
|
419 | 33 |
def int_to_float(val, flt_type): |
420 |
""" Convert integer `val` to floating point type `flt_type`
|
|
421 |
|
|
422 |
Why is this so complicated?
|
|
423 |
|
|
424 |
At least in numpy <= 1.6.1, numpy longdoubles do not correctly convert to
|
|
425 |
ints, and ints do not correctly convert to longdoubles. Specifically, in
|
|
426 |
both cases, the values seem to go through float64 conversion on the way, so
|
|
427 |
to convert better, we need to split into float64s and sum up the result.
|
|
428 |
|
|
429 |
Parameters
|
|
430 |
----------
|
|
431 |
val : int
|
|
432 |
Integer value
|
|
433 |
flt_type : object
|
|
434 |
numpy floating point type
|
|
435 |
|
|
436 |
Returns
|
|
437 |
-------
|
|
438 |
f : numpy scalar
|
|
439 |
of type `flt_type`
|
|
440 |
"""
|
|
441 |
if flt_type is not np.longdouble: |
|
442 | 33 |
return flt_type(val) |
443 |
# The following works around a nasty numpy 1.4.1 bug such that:
|
|
444 |
# >>> int(np.uint32(2**32-1)
|
|
445 |
# -1
|
|
446 |
if not isinstance(val, Integral): |
|
447 |
val = int(str(val)) |
|
448 | 33 |
faval = np.longdouble(0) |
449 |
while val != 0: |
|
450 | 33 |
f64 = np.float64(val) |
451 | 33 |
faval += f64 |
452 | 33 |
val -= int(f64) |
453 | 33 |
return faval |
454 |
|
|
455 |
|
|
456 | 33 |
def floor_exact(val, flt_type): |
457 |
""" Return nearest exact integer <= `val` in float type `flt_type`
|
|
458 |
|
|
459 |
Parameters
|
|
460 |
----------
|
|
461 |
val : int
|
|
462 |
We have to pass val as an int rather than the floating point type
|
|
463 |
because large integers cast as floating point may be rounded by the
|
|
464 |
casting process.
|
|
465 |
flt_type : numpy type
|
|
466 |
numpy float type.
|
|
467 |
|
|
468 |
Returns
|
|
469 |
-------
|
|
470 |
floor_val : object
|
|
471 |
value of same floating point type as `val`, that is the nearest exact
|
|
472 |
integer in this type such that `floor_val` <= `val`. Thus if `val` is
|
|
473 |
exact in `flt_type`, `floor_val` == `val`.
|
|
474 |
|
|
475 |
Examples
|
|
476 |
--------
|
|
477 |
Obviously 2 is within the range of representable integers for float32
|
|
478 |
|
|
479 |
>>> floor_exact(2, np.float32)
|
|
480 |
2.0
|
|
481 |
|
|
482 |
As is 2**24-1 (the number of significand digits is 23 + 1 implicit)
|
|
483 |
|
|
484 |
>>> floor_exact(2**24-1, np.float32) == 2**24-1
|
|
485 |
True
|
|
486 |
|
|
487 |
But 2**24+1 gives a number that float32 can't represent exactly
|
|
488 |
|
|
489 |
>>> floor_exact(2**24+1, np.float32) == 2**24
|
|
490 |
True
|
|
491 |
|
|
492 |
As for the numpy floor function, negatives floor towards -inf
|
|
493 |
|
|
494 |
>>> floor_exact(-2**24-1, np.float32) == -2**24-2
|
|
495 |
True
|
|
496 |
"""
|
|
497 | 33 |
val = int(val) |
498 | 33 |
flt_type = np.dtype(flt_type).type |
499 | 33 |
sign = 1 if val > 0 else -1 |
500 | 33 |
try: # int_to_float deals with longdouble safely |
501 | 33 |
fval = int_to_float(val, flt_type) |
502 | 33 |
except OverflowError: |
503 | 33 |
return sign * np.inf |
504 |
if not np.isfinite(fval): |
|
505 | 33 |
return fval |
506 | 33 |
info = type_info(flt_type) |
507 | 33 |
diff = val - as_int(fval) |
508 |
if diff >= 0: # floating point value <= val |
|
509 | 33 |
return fval |
510 |
# Float casting made the value go up
|
|
511 | 33 |
biggest_gap = 2**(floor_log2(val) - info['nmant']) |
512 | 33 |
assert biggest_gap > 1 |
513 | 33 |
fval -= flt_type(biggest_gap) |
514 | 33 |
return fval |
515 |
|
|
516 |
|
|
517 | 33 |
def ceil_exact(val, flt_type): |
518 |
""" Return nearest exact integer >= `val` in float type `flt_type`
|
|
519 |
|
|
520 |
Parameters
|
|
521 |
----------
|
|
522 |
val : int
|
|
523 |
We have to pass val as an int rather than the floating point type
|
|
524 |
because large integers cast as floating point may be rounded by the
|
|
525 |
casting process.
|
|
526 |
flt_type : numpy type
|
|
527 |
numpy float type.
|
|
528 |
|
|
529 |
Returns
|
|
530 |
-------
|
|
531 |
ceil_val : object
|
|
532 |
value of same floating point type as `val`, that is the nearest exact
|
|
533 |
integer in this type such that `floor_val` >= `val`. Thus if `val` is
|
|
534 |
exact in `flt_type`, `ceil_val` == `val`.
|
|
535 |
|
|
536 |
Examples
|
|
537 |
--------
|
|
538 |
Obviously 2 is within the range of representable integers for float32
|
|
539 |
|
|
540 |
>>> ceil_exact(2, np.float32)
|
|
541 |
2.0
|
|
542 |
|
|
543 |
As is 2**24-1 (the number of significand digits is 23 + 1 implicit)
|
|
544 |
|
|
545 |
>>> ceil_exact(2**24-1, np.float32) == 2**24-1
|
|
546 |
True
|
|
547 |
|
|
548 |
But 2**24+1 gives a number that float32 can't represent exactly
|
|
549 |
|
|
550 |
>>> ceil_exact(2**24+1, np.float32) == 2**24+2
|
|
551 |
True
|
|
552 |
|
|
553 |
As for the numpy ceil function, negatives ceil towards inf
|
|
554 |
|
|
555 |
>>> ceil_exact(-2**24-1, np.float32) == -2**24
|
|
556 |
True
|
|
557 |
"""
|
|
558 | 33 |
return -floor_exact(-val, flt_type) |
559 |
|
|
560 |
|
|
561 | 33 |
def int_abs(arr): |
562 |
""" Absolute values of array taking care of max negative int values
|
|
563 |
|
|
564 |
Parameters
|
|
565 |
----------
|
|
566 |
arr : array-like
|
|
567 |
|
|
568 |
Returns
|
|
569 |
-------
|
|
570 |
abs_arr : array
|
|
571 |
array the same shape as `arr` in which all negative numbers have been
|
|
572 |
changed to positive numbers with the magnitude.
|
|
573 |
|
|
574 |
Examples
|
|
575 |
--------
|
|
576 |
This kind of thing is confusing in base numpy:
|
|
577 |
|
|
578 |
>>> import numpy as np
|
|
579 |
>>> np.abs(np.int8(-128))
|
|
580 |
-128
|
|
581 |
|
|
582 |
``int_abs`` fixes that:
|
|
583 |
|
|
584 |
>>> int_abs(np.int8(-128))
|
|
585 |
128
|
|
586 |
>>> int_abs(np.array([-128, 127], dtype=np.int8))
|
|
587 |
array([128, 127], dtype=uint8)
|
|
588 |
>>> int_abs(np.array([-128, 127], dtype=np.float32))
|
|
589 |
array([128., 127.], dtype=float32)
|
|
590 |
"""
|
|
591 | 33 |
arr = np.array(arr, copy=False) |
592 | 33 |
dt = arr.dtype |
593 |
if dt.kind == 'u': |
|
594 | 33 |
return arr |
595 |
if dt.kind != 'i': |
|
596 | 33 |
return np.absolute(arr) |
597 | 33 |
out = arr.astype(np.dtype(dt.str.replace('i', 'u'))) |
598 | 33 |
return np.choose(arr < 0, (arr, arr * -1), out=out) |
599 |
|
|
600 |
|
|
601 | 33 |
def floor_log2(x): |
602 |
""" floor of log2 of abs(`x`)
|
|
603 |
|
|
604 |
Embarrassingly, from https://en.wikipedia.org/wiki/Binary_logarithm
|
|
605 |
|
|
606 |
Parameters
|
|
607 |
----------
|
|
608 |
x : int
|
|
609 |
|
|
610 |
Returns
|
|
611 |
-------
|
|
612 |
L : None or int
|
|
613 |
floor of base 2 log of `x`. None if `x` == 0.
|
|
614 |
|
|
615 |
Examples
|
|
616 |
--------
|
|
617 |
>>> floor_log2(2**9+1)
|
|
618 |
9
|
|
619 |
>>> floor_log2(-2**9+1)
|
|
620 |
8
|
|
621 |
>>> floor_log2(0.5)
|
|
622 |
-1
|
|
623 |
>>> floor_log2(0) is None
|
|
624 |
True
|
|
625 |
"""
|
|
626 | 33 |
ip = 0 |
627 | 33 |
rem = abs(x) |
628 |
if rem > 1: |
|
629 |
while rem >= 2: |
|
630 | 33 |
ip += 1 |
631 | 33 |
rem //= 2 |
632 | 33 |
return ip |
633 |
elif rem == 0: |
|
634 | 33 |
return None |
635 |
while rem < 1: |
|
636 | 33 |
ip -= 1 |
637 | 33 |
rem *= 2 |
638 | 33 |
return ip |
639 |
|
|
640 |
|
|
641 | 33 |
def best_float(): |
642 |
""" Floating point type with best precision
|
|
643 |
|
|
644 |
This is nearly always np.longdouble, except on Windows, where np.longdouble
|
|
645 |
is Intel80 storage, but with float64 precision for calculations. In that
|
|
646 |
case we return float64 on the basis it's the fastest and smallest at the
|
|
647 |
highest precision.
|
|
648 |
|
|
649 |
SPARC float128 also proved so slow that we prefer float64.
|
|
650 |
|
|
651 |
Returns
|
|
652 |
-------
|
|
653 |
best_type : numpy type
|
|
654 |
floating point type with highest precision
|
|
655 |
|
|
656 |
Notes
|
|
657 |
-----
|
|
658 |
Needs to run without error for module import, because it is called in
|
|
659 |
``ok_floats`` below, and therefore in setting module global ``OK_FLOATS``.
|
|
660 |
"""
|
|
661 | 33 |
try: |
662 | 33 |
long_info = type_info(np.longdouble) |
663 |
except FloatingError: |
|
664 |
return np.float64 |
|
665 |
if (long_info['nmant'] > type_info(np.float64)['nmant'] and |
|
666 |
machine() != 'sparc64'): # sparc has crazy-slow float128 |
|
667 | 27 |
return np.longdouble |
668 | 6 |
return np.float64 |
669 |
|
|
670 |
|
|
671 | 33 |
def longdouble_lte_float64(): |
672 |
""" Return True if longdouble appears to have the same precision as float64
|
|
673 |
"""
|
|
674 | 33 |
return np.longdouble(2**53) == np.longdouble(2**53) + 1 |
675 |
|
|
676 |
|
|
677 |
# Record longdouble precision at import because it can change on Windows
|
|
678 | 33 |
_LD_LTE_FLOAT64 = longdouble_lte_float64() |
679 |
|
|
680 |
|
|
681 | 33 |
def longdouble_precision_improved(): |
682 |
""" True if longdouble precision increased since initial import
|
|
683 |
|
|
684 |
This can happen on Windows compiled with MSVC. It may be because libraries
|
|
685 |
compiled with mingw (longdouble is Intel80) get linked to numpy compiled
|
|
686 |
with MSVC (longdouble is Float64)
|
|
687 |
"""
|
|
688 | 33 |
return not longdouble_lte_float64() and _LD_LTE_FLOAT64 |
689 |
|
|
690 |
|
|
691 | 33 |
def have_binary128(): |
692 |
""" True if we have a binary128 IEEE longdouble
|
|
693 |
"""
|
|
694 | 33 |
try: |
695 | 33 |
ti = type_info(np.longdouble) |
696 |
except FloatingError: |
|
697 |
return False |
|
698 | 33 |
return (ti['nmant'], ti['maxexp']) == (112, 16384) |
699 |
|
|
700 |
|
|
701 | 33 |
def ok_floats(): |
702 |
""" Return floating point types sorted by precision
|
|
703 |
|
|
704 |
Remove longdouble if it has no higher precision than float64
|
|
705 |
"""
|
|
706 |
# copy float list so we don't change the numpy global
|
|
707 | 33 |
floats = np.sctypes['float'][:] |
708 |
if best_float() != np.longdouble and np.longdouble in floats: |
|
709 |
floats.remove(np.longdouble) |
|
710 |
return sorted(floats, key=lambda f: type_info(f)['nmant']) |
|
711 |
|
|
712 |
|
|
713 | 33 |
OK_FLOATS = ok_floats() |
714 |
|
|
715 |
|
|
716 | 33 |
def able_int_type(values): |
717 |
""" Find the smallest integer numpy type to contain sequence `values`
|
|
718 |
|
|
719 |
Prefers uint to int if minimum is >= 0
|
|
720 |
|
|
721 |
Parameters
|
|
722 |
----------
|
|
723 |
values : sequence
|
|
724 |
sequence of integer values
|
|
725 |
|
|
726 |
Returns
|
|
727 |
-------
|
|
728 |
itype : None or numpy type
|
|
729 |
numpy integer type or None if no integer type holds all `values`
|
|
730 |
|
|
731 |
Examples
|
|
732 |
--------
|
|
733 |
>>> able_int_type([0, 1]) == np.uint8
|
|
734 |
True
|
|
735 |
>>> able_int_type([-1, 1]) == np.int8
|
|
736 |
True
|
|
737 |
"""
|
|
738 |
if any([v % 1 for v in values]): |
|
739 | 33 |
return None |
740 | 33 |
mn = min(values) |
741 | 33 |
mx = max(values) |
742 |
if mn >= 0: |
|
743 |
for ityp in np.sctypes['uint']: |
|
744 |
if mx <= np.iinfo(ityp).max: |
|
745 | 33 |
return ityp |
746 |
for ityp in np.sctypes['int']: |
|
747 | 33 |
info = np.iinfo(ityp) |
748 |
if mn >= info.min and mx <= info.max: |
|
749 | 33 |
return ityp |
750 | 33 |
return None |
751 |
|
|
752 |
|
|
753 | 33 |
def ulp(val=np.float64(1.0)): |
754 |
""" Return gap between `val` and nearest representable number of same type
|
|
755 |
|
|
756 |
This is the value of a unit in the last place (ULP), and is similar in
|
|
757 |
meaning to the MATLAB eps function.
|
|
758 |
|
|
759 |
Parameters
|
|
760 |
----------
|
|
761 |
val : scalar, optional
|
|
762 |
scalar value of any numpy type. Default is 1.0 (float64)
|
|
763 |
|
|
764 |
Returns
|
|
765 |
-------
|
|
766 |
ulp_val : scalar
|
|
767 |
gap between `val` and nearest representable number of same type
|
|
768 |
|
|
769 |
Notes
|
|
770 |
-----
|
|
771 |
The wikipedia article on machine epsilon points out that the term *epsilon*
|
|
772 |
can be used in the sense of a unit in the last place (ULP), or as the
|
|
773 |
maximum relative rounding error. The MATLAB ``eps`` function uses the ULP
|
|
774 |
meaning, but this function is ``ulp`` rather than ``eps`` to avoid
|
|
775 |
confusion between different meanings of *eps*.
|
|
776 |
"""
|
|
777 | 33 |
val = np.array(val) |
778 |
if not np.isfinite(val): |
|
779 | 33 |
return np.nan |
780 |
if val.dtype.kind in 'iu': |
|
781 | 33 |
return 1 |
782 | 33 |
aval = np.abs(val) |
783 | 33 |
info = type_info(val.dtype) |
784 | 33 |
fl2 = floor_log2(aval) |
785 |
if fl2 is None or fl2 < info['minexp']: # subnormal |
|
786 | 33 |
fl2 = info['minexp'] |
787 |
# 'nmant' value does not include implicit first bit
|
|
788 | 33 |
return 2**(fl2 - info['nmant']) |
Read our documentation on viewing source code .