#37539 CI Move unwanted typing checks to pre-commit

Merged Marco Gorelli MarcoGorelli Pseudo commit used to compare (e38e987...a648fb2)
Missing base report.

Unable to compare commits because the base of the pull request did not upload a coverage report.

Changes found in between e38e987...a648fb2 (pseudo...base) which prevent comparing this pull request.

Showing 15 of 38 files from the diff.
Newly tracked file
pandas/io/common.py changed.
Newly tracked file
pandas/core/frame.py changed.
Newly tracked file
pandas/io/orc.py changed.

@@ -81,9 +81,7 @@
Loading
81 81
82 82
    feather.write_feather(df, ioargs.filepath_or_buffer, **kwargs)
83 83
84 -
    if ioargs.should_close:
85 -
        assert not isinstance(ioargs.filepath_or_buffer, str)
86 -
        ioargs.filepath_or_buffer.close()
84 +
    ioargs.close()
87 85
88 86
89 87
def read_feather(
@@ -137,9 +135,6 @@
Loading
137 135
        ioargs.filepath_or_buffer, columns=columns, use_threads=bool(use_threads)
138 136
    )
139 137
140 -
    # s3fs only validates the credentials when the file is closed.
141 -
    if ioargs.should_close:
142 -
        assert not isinstance(ioargs.filepath_or_buffer, str)
143 -
        ioargs.filepath_or_buffer.close()
138 +
    ioargs.close()
144 139
145 140
    return df

@@ -2,8 +2,9 @@
Loading
2 2
3 3
import bz2
4 4
from collections import abc
5 +
import dataclasses
5 6
import gzip
6 -
from io import BufferedIOBase, BytesIO, RawIOBase
7 +
from io import BufferedIOBase, BytesIO, RawIOBase, TextIOWrapper
7 8
import mmap
8 9
import os
9 10
import pathlib
@@ -13,12 +14,14 @@
Loading
13 14
    Any,
14 15
    AnyStr,
15 16
    Dict,
17 +
    Generic,
16 18
    List,
17 19
    Mapping,
18 20
    Optional,
19 21
    Tuple,
20 22
    Type,
21 23
    Union,
24 +
    cast,
22 25
)
23 26
from urllib.parse import (
24 27
    urljoin,
@@ -31,12 +34,12 @@
Loading
31 34
import zipfile
32 35
33 36
from pandas._typing import (
37 +
    Buffer,
34 38
    CompressionDict,
35 39
    CompressionOptions,
36 40
    EncodingVar,
37 41
    FileOrBuffer,
38 42
    FilePathOrBuffer,
39 -
    IOargs,
40 43
    ModeVar,
41 44
    StorageOptions,
42 45
)
@@ -56,6 +59,76 @@
Loading
56 59
    from io import IOBase
57 60
58 61
62 +
@dataclasses.dataclass
63 +
class IOArgs(Generic[ModeVar, EncodingVar]):
64 +
    """
65 +
    Return value of io/common.py:get_filepath_or_buffer.
66 +
67 +
    This is used to easily close created fsspec objects.
68 +
69 +
    Note (copy&past from io/parsers):
70 +
    filepath_or_buffer can be Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile]
71 +
    though mypy handling of conditional imports is difficult.
72 +
    See https://github.com/python/mypy/issues/1297
73 +
    """
74 +
75 +
    filepath_or_buffer: FileOrBuffer
76 +
    encoding: EncodingVar
77 +
    mode: Union[ModeVar, str]
78 +
    compression: CompressionDict
79 +
    should_close: bool = False
80 +
81 +
    def close(self) -> None:
82 +
        """
83 +
        Close the buffer if it was created by get_filepath_or_buffer.
84 +
        """
85 +
        if self.should_close:
86 +
            assert not isinstance(self.filepath_or_buffer, str)
87 +
            try:
88 +
                self.filepath_or_buffer.close()
89 +
            except (OSError, ValueError):
90 +
                pass
91 +
        self.should_close = False
92 +
93 +
94 +
@dataclasses.dataclass
95 +
class IOHandles:
96 +
    """
97 +
    Return value of io/common.py:get_handle
98 +
99 +
    This is used to easily close created buffers and to handle corner cases when
100 +
    TextIOWrapper is inserted.
101 +
102 +
    handle: The file handle to be used.
103 +
    created_handles: All file handles that are created by get_handle
104 +
    is_wrapped: Whether a TextIOWrapper needs to be detached.
105 +
    """
106 +
107 +
    handle: Buffer
108 +
    created_handles: List[Buffer] = dataclasses.field(default_factory=list)
109 +
    is_wrapped: bool = False
110 +
111 +
    def close(self) -> None:
112 +
        """
113 +
        Close all created buffers.
114 +
115 +
        Note: If a TextIOWrapper was inserted, it is flushed and detached to
116 +
        avoid closing the potentially user-created buffer.
117 +
        """
118 +
        if self.is_wrapped:
119 +
            assert isinstance(self.handle, TextIOWrapper)
120 +
            self.handle.flush()
121 +
            self.handle.detach()
122 +
            self.created_handles.remove(self.handle)
123 +
        try:
124 +
            for handle in self.created_handles:
125 +
                handle.close()
126 +
        except (OSError, ValueError):
127 +
            pass
128 +
        self.created_handles = []
129 +
        self.is_wrapped = False
130 +
131 +
59 132
def is_url(url) -> bool:
60 133
    """
61 134
    Check to see if a URL has a valid protocol.
@@ -176,7 +249,7 @@
Loading
176 249
    compression: CompressionOptions = None,
177 250
    mode: ModeVar = None,  # type: ignore[assignment]
178 251
    storage_options: StorageOptions = None,
179 -
) -> IOargs[ModeVar, EncodingVar]:
252 +
) -> IOArgs[ModeVar, EncodingVar]:
180 253
    """
181 254
    If the filepath_or_buffer is a url, translate and return the buffer.
182 255
    Otherwise passthrough.
@@ -201,7 +274,7 @@
Loading
201 274
202 275
    ..versionchange:: 1.2.0
203 276
204 -
      Returns the dataclass IOargs.
277 +
      Returns the dataclass IOArgs.
205 278
    """
206 279
    filepath_or_buffer = stringify_path(filepath_or_buffer)
207 280
@@ -225,6 +298,10 @@
Loading
225 298
226 299
    compression = dict(compression, method=compression_method)
227 300
301 +
    # uniform encoding names
302 +
    if encoding is not None:
303 +
        encoding = encoding.replace("_", "-").lower()
304 +
228 305
    # bz2 and xz do not write the byte order mark for utf-16 and utf-32
229 306
    # print a warning when writing such files
230 307
    if (
@@ -258,7 +335,7 @@
Loading
258 335
            compression = {"method": "gzip"}
259 336
        reader = BytesIO(req.read())
260 337
        req.close()
261 -
        return IOargs(
338 +
        return IOArgs(
262 339
            filepath_or_buffer=reader,
263 340
            encoding=encoding,
264 341
            compression=compression,
@@ -310,7 +387,7 @@
Loading
310 387
                filepath_or_buffer, mode=fsspec_mode, **(storage_options or {})
311 388
            ).open()
312 389
313 -
        return IOargs(
390 +
        return IOArgs(
314 391
            filepath_or_buffer=file_obj,
315 392
            encoding=encoding,
316 393
            compression=compression,
@@ -323,7 +400,7 @@
Loading
323 400
        )
324 401
325 402
    if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)):
326 -
        return IOargs(
403 +
        return IOArgs(
327 404
            filepath_or_buffer=_expand_user(filepath_or_buffer),
328 405
            encoding=encoding,
329 406
            compression=compression,
@@ -335,7 +412,7 @@
Loading
335 412
        msg = f"Invalid file path or buffer object type: {type(filepath_or_buffer)}"
336 413
        raise ValueError(msg)
337 414
338 -
    return IOargs(
415 +
    return IOArgs(
339 416
        filepath_or_buffer=filepath_or_buffer,
340 417
        encoding=encoding,
341 418
        compression=compression,
@@ -455,14 +532,14 @@
Loading
455 532
456 533
457 534
def get_handle(
458 -
    path_or_buf,
535 +
    path_or_buf: FilePathOrBuffer,
459 536
    mode: str,
460 -
    encoding=None,
537 +
    encoding: Optional[str] = None,
461 538
    compression: CompressionOptions = None,
462 539
    memory_map: bool = False,
463 540
    is_text: bool = True,
464 -
    errors=None,
465 -
):
541 +
    errors: Optional[str] = None,
542 +
) -> IOHandles:
466 543
    """
467 544
    Get file handle for given path/buffer and mode.
468 545
@@ -506,14 +583,9 @@
Loading
506 583
        See the errors argument for :func:`open` for a full list
507 584
        of options.
508 585
509 -
        .. versionadded:: 1.1.0
586 +
    .. versionchanged:: 1.2.0
510 587
511 -
    Returns
512 -
    -------
513 -
    f : file-like
514 -
        A file-like object.
515 -
    handles : list of file-like objects
516 -
        A list of file-like object that were opened in this function.
588 +
    Returns the dataclass IOHandles
517 589
    """
518 590
    need_text_wrapping: Tuple[Type["IOBase"], ...]
519 591
    try:
@@ -532,12 +604,16 @@
Loading
532 604
    except ImportError:
533 605
        pass
534 606
535 -
    handles: List[Union[IO, _MMapWrapper]] = list()
536 -
    f = path_or_buf
607 +
    handles: List[Buffer] = list()
608 +
609 +
    # Windows does not default to utf-8. Set to utf-8 for a consistent behavior
610 +
    if encoding is None:
611 +
        encoding = "utf-8"
537 612
538 613
    # Convert pathlib.Path/py.path.local or string
539 614
    path_or_buf = stringify_path(path_or_buf)
540 615
    is_path = isinstance(path_or_buf, str)
616 +
    f = path_or_buf
541 617
542 618
    compression, compression_args = get_compression_method(compression)
543 619
    if is_path:
@@ -548,25 +624,29 @@
Loading
548 624
        # GZ Compression
549 625
        if compression == "gzip":
550 626
            if is_path:
627 +
                assert isinstance(path_or_buf, str)
551 628
                f = gzip.GzipFile(filename=path_or_buf, mode=mode, **compression_args)
552 629
            else:
553 -
                f = gzip.GzipFile(fileobj=path_or_buf, mode=mode, **compression_args)
630 +
                f = gzip.GzipFile(
631 +
                    fileobj=path_or_buf,  # type: ignore[arg-type]
632 +
                    mode=mode,
633 +
                    **compression_args,
634 +
                )
554 635
555 636
        # BZ Compression
556 637
        elif compression == "bz2":
557 -
            f = bz2.BZ2File(path_or_buf, mode=mode, **compression_args)
638 +
            f = bz2.BZ2File(
639 +
                path_or_buf, mode=mode, **compression_args  # type: ignore[arg-type]
640 +
            )
558 641
559 642
        # ZIP Compression
560 643
        elif compression == "zip":
561 -
            zf = _BytesZipFile(path_or_buf, mode, **compression_args)
562 -
            # Ensure the container is closed as well.
563 -
            handles.append(zf)
564 -
            if zf.mode == "w":
565 -
                f = zf
566 -
            elif zf.mode == "r":
567 -
                zip_names = zf.namelist()
644 +
            f = _BytesZipFile(path_or_buf, mode, **compression_args)
645 +
            if f.mode == "r":
646 +
                handles.append(f)
647 +
                zip_names = f.namelist()
568 648
                if len(zip_names) == 1:
569 -
                    f = zf.open(zip_names.pop())
649 +
                    f = f.open(zip_names.pop())
570 650
                elif len(zip_names) == 0:
571 651
                    raise ValueError(f"Zero files found in ZIP file {path_or_buf}")
572 652
                else:
@@ -584,36 +664,40 @@
Loading
584 664
            msg = f"Unrecognized compression type: {compression}"
585 665
            raise ValueError(msg)
586 666
667 +
        assert not isinstance(f, str)
587 668
        handles.append(f)
588 669
589 670
    elif is_path:
590 671
        # Check whether the filename is to be opened in binary mode.
591 672
        # Binary mode does not support 'encoding' and 'newline'.
592 673
        is_binary_mode = "b" in mode
593 -
674 +
        assert isinstance(path_or_buf, str)
594 675
        if encoding and not is_binary_mode:
595 676
            # Encoding
596 677
            f = open(path_or_buf, mode, encoding=encoding, errors=errors, newline="")
597 -
        elif is_text and not is_binary_mode:
598 -
            # No explicit encoding
599 -
            f = open(path_or_buf, mode, errors="replace", newline="")
600 678
        else:
601 679
            # Binary mode
602 680
            f = open(path_or_buf, mode)
603 681
        handles.append(f)
604 682
605 683
    # Convert BytesIO or file objects passed with an encoding
606 -
    if is_text and (compression or isinstance(f, need_text_wrapping)):
607 -
        from io import TextIOWrapper
608 -
609 -
        g = TextIOWrapper(f, encoding=encoding, errors=errors, newline="")
610 -
        if not isinstance(f, (BufferedIOBase, RawIOBase)):
611 -
            handles.append(g)
612 -
        f = g
684 +
    is_wrapped = False
685 +
    if is_text and (
686 +
        compression
687 +
        or isinstance(f, need_text_wrapping)
688 +
        or "b" in getattr(f, "mode", "")
689 +
    ):
690 +
        f = TextIOWrapper(
691 +
            f, encoding=encoding, errors=errors, newline=""  # type: ignore[arg-type]
692 +
        )
693 +
        handles.append(f)
694 +
        # do not mark as wrapped when the user provided a string
695 +
        is_wrapped = not is_path
613 696
614 697
    if memory_map and hasattr(f, "fileno"):
698 +
        assert not isinstance(f, str)
615 699
        try:
616 -
            wrapped = _MMapWrapper(f)
700 +
            wrapped = cast(mmap.mmap, _MMapWrapper(f))  # type: ignore[arg-type]
617 701
            f.close()
618 702
            handles.remove(f)
619 703
            handles.append(wrapped)
@@ -625,7 +709,13 @@
Loading
625 709
            # leave the file handler as is then
626 710
            pass
627 711
628 -
    return f, handles
712 +
    handles.reverse()  # close the most recently added buffer first
713 +
    assert not isinstance(f, str)
714 +
    return IOHandles(
715 +
        handle=f,
716 +
        created_handles=handles,
717 +
        is_wrapped=is_wrapped,
718 +
    )
629 719
630 720
631 721
# error: Definition of "__exit__" in base class "ZipFile" is incompatible with

@@ -229,12 +229,9 @@
Loading
229 229
    @cache_readonly
230 230
    def indices(self):
231 231
        """ dict {group name -> group indices} """
232 -
        if len(self.groupings) == 1:
233 -
            return self.groupings[0].indices
234 -
        else:
235 -
            codes_list = [ping.codes for ping in self.groupings]
236 -
            keys = [ping.group_index for ping in self.groupings]
237 -
            return get_indexer_dict(codes_list, keys)
232 +
        codes_list = [ping.codes for ping in self.groupings]
233 +
        keys = [ping.group_index for ping in self.groupings]
234 +
        return get_indexer_dict(codes_list, keys)
238 235
239 236
    @property
240 237
    def codes(self) -> List[np.ndarray]:

@@ -1968,7 +1968,13 @@
Loading
1968 1968
            data, inferred_tz = objects_to_datetime64ns(
1969 1969
                data, dayfirst=dayfirst, yearfirst=yearfirst
1970 1970
            )
1971 -
            tz = _maybe_infer_tz(tz, inferred_tz)
1971 +
            if tz and inferred_tz:
1972 +
                #  two timezones: convert to intended from base UTC repr
1973 +
                data = tzconversion.tz_convert_from_utc(data.view("i8"), tz)
1974 +
                data = data.view(DT64NS_DTYPE)
1975 +
            elif inferred_tz:
1976 +
                tz = inferred_tz
1977 +
1972 1978
        data_dtype = data.dtype
1973 1979
1974 1980
    # `data` may have originally been a Categorical[datetime64[ns, tz]],

@@ -16,7 +16,7 @@
Loading
16 16
from collections import abc
17 17
from datetime import datetime, timedelta
18 18
import struct
19 -
from typing import IO, Any, Union
19 +
from typing import IO, Any, Union, cast
20 20
21 21
import numpy as np
22 22
@@ -131,8 +131,6 @@
Loading
131 131
        bytes.
132 132
    """
133 133
134 -
    _path_or_buf: IO[Any]
135 -
136 134
    def __init__(
137 135
        self,
138 136
        path_or_buf,
@@ -170,14 +168,12 @@
Loading
170 168
        self._current_row_on_page_index = 0
171 169
        self._current_row_in_file_index = 0
172 170
173 -
        path_or_buf = get_filepath_or_buffer(path_or_buf).filepath_or_buffer
174 -
        if isinstance(path_or_buf, str):
175 -
            buf = open(path_or_buf, "rb")
176 -
            self.handle = buf
177 -
        else:
178 -
            buf = path_or_buf
171 +
        self.ioargs = get_filepath_or_buffer(path_or_buf)
172 +
        if isinstance(self.ioargs.filepath_or_buffer, str):
173 +
            self.ioargs.filepath_or_buffer = open(path_or_buf, "rb")
174 +
            self.ioargs.should_close = True
179 175
180 -
        self._path_or_buf: IO[Any] = buf
176 +
        self._path_or_buf = cast(IO[Any], self.ioargs.filepath_or_buffer)
181 177
182 178
        try:
183 179
            self._get_properties()
@@ -202,10 +198,7 @@
Loading
202 198
        return np.asarray(self._column_types, dtype=np.dtype("S1"))
203 199
204 200
    def close(self):
205 -
        try:
206 -
            self.handle.close()
207 -
        except AttributeError:
208 -
            pass
201 +
        self.ioargs.close()
209 202
210 203
    def _get_properties(self):
211 204

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Unable to process changes.

No base report to compare against.

Files Coverage
pandas 94.21%
Project Totals (214 files) 94.21%
Loading