sdpython / cpyquickhelper
1
"""
2
@file
3
@brief Fast data manipulations.
4
"""
5 5
import pandas
6

7

8 5
def df2array(df, check=True):
9
    """
10
    Converts a dataframe into a :epkg:`numpy:array`
11
    without copying. :epkg:`pandas` is merging
12
    consecutive columns sharing the same type
13
    into one memory block. The function can be used
14
    only if the data is stored in one block and one type
15
    as a consequence.
16

17
    @param      df      dataframe
18
    @param      check   verifies the operation can be done (True)
19
                        or skip verification (False)
20
    @return             :epkg:`numpy:array`
21

22
    See `data member <https://pandas.pydata.org/pandas-docs/stable/search.html?q=pointer&check_keywords=yes&area=default>`_,
23
    `_data <https://github.com/pandas-dev/pandas/blob/master/pandas/core/frame.py#L322>`_.
24

25
    .. seealso:: @see fn df2array
26
    """
27 5
    if check:
28 5
        if not isinstance(df, pandas.DataFrame):
29
            raise TypeError("df is not a pandas.DataFrame")  # pragma: no cover
30 5
        if len(df._data.blocks) != 1:
31 5
            raise ValueError(
32
                "The dataframe has many block of data. There should be only one column type.")
33 5
    return df._data.blocks[0].values
34

35

36 5
def df2arrays(df, sep=",", check=True):
37
    """
38
    Converts a dataframe into a list of
39
    a list of tuple *(column name, :epkg:`numpy:array`)*
40
    without copying. :epkg:`pandas` is merging
41
    consecutive columns sharing the same type
42
    into one memory block. That's what the function extracts
43

44
    @param      df      dataframe
45
    @param      check   verifies the operation can be done (True)
46
                        or skip verification (False)
47
    @param      sep     columns separator
48
    @return             a list of tuple ``(column, array)``
49

50
    Example:
51

52
    .. runpython::
53
        :showcode:
54

55
        from pandas import DataFrame
56
        from cpyquickhelper.fastdata import df2arrays
57

58
        df = DataFrame([dict(a=3.4, b=5.6, c="e"),
59
                        dict(a=3.5, b=5.7, c="r")])
60
        arr = df2arrays(df)
61
        print(arr)
62

63

64
    .. seealso:: @see fn df2array
65
    """
66 5
    if check:
67 5
        if not isinstance(df, pandas.DataFrame):
68
            raise TypeError("df is not a pandas.DataFrame")  # pragma: no cover
69

70 5
    cols = df.columns
71 5
    res = []
72 5
    pos = 0
73 5
    for b in df._data.blocks:
74 5
        name = sep.join(cols[pos:pos + b.shape[1]])
75 5
        res.append((name, b.values))
76 5
        pos += b.shape[1]
77 5
    return res

Read our documentation on viewing source code .

Loading