sdpython / cpyquickhelper
1
"""
2
@file
3
@brief Addition for :epkg:`pandas`.
4
"""
5 1
from itertools import chain
6 1
import numpy
7 1
from pandas import Series
8 1
from pandas.api.extensions import (
9
    register_series_accessor, ExtensionDtype, register_extension_dtype)
10 1
from pandas.arrays import PandasArray
11 1
from pandas.core.arrays.numpy_ import PandasDtype
12 1
from .weighted_number import WeightedDouble  # pylint: disable=E0611
13

14

15 1
class WeightedSeriesDtype(ExtensionDtype):
16
    """
17
    Defines a custom type for a @see cl WeightedSeries.
18
    """
19

20 1
    dtype = numpy.dtype(WeightedDouble)
21

22 1
    def __str__(self):
23
        """
24
        usual
25
        """
26 1
        return self.name
27

28 1
    @property
29
    def type(self):
30
        # type: () -> type
31
        """The scalar type for the array, e.g. ``int``
32
        It's expected ``ExtensionArray[item]`` returns an instance
33
        of ``ExtensionDtype.type`` for scalar ``item``.
34
        """
35 1
        return WeightedSeriesDtype
36

37 1
    def __repr__(self):
38
        "usual"
39 1
        return "WeightedSeriesDtype()"
40

41 1
    @property
42
    def kind(self):
43
        # type () -> str
44
        """
45
        A character code (one of 'biufcmMOSUV'), default 'O'
46
        This should match the NumPy dtype used when the array is
47
        converted to an ndarray, 'O' in this case.
48
        type.
49

50
        See Also
51
        --------
52
        numpy.dtype.kind
53
        """
54 1
        return WeightedSeriesDtype.dtype.kind
55

56 1
    @property
57
    def name(self):
58
        """
59
        A string identifying the data type.
60
        Will be used for display in, e.g. ``Series.dtype``
61
        """
62 1
        return "WeightedDouble"
63

64 1
    @classmethod
65
    def construct_from_string(cls, string):
66
        """
67
        Attempt to construct this type from a string.
68
        Parameters
69
        ----------
70
        string : str
71

72
        Returns
73
        -------
74
        self : instance of 'WeightedDouble'
75

76
        Raises
77
        ------
78
        TypeError
79
            If a class cannot be constructed from this 'string'.
80
        """
81
        if not string.startswith("WD"):  # pragma no cover
82
            raise TypeError("Unable to parse '{0}'".format(string))
83 1
        val = string[2:].strip('() ').split(",")
84 1
        if len(val) == 1 and val[0]:
85 1
            val = float(val[0])
86 1
        elif len(val) == 2:
87 1
            val = float(val[0]), float(val[1])
88 1
        elif len(val) == 0 or (len(val) == 1 and val[0] == ''):
89 1
            val = numpy.nan
90
        else:  # pragma no cover
91
            raise TypeError("Unable to parse '{0}'".format(string))
92 1
        if isinstance(val, tuple):
93
            if len(val) != 2:  # pragma no cover
94
                raise TypeError("Unable to parse '{0}'".format(string))
95 1
            return WeightedDouble(val[0], val[1])
96 1
        return WeightedDouble(val)
97

98 1
    @classmethod
99
    def construct_array_type(cls):
100
        """
101
        Return the array type associated with this dtype.
102

103
        Returns
104
        -------
105
        type
106
        """
107 1
        return WeightedArray
108

109

110 1
register_extension_dtype(WeightedSeriesDtype)
111

112

113 1
@register_series_accessor("wdouble")
114
class WeightedDoubleAccessor:
115
    """
116
    Extends :epkg:`pandas` with new accessor for
117
    series based on @see cl WeightedDouble.
118
    """
119

120 1
    def __init__(self, obj):
121 1
        self.obj = obj
122

123 1
    def __len__(self):
124 1
        return len(self.obj)
125

126 1
    @property
127
    def value(self):
128
        "Returns the values."
129 1
        return self._new_series(lambda s: s.value)
130

131 1
    @property
132
    def weight(self):
133
        "Returns the weights."
134 1
        return self._new_series(lambda s: s.weight)
135

136 1
    def isnan(self):
137
        "Tells if values are missing."
138 1
        return self._new_series(lambda s: numpy.isnan(s.value))
139

140 1
    def _new_series(self, fct):
141
        if len(self) == 0:  # pragma no cover
142
            raise ValueError("Series cannot be empty.")
143 1
        if isinstance(self.obj, WeightedArray) or isinstance(self.obj[0], WeightedDouble):
144 1
            return WeightedArray([fct(s) for s in self.obj],
145
                                 index=self.obj.index, dtype=float)
146
        raise TypeError(  # pragma no cover
147
            "Unexpected type, array is '{0}', first element is '{1}'".format(
148
                type(self.obj), type(self.obj[0])))
149

150

151 1
class WeightedSeries(Series):
152
    """
153
    Implements a series holding @see WeightedDouble numbers.
154
    Does not add anything to *Series*.
155
    """
156

157 1
    def __init__(self, *args, **kwargs):
158
        """
159
        Overwrites the constructor to force
160
        dtype to be @see cl WeightedSeriesDtype.
161
        """
162 1
        dt = kwargs.pop('dtype', WeightedSeriesDtype())
163 1
        Series.__init__(self, *args, dtype=dt, **kwargs)
164

165 1
    def __getattr__(self, attr):
166
        """
167
        Tries first to see if class *Series* has this attribute
168
        and then tries @see cl WeightedDoubleAccessor.
169
        """
170 1
        if hasattr(Series, attr):
171 0
            return getattr(self, attr)
172 1
        if hasattr(WeightedDoubleAccessor, attr):
173 1
            obj = WeightedDoubleAccessor(self)
174 1
            return getattr(obj, attr)
175 1
        if attr == '_ndarray':
176 1
            return numpy.array(self)
177 1
        raise AttributeError("Unkown attribute '{0}'".format(attr))
178

179

180 1
class WeightedArray(PandasArray):
181
    """
182
    Implements an array holding @see WeightedDouble numbers.
183
    This leverages a new concept introduced in :epkg:`pandas` 0.24
184
    implemented in class :epkg:`PandasArray`. It can be used
185
    to define a new column type in a dataframe.
186
    """
187

188 1
    def __init__(self, *args, **kwargs):
189
        """
190
        Overwrites the constructor to force
191
        *dtype* to be @see cl WeightedSeriesDtype.
192
        """
193 1
        if "data" in kwargs and isinstance(kwargs["data"], WeightedSeries):
194 0
            serie = kwargs["data"]
195 1
        elif len(args) == 1 and isinstance(args[0], numpy.ndarray):
196 1
            PandasArray.__init__(self, args[0])
197
        else:
198 1
            serie = WeightedSeries(*args, **kwargs)
199 1
            PandasArray.__init__(self, serie._ndarray)
200

201 1
    @property
202
    def dtype(self):
203
        """
204
        Returns @see cl WeightedSeriesDtype.
205
        """
206 1
        return self._dtype
207

208 1
    @property
209
    def name(self):
210
        """
211
        A string identifying the data type.
212
        Will be used for display in, e.g. ``Series.dtype``
213
        """
214 0
        return "WeightedArray"
215

216 1
    def __add__(self, other):
217
        "Addition"
218 1
        return WeightedArray([a + b for a, b in zip(self, other)])
219

220 1
    def __sub__(self, other):
221
        "Soustraction"
222 1
        return WeightedArray([a - b for a, b in zip(self, other)])
223

224 1
    def __mul__(self, other):
225
        "Multiplication"
226 1
        return WeightedArray([a * b for a, b in zip(self, other)])
227

228 1
    def __truediv__(self, other):
229
        "Division"
230 1
        return WeightedArray([a / b for a, b in zip(self, other)])
231

232 1
    def isna(self):
233
        "is nan?"
234 1
        return numpy.array([numpy.isnan(s.value) for s in self])
235

236 1
    @classmethod
237
    def _concat_same_type(cls, to_concat):
238
        # type: (Sequence[ExtensionArray]) -> ExtensionArray
239
        """Concatenate multiple array
240

241
        Parameters
242
        ----------
243
        to_concat : sequence of this type
244

245
        Returns
246
        -------
247
        @see cl WeightedArray
248
        """
249 1
        for s in to_concat:
250 1
            if not isinstance(s.dtype, (WeightedSeriesDtype, object)):
251
                raise TypeError(  # pragma no cover
252
                    "All arrays must be of type WeightedSeriesDtype not {}-{}".format(
253
                        type(s), type(s.dtype)))
254 1
        return WeightedArray(list(chain(*to_concat)))
255

256 1
    @classmethod
257 1
    def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "PandasArray":
258 1
        if isinstance(dtype, PandasDtype):
259 0
            dtype = dtype._dtype
260

261 1
        result = numpy.asarray(scalars, dtype=dtype)
262 1
        if copy and result is scalars:
263 0
            result = result.copy()
264 1
        return cls(result)

Read our documentation on viewing source code .

Loading