1
# -*- coding: utf-8 -*-
2 20
"""
3
This module offers a parser for ISO-8601 strings
4

5
It is intended to support all valid date, time and datetime formats per the
6
ISO-8601 specification.
7

8
..versionadded:: 2.7.0
9
"""
10 20
from datetime import datetime, timedelta, time, date
11 20
import calendar
12 20
from dateutil import tz
13

14 20
from functools import wraps
15

16 20
import re
17 20
import six
18

19 20
__all__ = ["isoparse", "isoparser"]
20

21

22 20
def _takes_ascii(f):
23 20
    @wraps(f)
24 2
    def func(self, str_in, *args, **kwargs):
25
        # If it's a stream, read the whole thing
26 20
        str_in = getattr(str_in, 'read', lambda: str_in)()
27

28
        # If it's unicode, turn it into bytes, since ISO-8601 only covers ASCII
29 20
        if isinstance(str_in, six.text_type):
30
            # ASCII is the same in UTF-8
31 20
            try:
32 20
                str_in = str_in.encode('ascii')
33 20
            except UnicodeEncodeError as e:
34 20
                msg = 'ISO-8601 strings should contain only ASCII characters'
35 20
                six.raise_from(ValueError(msg), e)
36

37 20
        return f(self, str_in, *args, **kwargs)
38

39 20
    return func
40

41

42 20
class isoparser(object):
43 20
    def __init__(self, sep=None):
44
        """
45
        :param sep:
46
            A single character that separates date and time portions. If
47
            ``None``, the parser will accept any single character.
48
            For strict ISO-8601 adherence, pass ``'T'``.
49
        """
50 20
        if sep is not None:
51 20
            if (len(sep) != 1 or ord(sep) >= 128 or sep in '0123456789'):
52 20
                raise ValueError('Separator must be a single, non-numeric ' +
53
                                 'ASCII character')
54

55 20
            sep = sep.encode('ascii')
56

57 20
        self._sep = sep
58

59 20
    @_takes_ascii
60 2
    def isoparse(self, dt_str):
61
        """
62
        Parse an ISO-8601 datetime string into a :class:`datetime.datetime`.
63

64
        An ISO-8601 datetime string consists of a date portion, followed
65
        optionally by a time portion - the date and time portions are separated
66
        by a single character separator, which is ``T`` in the official
67
        standard. Incomplete date formats (such as ``YYYY-MM``) may *not* be
68
        combined with a time portion.
69

70
        Supported date formats are:
71

72
        Common:
73

74
        - ``YYYY``
75
        - ``YYYY-MM`` or ``YYYYMM``
76
        - ``YYYY-MM-DD`` or ``YYYYMMDD``
77

78
        Uncommon:
79

80
        - ``YYYY-Www`` or ``YYYYWww`` - ISO week (day defaults to 0)
81
        - ``YYYY-Www-D`` or ``YYYYWwwD`` - ISO week and day
82

83
        The ISO week and day numbering follows the same logic as
84
        :func:`datetime.date.isocalendar`.
85

86
        Supported time formats are:
87

88
        - ``hh``
89
        - ``hh:mm`` or ``hhmm``
90
        - ``hh:mm:ss`` or ``hhmmss``
91
        - ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits)
92

93
        Midnight is a special case for `hh`, as the standard supports both
94
        00:00 and 24:00 as a representation. The decimal separator can be
95
        either a dot or a comma.
96

97

98
        .. caution::
99

100
            Support for fractional components other than seconds is part of the
101
            ISO-8601 standard, but is not currently implemented in this parser.
102

103
        Supported time zone offset formats are:
104

105
        - `Z` (UTC)
106
        - `±HH:MM`
107
        - `±HHMM`
108
        - `±HH`
109

110
        Offsets will be represented as :class:`dateutil.tz.tzoffset` objects,
111
        with the exception of UTC, which will be represented as
112
        :class:`dateutil.tz.tzutc`. Time zone offsets equivalent to UTC (such
113
        as `+00:00`) will also be represented as :class:`dateutil.tz.tzutc`.
114

115
        :param dt_str:
116
            A string or stream containing only an ISO-8601 datetime string
117

118
        :return:
119
            Returns a :class:`datetime.datetime` representing the string.
120
            Unspecified components default to their lowest value.
121

122
        .. warning::
123

124
            As of version 2.7.0, the strictness of the parser should not be
125
            considered a stable part of the contract. Any valid ISO-8601 string
126
            that parses correctly with the default settings will continue to
127
            parse correctly in future versions, but invalid strings that
128
            currently fail (e.g. ``2017-01-01T00:00+00:00:00``) are not
129
            guaranteed to continue failing in future versions if they encode
130
            a valid date.
131

132
        .. versionadded:: 2.7.0
133
        """
134 20
        components, pos = self._parse_isodate(dt_str)
135

136 20
        if len(dt_str) > pos:
137 20
            if self._sep is None or dt_str[pos:pos + 1] == self._sep:
138 20
                components += self._parse_isotime(dt_str[pos + 1:])
139
            else:
140 20
                raise ValueError('String contains unknown ISO components')
141

142 20
        if len(components) > 3 and components[3] == 24:
143 20
            components[3] = 0
144 20
            return datetime(*components) + timedelta(days=1)
145

146 20
        return datetime(*components)
147

148 20
    @_takes_ascii
149 2
    def parse_isodate(self, datestr):
150
        """
151
        Parse the date portion of an ISO string.
152

153
        :param datestr:
154
            The string portion of an ISO string, without a separator
155

156
        :return:
157
            Returns a :class:`datetime.date` object
158
        """
159 20
        components, pos = self._parse_isodate(datestr)
160 20
        if pos < len(datestr):
161 20
            raise ValueError('String contains unknown ISO ' +
162
                             'components: {}'.format(datestr))
163 20
        return date(*components)
164

165 20
    @_takes_ascii
166 2
    def parse_isotime(self, timestr):
167
        """
168
        Parse the time portion of an ISO string.
169

170
        :param timestr:
171
            The time portion of an ISO string, without a separator
172

173
        :return:
174
            Returns a :class:`datetime.time` object
175
        """
176 20
        components = self._parse_isotime(timestr)
177 20
        if components[0] == 24:
178 20
            components[0] = 0
179 20
        return time(*components)
180

181 20
    @_takes_ascii
182 20
    def parse_tzstr(self, tzstr, zero_as_utc=True):
183
        """
184
        Parse a valid ISO time zone string.
185

186
        See :func:`isoparser.isoparse` for details on supported formats.
187

188
        :param tzstr:
189
            A string representing an ISO time zone offset
190

191
        :param zero_as_utc:
192
            Whether to return :class:`dateutil.tz.tzutc` for zero-offset zones
193

194
        :return:
195
            Returns :class:`dateutil.tz.tzoffset` for offsets and
196
            :class:`dateutil.tz.tzutc` for ``Z`` and (if ``zero_as_utc`` is
197
            specified) offsets equivalent to UTC.
198
        """
199 20
        return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
200

201
    # Constants
202 20
    _DATE_SEP = b'-'
203 20
    _TIME_SEP = b':'
204 20
    _FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)')
205

206 20
    def _parse_isodate(self, dt_str):
207 20
        try:
208 20
            return self._parse_isodate_common(dt_str)
209 20
        except ValueError:
210 20
            return self._parse_isodate_uncommon(dt_str)
211

212 20
    def _parse_isodate_common(self, dt_str):
213 20
        len_str = len(dt_str)
214 20
        components = [1, 1, 1]
215

216 20
        if len_str < 4:
217 20
            raise ValueError('ISO string too short')
218

219
        # Year
220 20
        components[0] = int(dt_str[0:4])
221 20
        pos = 4
222 20
        if pos >= len_str:
223 20
            return components, pos
224

225 20
        has_sep = dt_str[pos:pos + 1] == self._DATE_SEP
226 20
        if has_sep:
227 20
            pos += 1
228

229
        # Month
230 20
        if len_str - pos < 2:
231 20
            raise ValueError('Invalid common month')
232

233 20
        components[1] = int(dt_str[pos:pos + 2])
234 20
        pos += 2
235

236 20
        if pos >= len_str:
237 20
            if has_sep:
238 20
                return components, pos
239
            else:
240 20
                raise ValueError('Invalid ISO format')
241

242 20
        if has_sep:
243 20
            if dt_str[pos:pos + 1] != self._DATE_SEP:
244 20
                raise ValueError('Invalid separator in ISO string')
245 20
            pos += 1
246

247
        # Day
248 20
        if len_str - pos < 2:
249 20
            raise ValueError('Invalid common day')
250 20
        components[2] = int(dt_str[pos:pos + 2])
251 20
        return components, pos + 2
252

253 20
    def _parse_isodate_uncommon(self, dt_str):
254 20
        if len(dt_str) < 4:
255 20
            raise ValueError('ISO string too short')
256

257
        # All ISO formats start with the year
258 20
        year = int(dt_str[0:4])
259

260 20
        has_sep = dt_str[4:5] == self._DATE_SEP
261

262 20
        pos = 4 + has_sep       # Skip '-' if it's there
263 20
        if dt_str[pos:pos + 1] == b'W':
264
            # YYYY-?Www-?D?
265 20
            pos += 1
266 20
            weekno = int(dt_str[pos:pos + 2])
267 20
            pos += 2
268

269 20
            dayno = 1
270 20
            if len(dt_str) > pos:
271 20
                if (dt_str[pos:pos + 1] == self._DATE_SEP) != has_sep:
272 20
                    raise ValueError('Inconsistent use of dash separator')
273

274 20
                pos += has_sep
275

276 20
                dayno = int(dt_str[pos:pos + 1])
277 20
                pos += 1
278

279 20
            base_date = self._calculate_weekdate(year, weekno, dayno)
280
        else:
281
            # YYYYDDD or YYYY-DDD
282 20
            if len(dt_str) - pos < 3:
283 20
                raise ValueError('Invalid ordinal day')
284

285 20
            ordinal_day = int(dt_str[pos:pos + 3])
286 20
            pos += 3
287

288 20
            if ordinal_day < 1 or ordinal_day > (365 + calendar.isleap(year)):
289 20
                raise ValueError('Invalid ordinal day' +
290
                                 ' {} for year {}'.format(ordinal_day, year))
291

292 20
            base_date = date(year, 1, 1) + timedelta(days=ordinal_day - 1)
293

294 20
        components = [base_date.year, base_date.month, base_date.day]
295 20
        return components, pos
296

297 20
    def _calculate_weekdate(self, year, week, day):
298
        """
299
        Calculate the day of corresponding to the ISO year-week-day calendar.
300

301
        This function is effectively the inverse of
302
        :func:`datetime.date.isocalendar`.
303

304
        :param year:
305
            The year in the ISO calendar
306

307
        :param week:
308
            The week in the ISO calendar - range is [1, 53]
309

310
        :param day:
311
            The day in the ISO calendar - range is [1 (MON), 7 (SUN)]
312

313
        :return:
314
            Returns a :class:`datetime.date`
315
        """
316 20
        if not 0 < week < 54:
317 20
            raise ValueError('Invalid week: {}'.format(week))
318

319 20
        if not 0 < day < 8:     # Range is 1-7
320 20
            raise ValueError('Invalid weekday: {}'.format(day))
321

322
        # Get week 1 for the specific year:
323 20
        jan_4 = date(year, 1, 4)   # Week 1 always has January 4th in it
324 20
        week_1 = jan_4 - timedelta(days=jan_4.isocalendar()[2] - 1)
325

326
        # Now add the specific number of weeks and days to get what we want
327 20
        week_offset = (week - 1) * 7 + (day - 1)
328 20
        return week_1 + timedelta(days=week_offset)
329

330 20
    def _parse_isotime(self, timestr):
331 20
        len_str = len(timestr)
332 20
        components = [0, 0, 0, 0, None]
333 20
        pos = 0
334 20
        comp = -1
335

336 20
        if len(timestr) < 2:
337 20
            raise ValueError('ISO time too short')
338

339 20
        has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP
340

341 20
        while pos < len_str and comp < 5:
342 20
            comp += 1
343

344 20
            if timestr[pos:pos + 1] in b'-+Zz':
345
                # Detect time zone boundary
346 20
                components[-1] = self._parse_tzstr(timestr[pos:])
347 20
                pos = len_str
348 20
                break
349

350 20
            if comp < 3:
351
                # Hour, minute, second
352 20
                components[comp] = int(timestr[pos:pos + 2])
353 20
                pos += 2
354 20
                if (has_sep and pos < len_str and
355
                        timestr[pos:pos + 1] == self._TIME_SEP):
356 20
                    pos += 1
357

358 20
            if comp == 3:
359
                # Fraction of a second
360 20
                frac = self._FRACTION_REGEX.match(timestr[pos:])
361 20
                if not frac:
362 20
                    continue
363

364 20
                us_str = frac.group(1)[:6]  # Truncate to microseconds
365 20
                components[comp] = int(us_str) * 10**(6 - len(us_str))
366 20
                pos += len(frac.group())
367

368 20
        if pos < len_str:
369 20
            raise ValueError('Unused components in ISO string')
370

371 20
        if components[0] == 24:
372
            # Standard supports 00:00 and 24:00 as representations of midnight
373 20
            if any(component != 0 for component in components[1:4]):
374 20
                raise ValueError('Hour may only be 24 at 24:00:00.000')
375

376 20
        return components
377

378 20
    def _parse_tzstr(self, tzstr, zero_as_utc=True):
379 20
        if tzstr == b'Z' or tzstr == b'z':
380 20
            return tz.UTC
381

382 20
        if len(tzstr) not in {3, 5, 6}:
383 20
            raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
384

385 20
        if tzstr[0:1] == b'-':
386 20
            mult = -1
387 20
        elif tzstr[0:1] == b'+':
388 20
            mult = 1
389
        else:
390 20
            raise ValueError('Time zone offset requires sign')
391

392 20
        hours = int(tzstr[1:3])
393 20
        if len(tzstr) == 3:
394 20
            minutes = 0
395
        else:
396 20
            minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
397

398 20
        if zero_as_utc and hours == 0 and minutes == 0:
399 20
            return tz.UTC
400
        else:
401 20
            if minutes > 59:
402 20
                raise ValueError('Invalid minutes in time zone offset')
403

404 20
            if hours > 23:
405 20
                raise ValueError('Invalid hours in time zone offset')
406

407 20
            return tz.tzoffset(None, mult * (hours * 60 + minutes) * 60)
408

409

410 20
DEFAULT_ISOPARSER = isoparser()
411 20
isoparse = DEFAULT_ISOPARSER.isoparse

Read our documentation on viewing source code .

Loading