1
# -*- coding: utf-8 -*-
2 84
"""
3
This module offers a parser for ISO-8601 strings
4

5
It is intended to support all valid date, time and datetime formats per the
6
ISO-8601 specification.
7

8
..versionadded:: 2.7.0
9
"""
10 84
from datetime import datetime, timedelta, time, date
11 84
import calendar
12 84
from dateutil import tz
13

14 84
from functools import wraps
15

16 84
import re
17 84
import six
18

19 84
__all__ = ["isoparse", "isoparser"]
20

21

22 84
def _takes_ascii(f):
23 84
    @wraps(f)
24 27
    def func(self, str_in, *args, **kwargs):
25
        # If it's a stream, read the whole thing
26 84
        str_in = getattr(str_in, 'read', lambda: str_in)()
27

28
        # If it's unicode, turn it into bytes, since ISO-8601 only covers ASCII
29 84
        if isinstance(str_in, six.text_type):
30
            # ASCII is the same in UTF-8
31 84
            try:
32 84
                str_in = str_in.encode('ascii')
33 84
            except UnicodeEncodeError as e:
34 84
                msg = 'ISO-8601 strings should contain only ASCII characters'
35 84
                six.raise_from(ValueError(msg), e)
36

37 84
        return f(self, str_in, *args, **kwargs)
38

39 84
    return func
40

41

42 84
class isoparser(object):
43 84
    def __init__(self, sep=None):
44
        """
45
        :param sep:
46
            A single character that separates date and time portions. If
47
            ``None``, the parser will accept any single character.
48
            For strict ISO-8601 adherence, pass ``'T'``.
49
        """
50 84
        if sep is not None:
51 84
            if (len(sep) != 1 or ord(sep) >= 128 or sep in '0123456789'):
52 84
                raise ValueError('Separator must be a single, non-numeric ' +
53
                                 'ASCII character')
54

55 84
            sep = sep.encode('ascii')
56

57 84
        self._sep = sep
58

59 84
    @_takes_ascii
60 27
    def isoparse(self, dt_str):
61
        """
62
        Parse an ISO-8601 datetime string into a :class:`datetime.datetime`.
63

64
        An ISO-8601 datetime string consists of a date portion, followed
65
        optionally by a time portion - the date and time portions are separated
66
        by a single character separator, which is ``T`` in the official
67
        standard. Incomplete date formats (such as ``YYYY-MM``) may *not* be
68
        combined with a time portion.
69

70
        Supported date formats are:
71

72
        Common:
73

74
        - ``YYYY``
75
        - ``YYYY-MM`` or ``YYYYMM``
76
        - ``YYYY-MM-DD`` or ``YYYYMMDD``
77

78
        Uncommon:
79

80
        - ``YYYY-Www`` or ``YYYYWww`` - ISO week (day defaults to 0)
81
        - ``YYYY-Www-D`` or ``YYYYWwwD`` - ISO week and day
82

83
        The ISO week and day numbering follows the same logic as
84
        :func:`datetime.date.isocalendar`.
85

86
        Supported time formats are:
87

88
        - ``hh``
89
        - ``hh:mm`` or ``hhmm``
90
        - ``hh:mm:ss`` or ``hhmmss``
91
        - ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits)
92

93
        Midnight is a special case for `hh`, as the standard supports both
94
        00:00 and 24:00 as a representation. The decimal separator can be
95
        either a dot or a comma.
96

97

98
        .. caution::
99

100
            Support for fractional components other than seconds is part of the
101
            ISO-8601 standard, but is not currently implemented in this parser.
102

103
        Supported time zone offset formats are:
104

105
        - `Z` (UTC)
106
        - `±HH:MM`
107
        - `±HHMM`
108
        - `±HH`
109

110
        Offsets will be represented as :class:`dateutil.tz.tzoffset` objects,
111
        with the exception of UTC, which will be represented as
112
        :class:`dateutil.tz.tzutc`. Time zone offsets equivalent to UTC (such
113
        as `+00:00`) will also be represented as :class:`dateutil.tz.tzutc`.
114

115
        :param dt_str:
116
            A string or stream containing only an ISO-8601 datetime string
117

118
        :return:
119
            Returns a :class:`datetime.datetime` representing the string.
120
            Unspecified components default to their lowest value.
121

122
        .. warning::
123

124
            As of version 2.7.0, the strictness of the parser should not be
125
            considered a stable part of the contract. Any valid ISO-8601 string
126
            that parses correctly with the default settings will continue to
127
            parse correctly in future versions, but invalid strings that
128
            currently fail (e.g. ``2017-01-01T00:00+00:00:00``) are not
129
            guaranteed to continue failing in future versions if they encode
130
            a valid date.
131

132
        .. versionadded:: 2.7.0
133
        """
134 84
        components, pos = self._parse_isodate(dt_str)
135

136 84
        if len(dt_str) > pos:
137 84
            if self._sep is None or dt_str[pos:pos + 1] == self._sep:
138 84
                components += self._parse_isotime(dt_str[pos + 1:])
139
            else:
140 84
                raise ValueError('String contains unknown ISO components')
141

142 84
        if len(components) > 3 and components[3] == 24:
143 84
            components[3] = 0
144 84
            return datetime(*components) + timedelta(days=1)
145

146 84
        return datetime(*components)
147

148 84
    @_takes_ascii
149 27
    def parse_isodate(self, datestr):
150
        """
151
        Parse the date portion of an ISO string.
152

153
        :param datestr:
154
            The string portion of an ISO string, without a separator
155

156
        :return:
157
            Returns a :class:`datetime.date` object
158
        """
159 84
        components, pos = self._parse_isodate(datestr)
160 84
        if pos < len(datestr):
161 84
            raise ValueError('String contains unknown ISO ' +
162
                             'components: {}'.format(datestr))
163 84
        return date(*components)
164

165 84
    @_takes_ascii
166 27
    def parse_isotime(self, timestr):
167
        """
168
        Parse the time portion of an ISO string.
169

170
        :param timestr:
171
            The time portion of an ISO string, without a separator
172

173
        :return:
174
            Returns a :class:`datetime.time` object
175
        """
176 84
        components = self._parse_isotime(timestr)
177 84
        if components[0] == 24:
178 84
            components[0] = 0
179 84
        return time(*components)
180

181 84
    @_takes_ascii
182 84
    def parse_tzstr(self, tzstr, zero_as_utc=True):
183
        """
184
        Parse a valid ISO time zone string.
185

186
        See :func:`isoparser.isoparse` for details on supported formats.
187

188
        :param tzstr:
189
            A string representing an ISO time zone offset
190

191
        :param zero_as_utc:
192
            Whether to return :class:`dateutil.tz.tzutc` for zero-offset zones
193

194
        :return:
195
            Returns :class:`dateutil.tz.tzoffset` for offsets and
196
            :class:`dateutil.tz.tzutc` for ``Z`` and (if ``zero_as_utc`` is
197
            specified) offsets equivalent to UTC.
198
        """
199 84
        return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
200

201
    # Constants
202 84
    _DATE_SEP = b'-'
203 84
    _TIME_SEP = b':'
204 84
    _FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)')
205

206 84
    def _parse_isodate(self, dt_str):
207 84
        try:
208 84
            return self._parse_isodate_common(dt_str)
209 84
        except ValueError:
210 84
            return self._parse_isodate_uncommon(dt_str)
211

212 84
    def _parse_isodate_common(self, dt_str):
213 84
        len_str = len(dt_str)
214 84
        components = [1, 1, 1]
215

216 84
        if len_str < 4:
217 84
            raise ValueError('ISO string too short')
218

219
        # Year
220 84
        components[0] = int(dt_str[0:4])
221 84
        pos = 4
222 84
        if pos >= len_str:
223 84
            return components, pos
224

225 84
        has_sep = dt_str[pos:pos + 1] == self._DATE_SEP
226 84
        if has_sep:
227 84
            pos += 1
228

229
        # Month
230 84
        if len_str - pos < 2:
231 84
            raise ValueError('Invalid common month')
232

233 84
        components[1] = int(dt_str[pos:pos + 2])
234 84
        pos += 2
235

236 84
        if pos >= len_str:
237 84
            if has_sep:
238 84
                return components, pos
239
            else:
240 84
                raise ValueError('Invalid ISO format')
241

242 84
        if has_sep:
243 84
            if dt_str[pos:pos + 1] != self._DATE_SEP:
244 84
                raise ValueError('Invalid separator in ISO string')
245 84
            pos += 1
246

247
        # Day
248 84
        if len_str - pos < 2:
249 84
            raise ValueError('Invalid common day')
250 84
        components[2] = int(dt_str[pos:pos + 2])
251 84
        return components, pos + 2
252

253 84
    def _parse_isodate_uncommon(self, dt_str):
254 84
        if len(dt_str) < 4:
255 84
            raise ValueError('ISO string too short')
256

257
        # All ISO formats start with the year
258 84
        year = int(dt_str[0:4])
259

260 84
        has_sep = dt_str[4:5] == self._DATE_SEP
261

262 84
        pos = 4 + has_sep       # Skip '-' if it's there
263 84
        if dt_str[pos:pos + 1] == b'W':
264
            # YYYY-?Www-?D?
265 84
            pos += 1
266 84
            weekno = int(dt_str[pos:pos + 2])
267 84
            pos += 2
268

269 84
            dayno = 1
270 84
            if len(dt_str) > pos:
271 84
                if (dt_str[pos:pos + 1] == self._DATE_SEP) != has_sep:
272 84
                    raise ValueError('Inconsistent use of dash separator')
273

274 84
                pos += has_sep
275

276 84
                dayno = int(dt_str[pos:pos + 1])
277 84
                pos += 1
278

279 84
            base_date = self._calculate_weekdate(year, weekno, dayno)
280
        else:
281
            # YYYYDDD or YYYY-DDD
282 84
            if len(dt_str) - pos < 3:
283 84
                raise ValueError('Invalid ordinal day')
284

285 84
            ordinal_day = int(dt_str[pos:pos + 3])
286 84
            pos += 3
287

288 84
            if ordinal_day < 1 or ordinal_day > (365 + calendar.isleap(year)):
289 84
                raise ValueError('Invalid ordinal day' +
290
                                 ' {} for year {}'.format(ordinal_day, year))
291

292 84
            base_date = date(year, 1, 1) + timedelta(days=ordinal_day - 1)
293

294 84
        components = [base_date.year, base_date.month, base_date.day]
295 84
        return components, pos
296

297 84
    def _calculate_weekdate(self, year, week, day):
298
        """
299
        Calculate the day of corresponding to the ISO year-week-day calendar.
300

301
        This function is effectively the inverse of
302
        :func:`datetime.date.isocalendar`.
303

304
        :param year:
305
            The year in the ISO calendar
306

307
        :param week:
308
            The week in the ISO calendar - range is [1, 53]
309

310
        :param day:
311
            The day in the ISO calendar - range is [1 (MON), 7 (SUN)]
312

313
        :return:
314
            Returns a :class:`datetime.date`
315
        """
316 84
        if not 0 < week < 54:
317 84
            raise ValueError('Invalid week: {}'.format(week))
318

319 84
        if not 0 < day < 8:     # Range is 1-7
320 84
            raise ValueError('Invalid weekday: {}'.format(day))
321

322
        # Get week 1 for the specific year:
323 84
        jan_4 = date(year, 1, 4)   # Week 1 always has January 4th in it
324 84
        week_1 = jan_4 - timedelta(days=jan_4.isocalendar()[2] - 1)
325

326
        # Now add the specific number of weeks and days to get what we want
327 84
        week_offset = (week - 1) * 7 + (day - 1)
328 84
        return week_1 + timedelta(days=week_offset)
329

330 84
    def _parse_isotime(self, timestr):
331 84
        len_str = len(timestr)
332 84
        components = [0, 0, 0, 0, None]
333 84
        pos = 0
334 84
        comp = -1
335

336 84
        if len(timestr) < 2:
337 84
            raise ValueError('ISO time too short')
338

339 84
        has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP
340

341 84
        while pos < len_str and comp < 5:
342 84
            comp += 1
343

344 84
            if timestr[pos:pos + 1] in b'-+Zz':
345
                # Detect time zone boundary
346 84
                components[-1] = self._parse_tzstr(timestr[pos:])
347 84
                pos = len_str
348 84
                break
349

350 84
            if comp < 3:
351
                # Hour, minute, second
352 84
                components[comp] = int(timestr[pos:pos + 2])
353 84
                pos += 2
354 84
                if (has_sep and pos < len_str and
355
                        timestr[pos:pos + 1] == self._TIME_SEP):
356 84
                    pos += 1
357

358 84
            if comp == 3:
359
                # Fraction of a second
360 84
                frac = self._FRACTION_REGEX.match(timestr[pos:])
361 84
                if not frac:
362 84
                    continue
363

364 84
                us_str = frac.group(1)[:6]  # Truncate to microseconds
365 84
                components[comp] = int(us_str) * 10**(6 - len(us_str))
366 84
                pos += len(frac.group())
367

368 84
        if pos < len_str:
369 84
            raise ValueError('Unused components in ISO string')
370

371 84
        if components[0] == 24:
372
            # Standard supports 00:00 and 24:00 as representations of midnight
373 84
            if any(component != 0 for component in components[1:4]):
374 84
                raise ValueError('Hour may only be 24 at 24:00:00.000')
375

376 84
        return components
377

378 84
    def _parse_tzstr(self, tzstr, zero_as_utc=True):
379 84
        if tzstr == b'Z' or tzstr == b'z':
380 84
            return tz.UTC
381

382 84
        if len(tzstr) not in {3, 5, 6}:
383 84
            raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
384

385 84
        if tzstr[0:1] == b'-':
386 84
            mult = -1
387 84
        elif tzstr[0:1] == b'+':
388 84
            mult = 1
389
        else:
390 84
            raise ValueError('Time zone offset requires sign')
391

392 84
        hours = int(tzstr[1:3])
393 84
        if len(tzstr) == 3:
394 84
            minutes = 0
395
        else:
396 84
            minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
397

398 84
        if zero_as_utc and hours == 0 and minutes == 0:
399 84
            return tz.UTC
400
        else:
401 84
            if minutes > 59:
402 84
                raise ValueError('Invalid minutes in time zone offset')
403

404 84
            if hours > 23:
405 84
                raise ValueError('Invalid hours in time zone offset')
406

407 84
            return tz.tzoffset(None, mult * (hours * 60 + minutes) * 60)
408

409

410 84
DEFAULT_ISOPARSER = isoparser()
411 84
isoparse = DEFAULT_ISOPARSER.isoparse

Read our documentation on viewing source code .

Loading