Skip to content
Open

V3.0 #267

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
3431f12
Added simple logging in nlp to identify which expressions produce a l…
idpaterson Sep 10, 2016
cc3663d
Added failing tests for nlp taken directly from simple date time test…
idpaterson Sep 9, 2016
c0a88d0
Removed transformations to remove quotes and periods in nlp and parse
idpaterson Sep 10, 2016
28ea99b
Allow nlp to find P.M. and A.M. formatted meridians
idpaterson Sep 10, 2016
ab951d5
Allow standard characters between nlp prefix and date expression.
idpaterson Sep 10, 2016
43521cb
Allow months to be abbreviated with a period
idpaterson Sep 10, 2016
02f35aa
Allow dates to begin with a word boundary rather than a space.
idpaterson Sep 10, 2016
e7ebb05
Merge pull request #195 from idpaterson/pull-requests/obsolete-transf…
idpaterson Sep 27, 2016
81e0dac
allow day start hour to be configurable - by default 9 as now
Nov 9, 2016
68e3bfe
added parameter specs
Nov 9, 2016
eb13b63
Merge pull request #203 from nishantd/v3.0
bear Nov 14, 2016
4af6b6a
Fixed code formatting that was causing pycodestyle 2.2.0 to fail unit…
idpaterson Nov 27, 2016
5b4fd8e
Merge pull request #204 from idpaterson/pull-requests/tox-pycodestyle
bear Nov 28, 2016
369a760
Merge pull request #194 from idpaterson/pull-requests/nlp-logging
bear Nov 19, 2019
165bbe5
Merge branch 'master' of github.com:bear/parsedatetime into v3.0
bear Oct 9, 2021
bdcb0c6
Removed the setting of DEFAULT_DAY_START_HOUR as it is
bear Oct 11, 2021
7e45f77
Removed explicitly setting "day start hour" in tests except in
bear Oct 11, 2021
7d67829
Merge branch 'master' of github.com:bear/parsedatetime into v3.0
bear Oct 11, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 52 additions & 28 deletions parsedatetime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

Requires Python 2.7 or later
"""

from __future__ import with_statement, absolute_import, unicode_literals

import re
Expand All @@ -44,7 +43,7 @@
__email__ = '[email protected]'
__copyright__ = 'Copyright (c) 2017-2021 Mike Taylor'
__license__ = 'Apache License 2.0'
__version__ = '2.7'
__version__ = '3.0'
__url__ = 'https://github.com/bear/parsedatetime'
__download_url__ = 'https://pypi.python.org/pypi/parsedatetime'
__description__ = 'Parse human-readable date/time text.'
Expand Down Expand Up @@ -236,7 +235,8 @@ class Calendar(object):
The text can either be 'normal' date values or it can be human readable.
"""

def __init__(self, constants=None, version=VERSION_CONTEXT_STYLE):
def __init__(self, constants=None, version=VERSION_CONTEXT_STYLE,
day_start_hour=None):
"""
Default constructor for the L{Calendar} class.

Expand All @@ -246,6 +246,9 @@ def __init__(self, constants=None, version=VERSION_CONTEXT_STYLE):
@param version: Default style version of current Calendar instance.
Valid value can be 1 (L{VERSION_FLAG_STYLE}) or
2 (L{VERSION_CONTEXT_STYLE}). See L{parse()}.
@type day_start_hour: int
@param day_start_hour: Hour to set a datetime when no time has been
specified.

@rtype: object
@return: L{Calendar} instance
Expand All @@ -255,6 +258,8 @@ def __init__(self, constants=None, version=VERSION_CONTEXT_STYLE):
self.ptc = Constants()
else:
self.ptc = constants
if day_start_hour is not None:
self.ptc.StartHour = day_start_hour

self.version = version
if version == VERSION_FLAG_STYLE:
Expand Down Expand Up @@ -767,9 +772,9 @@ def _evalModifier(self, modifier, chunk1, chunk2, sourceTime):
startMinute = mn
startSecond = sec
else:
startHour = self.ptc.StartHour
startMinute = 0
startSecond = 0
startHour = self.ptc.StartHour

# capture the units after the modifier and the remaining
# string after the unit
Expand Down Expand Up @@ -1836,10 +1841,6 @@ def parse(self, datetimeString, sourceTime=None, version=None):
"""
debug and logging.debug('parse()')

datetimeString = re.sub(r'(\w)\.(\s)', r'\1\2', datetimeString)
datetimeString = re.sub(r'(\w)[\'"](\s|$)', r'\1 \2', datetimeString)
datetimeString = re.sub(r'(\s|^)[\'"](\w)', r'\1 \2', datetimeString)

if sourceTime:
if isinstance(sourceTime, datetime.datetime):
debug and logging.debug('coercing datetime to timetuple')
Expand Down Expand Up @@ -1983,15 +1984,11 @@ def nlp(self, inputString, sourceTime=None, version=None):
were no matches
"""

debug and log.debug('nlp()')

orig_inputstring = inputString

# replace periods at the end of sentences w/ spaces
# opposed to removing them altogether in order to
# retain relative positions (identified by alpha, period, space).
# this is required for some of the regex patterns to match
inputString = re.sub(r'(\w)(\.)(\s)', r'\1 \3', inputString).lower()
inputString = re.sub(r'(\w)(\'|")(\s|$)', r'\1 \3', inputString)
inputString = re.sub(r'(\s|^)(\'|")(\w)', r'\1 \3', inputString)
inputString = inputString.lower()

startpos = 0 # the start position in the inputString during the loop

Expand All @@ -2015,6 +2012,9 @@ def nlp(self, inputString, sourceTime=None, version=None):
leftmost_match[3] = 0
leftmost_match[4] = 'modifier'

debug and log.debug('CRE_MODIFIER matched [%s]',
leftmost_match[2])

# Quantity + Units
m = self.ptc.CRE_UNITS.search(inputString[startpos:])
if m is not None:
Expand All @@ -2036,6 +2036,9 @@ def nlp(self, inputString, sourceTime=None, version=None):
leftmost_match[0] = leftmost_match[0] - 1
leftmost_match[2] = '-' + leftmost_match[2]

debug and log.debug('CRE_UNITS matched [%s]',
leftmost_match[2])

# Quantity + Units
m = self.ptc.CRE_QUNITS.search(inputString[startpos:])
if m is not None:
Expand All @@ -2056,6 +2059,9 @@ def nlp(self, inputString, sourceTime=None, version=None):
leftmost_match[0] = leftmost_match[0] - 1
leftmost_match[2] = '-' + leftmost_match[2]

debug and log.debug('CRE_QUNITS matched [%s]',
leftmost_match[2])

m = self.ptc.CRE_DATE3.search(inputString[startpos:])
# NO LONGER NEEDED, THE REGEXP HANDLED MTHNAME NOW
# for match in self.ptc.CRE_DATE3.finditer(inputString[startpos:]):
Expand All @@ -2077,6 +2083,8 @@ def nlp(self, inputString, sourceTime=None, version=None):
leftmost_match[2] = m.group('date')
leftmost_match[3] = 1
leftmost_match[4] = 'dateStr'
debug and log.debug('CRE_DATE3 matched [%s]',
leftmost_match[2])

# Standard date format
m = self.ptc.CRE_DATE.search(inputString[startpos:])
Expand All @@ -2088,6 +2096,8 @@ def nlp(self, inputString, sourceTime=None, version=None):
leftmost_match[2] = m.group('date')
leftmost_match[3] = 1
leftmost_match[4] = 'dateStd'
debug and log.debug('CRE_DATE matched [%s]',
leftmost_match[2])

# Natural language day strings
m = self.ptc.CRE_DAY.search(inputString[startpos:])
Expand All @@ -2099,6 +2109,8 @@ def nlp(self, inputString, sourceTime=None, version=None):
leftmost_match[2] = m.group()
leftmost_match[3] = 1
leftmost_match[4] = 'dayStr'
debug and log.debug('CRE_DAY matched [%s]',
leftmost_match[2])

# Weekday
m = self.ptc.CRE_WEEKDAY.search(inputString[startpos:])
Expand All @@ -2111,6 +2123,8 @@ def nlp(self, inputString, sourceTime=None, version=None):
leftmost_match[2] = m.group()
leftmost_match[3] = 1
leftmost_match[4] = 'weekdy'
debug and log.debug('CRE_WEEKDAY matched [%s]',
leftmost_match[2])

# Natural language time strings
m = self.ptc.CRE_TIME.search(inputString[startpos:])
Expand All @@ -2122,6 +2136,8 @@ def nlp(self, inputString, sourceTime=None, version=None):
leftmost_match[2] = m.group()
leftmost_match[3] = 2
leftmost_match[4] = 'timeStr'
debug and log.debug('CRE_TIME matched [%s]',
leftmost_match[2])

# HH:MM(:SS) am/pm time strings
m = self.ptc.CRE_TIMEHMS2.search(inputString[startpos:])
Expand All @@ -2134,6 +2150,8 @@ def nlp(self, inputString, sourceTime=None, version=None):
leftmost_match[1]]
leftmost_match[3] = 2
leftmost_match[4] = 'meridian'
debug and log.debug('CRE_TIMEHMS2 matched [%s]',
leftmost_match[2])

# HH:MM(:SS) time strings
m = self.ptc.CRE_TIMEHMS.search(inputString[startpos:])
Expand All @@ -2149,6 +2167,8 @@ def nlp(self, inputString, sourceTime=None, version=None):
leftmost_match[1]]
leftmost_match[3] = 2
leftmost_match[4] = 'timeStd'
debug and log.debug('CRE_TIMEHMS matched [%s]',
leftmost_match[2])

# Units only; must be preceded by a modifier
if len(matches) > 0 and matches[-1][3] == 0:
Expand All @@ -2166,6 +2186,8 @@ def nlp(self, inputString, sourceTime=None, version=None):
leftmost_match[2] = m.group()
leftmost_match[3] = 3
leftmost_match[4] = 'unitsOnly'
debug and log.debug('CRE_UNITS_ONLY matched [%s]',
leftmost_match[2])

# set the start position to the end pos of the leftmost match
startpos = leftmost_match[1]
Expand All @@ -2182,6 +2204,8 @@ def nlp(self, inputString, sourceTime=None, version=None):
leftmost_match[0] = m.start('nlp_prefix')
leftmost_match[2] = inputString[leftmost_match[0]:
leftmost_match[1]]
debug and log.debug('CRE_NLP_PREFIX matched [%s]',
leftmost_match[2])
matches.append(leftmost_match)

# find matches in proximity with one another and
Expand Down Expand Up @@ -2503,9 +2527,9 @@ def _buildOffsets(offsetDict, localeData, indexStart):
(,)?
(\s)*
)
(?P<mthname>
\b({months}|{shortmonths})\b
)\s*
\b(?P<mthname>
{months}|{shortmonths}
)\b\.?\s*
(?P<year>\d\d
(\d\d)?
)?
Expand All @@ -2521,12 +2545,12 @@ def _buildOffsets(offsetDict, localeData, indexStart):
# when the day is absent from the string
self.RE_DATE3 = r'''(?P<date>
(?:
(?:^|\s+)
(?:^|\s+|\b)
(?P<mthname>
{months}|{shortmonths}
)\b
)\b\.?
|
(?:^|\s+)
(?:^|\s+|\b)
(?P<day>[1-9]|[012]\d|3[01])
(?P<suffix>{daysuffix}|)\b
(?!\s*(?:{timecomponents}))
Expand All @@ -2543,9 +2567,9 @@ def _buildOffsets(offsetDict, localeData, indexStart):
self.RE_MONTH = r'''(\s+|^)
(?P<month>
(
(?P<mthname>
\b({months}|{shortmonths})\b
)
\b(?P<mthname>
{months}|{shortmonths}
)\b\.?
(\s*
(?P<year>(\d{{4}}))
)?
Expand Down Expand Up @@ -2609,17 +2633,17 @@ def _buildOffsets(offsetDict, localeData, indexStart):
# 1, 2, and 3 here refer to the type of match date, time, or units
self.RE_NLP_PREFIX = r'''\b(?P<nlp_prefix>
(on)
(\s)+1
[\s(\["'-]+1
|
(at|in)
(\s)+2
[\s(\["'-]+2
|
(in)
(\s)+3
[\s(\["'-]+3
)'''

if 'meridian' in self.locale.re_values:
self.RE_TIMEHMS2 += (r'\s*(?P<meridian>{meridian})\b'
self.RE_TIMEHMS2 += (r'\s*(?P<meridian>{meridian})'
.format(**self.locale.re_values))
else:
self.RE_TIMEHMS2 += r'\b'
Expand Down
2 changes: 1 addition & 1 deletion parsedatetime/pdt_locales/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
'timeseparator': ':',
'rangeseparator': '-',
'daysuffix': 'rd|st|nd|th',
'meridian': r'am|pm|a\.m\.|p\.m\.|a|p',
'meridian': r'a\.m\.|p\.m\.|(?:am|pm|a|p)\b',
'qunits': 'h|m|s|d|w|y',
'now': ['now', 'right now'],
}
Expand Down
4 changes: 2 additions & 2 deletions parsedatetime/pdt_locales/icu.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,10 @@ def _sanitize_key(k):

result['dateSep'] = [ds]
s = result['dateFormats']['short']
ll = s.lower().split(ds)
formats = s.lower().split(ds)
dp_order = []

for s in ll:
for s in formats:
if len(s) > 0:
dp_order.append(s[:1])

Expand Down
60 changes: 60 additions & 0 deletions tests/TestDayStartHour.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
"""
Test parsing of strings that are phrases
"""
from __future__ import unicode_literals

import sys
import time
import datetime
import parsedatetime as pdt
from . import utils

if sys.version_info < (2, 7):
import unittest2 as unittest
else:
import unittest


class test(unittest.TestCase):

@utils.assertEqualWithComparator
def assertExpectedResult(self, result, check, **kwargs):
return utils.compareResultByTimeTuplesAndFlags(result, check, **kwargs)

def setUp(self):
# Test with a different day start hour.
(self.yr, self.mth, self.dy, self.hr,
self.mn, self.sec, self.wd, self.yd, self.isdst) = time.localtime()

def testDifferentDayStartHours(self):
for day_start_hour in (0, 6, 9, 12):
cal = pdt.Calendar(day_start_hour=day_start_hour)

s = datetime.datetime.now()
t = datetime.datetime(
self.yr, self.mth, self.dy,
day_start_hour, 0, 0) + datetime.timedelta(days=1)

start = s.timetuple()
target = t.timetuple()

self.assertExpectedResult(
cal.parse('tomorrow', start), (target, 1))
self.assertExpectedResult(
cal.parse('next day', start), (target, 1))

t = datetime.datetime(
self.yr, self.mth, self.dy,
day_start_hour, 0, 0) + datetime.timedelta(days=-1)
target = t.timetuple()

self.assertExpectedResult(
cal.parse('yesterday', start), (target, 1))

t = datetime.datetime(
self.yr, self.mth, self.dy,
day_start_hour, 0, 0)
target = t.timetuple()

self.assertExpectedResult(cal.parse('today', start), (target, 1))
2 changes: 1 addition & 1 deletion tests/TestLocaleBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def setUp(self):
self.cal = pdt.Calendar(self.ptc)

def test_dayoffsets(self):
start = datetime.datetime(self.yr, self.mth, self.dy, 9)
start = datetime.datetime(self.yr, self.mth, self.dy, self.ptc.StartHour)
for date_string, expected_day_offset in [
("Aujourd'hui", 0),
("aujourd'hui", 0),
Expand Down
Loading