custom timeparser, 130 lines of the most annoying code.
parent
b9ef486448
commit
3e71ac6421
@ -0,0 +1,219 @@
|
||||
"""
|
||||
Test Cases:
|
||||
|
||||
1 in the afternoon
|
||||
147 in the afternoon
|
||||
1223 in the morning
|
||||
1 pm
|
||||
132 pm
|
||||
1426
|
||||
half past 1 in the afternoon
|
||||
quarter past 12 in the afternoon
|
||||
tomorrow at 3 in the afternoon
|
||||
tomorrow morning
|
||||
yesterday morning
|
||||
this afternoon
|
||||
this morning
|
||||
monday at 3 in the afternoon
|
||||
wednesday at 930
|
||||
1030
|
||||
10 after 10 in the evening
|
||||
10 before 10 in the evening
|
||||
"""
|
||||
|
||||
"""
|
||||
Expected Test Case Outputs (run on monday sept 11 at 7:37pm):
|
||||
|
||||
['11-09-2023 13:00', [['1'], [], ['afternoon'], [], [], []]]
|
||||
['11-09-2023 13:47', [['147'], [], ['afternoon'], [], [], []]]
|
||||
['11-09-2023 00:23', [['1223'], [], ['morning'], [], [], []]]
|
||||
['11-09-2023 13:00', [['1'], [], ['pm'], [], [], []]]
|
||||
['11-09-2023 13:32', [['132'], [], ['pm'], [], [], []]]
|
||||
['11-09-2023 14:26', [['1426'], [], [], [], [], []]]
|
||||
['11-09-2023 13:30', [['1'], [], ['afternoon'], ['half past'], [], []]]
|
||||
['11-09-2023 12:15', [['12'], [], ['afternoon'], ['quarter past'], [], []]]
|
||||
['12-09-2023 15:00', [['3'], [], ['afternoon'], [], ['tomorrow'], []]]
|
||||
['12-09-2023 08:00', [[], [], ['morning'], [], ['tomorrow'], []]]
|
||||
['10-09-2023 08:00', [[], [], ['morning'], [], ['yesterday'], []]]
|
||||
['11-09-2023 13:00', [[], [], ['afternoon'], [], ['this'], []]]
|
||||
['11-09-2023 08:00', [[], [], ['morning'], [], ['this'], []]]
|
||||
['11-09-2023 15:00', [['3'], [], ['afternoon'], [], [], ['monday']]]
|
||||
['13-09-2023 09:30', [['930'], [], [], [], [], ['wednesday']]]
|
||||
['11-09-2023 10:30', [['1030'], [], [], [], [], []]]
|
||||
['11-09-2023 10:10', [['10', '10'], ['after'], ['evening'], [], [], []]]
|
||||
['11-09-2023 10:50', [['10', '10'], ['before'], ['evening'], [], [], []]]
|
||||
"""
|
||||
|
||||
"""
|
||||
Regex Breakdown:
|
||||
|
||||
|
||||
([0-9]+)
|
||||
(before|after)
|
||||
(afternoon|morning|pm|am|evening)
|
||||
(half past|quarter past|quarter to)
|
||||
(today|yesterday|tomorrow|tonight|this)
|
||||
(monday|tuesday|wednesday|thursday|friday|saturday|sunday)
|
||||
|
||||
after each word, add \b
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
import time
|
||||
|
||||
if __name__ == '__main__':
|
||||
from config import default_morning_time, default_afternoon_time, default_evening_time
|
||||
else:
|
||||
from skills.config import default_morning_time, default_afternoon_time, default_evening_time
|
||||
|
||||
|
||||
morning_datetime = datetime.strptime(default_morning_time, "%H:%M")
|
||||
afternoon_datetime = datetime.strptime(default_afternoon_time, "%H:%M")
|
||||
evening_datetime = datetime.strptime(default_evening_time, "%H:%M")
|
||||
|
||||
|
||||
dayofweek_to_number = {
|
||||
"monday": 0,
|
||||
"tuesday": 1,
|
||||
"wednesday": 2,
|
||||
"thursday": 3,
|
||||
"friday": 4,
|
||||
"saturday": 5,
|
||||
"sunday": 6
|
||||
}
|
||||
|
||||
class RegexTimeParser:
|
||||
def __init__(self):
|
||||
self.regex_string = r"([0-9]+)\b|(before\b|after\b)|(afternoon\b|morning\b|pm\b|am\b|evening\b)|(half past\b|quarter past\b||quarter to\b)|(today\b|yesterday\b|tomorrow\b|tonight\b|this\b)|(monday\b|tuesday\b|wednesday\b|thursday\b|friday\b|saturday\b|sunday\b)"
|
||||
# self.regex_string = "([0-9]+)|(before|after)|(afternoon|morning|pm|am|evening)|(half past|quarter past)|(today|yesterday|tomorrow|tonight)|(monday|tuesday|wednesday|thursday|friday|saturday|sunday)"
|
||||
self.regex_exp = re.compile(self.regex_string, re.IGNORECASE)
|
||||
|
||||
def _merge_findall(self, list_of_lists):
|
||||
out = [[], [], [], [], [], []]
|
||||
for pos in range(6):
|
||||
for group in list_of_lists:
|
||||
if group[pos] != '':
|
||||
out[pos].append(group[pos])
|
||||
return out
|
||||
|
||||
def _parse_time_string(self, phrase):
|
||||
"""Takes time string and parses hours/minutes into dict"""
|
||||
hours = 0
|
||||
minutes = 0
|
||||
match len(phrase):
|
||||
case 1:
|
||||
#hour
|
||||
hours = int(phrase)
|
||||
case 2:
|
||||
#hour
|
||||
hours = int(phrase)
|
||||
case 3:
|
||||
#hour first digit, last two digits minutes
|
||||
hours = int(phrase[:1])
|
||||
minutes = int(phrase[1:3])
|
||||
case 4:
|
||||
#hour first two digits, last two digits minutes
|
||||
hours = int(phrase[:2])
|
||||
minutes = int(phrase[2:4])
|
||||
|
||||
return {"hours": hours, "minutes": minutes}
|
||||
|
||||
def parse_time(self, phrase):
|
||||
|
||||
matches = self._merge_findall(self.regex_exp.findall(phrase))
|
||||
date = datetime.now()
|
||||
|
||||
if matches[2]:
|
||||
if "afternoon" in matches[2]:
|
||||
#default afternoon time from config
|
||||
date = date.replace(hour=afternoon_datetime.hour, minute=afternoon_datetime.minute)
|
||||
elif "evening" in matches[2]:
|
||||
#default evening time from config
|
||||
date = date.replace(hour=evening_datetime.hour, minute=evening_datetime.minute)
|
||||
elif "pm" in matches[2]:
|
||||
#default afternoon time from config
|
||||
date = date.replace(hour=afternoon_datetime.hour, minute=afternoon_datetime.minute)
|
||||
elif "morning" in matches[2]:
|
||||
#default morning time from config
|
||||
date = date.replace(hour=morning_datetime.hour, minute=morning_datetime.minute)
|
||||
elif "am" in matches[2]:
|
||||
#default morning time from config
|
||||
date = date.replace(hour=morning_datetime.hour, minute=morning_datetime.minute)
|
||||
|
||||
|
||||
if len(matches[0]) > 1: #uses only first two number groups ([0-9]+)
|
||||
t = self._parse_time_string(matches[0][0])
|
||||
t2 = self._parse_time_string(matches[0][1])
|
||||
if "after" in matches[1]:
|
||||
date = date.replace(hour = t2['hours'], minute = int(matches[0][1]))
|
||||
elif "before" in matches[1]:
|
||||
date = date.replace(hour = t['hours'], minute = 60-int(matches[0][0]))
|
||||
|
||||
elif len(matches[0]) > 0: #([0-9]+)
|
||||
t = self._parse_time_string(matches[0][0])
|
||||
if "afternoon" in matches[2] or "evening" in matches[2] or "pm" in matches[2]:
|
||||
if t["hours"] < 12:
|
||||
t["hours"] = t["hours"]+12
|
||||
elif "morning" in matches[2] or "am" in matches[2]:
|
||||
if t["hours"] >= 12:
|
||||
t["hours"] = t["hours"]-12
|
||||
date = date.replace(hour = t["hours"], minute = t["minutes"])
|
||||
|
||||
|
||||
if matches[3]: #(half past|quarter past|quarter to)
|
||||
match matches[3][0]:
|
||||
case "quarter past":
|
||||
date = date.replace(minute = 15)
|
||||
case "half past":
|
||||
date = date.replace(minute = 30)
|
||||
case "quarter to":
|
||||
date = date.replace(minute = 45)
|
||||
|
||||
|
||||
if matches[4]: #(today|yesterday|tomorrow|tonight|this)
|
||||
match matches[4][0]:
|
||||
case "yesterday":
|
||||
yesterday_date = datetime.now() - timedelta(days = 1)
|
||||
# print(yesterday_date)
|
||||
# print(timedelta(1))
|
||||
# print(datetime.now())
|
||||
date = date.replace(day=yesterday_date.day, month=yesterday_date.month, year=yesterday_date.year)
|
||||
case "tomorrow":
|
||||
tomorrow_date = datetime.now() + timedelta(1)
|
||||
date = date.replace(day=tomorrow_date.day, month=tomorrow_date.month, year=tomorrow_date.year)
|
||||
|
||||
if matches[5]: #(monday|tuesday|wednesday|thursday|friday|saturday|sunday)
|
||||
date = date + timedelta(days = (dayofweek_to_number[matches[5][0]] - date.weekday() + 7) % 7)
|
||||
|
||||
return [date.strftime("%d-%m-%Y %H:%M"), matches]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test = RegexTimeParser()
|
||||
|
||||
test_phrases = [
|
||||
"1 in the afternoon", #done
|
||||
"147 in the afternoon", #done
|
||||
"1223 in the morning", #done
|
||||
"1 pm", #done
|
||||
"132 pm", #done
|
||||
"1426", #done
|
||||
"half past 1 in the afternoon", #done
|
||||
"quarter past 12 in the afternoon", #done
|
||||
"tomorrow at 3 in the afternoon", #done
|
||||
"tomorrow morning", #done
|
||||
"yesterday morning", #done
|
||||
"this afternoon", #done
|
||||
"this morning", #done
|
||||
"monday at 3 in the afternoon", #done
|
||||
"wednesday at 930", #done
|
||||
"1030", #done
|
||||
"10 after 10 in the evening", #done
|
||||
"10 before 10 in the evening" #done
|
||||
]
|
||||
|
||||
|
||||
for ph in test_phrases:
|
||||
print(test.parse_time(ph))
|
Loading…
Reference in New Issue