Compare commits

...

15 Commits

Author SHA1 Message Date
samerbam
8c7d8f8f1b remove config.py 2023-09-05 11:55:32 -04:00
samerbam
57245ddefc reading material 2023-09-05 11:50:38 -04:00
samerbam
f21b0ad8d2 added in some comments 2023-09-05 11:11:21 -04:00
samerbam
ca7996cdbd basic boilerplate for translate skill 2023-09-05 11:07:38 -04:00
samerbam
1d88e949c9 timer skill 2023-09-03 22:42:54 -04:00
samerbam
666fbac971 finish initial timer implementation, still need to add timer sounds. 2023-06-22 14:56:11 -04:00
samerbam
747d34ec49 fix readme typo 2023-06-22 13:32:50 -04:00
samerbam
8768080ce0 update readme 2023-06-22 13:32:29 -04:00
samerbam
7d431b0b51 updates 2023-06-22 13:14:13 -04:00
samerbam
4fefe79304 added starts of NLP processing using NLI model 2023-06-21 10:59:27 -04:00
samerbam
f349068ed2 added auth.py. renamed setup.sh to setup-mac.sh 2023-06-14 12:04:04 -04:00
samerbam
ba2776933d remove. 2023-06-14 11:59:42 -04:00
samerbam
389ee034fc remove tmpdir 2023-06-14 11:58:48 -04:00
samerbam
78b673730d remove tempdir 2023-06-14 11:58:25 -04:00
samerbam
eab4a90c80 reorganize codebase. Add in more of the framework for adding skills. 2023-06-14 11:57:12 -04:00
44 changed files with 553 additions and 57 deletions

11
.gitignore vendored
View File

@ -356,8 +356,8 @@ fabric.properties
#################### NotepadPP.gitignore ####################
# Notepad++ backups #
*.bak
# Notepad++ backups #
*.bak
#################### PuTTY.gitignore ####################
@ -485,7 +485,8 @@ $RECYCLE.BIN/
.LSOverride
# Icon must end with two \r
Icon
Icon
# Thumbnails
._*
@ -511,4 +512,6 @@ Temporary Items
# add your custom gitignore here:
!.gitignore
!.gitsubmodules
skills/config.py
backend/skills/config.py
tempdir*/
tmpdir*/

43
README.md Normal file
View File

@ -0,0 +1,43 @@
# A Siri like AI Assistant
* Uses ChatGPT for general queries
* Uses Wolfram Alpha for anything math related
* Has built in NLP (using a NLI model) for determining if we can process query locally
* Frontend/Backend architecture for ability to deploy lightweight clients
## Skills
- [ ] Translations
- [ ] Alarms
- [ ] Calendar
- [ ] Gmail
- [ ] ChatGPT
- [ ] Reminders
- [x] Timers - TODO: Adding in sound notifications.
- [ ] Todos
- [ ] Weather
- [ ] Wolfram
- [x] NLP
- [x] Speech to Text (frontend for sure)
- [ ] API
- [ ] Authentication
- [ ] General API
- [ ] TTS
- generate audio on backend or frontend?
- Perks of backend is fast generation
- Cons of backend is large file transfers between devices, lots of internet usage
- Perks of frontend is less data transfer between devices requiring less internet usage
- Cons of frontend is slower generation
- Current Solution: https://github.com/synesthesiam/opentts
## API Specs
Using websockets for communication allows for two way communication where the server can send the client info at any point
Link for example: https://stackoverflow.com/questions/53331127/python-websockets-send-to-client-and-keep-connection-alive
More examples (includes jwt authentication, though this is in node.js, still useful for figuring out how to do this stuff): https://www.linode.com/docs/guides/authenticating-over-websockets-with-jwt/
## Ideas
* Dashboard with api call counts

133
backend/NLP.py Normal file
View File

@ -0,0 +1,133 @@
# Natural Language Processing using something like https://spacy.io
# THIS WORKS REALLY WELL FOR WHAT WE NEED: https://huggingface.co/facebook/bart-large-mnli
# Zero Shot Classification - Natrual Language Inference
# basically this means we can list all the different skill names and the model will give us a
# percentage probability that we are talking about each of them. We should be able to take the top
# value and pass valid information from the query into the skill class after extracting information
# using another model for tokenization. This took me an entire week of research to figure out -_-
# this will allow us to figure out what the query means
# i.e we might not have to add the word "wolfram" into a query to send it to wolfram...
# import spacy
# nlp = spacy.load("en_core_web_sm")
# doc = nlp("What is the weather in toronto")
# for token in doc.ents:
# print(token)
# # if token.like_num:
# # print(tokenx)
# from transformers import AutoTokenizer, AutoModelForSequenceClassification
# tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-mnli")
# model = AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli")
from transformers import pipeline
import spacy
from datetime import datetime
import time
# text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
# doc = nlp(text)
class NLP:
def __init__(self, candidate_labels=[]):
self.candidate_labels = candidate_labels
self.classifier = pipeline("zero-shot-classification",
model="facebook/bart-large-mnli")
self.tokenclass = spacy.load("en_core_web_sm")
def get_skill(self, sentence):
return self.classifier(sentence, self.candidate_labels)
def get_named_entities(self, sentence):
return [[ent.text, ent.start_char, ent.end_char, ent.label_] for ent in self.tokenclass(sentence).ents]
# for ent in doc.ents:
# print(ent.text, ent.start_char, ent.end_char, ent.label_)
# return self.tokenclass(sentence)
if __name__ == "__main__":
starttime = time.time()
nlp = NLP(['weather', 'timer', 'physics', 'mathematics'])
print(f"Init: {time.time()-starttime}")
starttime = time.time()
print('==')
print(nlp.get_skill("one day I will see the world"))
print(f"Took: {time.time()-starttime}")
starttime = time.time()
print("yay!")
print(nlp.get_skill("What is the weather today?"))
print(f"Took: {time.time()-starttime}")
starttime = time.time()
print('==')
print(nlp.get_skill("What is air resistance of a spaceship with a mass of 1000kg"))
print(f"Took: {time.time()-starttime}")
starttime = time.time()
print(nlp.get_skill("What is five plus five"))
print(f"Took: {time.time()-starttime}")
starttime = time.time()
print('====')
print(nlp.get_named_entities("one day I will see the world"))
print(f"Took: {time.time()-starttime}")
starttime = time.time()
print("yay!")
print(nlp.get_named_entities("What is the weather today in london?"))
print(f"Took: {time.time()-starttime}")
starttime = time.time()
print('====')
print('======')
print(nlp.get_named_entities("set a timer for 1 minute and 15 seconds"))
print(f"Took: {time.time()-starttime}")
starttime = time.time()
print(nlp.get_named_entities("remind me at May 5th at 2:30 in the afternoon to wash the dog"))
print(f"Took: {time.time()-starttime}")
starttime = time.time()
print(nlp.get_skill("remind me at May 5th at 2:30 in the afternoon to wash the dog"))
print(f"Took: {time.time()-starttime}")
starttime = time.time()
print('======')
# sequence_to_classify = "one day I will see the world"
# candidate_labels = ['travel', 'cooking', 'dancing']
# print(classifier(sequence_to_classify, candidate_labels))
# import spacy
# from spacy.matcher import Matcher
# nlp = spacy.load("en_core_web_sm")
# matcher = Matcher(nlp.vocab)
# # Add match ID "HelloWorld" with no callback and one pattern
# pattern = [{"LOWER": "hello"}, {"IS_PUNCT": True}, {"LOWER": "world"}]
# matcher.add("HelloWorld", [pattern])
# doc = nlp("Hello, world! Hello world!")
# matches = matcher(doc)
# for match_id, start, end in matches:
# string_id = nlp.vocab.strings[match_id] # Get string representation
# span = doc[start:end] # The matched span
# print(match_id, string_id, start, end, span.text)

1
backend/api.py Normal file
View File

@ -0,0 +1 @@
#using FastAPI because I used that for ThermalTodos with auth0 auth and dont want to relearn auth0

8
backend/auth.py Normal file
View File

@ -0,0 +1,8 @@
# TODO: Handle all authentication stuff for verifying client is who we think it is in here.
# Example: https://github.com/miguelgrinberg/REST-auth
# User/Pass for initial token and refresh token generation, this should
# OAuth2 style authentication
# Flask-RESTFul with
#

31
backend/main.py Normal file
View File

@ -0,0 +1,31 @@
from skills.alarms import Alarms
from skills.cal import Cal
from skills.gmail import Gmail
from skills.gpt import GPT
from skills.reminders import Reminders
from skills.timers import Timers
from skills.todos import Todos
from skills.weather import Weather
from skills.wolfram import Wolfram
from NLP import NLP
import sys
print(sys.version)
skills = [GPT(), Alarms(), Cal(), Gmail(), Reminders(), Timers(), Todos(), Weather(), Wolfram()]
skill_names = [skill.trigger_phrase for skill in skills]
print("test")
if __name__ == "__main__":
# print("Skill Trigger Phrases: ")
print(f"Active Skills: {skill_names}")
nlp = NLP()
# for skill in skills:
# print(skill.trigger_phrase)

4
backend/requirements.txt Normal file
View File

@ -0,0 +1,4 @@
transformers
spacy
schedule
ctparse

11
backend/skills/alarms.py Normal file
View File

@ -0,0 +1,11 @@
"""
Basically going to be the same as timers.py
Maybe we just call timers.py from here?
"""
class Alarms:
def __init__(self):
self.trigger_phrase = "alarms"

12
backend/skills/auth.py Normal file
View File

@ -0,0 +1,12 @@
# TODO: Handle all authentication stuff for verifying client is who we think it is in here.
"""
Reading material for this:
https://auth0.com/blog/using-m2m-authorization/
https://git.imsam.ca/sam/ThermalTodos/src/branch/main/application/main.py (FastAPI + Auth0)
(previous link is based on this) https://auth0.com/blog/build-and-secure-fastapi-server-with-auth0/
THIS IS GOING TO BE AN ABSOLUTE PAIN TO WORK ON, I SPENT LITERALLY ABOUT 8 HOURS JUST ON THERMALTODOS AUTH
I HOPE THIS ISNT AS PAINFUL
"""

11
backend/skills/cal.py Normal file
View File

@ -0,0 +1,11 @@
"""
Reading Material:
https://developers.google.com/calendar/api/quickstart/python
https://git.imsam.ca/sam/ThermalTodos/src/branch/main/application/sync_calendar.py (readonly application of previous link)
"""
class Cal:
def __init__(self):
self.trigger_phrase = "calendar"

View File

@ -0,0 +1,4 @@
# Copy & Rename this file to config.py and fill in data
ntfy_url="" # Obtained from ntfy.sh app (choose a random string of numbers/letters for better security)
deepl_api_key="" # Obtained from https://www.deepl.com/en/docs-api
google_api_key="" #Obtained from https://cloud.google.com/translate/pricing

11
backend/skills/gmail.py Normal file
View File

@ -0,0 +1,11 @@
"""
Reading Material:
https://developers.google.com/gmail/api/quickstart/python
https://git.imsam.ca/sam/ThermalTodos/src/branch/main/application/sync_calendar.py (for autherizing user)
"""
class Gmail:
def __init__(self):
self.trigger_phrase = "gmail"

5
backend/skills/gpt.py Normal file
View File

@ -0,0 +1,5 @@
class GPT:
def __init__(self):
self.trigger_phrase = "gpt"

View File

@ -0,0 +1,12 @@
"""
Using notification logic from timers.py to notify at specified time
have web app to access list of reminders? (notion api?)
look into location based reminders and see if thats possible (maybe ntfy.sh supports this?)
"""
class Reminders: # ntfy.sh notification?
def __init__(self):
self.trigger_phrase = "reminders"

116
backend/skills/timers.py Normal file
View File

@ -0,0 +1,116 @@
import requests
if __name__ == "__main__": # Handle running this script directly vs as a project
from config import ntfy_url
from utility import parsetime2
else:
from skills.config import ntfy_url
from skills.utility import parsetime2
import threading
import schedule
import time
# def job_that_executes_once():
# Do some work that only needs to happen once...
# return schedule.CancelJob
def run_continuously(schedule, interval=1):
# Borrowed from schedule documentation, why reinvent the wheel when its been created.
"""Continuously run, while executing pending jobs at each
elapsed time interval.
@return cease_continuous_run: threading. Event which can
be set to cease continuous run. Please note that it is
*intended behavior that run_continuously() does not run
missed jobs*. For example, if you've registered a job that
should run every minute and you set a continuous run
interval of one hour then your job won't be run 60 times
at each interval but only once.
"""
cease_continuous_run = threading.Event()
class ScheduleThread(threading.Thread):
@classmethod
def run(cls):
while not cease_continuous_run.is_set():
schedule.run_pending()
time.sleep(interval)
continuous_thread = ScheduleThread()
continuous_thread.start()
return cease_continuous_run
# while True:
# schedule.run_pending()
# time.sleep(1)
class Timers:
def __init__(self):
self.trigger_phrase = "timer"
self.timers = {}
self.schedule = schedule.Scheduler()
def _notify(self, device_id, timer_name):
r = requests.post(f"https://ntfy.sh/{device_id}",
data=f"{timer_name}",
headers={
"Title": "Your timer is going off!",
"Priority": "default",
"Tags": "bell"
})
return r
def _add_timer(self, duration, name):
if len(self.timers) == 0:
self.stop_run_continuously = run_continuously(self.schedule)
self.timers[name] = time.mktime(duration.timetuple())
self.schedule.every().day.at(time.strftime("%H:%M:%S", duration.timetuple())).do(self._trigger_timer, name).tag(name)
# use https://schedule.readthedocs.io/en/stable/examples.html#run-a-job-once to trigger self._trigger_timer()
def _remove_timer(self, name):
del self.timers[name]
if len(self.timers) == 0:
self.stop_run_continuously.set()
def _trigger_timer(self, name):
if name in self.timers:
res = self._notify(ntfy_url, name).text
print(res)
self._remove_timer(name)
# TODO: play timer done sound, send response on api saying to listen to ntfy.sh for signal to trigger sound.
# Better option for two way communication: WEBSOCKETS!
return schedule.CancelJob
def get_remaining_time(self, name=""): #TODO: test this function
"""Returns time remaining for timer as seconds remaining"""
return self.timers[name]-time.mktime(datetime.now().timetuple())
# if name == "":
def run(self, query="", duration_string="", name=""):
if "add" in query:
# duration = time.mktime(parsetime2(duration_string).timetuple())
duration = parsetime2(duration_string)
self._add_timer(duration, name)
return True # Return true to indicate success
if "remove" in query:
self._remove_timer(name)
return True
return False # Return false to indicate failure
def _disable_timer_check_thread(self):
self.stop_run_continuously.set()
if __name__ == "__main__":
dur = Timers()
dur.run("add", "15 seconds", "test timer")
# dur._add_timer(123, "123")
# dur._trigger_timer("123")

5
backend/skills/todos.py Normal file
View File

@ -0,0 +1,5 @@
class Todos: # Notion api?
def __init__(self):
self.trigger_phrase = "todos"

View File

@ -0,0 +1,70 @@
import requests
from skills.config import deepl_api_key
from skills.config import google_api_key
"""
Reading material for this:
https://www.deepl.com/en/docs-api
https://cloud.google.com/translate/docs/overview
"""
class Translations:
def __init__(self):
self.trigger_phrase = "translate"
# These vars would be good canidates for a future monitoring dashboard
self.total_chars_translated = 0
self.deepl_chars_translated = 0
self.googl_chars_translated = 0 #only increment if api charged for the characters (read docs to figure this out)
self.free_monthly_char_limit_googl = 500000
self.free_monthly_char_limit_deepl = 500000
self.supported_deepl_langs = ["list of supported languages"]
def _get_language_code(self, query):
return "FR" #TODO: Convert parse language from query and convert possibly into lang code depending on api requirements
# Return None if language not recognized
def _clean_up_text(self, query):
return "cleaned up text" #TODO: remove characters that are not needed to translate text (apis charge per char sent, not per char translated)
def translate_deepl(self, text, language):
self.deepl_chars_translated += len(text)
headers = {
'Authorization': 'DeepL-Auth-Key ' + deepl_api_key,
'Content-Type': 'application/json',
}
json_data = {
'text': [text],
'target_lang': language,
}
response = requests.post('https://api-free.deepl.com/v2/translate', headers=headers, json=json_data)
return response
def translate_google(self, text, language):
self.google_chars_translated += len(text)
return "" #TODO: add in google translate api, probably using python client library for google api
def translate(self, text):
parsed_text = self._clean_up_text(text) #TODO: Parse text and extract language and query from it,
target_language = self._get_language_code(text)
if target_language in self.supported_deepl_langs:
return self.translate_deepl(parsed_text, target_language)
if target_language is not None:
return self.translate_google(text, target_language)
return self.translate_google(text, "auto")

51
backend/skills/utility.py Normal file
View File

@ -0,0 +1,51 @@
from ctparse import ctparse #Used for parsing time (parsetime), https://github.com/comtravo/ctparse
import parsedatetime #Used for parsing time (parsetime2), https://github.com/bear/parsedatetime
from datetime import datetime
import time
def parsetime(phrase):
"""
Takes in natrual language time phrase, outputs datetime object
"""
ts = datetime.now()
p = ctparse(phrase, ts=ts)
if p is not None:
return p.resolution.dt
return p
# return ctparse(phrase, ts=ts)
def parsetime2(phrase):
"""
Takes in natrual language time phrase, outputs datetime object
Handles seconds better
Doesnt handle 'in the afternoon'
Does handle 'this afternoon'
"""
time_struct, parse_status = parsedatetime.Calendar().parse(phrase)
return datetime(*time_struct[:6])
if __name__ == "__main__":
t = parsetime('May 5th 2:30 in the afternoon')
print(t)
# t2 = parsetime('15 seconds')
# # print(t2)
# # print(t2)
# if t2 is not None:
# print(t2.resolution)
t2 = parsetime2('now')
print(time.mktime(t2.timetuple()))
t3 = parsetime2('in 5 minutes 30 seconds')
print(time.mktime(t3.timetuple()))
print(time.strftime("%H:%M:%S", t3.timetuple()))
# for x in t:
# print(x)

View File

@ -0,0 +1,5 @@
class Weather: #open weather map api
def __init__(self):
self.trigger_phrase = "weather"

View File

@ -0,0 +1,5 @@
class Wolfram: #wolfram alpha api
def __init__(self):
self.trigger_phrase = "wolfram"

1
frontend/auth.py Normal file
View File

@ -0,0 +1 @@
# TODO: Handle all authentication stuff for verifying server is who we think it is in here.

View File

@ -22,3 +22,9 @@ stop_listening = r.listen_in_background(m, callback)
print("Listening...")
while True:
time.sleep(0.1)
# TODO: Make api request to backend with resulting text in callback function.
# Use to connect to backend with bi-directional communication, allows for alarms/timers/etc to work:
# https://pypi.org/project/websocket-client/

1
frontend/tts.py Normal file
View File

@ -0,0 +1 @@
# make requests to https://github.com/synesthesiam/opentts and play resulting wav file

View File

@ -1,2 +0,0 @@
# Copy & Rename this file to config.py and fill in data
ntfy_url="" # Obtained from ntfy.sh app (choose a random string of numbers/letters for better security)

View File

View File

View File

View File

@ -1,47 +0,0 @@
from config import ntfy_url
import requests
class Timers:
def __init__(self):
self.trigger_phrase = "timer"
self.timers = {}
def _add_timer(self, time, name):
self.timers[name] = time
# use https://schedule.readthedocs.io/en/stable/examples.html#run-a-job-once to trigger self._trigger_timer()
def _remove_timer(self, name):
del self.timers[name]
def _trigger_timer(self, name):
if name in self.timers:
r = requests.post(f"https://ntfy.sh/{ntfy_url}",
data=f"{name}",
headers={
"Title": "Your timer is going off!",
"Priority": "default",
"Tags": "bell"
})
print(r.text)
#TODO: send ntfy.sh to device
#TODO: play timer done sound
def run(self, query):
if "add" in query:
time = 0 #TODO: Natural Language parse time out of phrase
self._add_timer(time)
return True # Return true to indicate success
if "remove" in query:
time = 0 #TODO: Natural Language parse time out of phrase
self._remove_timer(time)
return True
return False # Return false to indicate failure
if __name__ == "__main__":
time = Timers()
time._add_timer(123, "123")
time._trigger_timer("123")

View File

View File

View File

View File

@ -1 +0,0 @@
/Users/sam/.cache/huggingface/hub/models--speechbrain--tts-tacotron2-ljspeech/snapshots/f0c9855a337493070f576ef94dacd0ed407e04f2/hyperparams.yaml

View File

@ -1 +0,0 @@
/Users/sam/.cache/huggingface/hub/models--speechbrain--tts-tacotron2-ljspeech/snapshots/f0c9855a337493070f576ef94dacd0ed407e04f2/model.ckpt

View File

@ -1 +0,0 @@
/Users/sam/.cache/huggingface/hub/models--speechbrain--tts-hifigan-ljspeech/snapshots/e0cc1f9be5b65d4612013f26867ca600e98bc1b6/generator.ckpt

View File

@ -1 +0,0 @@
/Users/sam/.cache/huggingface/hub/models--speechbrain--tts-hifigan-ljspeech/snapshots/e0cc1f9be5b65d4612013f26867ca600e98bc1b6/hyperparams.yaml