2023-06-14 15:57:12 +00:00
|
|
|
# Natural Language Processing using something like https://spacy.io
|
|
|
|
|
2023-06-21 14:59:27 +00:00
|
|
|
# THIS WORKS REALLY WELL FOR WHAT WE NEED: https://huggingface.co/facebook/bart-large-mnli
|
|
|
|
# Zero Shot Classification - Natrual Language Inference
|
|
|
|
# basically this means we can list all the different skill names and the model will give us a
|
|
|
|
# percentage probability that we are talking about each of them. We should be able to take the top
|
|
|
|
# value and pass valid information from the query into the skill class after extracting information
|
|
|
|
# using another model for tokenization. This took me an entire week of research to figure out -_-
|
|
|
|
|
2023-06-14 15:57:12 +00:00
|
|
|
# this will allow us to figure out what the query means
|
|
|
|
# i.e we might not have to add the word "wolfram" into a query to send it to wolfram...
|
2023-06-21 14:59:27 +00:00
|
|
|
|
|
|
|
# import spacy
|
|
|
|
|
|
|
|
|
|
|
|
# nlp = spacy.load("en_core_web_sm")
|
|
|
|
|
|
|
|
# doc = nlp("What is the weather in toronto")
|
|
|
|
|
|
|
|
# for token in doc.ents:
|
|
|
|
# print(token)
|
|
|
|
# # if token.like_num:
|
|
|
|
# # print(tokenx)
|
|
|
|
|
|
|
|
|
|
|
|
# from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
|
|
|
|
|
|
# tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-mnli")
|
|
|
|
|
|
|
|
# model = AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli")
|
|
|
|
|
|
|
|
from transformers import pipeline
|
|
|
|
import spacy
|
|
|
|
|
2023-09-04 02:42:54 +00:00
|
|
|
from datetime import datetime
|
|
|
|
import time
|
|
|
|
|
2023-06-21 14:59:27 +00:00
|
|
|
# text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
|
|
|
|
|
|
|
|
# doc = nlp(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class NLP:
|
|
|
|
def __init__(self, candidate_labels=[]):
|
|
|
|
self.candidate_labels = candidate_labels
|
|
|
|
|
|
|
|
self.classifier = pipeline("zero-shot-classification",
|
|
|
|
model="facebook/bart-large-mnli")
|
|
|
|
|
|
|
|
self.tokenclass = spacy.load("en_core_web_sm")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_skill(self, sentence):
|
|
|
|
return self.classifier(sentence, self.candidate_labels)
|
|
|
|
|
|
|
|
def get_named_entities(self, sentence):
|
|
|
|
return [[ent.text, ent.start_char, ent.end_char, ent.label_] for ent in self.tokenclass(sentence).ents]
|
|
|
|
|
|
|
|
# for ent in doc.ents:
|
|
|
|
# print(ent.text, ent.start_char, ent.end_char, ent.label_)
|
|
|
|
|
|
|
|
# return self.tokenclass(sentence)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2023-09-04 02:42:54 +00:00
|
|
|
starttime = time.time()
|
|
|
|
nlp = NLP(['weather', 'timer', 'physics', 'mathematics'])
|
|
|
|
print(f"Init: {time.time()-starttime}")
|
|
|
|
starttime = time.time()
|
2023-06-21 14:59:27 +00:00
|
|
|
print('==')
|
|
|
|
print(nlp.get_skill("one day I will see the world"))
|
2023-09-04 02:42:54 +00:00
|
|
|
print(f"Took: {time.time()-starttime}")
|
|
|
|
starttime = time.time()
|
2023-06-21 14:59:27 +00:00
|
|
|
print("yay!")
|
|
|
|
print(nlp.get_skill("What is the weather today?"))
|
2023-09-04 02:42:54 +00:00
|
|
|
print(f"Took: {time.time()-starttime}")
|
|
|
|
starttime = time.time()
|
2023-06-21 14:59:27 +00:00
|
|
|
print('==')
|
|
|
|
|
2023-09-04 02:42:54 +00:00
|
|
|
|
|
|
|
print(nlp.get_skill("What is air resistance of a spaceship with a mass of 1000kg"))
|
|
|
|
print(f"Took: {time.time()-starttime}")
|
|
|
|
starttime = time.time()
|
|
|
|
|
|
|
|
print(nlp.get_skill("What is five plus five"))
|
|
|
|
print(f"Took: {time.time()-starttime}")
|
|
|
|
starttime = time.time()
|
|
|
|
|
2023-06-21 14:59:27 +00:00
|
|
|
print('====')
|
|
|
|
print(nlp.get_named_entities("one day I will see the world"))
|
2023-09-04 02:42:54 +00:00
|
|
|
print(f"Took: {time.time()-starttime}")
|
|
|
|
starttime = time.time()
|
2023-06-21 14:59:27 +00:00
|
|
|
print("yay!")
|
|
|
|
print(nlp.get_named_entities("What is the weather today in london?"))
|
2023-09-04 02:42:54 +00:00
|
|
|
print(f"Took: {time.time()-starttime}")
|
|
|
|
starttime = time.time()
|
2023-06-21 14:59:27 +00:00
|
|
|
print('====')
|
2023-09-04 02:42:54 +00:00
|
|
|
|
|
|
|
print('======')
|
|
|
|
print(nlp.get_named_entities("set a timer for 1 minute and 15 seconds"))
|
|
|
|
print(f"Took: {time.time()-starttime}")
|
|
|
|
starttime = time.time()
|
|
|
|
print(nlp.get_named_entities("remind me at May 5th at 2:30 in the afternoon to wash the dog"))
|
|
|
|
print(f"Took: {time.time()-starttime}")
|
|
|
|
starttime = time.time()
|
|
|
|
print(nlp.get_skill("remind me at May 5th at 2:30 in the afternoon to wash the dog"))
|
|
|
|
print(f"Took: {time.time()-starttime}")
|
|
|
|
starttime = time.time()
|
|
|
|
print('======')
|
2023-06-21 14:59:27 +00:00
|
|
|
# sequence_to_classify = "one day I will see the world"
|
|
|
|
# candidate_labels = ['travel', 'cooking', 'dancing']
|
|
|
|
# print(classifier(sequence_to_classify, candidate_labels))
|
|
|
|
|
|
|
|
|
|
|
|
# import spacy
|
|
|
|
# from spacy.matcher import Matcher
|
|
|
|
|
|
|
|
# nlp = spacy.load("en_core_web_sm")
|
|
|
|
# matcher = Matcher(nlp.vocab)
|
|
|
|
# # Add match ID "HelloWorld" with no callback and one pattern
|
|
|
|
# pattern = [{"LOWER": "hello"}, {"IS_PUNCT": True}, {"LOWER": "world"}]
|
|
|
|
# matcher.add("HelloWorld", [pattern])
|
|
|
|
|
|
|
|
# doc = nlp("Hello, world! Hello world!")
|
|
|
|
# matches = matcher(doc)
|
|
|
|
# for match_id, start, end in matches:
|
|
|
|
# string_id = nlp.vocab.strings[match_id] # Get string representation
|
|
|
|
# span = doc[start:end] # The matched span
|
|
|
|
# print(match_id, string_id, start, end, span.text)
|