From 4fefe793049fd6df44ba82a8ee946ab1310ace04 Mon Sep 17 00:00:00 2001 From: samerbam Date: Wed, 21 Jun 2023 10:59:27 -0400 Subject: [PATCH] added starts of NLP processing using NLI model --- backend/NLP.py | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/backend/NLP.py b/backend/NLP.py index 8609335..99b72b1 100644 --- a/backend/NLP.py +++ b/backend/NLP.py @@ -1,4 +1,98 @@ # Natural Language Processing using something like https://spacy.io +# THIS WORKS REALLY WELL FOR WHAT WE NEED: https://huggingface.co/facebook/bart-large-mnli +# Zero Shot Classification - Natrual Language Inference +# basically this means we can list all the different skill names and the model will give us a +# percentage probability that we are talking about each of them. We should be able to take the top +# value and pass valid information from the query into the skill class after extracting information +# using another model for tokenization. This took me an entire week of research to figure out -_- + # this will allow us to figure out what the query means # i.e we might not have to add the word "wolfram" into a query to send it to wolfram... + +# import spacy + + +# nlp = spacy.load("en_core_web_sm") + +# doc = nlp("What is the weather in toronto") + +# for token in doc.ents: +# print(token) +# # if token.like_num: +# # print(tokenx) + + +# from transformers import AutoTokenizer, AutoModelForSequenceClassification + +# tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-mnli") + +# model = AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli") + +from transformers import pipeline +import spacy + +# text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously." + +# doc = nlp(text) + + + + +class NLP: + def __init__(self, candidate_labels=[]): + self.candidate_labels = candidate_labels + + self.classifier = pipeline("zero-shot-classification", + model="facebook/bart-large-mnli") + + self.tokenclass = spacy.load("en_core_web_sm") + + + + def get_skill(self, sentence): + return self.classifier(sentence, self.candidate_labels) + + def get_named_entities(self, sentence): + return [[ent.text, ent.start_char, ent.end_char, ent.label_] for ent in self.tokenclass(sentence).ents] + + # for ent in doc.ents: + # print(ent.text, ent.start_char, ent.end_char, ent.label_) + + # return self.tokenclass(sentence) + + + +if __name__ == "__main__": + nlp = NLP(['travel', 'cooking', 'dancing', 'weather']) + print('==') + print(nlp.get_skill("one day I will see the world")) + print("yay!") + print(nlp.get_skill("What is the weather today?")) + print('==') + + print('====') + print(nlp.get_named_entities("one day I will see the world")) + print("yay!") + print(nlp.get_named_entities("What is the weather today in london?")) + print('====') +# sequence_to_classify = "one day I will see the world" +# candidate_labels = ['travel', 'cooking', 'dancing'] +# print(classifier(sequence_to_classify, candidate_labels)) + + +# import spacy +# from spacy.matcher import Matcher + +# nlp = spacy.load("en_core_web_sm") +# matcher = Matcher(nlp.vocab) +# # Add match ID "HelloWorld" with no callback and one pattern +# pattern = [{"LOWER": "hello"}, {"IS_PUNCT": True}, {"LOWER": "world"}] +# matcher.add("HelloWorld", [pattern]) + +# doc = nlp("Hello, world! Hello world!") +# matches = matcher(doc) +# for match_id, start, end in matches: +# string_id = nlp.vocab.strings[match_id] # Get string representation +# span = doc[start:end] # The matched span +# print(match_id, string_id, start, end, span.text) \ No newline at end of file