From 15d1a0b58babd1812e8c2bdd3b32e3cd1423d461 Mon Sep 17 00:00:00 2001 From: samerbam Date: Mon, 12 Jun 2023 12:06:24 -0400 Subject: [PATCH] Upload --- ai.py | 16 ++++++++++++++ main.py | 24 ++++++++++++++++++++ setup.sh | 20 +++++++++++++++++ talk.py | 17 ++++++++++++++ test.html | 14 ++++++++++++ tmpdir_tts/hyperparams.yaml | 1 + tmpdir_tts/model.ckpt | 1 + tmpdir_vocoder/generator.ckpt | 1 + tmpdir_vocoder/hyperparams.yaml | 1 + tts.py | 20 +++++++++++++++++ untitled.py | 28 +++++++++++++++++++++++ voice.py | 39 +++++++++++++++++++++++++++++++++ 12 files changed, 182 insertions(+) create mode 100644 ai.py create mode 100644 main.py create mode 100755 setup.sh create mode 100644 talk.py create mode 100644 test.html create mode 120000 tmpdir_tts/hyperparams.yaml create mode 120000 tmpdir_tts/model.ckpt create mode 120000 tmpdir_vocoder/generator.ckpt create mode 120000 tmpdir_vocoder/hyperparams.yaml create mode 100644 tts.py create mode 100644 untitled.py create mode 100644 voice.py diff --git a/ai.py b/ai.py new file mode 100644 index 0000000..c1635ca --- /dev/null +++ b/ai.py @@ -0,0 +1,16 @@ +from revChatGPT.V1 import Chatbot + +chatbot = Chatbot(config={ + "email": "pushes_mouthy_0x@icloud.com", + "password": "9hCyLiGWUdr9MX2mepTk" +}) + +prompt = "how many beaches does portugal have?" +response = "" + +for data in chatbot.ask( + prompt +): + response = data["message"] + +print(response) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..2b4281b --- /dev/null +++ b/main.py @@ -0,0 +1,24 @@ +import time +import speech_recognition as sr + +def callback(recognizer, audio): + try: + print("Processing...") + text = r.recognize_whisper(audio, language="english") + print(f"Apollo (Whisper Model) thinks you said: {text}") + except sr.UnknownValueError: + print("Could not understand audio.") + except sr.RequestError as e: + print("Could not request result from Whisper") + +r = sr.Recognizer() +m = sr.Microphone() + +with m as source: + r.adjust_for_ambient_noise(source) + +stop_listening = r.listen_in_background(m, callback) + +print("Listening...") +while True: + time.sleep(0.1) diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..982a928 --- /dev/null +++ b/setup.sh @@ -0,0 +1,20 @@ + +#Switch to virtual python enviroment +source .venv/bin/activate + +#install homebrew (https://brew.sh) +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" + +#Install brew dependencies +brew install portaudio +brew install ffmpeg + +# Install python dependencies +pip install -U pyaudio # Microphone +pip install numpy +pip install pillow +pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu +pip install -U openai-whisper # Speech to text model (needed by speechrecognition) +pip install -U SpeechRecognition # Speech to text logic +pip install -U revChatGPT # Interacts with OpenAI ChatGPT +pip install speechbrain \ No newline at end of file diff --git a/talk.py b/talk.py new file mode 100644 index 0000000..6646926 --- /dev/null +++ b/talk.py @@ -0,0 +1,17 @@ +import pyttsx3 + +engine = pyttsx3.init() +voices = engine.getProperty('voices') + +# engine.setProperty('voice', voices[3].id) +# engine.say("I will speak this text") + +# engine.runAndWait() +# engine = pyttsx3.init() +# voices = engine.getProperty('voices') +# print(voices) +for voice in voices: + engine.setProperty('voice', voice.id) + print(voice) + engine.say('The quick brown fox jumped over the lazy dog.') +engine.runAndWait() \ No newline at end of file diff --git a/test.html b/test.html new file mode 100644 index 0000000..3103bd1 --- /dev/null +++ b/test.html @@ -0,0 +1,14 @@ + + + + + + test + + + + +
+ + + \ No newline at end of file diff --git a/tmpdir_tts/hyperparams.yaml b/tmpdir_tts/hyperparams.yaml new file mode 120000 index 0000000..e6e4f35 --- /dev/null +++ b/tmpdir_tts/hyperparams.yaml @@ -0,0 +1 @@ +/Users/sam/.cache/huggingface/hub/models--speechbrain--tts-tacotron2-ljspeech/snapshots/f0c9855a337493070f576ef94dacd0ed407e04f2/hyperparams.yaml \ No newline at end of file diff --git a/tmpdir_tts/model.ckpt b/tmpdir_tts/model.ckpt new file mode 120000 index 0000000..97b3610 --- /dev/null +++ b/tmpdir_tts/model.ckpt @@ -0,0 +1 @@ +/Users/sam/.cache/huggingface/hub/models--speechbrain--tts-tacotron2-ljspeech/snapshots/f0c9855a337493070f576ef94dacd0ed407e04f2/model.ckpt \ No newline at end of file diff --git a/tmpdir_vocoder/generator.ckpt b/tmpdir_vocoder/generator.ckpt new file mode 120000 index 0000000..84def2c --- /dev/null +++ b/tmpdir_vocoder/generator.ckpt @@ -0,0 +1 @@ +/Users/sam/.cache/huggingface/hub/models--speechbrain--tts-hifigan-ljspeech/snapshots/e0cc1f9be5b65d4612013f26867ca600e98bc1b6/generator.ckpt \ No newline at end of file diff --git a/tmpdir_vocoder/hyperparams.yaml b/tmpdir_vocoder/hyperparams.yaml new file mode 120000 index 0000000..c048ff9 --- /dev/null +++ b/tmpdir_vocoder/hyperparams.yaml @@ -0,0 +1 @@ +/Users/sam/.cache/huggingface/hub/models--speechbrain--tts-hifigan-ljspeech/snapshots/e0cc1f9be5b65d4612013f26867ca600e98bc1b6/hyperparams.yaml \ No newline at end of file diff --git a/tts.py b/tts.py new file mode 100644 index 0000000..e341bab --- /dev/null +++ b/tts.py @@ -0,0 +1,20 @@ +import torchaudio +from speechbrain.pretrained import Tacotron2 +from speechbrain.pretrained import HIFIGAN + +import sounddevice as sd + +# Intialize TTS (tacotron2) and Vocoder (HiFIGAN) +tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts") +hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder") + +# Running the TTS +mel_output, mel_length, alignment = tacotron2.encode_text("This is an open-source toolkit for the development of speech technologies.") + +# Running Vocoder (spectrogram-to-waveform) +waveforms = hifi_gan.decode_batch(mel_output) +print(waveforms) +#Audio(waveforms.detach().cpu().squeeze(), rate=22050) +# sd.play(waveforms, 22050) +torchaudio.io.play_audio(waveform=waveforms, sample_rate=22050) +# torchaudio.Audio(waveforms.detach().cpu().squeeze(), rate=22050) diff --git a/untitled.py b/untitled.py new file mode 100644 index 0000000..c16a633 --- /dev/null +++ b/untitled.py @@ -0,0 +1,28 @@ + + print("Processing...") + text = r.recognize_whisper(audio, language="english") + print(f"Whisper thinks you said {text}") + + #TODO: Check for Apollo + +def callback(recognizer, audio): + try: + pass + except sr.UnknownValueError: + print("Whisper could not understand audio") + except sr.RequestError as e: + print("Could not request results from Whisper") + + + +r = sr.Recognizer() +m = sr.Microphone() +with m as source: + r.adjust_for_ambient_noise(source) # we only need to calibrate once, before we start listening + +stop_listening = r.listen_in_background(m, callback) + +print("Listening...") +while True: + time.sleep(0.1) # we're not listening anymore, + # print('1') \ No newline at end of file diff --git a/voice.py b/voice.py new file mode 100644 index 0000000..615a599 --- /dev/null +++ b/voice.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +import time +import speech_recognition as sr + +# this is called from the background thread +def callback(recognizer, audio): + # received audio data, now we'll recognize it using Google Speech Recognition + try: + print("Processing...") + text = r.recognize_whisper(audio, language="english") + print(f"Whisper thinks you said {text}") + + + + + except sr.UnknownValueError: + print("Whisper could not understand audio") + except sr.RequestError as e: + print("Could not request results from Whisper") + +r = sr.Recognizer() +m = sr.Microphone() +with m as source: + r.adjust_for_ambient_noise(source) # we only need to calibrate once, before we start listening + +# start listening in the background (note that we don't have to do this inside a `with` statement) +stop_listening = r.listen_in_background(m, callback) +# `stop_listening` is now a function that, when called, stops background listening +# do some unrelated computations for 5 seconds +# for _ in range(50): + # time.sleep(0.1) # we're still listening even though the main thread is doing other things + # print('0') +# calling this function requests that the background listener stop listening +# stop_listening(wait_for_stop=False) +# do some more unrelated things +print("Listening...") +while True: + time.sleep(0.1) # we're not listening anymore, + # print('1') \ No newline at end of file