You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
21 lines
863 B
Python
21 lines
863 B
Python
import torchaudio
|
|
from speechbrain.pretrained import Tacotron2
|
|
from speechbrain.pretrained import HIFIGAN
|
|
|
|
import sounddevice as sd
|
|
|
|
# Intialize TTS (tacotron2) and Vocoder (HiFIGAN)
|
|
tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
|
|
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
|
|
|
|
# Running the TTS
|
|
mel_output, mel_length, alignment = tacotron2.encode_text("This is an open-source toolkit for the development of speech technologies.")
|
|
|
|
# Running Vocoder (spectrogram-to-waveform)
|
|
waveforms = hifi_gan.decode_batch(mel_output)
|
|
print(waveforms)
|
|
#Audio(waveforms.detach().cpu().squeeze(), rate=22050)
|
|
# sd.play(waveforms, 22050)
|
|
torchaudio.io.play_audio(waveform=waveforms, sample_rate=22050)
|
|
# torchaudio.Audio(waveforms.detach().cpu().squeeze(), rate=22050)
|