import torchaudio from speechbrain.pretrained import Tacotron2 from speechbrain.pretrained import HIFIGAN import sounddevice as sd # Intialize TTS (tacotron2) and Vocoder (HiFIGAN) tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts") hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder") # Running the TTS mel_output, mel_length, alignment = tacotron2.encode_text("This is an open-source toolkit for the development of speech technologies.") # Running Vocoder (spectrogram-to-waveform) waveforms = hifi_gan.decode_batch(mel_output) print(waveforms) #Audio(waveforms.detach().cpu().squeeze(), rate=22050) # sd.play(waveforms, 22050) torchaudio.io.play_audio(waveform=waveforms, sample_rate=22050) # torchaudio.Audio(waveforms.detach().cpu().squeeze(), rate=22050)