40 lines
1.5 KiB
Python
40 lines
1.5 KiB
Python
from pathlib import Path
|
|
|
|
from loguru import logger
|
|
|
|
from EngineABC import EngineABC, ModelDescription, Argument
|
|
from to_wav import ndarray2wav
|
|
from .TeraTTS import TTS
|
|
|
|
|
|
class TeraTTSEngine(EngineABC):
|
|
def discovery(self) -> tuple[ModelDescription, ...]:
|
|
return tuple(
|
|
ModelDescription(
|
|
engine=self.__class__.__name__,
|
|
name=model_name,
|
|
arguments={
|
|
'lenght_scale': Argument(
|
|
type='float',
|
|
description="'length_scale' можно использовать для замедления аудио для лучшего звучания, по умолчанию 1.1")},
|
|
description='Вроде ru'
|
|
)
|
|
for model_name in self.speakers.keys()
|
|
)
|
|
|
|
def __init__(self, save_path: 'Path'):
|
|
super().__init__(save_path)
|
|
self.speakers: dict[str, TTS] = {}
|
|
|
|
for speaker_name in ('natasha-g2p-vits', 'glados2-g2p-vits', 'glados-g2p-vits', 'girl_nice-g2p-vits'):
|
|
logger.debug(f"Loading speaker: {speaker_name}")
|
|
self.speakers[speaker_name] = TTS(f"TeraTTS/{speaker_name}", add_time_to_end=1.0,
|
|
save_path=str(save_path / 'tts'))
|
|
|
|
def synth(self, text: str, model: str, **kwargs) -> bytes:
|
|
tts = self.speakers[model]
|
|
return ndarray2wav(
|
|
tts(text, **kwargs),
|
|
sample_rate=22050
|
|
)
|