language-learning-app/api/app/services/tts.py

39 lines
1.2 KiB
Python

import asyncio
from google import genai
from google.genai import types as genai_types
from ..config import settings
from ..storage import pcm_to_wav
VOICE_BY_LANGUAGE: dict[str, str] = {
"fr": "Kore",
"es": "Charon",
"it": "Aoede",
"de": "Fenrir",
"en": "Kore",
}
async def generate_audio(text: str, voice: str) -> bytes:
"""Generate TTS audio and return WAV bytes."""
def _call() -> bytes:
client = genai.Client(api_key=settings.gemini_api_key)
response = client.models.generate_content(
model="gemini-2.5-flash-preview-tts",
contents=text,
config=genai_types.GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=genai_types.SpeechConfig(
voice_config=genai_types.VoiceConfig(
prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(
voice_name=voice,
)
)
),
),
)
pcm_data = response.candidates[0].content.parts[0].inline_data.data
return pcm_to_wav(pcm_data)
return await asyncio.to_thread(_call)