language-learning-app/api/app/domain/services/summarise_service.py
2026-06-02 21:02:50 +01:00

107 lines
4 KiB
Python

import logging
import re
import uuid
from opentelemetry.trace import get_tracer
from sqlalchemy.ext.asyncio import AsyncSession
from app.outbound.postgres.repositories.article_repository import ArticleRepository
from ...languages import SUPPORTED_LANGUAGES
from ...outbound.anthropic.anthropic_client import AnthropicClient
from ...outbound.deepgram.deepgram_client import LocalDeepgramClient
from ...outbound.deepl.deepl_client import DeepLClient
from ...outbound.gemini.gemini_client import GeminiClient
from ...outbound.spacy.spacy_client import SpacyClient
from ...outbound.storage_client import get_storage_client
logger = logging.getLogger(__name__)
tracer = get_tracer(__name__)
class SummariseService:
def __init__(
self,
anthropic_client: AnthropicClient,
deepgram_client: LocalDeepgramClient,
deepl_client: DeepLClient,
gemini_client: GeminiClient,
spacy_client: SpacyClient,
article_repository: ArticleRepository,
) -> None:
self.anthropic_client = anthropic_client
self.deepgram_client = deepgram_client
self.deepl_client = deepl_client
self.gemini_client = gemini_client
self.spacy_client = spacy_client
self.article_repository = article_repository
def _split_title_and_body(self, text: str) -> tuple[str, str]:
lines = text.splitlines()
if not lines:
return "", ""
title = lines[0].lstrip("#").strip()
body = "\n".join(lines[1:]).strip()
return title, body
async def summarise_article(
self,
article_id: uuid.UUID,
target_language: str,
complexity_level: str,
input_text: str,
) -> None:
print(f"Summarising article {article_id} with target language {target_language} and complexity level {complexity_level}...")
with tracer.start_as_current_span("summarise_article"):
try:
with tracer.start_as_current_span("generate_title_and_text"):
language_name = SUPPORTED_LANGUAGES[target_language]
generated_text = await AnthropicClient.retry(
self.anthropic_client.create_summary_article,
content_to_summarise=input_text,
complexity_level=complexity_level,
to_language=language_name,
length_preference="200-400 words",
)
if generated_text is None:
print(f"Text generated to summarise article {article_id}...")
raise
generated_title, generated_text_without_title = (
self._split_title_and_body(generated_text)
)
await self.article_repository.update_title_and_text(
article_id, generated_title, generated_text_without_title
)
with tracer.start_as_current_span("generate_linguistic_data"):
text_linguistic_data = self.spacy_client.get_parts_of_speech(
generated_text_without_title, target_language
)
await self.article_repository.update_linguistic_data(
article_id, text_linguistic_data
)
with tracer.start_as_current_span("generate_voice"):
voice = self.gemini_client.get_voice_by_language(target_language)
wav_bytes = await self.gemini_client.generate_audio(
generated_text, voice
)
audio_key = f"audio/{article_id}.wav"
get_storage_client().upload(audio_key, wav_bytes)
await self.article_repository.update_audio_key(
article_id, audio_key
)
except Exception as exc:
print(f"Failed to summarise an article: {exc}")
raise exc