import logging import re import uuid from opentelemetry.trace import get_tracer from sqlalchemy.ext.asyncio import AsyncSession from app.outbound.postgres.repositories.article_repository import ArticleRepository from ...languages import SUPPORTED_LANGUAGES from ...outbound.anthropic.anthropic_client import AnthropicClient from ...outbound.deepgram.deepgram_client import LocalDeepgramClient from ...outbound.deepl.deepl_client import DeepLClient from ...outbound.gemini.gemini_client import GeminiClient from ...outbound.spacy.spacy_client import SpacyClient from ...outbound.storage_client import get_storage_client logger = logging.getLogger(__name__) tracer = get_tracer(__name__) class SummariseService: def __init__( self, anthropic_client: AnthropicClient, deepgram_client: LocalDeepgramClient, deepl_client: DeepLClient, gemini_client: GeminiClient, spacy_client: SpacyClient, article_repository: ArticleRepository, ) -> None: self.anthropic_client = anthropic_client self.deepgram_client = deepgram_client self.deepl_client = deepl_client self.gemini_client = gemini_client self.spacy_client = spacy_client self.article_repository = article_repository def _split_title_and_body(self, text: str) -> tuple[str, str]: lines = text.splitlines() if not lines: return "", "" title = lines[0].lstrip("#").strip() body = "\n".join(lines[1:]).strip() return title, body async def summarise_article( self, article_id: uuid.UUID, target_language: str, complexity_level: str, input_text: str, ) -> None: print(f"Summarising article {article_id} with target language {target_language} and complexity level {complexity_level}...") with tracer.start_as_current_span("summarise_article"): try: with tracer.start_as_current_span("generate_title_and_text"): language_name = SUPPORTED_LANGUAGES[target_language] generated_text = await AnthropicClient.retry( self.anthropic_client.create_summary_article, content_to_summarise=input_text, complexity_level=complexity_level, to_language=language_name, length_preference="200-400 words", ) if generated_text is None: print(f"Text generated to summarise article {article_id}...") raise generated_title, generated_text_without_title = ( self._split_title_and_body(generated_text) ) await self.article_repository.update_title_and_text( article_id, generated_title, generated_text_without_title ) with tracer.start_as_current_span("generate_linguistic_data"): text_linguistic_data = self.spacy_client.get_parts_of_speech( generated_text_without_title, target_language ) await self.article_repository.update_linguistic_data( article_id, text_linguistic_data ) with tracer.start_as_current_span("generate_voice"): voice = self.gemini_client.get_voice_by_language(target_language) wav_bytes = await self.gemini_client.generate_audio( generated_text, voice ) audio_key = f"audio/{article_id}.wav" get_storage_client().upload(audio_key, wav_bytes) await self.article_repository.update_audio_key( article_id, audio_key ) except Exception as exc: print(f"Failed to summarise an article: {exc}") raise exc