diff --git a/api/alembic/versions/20260327_0005_add_translated_articles.py b/api/alembic/versions/20260327_0005_add_translated_articles.py new file mode 100644 index 0000000..961a837 --- /dev/null +++ b/api/alembic/versions/20260327_0005_add_translated_articles.py @@ -0,0 +1,43 @@ +"""add translated_articles table + +Revision ID: 0005 +Revises: 0004 +Create Date: 2026-03-27 + +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +revision: str = "0005" +down_revision: Union[str, None] = "0004" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "translated_articles", + sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True), + sa.Column("published_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("source_language", sa.String(10), nullable=False), + sa.Column("source_title", sa.Text(), nullable=False), + sa.Column("source_body", sa.Text(), nullable=False), + sa.Column("target_language", sa.String(10), nullable=False), + sa.Column("target_complexities", postgresql.ARRAY(sa.String(5)), nullable=False), + sa.Column("target_title", sa.Text(), nullable=False), + sa.Column("target_body", sa.Text(), nullable=False), + sa.Column("audio_url", sa.Text(), nullable=True), + sa.Column("target_body_pos", postgresql.JSONB(), nullable=True), + sa.Column("target_body_transcript", postgresql.JSONB(), nullable=True), + ) + op.create_index("ix_translated_articles_published_at", "translated_articles", ["published_at"]) + op.create_index("ix_translated_articles_target_language", "translated_articles", ["target_language"]) + + +def downgrade() -> None: + op.drop_index("ix_translated_articles_target_language", table_name="translated_articles") + op.drop_index("ix_translated_articles_published_at", table_name="translated_articles") + op.drop_table("translated_articles") diff --git a/api/app/config.py b/api/app/config.py index 8a55b25..886b52d 100644 --- a/api/app/config.py +++ b/api/app/config.py @@ -9,6 +9,7 @@ class Settings(BaseSettings): deepgram_api_key: str gemini_api_key: str admin_user_emails: str = "" # comma-separated list of admin email addresses + api_base_url: str = "http://localhost:8000" storage_endpoint_url: str storage_access_key: str storage_secret_key: str diff --git a/api/app/domain/models/translated_article.py b/api/app/domain/models/translated_article.py index 600aace..2800839 100644 --- a/api/app/domain/models/translated_article.py +++ b/api/app/domain/models/translated_article.py @@ -1,15 +1,18 @@ from dataclasses import dataclass +from datetime import datetime -@dataclass + +@dataclass class TranslatedArticle: - id: str - - source_lang: str - source_title: str - source_text: str - - target_lang: str + id: str + published_at: datetime + source_language: str + source_title: str + source_body: str + target_language: str + target_complexities: list[str] target_title: str - target_text: str - - + target_body: str + audio_url: str | None + target_body_pos: dict | None + target_body_transcript: dict | None diff --git a/api/app/domain/services/article_service.py b/api/app/domain/services/article_service.py index 4c63e71..f525e6e 100644 --- a/api/app/domain/services/article_service.py +++ b/api/app/domain/services/article_service.py @@ -1,30 +1,19 @@ import re +from sqlalchemy.ext.asyncio import AsyncSession from ..models.summarise_job import SummariseJob from ..models.translated_article import TranslatedArticle +from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository def first_heading(md: str) -> str | None: m = re.search(r'^#{1,2}\s+(.+)', md, re.MULTILINE) return m.group(1).strip() if m else None class ArticleService: - def __init__(self, summarise_job_repository): - self.summarise_job_repository = summarise_job_repository + def __init__(self, db: AsyncSession) -> None: + self.translated_articles_repository = TranslatedArticleRepository(db) - async def get_all_articles(self) -> list[TranslatedArticle]: - summarise_jobs = await self.summarise_job_repository.list_all() - return summarise_jobs.map(self.summarise_job_to_translated_article) - - def summarise_job_to_translated_article( - self, - summarise_job: SummariseJob, - ) -> TranslatedArticle: - return TranslatedArticle( - id=summarise_job.id, - source_lang=summarise_job.target_language, # The source language for the article is the target language of the job - source_title=first_heading(summarise_job.translated_text) or "", - source_text=summarise_job.translated_text, - target_lang=summarise_job.source_language, # The target language for the article is the source language of the job - target_title=first_heading(summarise_job.generated_text) or "", - target_text=summarise_job.generated_text, - ) + async def get_all_articles(self, target_language: str) -> list[TranslatedArticle]: + """Fetch all translated articles""" + articles = await self.translated_articles_repository.list_all(target_language) + return articles \ No newline at end of file diff --git a/api/app/outbound/postgres/entities/translated_article_entity.py b/api/app/outbound/postgres/entities/translated_article_entity.py new file mode 100644 index 0000000..9d14eb2 --- /dev/null +++ b/api/app/outbound/postgres/entities/translated_article_entity.py @@ -0,0 +1,30 @@ +import uuid +from datetime import datetime, timezone + +from sqlalchemy import String, Text, DateTime +from sqlalchemy.orm import Mapped, mapped_column +from sqlalchemy.dialects.postgresql import UUID, ARRAY, JSONB + +from ..database import Base + + +class TranslatedArticleEntity(Base): + __tablename__ = "translated_articles" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + published_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + default=lambda: datetime.now(timezone.utc), + ) + source_language: Mapped[str] = mapped_column(String(10), nullable=False) + source_title: Mapped[str] = mapped_column(Text, nullable=False) + source_body: Mapped[str] = mapped_column(Text, nullable=False) + target_language: Mapped[str] = mapped_column(String(10), nullable=False) + target_complexities: Mapped[list[str]] = mapped_column(ARRAY(String(5)), nullable=False) + target_title: Mapped[str] = mapped_column(Text, nullable=False) + target_body: Mapped[str] = mapped_column(Text, nullable=False) + audio_url: Mapped[str | None] = mapped_column(Text, nullable=True) + target_body_pos: Mapped[dict | None] = mapped_column(JSONB, nullable=True) + target_body_transcript: Mapped[dict | None] = mapped_column(JSONB, nullable=True) diff --git a/api/app/outbound/postgres/repositories/translated_article_repository.py b/api/app/outbound/postgres/repositories/translated_article_repository.py new file mode 100644 index 0000000..daf9513 --- /dev/null +++ b/api/app/outbound/postgres/repositories/translated_article_repository.py @@ -0,0 +1,72 @@ +import uuid +from datetime import datetime, timezone + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from ..entities.translated_article_entity import TranslatedArticleEntity +from ....domain.models.translated_article import TranslatedArticle + + +class TranslatedArticleRepository: + def __init__(self, db: AsyncSession): + self.db = db + + def _to_model(self, entity: TranslatedArticleEntity) -> TranslatedArticle: + return TranslatedArticle( + id=str(entity.id), + published_at=entity.published_at, + source_language=entity.source_language, + source_title=entity.source_title, + source_body=entity.source_body, + target_language=entity.target_language, + target_complexities=list(entity.target_complexities), + target_title=entity.target_title, + target_body=entity.target_body, + audio_url=entity.audio_url, + target_body_pos=entity.target_body_pos, + target_body_transcript=entity.target_body_transcript, + ) + + async def create( + self, + source_language: str, + source_title: str, + source_body: str, + target_language: str, + target_complexities: list[str], + target_title: str, + target_body: str, + audio_url: str | None, + target_body_pos: dict | None, + target_body_transcript: dict | None, + ) -> TranslatedArticle: + entity = TranslatedArticleEntity( + published_at=datetime.now(timezone.utc), + source_language=source_language, + source_title=source_title, + source_body=source_body, + target_language=target_language, + target_complexities=target_complexities, + target_title=target_title, + target_body=target_body, + audio_url=audio_url, + target_body_pos=target_body_pos, + target_body_transcript=target_body_transcript, + ) + self.db.add(entity) + await self.db.commit() + await self.db.refresh(entity) + return self._to_model(entity) + + async def list_all(self, target_language: str) -> list[TranslatedArticle]: + result = await self.db.execute( + select(TranslatedArticleEntity) + .where(TranslatedArticleEntity.target_language == target_language) + .order_by(TranslatedArticleEntity.published_at.desc()) + ) + return [self._to_model(e) for e in result.scalars().all()] + + async def get_by_id(self, article_id: uuid.UUID) -> TranslatedArticle | None: + entity = await self.db.get(TranslatedArticleEntity, article_id) + return self._to_model(entity) if entity else None diff --git a/api/app/routers/api/generation.py b/api/app/routers/api/generation.py index 1320122..d721256 100644 --- a/api/app/routers/api/generation.py +++ b/api/app/routers/api/generation.py @@ -14,6 +14,8 @@ from ...auth import require_admin from ...storage import upload_audio from ...outbound.postgres.database import get_db, AsyncSessionLocal from ...outbound.postgres.repositories import summarise_job_repository +from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository +from ...domain.services.article_service import first_heading from ...outbound.anthropic.anthropic_client import AnthropicClient from ...outbound.deepgram.deepgram_client import LocalDeepgramClient from ...outbound.deepl.deepl_client import DeepLClient @@ -117,6 +119,19 @@ async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None await summarise_job_repository.mark_succeeded(db, job, audio_key) + await TranslatedArticleRepository(db).create( + source_language=request.source_language, + source_title=first_heading(translated_text) or "", + source_body=translated_text, + target_language=request.target_language, + target_complexities=[request.complexity_level], + target_title=first_heading(generated_text) or "", + target_body=generated_text, + audio_url=audio_key, + target_body_pos=target_pos_data, + target_body_transcript=transcript, + ) + except Exception as exc: await summarise_job_repository.mark_failed(db, job, str(exc)) diff --git a/api/app/routers/bff/articles.py b/api/app/routers/bff/articles.py index 5145bd8..0c32c06 100644 --- a/api/app/routers/bff/articles.py +++ b/api/app/routers/bff/articles.py @@ -1,30 +1,104 @@ +import uuid +from datetime import datetime + from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel +from sqlalchemy.ext.asyncio import AsyncSession +from ...auth import verify_token +from ...config import settings from ...domain.services.article_service import ArticleService -from ...outbound.postgres.database import get_db, AsyncSessionLocal -from ...outbound.postgres.repositories.summarise_job_repository import PostgresSummariseJobRepository +from ...outbound.postgres.database import get_db +from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository + +router = APIRouter(prefix="/articles", tags=["bff", "articles"]) -router = APIRouter(prefix="/articles", tags=["articles"]) - - -class ArticleResponse(BaseModel): +class ArticleItem(BaseModel): + id: str + published_at: datetime + source_language: str + source_title: str target_language: str - complexity_level: str - input_texts: list[str] + target_complexities: list[str] + target_title: str -class ArticlesResponse(BaseModel): - articles: list[ArticleResponse] -@router.get("", response_model=ArticlesResponse, status_code=200) -async def get_articles( - db = Depends(get_db), -) -> ArticlesResponse: - service = ArticleService(PostgresSummariseJobRepository(db)) +class ArticleListResponse(BaseModel): + articles: list[ArticleItem] + +class ArticleDetail(BaseModel): + id: str + published_at: datetime + source_language: str + source_title: str + source_body: str + target_language: str + target_complexities: list[str] + target_title: str + target_body: str + target_audio_url: str | None + target_body_pos: dict | None + target_body_transcript: dict | None + + +def _audio_url(key: str | None) -> str | None: + if key is None: + return None + return f"{settings.api_base_url}/media/{key}" + + +@router.get("", response_model=ArticleListResponse, status_code=200) +async def list_articles( + target_language: str = 'fr', + db: AsyncSession = Depends(get_db), + _: dict = Depends(verify_token), +) -> ArticleListResponse: + service = ArticleService(TranslatedArticleRepository(db)) + articles = await service.get_all_articles(target_language=target_language) + return ArticleListResponse( + articles=[ + ArticleItem( + id=a.id, + published_at=a.published_at, + source_language=a.source_language, + source_title=a.source_title, + target_language=a.target_language, + target_complexities=a.target_complexities, + target_title=a.target_title, + ) + for a in articles + ] + ) + + +@router.get("/{article_id}", response_model=ArticleDetail, status_code=200) +async def get_article( + article_id: str, + db: AsyncSession = Depends(get_db), + _: dict = Depends(verify_token), +) -> ArticleDetail: try: - articles = await service.get_all_articles() - return ArticlesResponse(articles=articles) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) + uid = uuid.UUID(article_id) + except ValueError: + raise HTTPException(status_code=400, detail="Invalid article ID") + + article = await TranslatedArticleRepository(db).get_by_id(uid) + if article is None: + raise HTTPException(status_code=404, detail="Article not found") + + return ArticleDetail( + id=article.id, + published_at=article.published_at, + source_language=article.source_language, + source_title=article.source_title, + source_body=article.source_body, + target_language=article.target_language, + target_complexities=article.target_complexities, + target_title=article.target_title, + target_body=article.target_body, + target_audio_url=_audio_url(article.audio_url), + target_body_pos=article.target_body_pos, + target_body_transcript=article.target_body_transcript, + )