feat: Add the TranslatedArticle entity

This commit is contained in:
wilson 2026-03-27 11:04:05 +00:00
parent e05a62cda9
commit dbc921d98a
8 changed files with 276 additions and 49 deletions

View file

@ -0,0 +1,43 @@
"""add translated_articles table
Revision ID: 0005
Revises: 0004
Create Date: 2026-03-27
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "0005"
down_revision: Union[str, None] = "0004"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"translated_articles",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("published_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("source_language", sa.String(10), nullable=False),
sa.Column("source_title", sa.Text(), nullable=False),
sa.Column("source_body", sa.Text(), nullable=False),
sa.Column("target_language", sa.String(10), nullable=False),
sa.Column("target_complexities", postgresql.ARRAY(sa.String(5)), nullable=False),
sa.Column("target_title", sa.Text(), nullable=False),
sa.Column("target_body", sa.Text(), nullable=False),
sa.Column("audio_url", sa.Text(), nullable=True),
sa.Column("target_body_pos", postgresql.JSONB(), nullable=True),
sa.Column("target_body_transcript", postgresql.JSONB(), nullable=True),
)
op.create_index("ix_translated_articles_published_at", "translated_articles", ["published_at"])
op.create_index("ix_translated_articles_target_language", "translated_articles", ["target_language"])
def downgrade() -> None:
op.drop_index("ix_translated_articles_target_language", table_name="translated_articles")
op.drop_index("ix_translated_articles_published_at", table_name="translated_articles")
op.drop_table("translated_articles")

View file

@ -9,6 +9,7 @@ class Settings(BaseSettings):
deepgram_api_key: str
gemini_api_key: str
admin_user_emails: str = "" # comma-separated list of admin email addresses
api_base_url: str = "http://localhost:8000"
storage_endpoint_url: str
storage_access_key: str
storage_secret_key: str

View file

@ -1,15 +1,18 @@
from dataclasses import dataclass
from datetime import datetime
@dataclass
class TranslatedArticle:
id: str
source_lang: str
published_at: datetime
source_language: str
source_title: str
source_text: str
target_lang: str
source_body: str
target_language: str
target_complexities: list[str]
target_title: str
target_text: str
target_body: str
audio_url: str | None
target_body_pos: dict | None
target_body_transcript: dict | None

View file

@ -1,30 +1,19 @@
import re
from sqlalchemy.ext.asyncio import AsyncSession
from ..models.summarise_job import SummariseJob
from ..models.translated_article import TranslatedArticle
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
def first_heading(md: str) -> str | None:
m = re.search(r'^#{1,2}\s+(.+)', md, re.MULTILINE)
return m.group(1).strip() if m else None
class ArticleService:
def __init__(self, summarise_job_repository):
self.summarise_job_repository = summarise_job_repository
def __init__(self, db: AsyncSession) -> None:
self.translated_articles_repository = TranslatedArticleRepository(db)
async def get_all_articles(self) -> list[TranslatedArticle]:
summarise_jobs = await self.summarise_job_repository.list_all()
return summarise_jobs.map(self.summarise_job_to_translated_article)
def summarise_job_to_translated_article(
self,
summarise_job: SummariseJob,
) -> TranslatedArticle:
return TranslatedArticle(
id=summarise_job.id,
source_lang=summarise_job.target_language, # The source language for the article is the target language of the job
source_title=first_heading(summarise_job.translated_text) or "",
source_text=summarise_job.translated_text,
target_lang=summarise_job.source_language, # The target language for the article is the source language of the job
target_title=first_heading(summarise_job.generated_text) or "",
target_text=summarise_job.generated_text,
)
async def get_all_articles(self, target_language: str) -> list[TranslatedArticle]:
"""Fetch all translated articles"""
articles = await self.translated_articles_repository.list_all(target_language)
return articles

View file

@ -0,0 +1,30 @@
import uuid
from datetime import datetime, timezone
from sqlalchemy import String, Text, DateTime
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.dialects.postgresql import UUID, ARRAY, JSONB
from ..database import Base
class TranslatedArticleEntity(Base):
__tablename__ = "translated_articles"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
published_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
)
source_language: Mapped[str] = mapped_column(String(10), nullable=False)
source_title: Mapped[str] = mapped_column(Text, nullable=False)
source_body: Mapped[str] = mapped_column(Text, nullable=False)
target_language: Mapped[str] = mapped_column(String(10), nullable=False)
target_complexities: Mapped[list[str]] = mapped_column(ARRAY(String(5)), nullable=False)
target_title: Mapped[str] = mapped_column(Text, nullable=False)
target_body: Mapped[str] = mapped_column(Text, nullable=False)
audio_url: Mapped[str | None] = mapped_column(Text, nullable=True)
target_body_pos: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
target_body_transcript: Mapped[dict | None] = mapped_column(JSONB, nullable=True)

View file

@ -0,0 +1,72 @@
import uuid
from datetime import datetime, timezone
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from ..entities.translated_article_entity import TranslatedArticleEntity
from ....domain.models.translated_article import TranslatedArticle
class TranslatedArticleRepository:
def __init__(self, db: AsyncSession):
self.db = db
def _to_model(self, entity: TranslatedArticleEntity) -> TranslatedArticle:
return TranslatedArticle(
id=str(entity.id),
published_at=entity.published_at,
source_language=entity.source_language,
source_title=entity.source_title,
source_body=entity.source_body,
target_language=entity.target_language,
target_complexities=list(entity.target_complexities),
target_title=entity.target_title,
target_body=entity.target_body,
audio_url=entity.audio_url,
target_body_pos=entity.target_body_pos,
target_body_transcript=entity.target_body_transcript,
)
async def create(
self,
source_language: str,
source_title: str,
source_body: str,
target_language: str,
target_complexities: list[str],
target_title: str,
target_body: str,
audio_url: str | None,
target_body_pos: dict | None,
target_body_transcript: dict | None,
) -> TranslatedArticle:
entity = TranslatedArticleEntity(
published_at=datetime.now(timezone.utc),
source_language=source_language,
source_title=source_title,
source_body=source_body,
target_language=target_language,
target_complexities=target_complexities,
target_title=target_title,
target_body=target_body,
audio_url=audio_url,
target_body_pos=target_body_pos,
target_body_transcript=target_body_transcript,
)
self.db.add(entity)
await self.db.commit()
await self.db.refresh(entity)
return self._to_model(entity)
async def list_all(self, target_language: str) -> list[TranslatedArticle]:
result = await self.db.execute(
select(TranslatedArticleEntity)
.where(TranslatedArticleEntity.target_language == target_language)
.order_by(TranslatedArticleEntity.published_at.desc())
)
return [self._to_model(e) for e in result.scalars().all()]
async def get_by_id(self, article_id: uuid.UUID) -> TranslatedArticle | None:
entity = await self.db.get(TranslatedArticleEntity, article_id)
return self._to_model(entity) if entity else None

View file

@ -14,6 +14,8 @@ from ...auth import require_admin
from ...storage import upload_audio
from ...outbound.postgres.database import get_db, AsyncSessionLocal
from ...outbound.postgres.repositories import summarise_job_repository
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
from ...domain.services.article_service import first_heading
from ...outbound.anthropic.anthropic_client import AnthropicClient
from ...outbound.deepgram.deepgram_client import LocalDeepgramClient
from ...outbound.deepl.deepl_client import DeepLClient
@ -117,6 +119,19 @@ async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None
await summarise_job_repository.mark_succeeded(db, job, audio_key)
await TranslatedArticleRepository(db).create(
source_language=request.source_language,
source_title=first_heading(translated_text) or "",
source_body=translated_text,
target_language=request.target_language,
target_complexities=[request.complexity_level],
target_title=first_heading(generated_text) or "",
target_body=generated_text,
audio_url=audio_key,
target_body_pos=target_pos_data,
target_body_transcript=transcript,
)
except Exception as exc:
await summarise_job_repository.mark_failed(db, job, str(exc))

View file

@ -1,30 +1,104 @@
import uuid
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from ...auth import verify_token
from ...config import settings
from ...domain.services.article_service import ArticleService
from ...outbound.postgres.database import get_db, AsyncSessionLocal
from ...outbound.postgres.repositories.summarise_job_repository import PostgresSummariseJobRepository
from ...outbound.postgres.database import get_db
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
router = APIRouter(prefix="/articles", tags=["bff", "articles"])
router = APIRouter(prefix="/articles", tags=["articles"])
class ArticleResponse(BaseModel):
class ArticleItem(BaseModel):
id: str
published_at: datetime
source_language: str
source_title: str
target_language: str
complexity_level: str
input_texts: list[str]
target_complexities: list[str]
target_title: str
class ArticlesResponse(BaseModel):
articles: list[ArticleResponse]
@router.get("", response_model=ArticlesResponse, status_code=200)
async def get_articles(
db = Depends(get_db),
) -> ArticlesResponse:
service = ArticleService(PostgresSummariseJobRepository(db))
class ArticleListResponse(BaseModel):
articles: list[ArticleItem]
class ArticleDetail(BaseModel):
id: str
published_at: datetime
source_language: str
source_title: str
source_body: str
target_language: str
target_complexities: list[str]
target_title: str
target_body: str
target_audio_url: str | None
target_body_pos: dict | None
target_body_transcript: dict | None
def _audio_url(key: str | None) -> str | None:
if key is None:
return None
return f"{settings.api_base_url}/media/{key}"
@router.get("", response_model=ArticleListResponse, status_code=200)
async def list_articles(
target_language: str = 'fr',
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token),
) -> ArticleListResponse:
service = ArticleService(TranslatedArticleRepository(db))
articles = await service.get_all_articles(target_language=target_language)
return ArticleListResponse(
articles=[
ArticleItem(
id=a.id,
published_at=a.published_at,
source_language=a.source_language,
source_title=a.source_title,
target_language=a.target_language,
target_complexities=a.target_complexities,
target_title=a.target_title,
)
for a in articles
]
)
@router.get("/{article_id}", response_model=ArticleDetail, status_code=200)
async def get_article(
article_id: str,
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token),
) -> ArticleDetail:
try:
articles = await service.get_all_articles()
return ArticlesResponse(articles=articles)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
uid = uuid.UUID(article_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid article ID")
article = await TranslatedArticleRepository(db).get_by_id(uid)
if article is None:
raise HTTPException(status_code=404, detail="Article not found")
return ArticleDetail(
id=article.id,
published_at=article.published_at,
source_language=article.source_language,
source_title=article.source_title,
source_body=article.source_body,
target_language=article.target_language,
target_complexities=article.target_complexities,
target_title=article.target_title,
target_body=article.target_body,
target_audio_url=_audio_url(article.audio_url),
target_body_pos=article.target_body_pos,
target_body_transcript=article.target_body_transcript,
)