feat: Add the TranslatedArticle entity

This commit is contained in:
wilson 2026-03-27 11:04:05 +00:00
parent e05a62cda9
commit dbc921d98a
8 changed files with 276 additions and 49 deletions

View file

@ -0,0 +1,43 @@
"""add translated_articles table
Revision ID: 0005
Revises: 0004
Create Date: 2026-03-27
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "0005"
down_revision: Union[str, None] = "0004"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"translated_articles",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("published_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("source_language", sa.String(10), nullable=False),
sa.Column("source_title", sa.Text(), nullable=False),
sa.Column("source_body", sa.Text(), nullable=False),
sa.Column("target_language", sa.String(10), nullable=False),
sa.Column("target_complexities", postgresql.ARRAY(sa.String(5)), nullable=False),
sa.Column("target_title", sa.Text(), nullable=False),
sa.Column("target_body", sa.Text(), nullable=False),
sa.Column("audio_url", sa.Text(), nullable=True),
sa.Column("target_body_pos", postgresql.JSONB(), nullable=True),
sa.Column("target_body_transcript", postgresql.JSONB(), nullable=True),
)
op.create_index("ix_translated_articles_published_at", "translated_articles", ["published_at"])
op.create_index("ix_translated_articles_target_language", "translated_articles", ["target_language"])
def downgrade() -> None:
op.drop_index("ix_translated_articles_target_language", table_name="translated_articles")
op.drop_index("ix_translated_articles_published_at", table_name="translated_articles")
op.drop_table("translated_articles")

View file

@ -9,6 +9,7 @@ class Settings(BaseSettings):
deepgram_api_key: str deepgram_api_key: str
gemini_api_key: str gemini_api_key: str
admin_user_emails: str = "" # comma-separated list of admin email addresses admin_user_emails: str = "" # comma-separated list of admin email addresses
api_base_url: str = "http://localhost:8000"
storage_endpoint_url: str storage_endpoint_url: str
storage_access_key: str storage_access_key: str
storage_secret_key: str storage_secret_key: str

View file

@ -1,15 +1,18 @@
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime
@dataclass
@dataclass
class TranslatedArticle: class TranslatedArticle:
id: str id: str
published_at: datetime
source_lang: str source_language: str
source_title: str source_title: str
source_text: str source_body: str
target_language: str
target_lang: str target_complexities: list[str]
target_title: str target_title: str
target_text: str target_body: str
audio_url: str | None
target_body_pos: dict | None
target_body_transcript: dict | None

View file

@ -1,30 +1,19 @@
import re import re
from sqlalchemy.ext.asyncio import AsyncSession
from ..models.summarise_job import SummariseJob from ..models.summarise_job import SummariseJob
from ..models.translated_article import TranslatedArticle from ..models.translated_article import TranslatedArticle
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
def first_heading(md: str) -> str | None: def first_heading(md: str) -> str | None:
m = re.search(r'^#{1,2}\s+(.+)', md, re.MULTILINE) m = re.search(r'^#{1,2}\s+(.+)', md, re.MULTILINE)
return m.group(1).strip() if m else None return m.group(1).strip() if m else None
class ArticleService: class ArticleService:
def __init__(self, summarise_job_repository): def __init__(self, db: AsyncSession) -> None:
self.summarise_job_repository = summarise_job_repository self.translated_articles_repository = TranslatedArticleRepository(db)
async def get_all_articles(self) -> list[TranslatedArticle]: async def get_all_articles(self, target_language: str) -> list[TranslatedArticle]:
summarise_jobs = await self.summarise_job_repository.list_all() """Fetch all translated articles"""
return summarise_jobs.map(self.summarise_job_to_translated_article) articles = await self.translated_articles_repository.list_all(target_language)
return articles
def summarise_job_to_translated_article(
self,
summarise_job: SummariseJob,
) -> TranslatedArticle:
return TranslatedArticle(
id=summarise_job.id,
source_lang=summarise_job.target_language, # The source language for the article is the target language of the job
source_title=first_heading(summarise_job.translated_text) or "",
source_text=summarise_job.translated_text,
target_lang=summarise_job.source_language, # The target language for the article is the source language of the job
target_title=first_heading(summarise_job.generated_text) or "",
target_text=summarise_job.generated_text,
)

View file

@ -0,0 +1,30 @@
import uuid
from datetime import datetime, timezone
from sqlalchemy import String, Text, DateTime
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.dialects.postgresql import UUID, ARRAY, JSONB
from ..database import Base
class TranslatedArticleEntity(Base):
__tablename__ = "translated_articles"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
published_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
)
source_language: Mapped[str] = mapped_column(String(10), nullable=False)
source_title: Mapped[str] = mapped_column(Text, nullable=False)
source_body: Mapped[str] = mapped_column(Text, nullable=False)
target_language: Mapped[str] = mapped_column(String(10), nullable=False)
target_complexities: Mapped[list[str]] = mapped_column(ARRAY(String(5)), nullable=False)
target_title: Mapped[str] = mapped_column(Text, nullable=False)
target_body: Mapped[str] = mapped_column(Text, nullable=False)
audio_url: Mapped[str | None] = mapped_column(Text, nullable=True)
target_body_pos: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
target_body_transcript: Mapped[dict | None] = mapped_column(JSONB, nullable=True)

View file

@ -0,0 +1,72 @@
import uuid
from datetime import datetime, timezone
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from ..entities.translated_article_entity import TranslatedArticleEntity
from ....domain.models.translated_article import TranslatedArticle
class TranslatedArticleRepository:
def __init__(self, db: AsyncSession):
self.db = db
def _to_model(self, entity: TranslatedArticleEntity) -> TranslatedArticle:
return TranslatedArticle(
id=str(entity.id),
published_at=entity.published_at,
source_language=entity.source_language,
source_title=entity.source_title,
source_body=entity.source_body,
target_language=entity.target_language,
target_complexities=list(entity.target_complexities),
target_title=entity.target_title,
target_body=entity.target_body,
audio_url=entity.audio_url,
target_body_pos=entity.target_body_pos,
target_body_transcript=entity.target_body_transcript,
)
async def create(
self,
source_language: str,
source_title: str,
source_body: str,
target_language: str,
target_complexities: list[str],
target_title: str,
target_body: str,
audio_url: str | None,
target_body_pos: dict | None,
target_body_transcript: dict | None,
) -> TranslatedArticle:
entity = TranslatedArticleEntity(
published_at=datetime.now(timezone.utc),
source_language=source_language,
source_title=source_title,
source_body=source_body,
target_language=target_language,
target_complexities=target_complexities,
target_title=target_title,
target_body=target_body,
audio_url=audio_url,
target_body_pos=target_body_pos,
target_body_transcript=target_body_transcript,
)
self.db.add(entity)
await self.db.commit()
await self.db.refresh(entity)
return self._to_model(entity)
async def list_all(self, target_language: str) -> list[TranslatedArticle]:
result = await self.db.execute(
select(TranslatedArticleEntity)
.where(TranslatedArticleEntity.target_language == target_language)
.order_by(TranslatedArticleEntity.published_at.desc())
)
return [self._to_model(e) for e in result.scalars().all()]
async def get_by_id(self, article_id: uuid.UUID) -> TranslatedArticle | None:
entity = await self.db.get(TranslatedArticleEntity, article_id)
return self._to_model(entity) if entity else None

View file

@ -14,6 +14,8 @@ from ...auth import require_admin
from ...storage import upload_audio from ...storage import upload_audio
from ...outbound.postgres.database import get_db, AsyncSessionLocal from ...outbound.postgres.database import get_db, AsyncSessionLocal
from ...outbound.postgres.repositories import summarise_job_repository from ...outbound.postgres.repositories import summarise_job_repository
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
from ...domain.services.article_service import first_heading
from ...outbound.anthropic.anthropic_client import AnthropicClient from ...outbound.anthropic.anthropic_client import AnthropicClient
from ...outbound.deepgram.deepgram_client import LocalDeepgramClient from ...outbound.deepgram.deepgram_client import LocalDeepgramClient
from ...outbound.deepl.deepl_client import DeepLClient from ...outbound.deepl.deepl_client import DeepLClient
@ -117,6 +119,19 @@ async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None
await summarise_job_repository.mark_succeeded(db, job, audio_key) await summarise_job_repository.mark_succeeded(db, job, audio_key)
await TranslatedArticleRepository(db).create(
source_language=request.source_language,
source_title=first_heading(translated_text) or "",
source_body=translated_text,
target_language=request.target_language,
target_complexities=[request.complexity_level],
target_title=first_heading(generated_text) or "",
target_body=generated_text,
audio_url=audio_key,
target_body_pos=target_pos_data,
target_body_transcript=transcript,
)
except Exception as exc: except Exception as exc:
await summarise_job_repository.mark_failed(db, job, str(exc)) await summarise_job_repository.mark_failed(db, job, str(exc))

View file

@ -1,30 +1,104 @@
import uuid
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from ...auth import verify_token
from ...config import settings
from ...domain.services.article_service import ArticleService from ...domain.services.article_service import ArticleService
from ...outbound.postgres.database import get_db, AsyncSessionLocal from ...outbound.postgres.database import get_db
from ...outbound.postgres.repositories.summarise_job_repository import PostgresSummariseJobRepository from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
router = APIRouter(prefix="/articles", tags=["bff", "articles"])
router = APIRouter(prefix="/articles", tags=["articles"]) class ArticleItem(BaseModel):
id: str
published_at: datetime
class ArticleResponse(BaseModel): source_language: str
source_title: str
target_language: str target_language: str
complexity_level: str target_complexities: list[str]
input_texts: list[str] target_title: str
class ArticlesResponse(BaseModel):
articles: list[ArticleResponse]
@router.get("", response_model=ArticlesResponse, status_code=200) class ArticleListResponse(BaseModel):
async def get_articles( articles: list[ArticleItem]
db = Depends(get_db),
) -> ArticlesResponse:
service = ArticleService(PostgresSummariseJobRepository(db))
class ArticleDetail(BaseModel):
id: str
published_at: datetime
source_language: str
source_title: str
source_body: str
target_language: str
target_complexities: list[str]
target_title: str
target_body: str
target_audio_url: str | None
target_body_pos: dict | None
target_body_transcript: dict | None
def _audio_url(key: str | None) -> str | None:
if key is None:
return None
return f"{settings.api_base_url}/media/{key}"
@router.get("", response_model=ArticleListResponse, status_code=200)
async def list_articles(
target_language: str = 'fr',
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token),
) -> ArticleListResponse:
service = ArticleService(TranslatedArticleRepository(db))
articles = await service.get_all_articles(target_language=target_language)
return ArticleListResponse(
articles=[
ArticleItem(
id=a.id,
published_at=a.published_at,
source_language=a.source_language,
source_title=a.source_title,
target_language=a.target_language,
target_complexities=a.target_complexities,
target_title=a.target_title,
)
for a in articles
]
)
@router.get("/{article_id}", response_model=ArticleDetail, status_code=200)
async def get_article(
article_id: str,
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token),
) -> ArticleDetail:
try: try:
articles = await service.get_all_articles() uid = uuid.UUID(article_id)
return ArticlesResponse(articles=articles) except ValueError:
except Exception as e: raise HTTPException(status_code=400, detail="Invalid article ID")
raise HTTPException(status_code=500, detail=str(e))
article = await TranslatedArticleRepository(db).get_by_id(uid)
if article is None:
raise HTTPException(status_code=404, detail="Article not found")
return ArticleDetail(
id=article.id,
published_at=article.published_at,
source_language=article.source_language,
source_title=article.source_title,
source_body=article.source_body,
target_language=article.target_language,
target_complexities=article.target_complexities,
target_title=article.target_title,
target_body=article.target_body,
target_audio_url=_audio_url(article.audio_url),
target_body_pos=article.target_body_pos,
target_body_transcript=article.target_body_transcript,
)