feat: Add the TranslatedArticle entity
This commit is contained in:
parent
e05a62cda9
commit
dbc921d98a
8 changed files with 276 additions and 49 deletions
|
|
@ -0,0 +1,43 @@
|
||||||
|
"""add translated_articles table
|
||||||
|
|
||||||
|
Revision ID: 0005
|
||||||
|
Revises: 0004
|
||||||
|
Create Date: 2026-03-27
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
revision: str = "0005"
|
||||||
|
down_revision: Union[str, None] = "0004"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.create_table(
|
||||||
|
"translated_articles",
|
||||||
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||||
|
sa.Column("published_at", sa.DateTime(timezone=True), nullable=False),
|
||||||
|
sa.Column("source_language", sa.String(10), nullable=False),
|
||||||
|
sa.Column("source_title", sa.Text(), nullable=False),
|
||||||
|
sa.Column("source_body", sa.Text(), nullable=False),
|
||||||
|
sa.Column("target_language", sa.String(10), nullable=False),
|
||||||
|
sa.Column("target_complexities", postgresql.ARRAY(sa.String(5)), nullable=False),
|
||||||
|
sa.Column("target_title", sa.Text(), nullable=False),
|
||||||
|
sa.Column("target_body", sa.Text(), nullable=False),
|
||||||
|
sa.Column("audio_url", sa.Text(), nullable=True),
|
||||||
|
sa.Column("target_body_pos", postgresql.JSONB(), nullable=True),
|
||||||
|
sa.Column("target_body_transcript", postgresql.JSONB(), nullable=True),
|
||||||
|
)
|
||||||
|
op.create_index("ix_translated_articles_published_at", "translated_articles", ["published_at"])
|
||||||
|
op.create_index("ix_translated_articles_target_language", "translated_articles", ["target_language"])
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_index("ix_translated_articles_target_language", table_name="translated_articles")
|
||||||
|
op.drop_index("ix_translated_articles_published_at", table_name="translated_articles")
|
||||||
|
op.drop_table("translated_articles")
|
||||||
|
|
@ -9,6 +9,7 @@ class Settings(BaseSettings):
|
||||||
deepgram_api_key: str
|
deepgram_api_key: str
|
||||||
gemini_api_key: str
|
gemini_api_key: str
|
||||||
admin_user_emails: str = "" # comma-separated list of admin email addresses
|
admin_user_emails: str = "" # comma-separated list of admin email addresses
|
||||||
|
api_base_url: str = "http://localhost:8000"
|
||||||
storage_endpoint_url: str
|
storage_endpoint_url: str
|
||||||
storage_access_key: str
|
storage_access_key: str
|
||||||
storage_secret_key: str
|
storage_secret_key: str
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,18 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
@dataclass
|
|
||||||
|
@dataclass
|
||||||
class TranslatedArticle:
|
class TranslatedArticle:
|
||||||
id: str
|
id: str
|
||||||
|
published_at: datetime
|
||||||
source_lang: str
|
source_language: str
|
||||||
source_title: str
|
source_title: str
|
||||||
source_text: str
|
source_body: str
|
||||||
|
target_language: str
|
||||||
target_lang: str
|
target_complexities: list[str]
|
||||||
target_title: str
|
target_title: str
|
||||||
target_text: str
|
target_body: str
|
||||||
|
audio_url: str | None
|
||||||
|
target_body_pos: dict | None
|
||||||
|
target_body_transcript: dict | None
|
||||||
|
|
|
||||||
|
|
@ -1,30 +1,19 @@
|
||||||
import re
|
import re
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from ..models.summarise_job import SummariseJob
|
from ..models.summarise_job import SummariseJob
|
||||||
from ..models.translated_article import TranslatedArticle
|
from ..models.translated_article import TranslatedArticle
|
||||||
|
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
|
||||||
|
|
||||||
def first_heading(md: str) -> str | None:
|
def first_heading(md: str) -> str | None:
|
||||||
m = re.search(r'^#{1,2}\s+(.+)', md, re.MULTILINE)
|
m = re.search(r'^#{1,2}\s+(.+)', md, re.MULTILINE)
|
||||||
return m.group(1).strip() if m else None
|
return m.group(1).strip() if m else None
|
||||||
|
|
||||||
class ArticleService:
|
class ArticleService:
|
||||||
def __init__(self, summarise_job_repository):
|
def __init__(self, db: AsyncSession) -> None:
|
||||||
self.summarise_job_repository = summarise_job_repository
|
self.translated_articles_repository = TranslatedArticleRepository(db)
|
||||||
|
|
||||||
async def get_all_articles(self) -> list[TranslatedArticle]:
|
async def get_all_articles(self, target_language: str) -> list[TranslatedArticle]:
|
||||||
summarise_jobs = await self.summarise_job_repository.list_all()
|
"""Fetch all translated articles"""
|
||||||
return summarise_jobs.map(self.summarise_job_to_translated_article)
|
articles = await self.translated_articles_repository.list_all(target_language)
|
||||||
|
return articles
|
||||||
def summarise_job_to_translated_article(
|
|
||||||
self,
|
|
||||||
summarise_job: SummariseJob,
|
|
||||||
) -> TranslatedArticle:
|
|
||||||
return TranslatedArticle(
|
|
||||||
id=summarise_job.id,
|
|
||||||
source_lang=summarise_job.target_language, # The source language for the article is the target language of the job
|
|
||||||
source_title=first_heading(summarise_job.translated_text) or "",
|
|
||||||
source_text=summarise_job.translated_text,
|
|
||||||
target_lang=summarise_job.source_language, # The target language for the article is the source language of the job
|
|
||||||
target_title=first_heading(summarise_job.generated_text) or "",
|
|
||||||
target_text=summarise_job.generated_text,
|
|
||||||
)
|
|
||||||
|
|
@ -0,0 +1,30 @@
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from sqlalchemy import String, Text, DateTime
|
||||||
|
from sqlalchemy.orm import Mapped, mapped_column
|
||||||
|
from sqlalchemy.dialects.postgresql import UUID, ARRAY, JSONB
|
||||||
|
|
||||||
|
from ..database import Base
|
||||||
|
|
||||||
|
|
||||||
|
class TranslatedArticleEntity(Base):
|
||||||
|
__tablename__ = "translated_articles"
|
||||||
|
|
||||||
|
id: Mapped[uuid.UUID] = mapped_column(
|
||||||
|
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||||
|
)
|
||||||
|
published_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True),
|
||||||
|
default=lambda: datetime.now(timezone.utc),
|
||||||
|
)
|
||||||
|
source_language: Mapped[str] = mapped_column(String(10), nullable=False)
|
||||||
|
source_title: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
source_body: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
target_language: Mapped[str] = mapped_column(String(10), nullable=False)
|
||||||
|
target_complexities: Mapped[list[str]] = mapped_column(ARRAY(String(5)), nullable=False)
|
||||||
|
target_title: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
target_body: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
audio_url: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
target_body_pos: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
|
||||||
|
target_body_transcript: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
|
||||||
|
|
@ -0,0 +1,72 @@
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from ..entities.translated_article_entity import TranslatedArticleEntity
|
||||||
|
from ....domain.models.translated_article import TranslatedArticle
|
||||||
|
|
||||||
|
|
||||||
|
class TranslatedArticleRepository:
|
||||||
|
def __init__(self, db: AsyncSession):
|
||||||
|
self.db = db
|
||||||
|
|
||||||
|
def _to_model(self, entity: TranslatedArticleEntity) -> TranslatedArticle:
|
||||||
|
return TranslatedArticle(
|
||||||
|
id=str(entity.id),
|
||||||
|
published_at=entity.published_at,
|
||||||
|
source_language=entity.source_language,
|
||||||
|
source_title=entity.source_title,
|
||||||
|
source_body=entity.source_body,
|
||||||
|
target_language=entity.target_language,
|
||||||
|
target_complexities=list(entity.target_complexities),
|
||||||
|
target_title=entity.target_title,
|
||||||
|
target_body=entity.target_body,
|
||||||
|
audio_url=entity.audio_url,
|
||||||
|
target_body_pos=entity.target_body_pos,
|
||||||
|
target_body_transcript=entity.target_body_transcript,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def create(
|
||||||
|
self,
|
||||||
|
source_language: str,
|
||||||
|
source_title: str,
|
||||||
|
source_body: str,
|
||||||
|
target_language: str,
|
||||||
|
target_complexities: list[str],
|
||||||
|
target_title: str,
|
||||||
|
target_body: str,
|
||||||
|
audio_url: str | None,
|
||||||
|
target_body_pos: dict | None,
|
||||||
|
target_body_transcript: dict | None,
|
||||||
|
) -> TranslatedArticle:
|
||||||
|
entity = TranslatedArticleEntity(
|
||||||
|
published_at=datetime.now(timezone.utc),
|
||||||
|
source_language=source_language,
|
||||||
|
source_title=source_title,
|
||||||
|
source_body=source_body,
|
||||||
|
target_language=target_language,
|
||||||
|
target_complexities=target_complexities,
|
||||||
|
target_title=target_title,
|
||||||
|
target_body=target_body,
|
||||||
|
audio_url=audio_url,
|
||||||
|
target_body_pos=target_body_pos,
|
||||||
|
target_body_transcript=target_body_transcript,
|
||||||
|
)
|
||||||
|
self.db.add(entity)
|
||||||
|
await self.db.commit()
|
||||||
|
await self.db.refresh(entity)
|
||||||
|
return self._to_model(entity)
|
||||||
|
|
||||||
|
async def list_all(self, target_language: str) -> list[TranslatedArticle]:
|
||||||
|
result = await self.db.execute(
|
||||||
|
select(TranslatedArticleEntity)
|
||||||
|
.where(TranslatedArticleEntity.target_language == target_language)
|
||||||
|
.order_by(TranslatedArticleEntity.published_at.desc())
|
||||||
|
)
|
||||||
|
return [self._to_model(e) for e in result.scalars().all()]
|
||||||
|
|
||||||
|
async def get_by_id(self, article_id: uuid.UUID) -> TranslatedArticle | None:
|
||||||
|
entity = await self.db.get(TranslatedArticleEntity, article_id)
|
||||||
|
return self._to_model(entity) if entity else None
|
||||||
|
|
@ -14,6 +14,8 @@ from ...auth import require_admin
|
||||||
from ...storage import upload_audio
|
from ...storage import upload_audio
|
||||||
from ...outbound.postgres.database import get_db, AsyncSessionLocal
|
from ...outbound.postgres.database import get_db, AsyncSessionLocal
|
||||||
from ...outbound.postgres.repositories import summarise_job_repository
|
from ...outbound.postgres.repositories import summarise_job_repository
|
||||||
|
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
|
||||||
|
from ...domain.services.article_service import first_heading
|
||||||
from ...outbound.anthropic.anthropic_client import AnthropicClient
|
from ...outbound.anthropic.anthropic_client import AnthropicClient
|
||||||
from ...outbound.deepgram.deepgram_client import LocalDeepgramClient
|
from ...outbound.deepgram.deepgram_client import LocalDeepgramClient
|
||||||
from ...outbound.deepl.deepl_client import DeepLClient
|
from ...outbound.deepl.deepl_client import DeepLClient
|
||||||
|
|
@ -117,6 +119,19 @@ async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None
|
||||||
|
|
||||||
await summarise_job_repository.mark_succeeded(db, job, audio_key)
|
await summarise_job_repository.mark_succeeded(db, job, audio_key)
|
||||||
|
|
||||||
|
await TranslatedArticleRepository(db).create(
|
||||||
|
source_language=request.source_language,
|
||||||
|
source_title=first_heading(translated_text) or "",
|
||||||
|
source_body=translated_text,
|
||||||
|
target_language=request.target_language,
|
||||||
|
target_complexities=[request.complexity_level],
|
||||||
|
target_title=first_heading(generated_text) or "",
|
||||||
|
target_body=generated_text,
|
||||||
|
audio_url=audio_key,
|
||||||
|
target_body_pos=target_pos_data,
|
||||||
|
target_body_transcript=transcript,
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
await summarise_job_repository.mark_failed(db, job, str(exc))
|
await summarise_job_repository.mark_failed(db, job, str(exc))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,30 +1,104 @@
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from ...auth import verify_token
|
||||||
|
from ...config import settings
|
||||||
from ...domain.services.article_service import ArticleService
|
from ...domain.services.article_service import ArticleService
|
||||||
from ...outbound.postgres.database import get_db, AsyncSessionLocal
|
from ...outbound.postgres.database import get_db
|
||||||
from ...outbound.postgres.repositories.summarise_job_repository import PostgresSummariseJobRepository
|
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/articles", tags=["bff", "articles"])
|
||||||
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/articles", tags=["articles"])
|
class ArticleItem(BaseModel):
|
||||||
|
id: str
|
||||||
|
published_at: datetime
|
||||||
class ArticleResponse(BaseModel):
|
source_language: str
|
||||||
|
source_title: str
|
||||||
target_language: str
|
target_language: str
|
||||||
complexity_level: str
|
target_complexities: list[str]
|
||||||
input_texts: list[str]
|
target_title: str
|
||||||
|
|
||||||
class ArticlesResponse(BaseModel):
|
|
||||||
articles: list[ArticleResponse]
|
|
||||||
|
|
||||||
@router.get("", response_model=ArticlesResponse, status_code=200)
|
class ArticleListResponse(BaseModel):
|
||||||
async def get_articles(
|
articles: list[ArticleItem]
|
||||||
db = Depends(get_db),
|
|
||||||
) -> ArticlesResponse:
|
|
||||||
service = ArticleService(PostgresSummariseJobRepository(db))
|
|
||||||
|
|
||||||
|
|
||||||
|
class ArticleDetail(BaseModel):
|
||||||
|
id: str
|
||||||
|
published_at: datetime
|
||||||
|
source_language: str
|
||||||
|
source_title: str
|
||||||
|
source_body: str
|
||||||
|
target_language: str
|
||||||
|
target_complexities: list[str]
|
||||||
|
target_title: str
|
||||||
|
target_body: str
|
||||||
|
target_audio_url: str | None
|
||||||
|
target_body_pos: dict | None
|
||||||
|
target_body_transcript: dict | None
|
||||||
|
|
||||||
|
|
||||||
|
def _audio_url(key: str | None) -> str | None:
|
||||||
|
if key is None:
|
||||||
|
return None
|
||||||
|
return f"{settings.api_base_url}/media/{key}"
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("", response_model=ArticleListResponse, status_code=200)
|
||||||
|
async def list_articles(
|
||||||
|
target_language: str = 'fr',
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
_: dict = Depends(verify_token),
|
||||||
|
) -> ArticleListResponse:
|
||||||
|
service = ArticleService(TranslatedArticleRepository(db))
|
||||||
|
articles = await service.get_all_articles(target_language=target_language)
|
||||||
|
return ArticleListResponse(
|
||||||
|
articles=[
|
||||||
|
ArticleItem(
|
||||||
|
id=a.id,
|
||||||
|
published_at=a.published_at,
|
||||||
|
source_language=a.source_language,
|
||||||
|
source_title=a.source_title,
|
||||||
|
target_language=a.target_language,
|
||||||
|
target_complexities=a.target_complexities,
|
||||||
|
target_title=a.target_title,
|
||||||
|
)
|
||||||
|
for a in articles
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{article_id}", response_model=ArticleDetail, status_code=200)
|
||||||
|
async def get_article(
|
||||||
|
article_id: str,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
_: dict = Depends(verify_token),
|
||||||
|
) -> ArticleDetail:
|
||||||
try:
|
try:
|
||||||
articles = await service.get_all_articles()
|
uid = uuid.UUID(article_id)
|
||||||
return ArticlesResponse(articles=articles)
|
except ValueError:
|
||||||
except Exception as e:
|
raise HTTPException(status_code=400, detail="Invalid article ID")
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
article = await TranslatedArticleRepository(db).get_by_id(uid)
|
||||||
|
if article is None:
|
||||||
|
raise HTTPException(status_code=404, detail="Article not found")
|
||||||
|
|
||||||
|
return ArticleDetail(
|
||||||
|
id=article.id,
|
||||||
|
published_at=article.published_at,
|
||||||
|
source_language=article.source_language,
|
||||||
|
source_title=article.source_title,
|
||||||
|
source_body=article.source_body,
|
||||||
|
target_language=article.target_language,
|
||||||
|
target_complexities=article.target_complexities,
|
||||||
|
target_title=article.target_title,
|
||||||
|
target_body=article.target_body,
|
||||||
|
target_audio_url=_audio_url(article.audio_url),
|
||||||
|
target_body_pos=article.target_body_pos,
|
||||||
|
target_body_transcript=article.target_body_transcript,
|
||||||
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue