Compare commits

..

No commits in common. "37570e9c58f5da0c35a3c7d4155e25b4d26bf1a3" and "fecb5839ea0666fde9c7c79a805ad5b3ae16eddc" have entirely different histories.

38 changed files with 1604 additions and 2058 deletions

1
.gitignore vendored
View file

@ -1,7 +1,6 @@
todo.md
.env
.env.prod
.env.test
.codegraph
/Language*Learning*API/

View file

@ -1,19 +1,19 @@
import asyncio
from logging.config import fileConfig
from alembic import context
from sqlalchemy import pool
from sqlalchemy.ext.asyncio import async_engine_from_config
import app.outbound.postgres.entities.adventure_entities
import app.outbound.postgres.entities.article_entities
import app.outbound.postgres.entities.dictionary_entities
import app.outbound.postgres.entities.pack_entities
import app.outbound.postgres.entities.summarise_job_entity
import app.outbound.postgres.entities.user_entity
from alembic import context
from app.config import settings
from app.outbound.postgres.database import Base
import app.outbound.postgres.entities.summarise_job_entity
import app.outbound.postgres.entities.user_entity
import app.outbound.postgres.entities.dictionary_entities
import app.outbound.postgres.entities.pack_entities
import app.outbound.postgres.entities.adventure_entities
config = context.config
config.set_main_option("sqlalchemy.url", settings.database_url)

View file

@ -1,77 +0,0 @@
"""add article tables
Revision ID: 0020
Revises: 0019
Create Date: 2026-05-31
"""
from typing import Sequence, Union
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from alembic import op
revision: str = "0020"
down_revision: Union[str, None] = "0019"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"article",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("article_type", sa.Text(), nullable=False),
sa.Column("language", sa.Text(), nullable=False),
sa.Column("target_complexity", sa.Text(), nullable=False),
sa.Column("title", sa.Text(), nullable=False),
sa.Column("text", sa.Text(), nullable=False),
sa.Column("text_linguistic_data", postgresql.JSONB(), nullable=True),
sa.Column("audio_key", sa.Text(), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
sa.Column("published_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True),
)
op.create_table(
"article_ownership",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"article_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("article.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("ownership_role", sa.Text(), nullable=False),
sa.Column(
"user_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True),
)
op.create_index(
"ix_article_ownership_article_id", "article_ownership", ["article_id"]
)
op.create_index("ix_article_ownership_user_id", "article_ownership", ["user_id"])
def downgrade() -> None:
op.drop_index("ix_article_ownership_user_id", table_name="article_ownership")
op.drop_index("ix_article_ownership_article_id", table_name="article_ownership")
op.drop_table("article_ownership")
op.drop_table("article")

View file

@ -1,18 +0,0 @@
def summarise_article_system_prompt(
to_language: str = "french",
complexity_level: str = "B1",
length_preference: str = "300 words",
) -> str:
return (
f"You are a {to_language} language learning content creator, tutoring someone at {complexity_level} proficiency level on the CEFR scale.\n"
f"Generate level-appropriate content from a source.\n"
f"Your response will:\n"
f"- Start with a level-one markdown title .\n"
f"- Then contain only the article, in {to_language}, as plain-text. \n"
f"- Separate each paragraph (and the title) with two new line characters.\n"
f"- Speak directly to the reader in a semi-formal, modern media tone.\n"
f"- Occasionally, where natural, include idiomatic expressions appropriate to {complexity_level} level.\n"
f"- Vary gramatical tenses, but naturally — do not restrict the piece to a single tense.\n"
f"- Be around {length_preference} long.\n"
f"- Be inspired by the content, but not the tone, of the source material."
)

View file

@ -1,36 +0,0 @@
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
class ArticleTypeEnum(str, Enum):
summary = "summary" # take the input text, and summarise it
@dataclass
class Article:
id: str
article_type: ArticleTypeEnum
language: str # e.g. "fr"
target_complexity: str # e.g. "B1"
title: str
text: str
text_linguistic_data: dict | None
audio_key: str | None
created_at: datetime
published_at: datetime | None
deleted_at: datetime | None
class ArticleOwnershipRoleEnum(str, Enum):
owner = "owner" # Person for who the Article was created
@dataclass
class ArticleOwnership:
id: str
article_id: str
ownership_role: ArticleOwnershipRoleEnum
user_id: str
created_at: datetime
deleted_at: datetime | None

View file

@ -14,6 +14,7 @@ from ...outbound.anthropic.adventure_prompts import (
parse_entry_response,
parse_title_response,
)
from ...outbound.anthropic.anthropic_client import AnthropicClient
from ...outbound.deepl.deepl_client import DeepLClient
from ...outbound.gemini.gemini_client import GeminiClient
from ...outbound.postgres.repositories.adventure_repository import (

View file

@ -1,73 +0,0 @@
import logging
from uuid import UUID, uuid4
from app.domain.models.article import Article, ArticleOwnershipRoleEnum, ArticleTypeEnum
from app.outbound.postgres.repositories.article_repository import (
ArticleOwnershipRepository,
ArticleRepository,
)
logger = logging.getLogger(__name__)
class ArticleService:
def __init__(
self,
article_repository: ArticleRepository,
article_ownership_repository: ArticleOwnershipRepository,
) -> None:
self.article_repository = article_repository
self.article_ownership_repository = article_ownership_repository
return
async def create_article_as_user(
self,
article_type: ArticleTypeEnum,
language: str,
target_complexity: str,
title: str,
text: str,
user_id: str,
) -> Article:
article = await self.article_repository.create(
article_type=article_type,
language=language,
target_complexity=target_complexity,
title=title,
text=text,
)
await self.article_ownership_repository.create(
article_id=UUID(article.id),
ownership_role=ArticleOwnershipRoleEnum.owner,
user_id=UUID(user_id),
)
return article
async def get_articles_for_user(self, user_id: str) -> list[Article]:
articles = await self.article_repository.get_non_deleted_articles_for_owner(
UUID(user_id)
)
return articles
async def get_article_as_user(
self, article_id: str, user_id: str
) -> Article | None:
aid = UUID(article_id)
article = await self.article_repository.get_by_id(aid)
if article is None:
logger.info(f"Article with id {article_id} not found")
return None
ownerships = await self.article_ownership_repository.get_by_article_id(aid)
print(f"Current user: {user_id}")
for o in ownerships:
print(o)
if not any(ownership.user_id == user_id for ownership in ownerships):
logger.info(
f"User with id {user_id} does not have access to article with id {article_id}"
)
return None
return article

View file

@ -0,0 +1 @@
# TODO: Implement this service, taking the code currently placed in app/routes/api/generation.py

View file

@ -1,23 +1,55 @@
import logging
import asyncio
import random
import re
import uuid
from typing import Any, Callable, Coroutine
from opentelemetry.trace import get_tracer
import anthropic
from sqlalchemy.ext.asyncio import AsyncSession
from app.outbound.postgres.repositories.article_repository import ArticleRepository
from ...languages import SUPPORTED_LANGUAGES
from ...outbound.postgres.repositories import summarise_job_repository
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
from ...outbound.anthropic.anthropic_client import AnthropicClient
from ...outbound.deepgram.deepgram_client import LocalDeepgramClient
from ...outbound.deepl.deepl_client import DeepLClient
from ...outbound.gemini.gemini_client import GeminiClient
from ...outbound.spacy.spacy_client import SpacyClient
from ...outbound.storage_client import get_storage_client
from ...languages import SUPPORTED_LANGUAGES
logger = logging.getLogger(__name__)
tracer = get_tracer(__name__)
_ANTHROPIC_RETRYABLE = (
anthropic.RateLimitError,
anthropic.InternalServerError,
anthropic.APITimeoutError,
anthropic.APIConnectionError,
)
_MAX_RETRIES = 4
_BASE_DELAY = 1.0
_MAX_DELAY = 60.0
async def _anthropic_with_backoff(
coro_fn: Callable[..., Coroutine[Any, Any, Any]],
*args: Any,
**kwargs: Any,
) -> Any:
for attempt in range(_MAX_RETRIES + 1):
try:
return await coro_fn(*args, **kwargs)
except _ANTHROPIC_RETRYABLE as exc:
if attempt == _MAX_RETRIES:
raise
retry_after: float | None = None
if isinstance(exc, anthropic.RateLimitError):
raw = exc.response.headers.get("retry-after")
if raw is not None:
retry_after = float(raw)
if retry_after is None:
retry_after = min(_BASE_DELAY * (2 ** attempt), _MAX_DELAY)
jittered = retry_after * (0.8 + random.random() * 0.4)
await asyncio.sleep(jittered)
class SummariseService:
@ -28,80 +60,90 @@ class SummariseService:
deepl_client: DeepLClient,
gemini_client: GeminiClient,
spacy_client: SpacyClient,
article_repository: ArticleRepository,
) -> None:
self.anthropic_client = anthropic_client
self.deepgram_client = deepgram_client
self.deepl_client = deepl_client
self.gemini_client = gemini_client
self.spacy_client = spacy_client
self.article_repository = article_repository
def _first_heading(self, md: str) -> str | None:
m = re.search(r'^#{1,2}\s+(.+)', md, re.MULTILINE)
return m.group(1).strip() if m else None
def _split_title_and_body(self, text: str) -> tuple[str, str]:
lines = text.splitlines()
if not lines:
return "", ""
title = lines[0].lstrip("#").strip()
body = "\n".join(lines[1:]).strip()
return title, body
"""Splits the text into a title (first heading) and body (the rest)."""
title = self._first_heading(text) or ""
body = text[len(title):].lstrip() if title else text
if title == "":
title = "Untitled Article"
async def summarise_article(
return title, body
async def run(
self,
db: AsyncSession,
job_id: uuid.UUID,
article_id: uuid.UUID,
source_language: str,
target_language: str,
complexity_level: str,
input_text: str,
input_texts: list[str],
) -> None:
print(f"Summarising article {article_id} with target language {target_language} and complexity level {complexity_level}...")
with tracer.start_as_current_span("summarise_article"):
try:
with tracer.start_as_current_span("generate_title_and_text"):
language_name = SUPPORTED_LANGUAGES[target_language]
article_repo = TranslatedArticleRepository(db)
job = await summarise_job_repository.get_by_id(db, job_id)
await summarise_job_repository.mark_processing(db, job)
generated_text = await AnthropicClient.retry(
self.anthropic_client.create_summary_article,
content_to_summarise=input_text,
complexity_level=complexity_level,
to_language=language_name,
length_preference="200-400 words",
)
try:
language_name = SUPPORTED_LANGUAGES[target_language]
source_material = "\n\n".join(input_texts[:3])
if generated_text is None:
print(f"Text generated to summarise article {article_id}...")
raise
generated_text = await _anthropic_with_backoff(
self.anthropic_client.generate_summary_text,
content_to_summarise=source_material,
complexity_level=complexity_level,
from_language=language_name,
to_language=language_name,
length_preference="200-400 words",
)
generated_title, generated_text_without_title = self._split_title_and_body(generated_text)
generated_title, generated_text_without_title = (
self._split_title_and_body(generated_text)
)
await article_repo.update_content(
article_id,
target_title=generated_title,
target_body=generated_text_without_title,
source_title="",
source_body="",
)
translated_text = await self.deepl_client.translate(generated_text, source_language)
await self.article_repository.update_title_and_text(
article_id, generated_title, generated_text_without_title
)
translated_title, translated_text_without_title = self._split_title_and_body(translated_text)
with tracer.start_as_current_span("generate_linguistic_data"):
text_linguistic_data = self.spacy_client.get_parts_of_speech(
generated_text_without_title, target_language
)
await article_repo.update_content(
article_id,
target_title=generated_title,
target_body=generated_text_without_title,
source_title=translated_title,
source_body=translated_text_without_title,
)
await self.article_repository.update_linguistic_data(
article_id, text_linguistic_data
)
target_pos_data = self.spacy_client.get_parts_of_speech(generated_text_without_title, target_language)
source_pos_data = self.spacy_client.get_parts_of_speech(translated_text_without_title, source_language)
with tracer.start_as_current_span("generate_voice"):
voice = self.gemini_client.get_voice_by_language(target_language)
wav_bytes = await self.gemini_client.generate_audio(
generated_text, voice
)
audio_key = f"audio/{article_id}.wav"
get_storage_client().upload(audio_key, wav_bytes)
await article_repo.update_pos(article_id, target_pos_data, source_pos_data)
await self.article_repository.update_audio_key(
article_id, audio_key
)
voice = self.gemini_client.get_voice_by_language(target_language)
wav_bytes = await self.gemini_client.generate_audio(generated_text, voice)
audio_key = f"audio/{job_id}.wav"
get_storage_client().upload(audio_key, wav_bytes)
except Exception as exc:
print(f"Failed to summarise an article: {exc}")
raise exc
transcript = await self.deepgram_client.transcribe_bytes(wav_bytes, target_language)
await article_repo.update_audio(article_id, audio_key, transcript)
await summarise_job_repository.mark_succeeded(db, job)
except Exception as exc:
await summarise_job_repository.mark_failed(db, job, str(exc))

View file

@ -14,14 +14,16 @@ from opentelemetry.sdk.trace.export import (
)
from prometheus_client import start_http_server
_observability_initialized = False
_observability_initialized = False
def setup_observability(app: FastAPI) -> None:
global _observability_initialized
if _observability_initialized:
return
service_name = os.getenv("OTEL_SERVICE_NAME", "language-learning-api")
metrics_host = os.getenv("OTEL_EXPORTER_PROMETHEUS_HOST", "0.0.0.0")
metrics_port = int(os.getenv("OTEL_EXPORTER_PROMETHEUS_PORT", "9464"))
@ -38,9 +40,10 @@ def setup_observability(app: FastAPI) -> None:
metrics.set_meter_provider(meter_provider)
LoggingInstrumentor().instrument(set_logging_format=True)
FastAPIInstrumentor.instrument_app(app)
# Expose OTel metrics for Prometheus scraping on the standard endpoint.
start_http_server(port=metrics_port, addr=metrics_host)
_observability_initialized = True
_observability_initialized = True

View file

@ -1,24 +1,9 @@
import asyncio
from random import random
from typing import Any, Callable, Coroutine
import anthropic
from app.domain.ai_prompts.summarise_article_ai_prompt import (
summarise_article_system_prompt,
)
from app.domain.models.gen_ai import GenAiChatMessage
_ANTHROPIC_RETRYABLE = (
anthropic.RateLimitError,
anthropic.InternalServerError,
anthropic.APITimeoutError,
anthropic.APIConnectionError,
)
_MAX_RETRIES = 4
_BASE_DELAY = 1.0
_MAX_DELAY = 60.0
class AnthropicClient:
def __init__(self, api_key: str):
@ -28,33 +13,27 @@ class AnthropicClient:
def new(cls, api_key: str) -> "AnthropicClient":
return cls(api_key)
@classmethod
async def retry(
cls,
callable_function: Callable[..., Coroutine[Any, Any, Any]],
*args: Any,
**kwargs: Any,
):
for attempt in range(_MAX_RETRIES + 1):
try:
return await callable_function(*args, **kwargs)
except _ANTHROPIC_RETRYABLE as exception:
if attempt == _MAX_RETRIES:
raise
retry_after: float | None = None
if isinstance(exception, anthropic.RateLimitError):
raw = exception.response.header.get("retry-after")
if raw is not None:
retry_after = float(raw)
if retry_after is None:
retry_after = min(_BASE_DELAY * (2**attempt), _MAX_DELAY)
jittered = retry_after * (0.8 * random.random() * 0.4)
await asyncio.sleep(jittered)
def _create_summarise_text_system_prompt(
self,
complexity_level: str,
from_language: str,
to_language: str,
length_preference="200-400 words",
) -> str:
return (
f"You are a language learning content creator.\n"
f"You generate original, level-appropriate content from a source.\n"
f"The content will be spoken aloud in {to_language}, write it accordingly.\n"
f"You will provide content in {to_language} at {complexity_level} proficiency level on the CEFR scale.\n"
f"The text you generate will:\n"
f"- Contain ONLY the generated summary text in {to_language}.\n"
f"- Speak directly to the reader/listener, adopting the tone and style of a semi-formal news reporter or podcaster.\n"
f"- Occasionally, where natural, include idiomatic expressions appropriate to {complexity_level} level.\n"
f"- Vary tense usage naturally — do not restrict the piece to a single tense.\n"
f"- Contain only plain text. The piece should start with a title prefaced like a level-1 markdown title (#), but all other text should be plain. \n"
f"- Be around {length_preference} long.\n"
f"- Be inspired by the content, but not the tone, of the source material."
)
def _create_prompt_summarise_text(
self,
@ -100,24 +79,24 @@ class AnthropicClient:
return await asyncio.to_thread(_call)
async def create_summary_article(
async def generate_summary_text(
self,
content_to_summarise: str,
complexity_level: str,
from_language: str,
to_language: str,
length_preference="200-400 words",
) -> str:
"""
Generate text, and title, for a summary article using Anthropic.
"""
"""Generate text using Anthropic."""
def _call() -> str:
message = self._client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=summarise_article_system_prompt(
to_language=to_language,
system=self._create_summarise_text_system_prompt(
complexity_level=complexity_level,
from_language=from_language,
to_language=to_language,
length_preference=length_preference,
),
messages=[

View file

@ -1,64 +0,0 @@
import uuid
from datetime import datetime, timezone
from sqlalchemy import DateTime, ForeignKey, Text
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.dialects.postgresql.json import JSONB
from sqlalchemy.orm import Mapped, mapped_column
from ..database import Base
class ArticleEntity(Base):
__tablename__ = "article"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
article_type: Mapped[str] = mapped_column(Text, nullable=False)
language: Mapped[str] = mapped_column(Text, nullable=False)
target_complexity: Mapped[str] = mapped_column(Text, nullable=False)
title: Mapped[str] = mapped_column(Text, nullable=False)
text: Mapped[str] = mapped_column(Text, nullable=False)
text_linguistic_data: Mapped[dict] = mapped_column(JSONB, nullable=True)
audio_key: Mapped[str] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=lambda: datetime.now(timezone.utc),
)
published_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
deleted_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
class ArticleOwnershipEntity(Base):
__tablename__ = "article_ownership"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
article_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("article.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
ownership_role: Mapped[str] = mapped_column(Text, nullable=False)
user_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=lambda: datetime.now(timezone.utc),
)
deleted_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)

View file

@ -1,206 +0,0 @@
import logging
import uuid
from datetime import datetime, timezone
from typing import Protocol
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from ....domain.models.article import (
Article,
ArticleOwnership,
ArticleOwnershipRoleEnum,
ArticleTypeEnum,
)
from ..entities.article_entities import ArticleEntity, ArticleOwnershipEntity
class ArticleRepository(Protocol):
async def create(
self,
article_type: ArticleTypeEnum,
language: str,
target_complexity: str,
title: str,
text: str,
) -> Article: ...
async def get_by_id(self, article_id: uuid.UUID) -> Article | None: ...
async def update_title_and_text(
self, id: uuid.UUID, title: str, text: str
) -> Article: ...
async def update_linguistic_data(
self, id: uuid.UUID, linguistic_data: dict
) -> Article: ...
async def update_audio_key(self, id: uuid.UUID, audio_key: str) -> Article: ...
async def get_non_deleted_articles_for_owner(self, owner_id: uuid.UUID) -> list[Article]: ...
class ArticleOwnershipRepository(Protocol):
async def create(
self,
article_id: uuid.UUID,
ownership_role: ArticleOwnershipRoleEnum,
user_id: uuid.UUID,
) -> ArticleOwnership: ...
async def get_by_article_id(
self, article_id: uuid.UUID
) -> list[ArticleOwnership]: ...
def _article_to_model(entity: ArticleEntity) -> Article:
return Article(
id=str(entity.id),
article_type=ArticleTypeEnum(entity.article_type),
language=entity.language,
target_complexity=entity.target_complexity,
title=entity.title,
text=entity.text,
audio_key=entity.audio_key,
text_linguistic_data=entity.text_linguistic_data,
created_at=entity.created_at,
published_at=entity.published_at,
deleted_at=entity.deleted_at,
)
def _ownership_to_model(entity: ArticleOwnershipEntity) -> ArticleOwnership:
return ArticleOwnership(
id=str(entity.id),
article_id=str(entity.article_id),
ownership_role=ArticleOwnershipRoleEnum(entity.ownership_role),
user_id=str(entity.user_id),
created_at=entity.created_at,
deleted_at=entity.deleted_at,
)
logger = logging.getLogger(__name__)
class PostgresArticleRepository:
def __init__(self, db: AsyncSession) -> None:
self.db = db
async def create(
self,
article_type: ArticleTypeEnum,
language: str,
target_complexity: str,
title: str,
text: str,
) -> Article:
entity = ArticleEntity(
article_type=article_type.value,
language=language,
target_complexity=target_complexity,
title=title,
text=text,
created_at=datetime.now(timezone.utc),
)
self.db.add(entity)
await self.db.commit()
await self.db.refresh(entity)
return _article_to_model(entity)
async def get_by_id(self, article_id: uuid.UUID) -> Article | None:
result = await self.db.execute(
select(ArticleEntity).where(ArticleEntity.id == article_id)
)
entity = result.scalar_one_or_none()
return _article_to_model(entity) if entity else None
async def get_non_deleted_articles_for_owner(self, owner_id: uuid.UUID) -> list[Article]:
result = await self.db.execute(
select(ArticleEntity)
.join(ArticleOwnershipEntity, ArticleEntity.id == ArticleOwnershipEntity.article_id)
.where(
ArticleOwnershipEntity.user_id == owner_id,
ArticleOwnershipEntity.deleted_at.is_(None),
ArticleEntity.deleted_at.is_(None),
ArticleOwnershipEntity.ownership_role == ArticleOwnershipRoleEnum.owner.value,
)
)
entities = result.scalars().all()
return [_article_to_model(entity) for entity in entities]
async def update_title_and_text(
self, id: uuid.UUID, title: str, text: str
) -> Article:
entity = await self.db.execute(
select(ArticleEntity).where(ArticleEntity.id == id)
)
a = entity.scalar_one_or_none()
if a is None:
logger.error(
f"update_title_and_text failed, cannot find article with id '{id}'"
)
raise
a.title = title
a.text = text
await self.db.commit()
logger.info(f"update_title_and_text for article '{id}' successful")
return _article_to_model(a)
async def update_linguistic_data(
self, id: uuid.UUID, linguistic_data: dict
) -> Article:
e = await self.db.execute(select(ArticleEntity).where(ArticleEntity.id == id))
a = e.scalar_one_or_none()
if a is None:
logger.error(
f"update_linguistic_data failed, cannot find article with id '{id}'"
)
raise
a.text_linguistic_data = linguistic_data
await self.db.commit()
logger.info(f"update_linguistic_data for article '{id}' successful")
return _article_to_model(a)
async def update_audio_key(self, id: uuid.UUID, audio_key: str) -> Article:
e = await self.db.execute(select(ArticleEntity).where(ArticleEntity.id == id))
a = e.scalar_one_or_none()
if a is None:
logger.error(f"update_audio_key failed, cannot find article with id '{id}'")
raise
a.audio_key = audio_key
await self.db.commit()
logger.info(f"update_audio_key for article '{id}' successful")
return _article_to_model(a)
class PostgresArticleOwnershipRepository:
def __init__(self, db: AsyncSession) -> None:
self.db = db
async def create(
self,
article_id: uuid.UUID,
ownership_role: ArticleOwnershipRoleEnum,
user_id: uuid.UUID,
) -> ArticleOwnership:
entity = ArticleOwnershipEntity(
article_id=article_id,
ownership_role=ownership_role.value,
user_id=user_id,
created_at=datetime.now(timezone.utc),
)
self.db.add(entity)
await self.db.commit()
await self.db.refresh(entity)
return _ownership_to_model(entity)
async def get_by_article_id(self, article_id: uuid.UUID) -> list[ArticleOwnership]:
result = await self.db.execute(
select(ArticleOwnershipEntity).where(
ArticleOwnershipEntity.article_id == article_id
)
)
return [_ownership_to_model(e) for e in result.scalars().all()]

View file

@ -1,82 +0,0 @@
import uuid
from enum import Enum
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from sqlalchemy.ext.asyncio.session import AsyncSession
from starlette.status import HTTP_201_CREATED
from app.auth import verify_token
from app.domain.models.article import Article, ArticleTypeEnum
from app.domain.services.article_service import ArticleService
from app.outbound.postgres.database import get_db
from app.outbound.postgres.repositories.article_repository import (
PostgresArticleOwnershipRepository,
PostgresArticleRepository,
)
from app.tasks.create_summary_article import create_summary_article
router = APIRouter(prefix="/articles", tags=["adventures"])
def _make_article_service(db) -> ArticleService:
return ArticleService(
article_repository=PostgresArticleRepository(db),
article_ownership_repository=PostgresArticleOwnershipRepository(db),
)
class CreateArticleBody(BaseModel):
article_type: ArticleTypeEnum
language: str
target_complexity: str
text: str
class CreateArticleResponse(BaseModel):
id: str
class ArticleItem(BaseModel):
id: str
def _to_article_item(article: Article) -> ArticleItem:
return ArticleItem(id=str(article.id))
@router.post("", response_model=CreateArticleResponse, status_code=HTTP_201_CREATED)
async def create_article(
body: CreateArticleBody,
db: AsyncSession = Depends(get_db),
token_data: dict = Depends(verify_token),
) -> CreateArticleResponse:
service = _make_article_service(db)
article = await service.create_article_as_user(
article_type=body.article_type,
language=body.language,
target_complexity=body.target_complexity,
text=body.text,
title="",
user_id=token_data["sub"],
)
await create_summary_article.defer_async(
article_id=article.id,
target_language=body.language,
complexity_level=body.target_complexity,
input_text=body.text,
)
return CreateArticleResponse(id=str(uuid.uuid4()))
@router.get("/{article_id}", response_model=ArticleItem, status_code=200)
async def get_article(
article_id: str,
db: AsyncSession = Depends(get_db),
token_data: dict = Depends(verify_token),
) -> ArticleItem:
return ArticleItem(id=article_id)

View file

@ -1,3 +1,5 @@
import uuid
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
@ -5,10 +7,9 @@ from sqlalchemy.ext.asyncio import AsyncSession
from ...auth import require_admin
from ...languages import SUPPORTED_LANGUAGES, SUPPORTED_LEVELS
from ...outbound.postgres.database import get_db
from ...outbound.postgres.repositories.translated_article_repository import (
TranslatedArticleRepository,
)
from ...tasks import create_summary_article
from ...outbound.postgres.repositories import summarise_job_repository
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
from ...tasks import summarise_article
router = APIRouter(prefix="/generate", tags=["api"])
@ -16,12 +17,12 @@ router = APIRouter(prefix="/generate", tags=["api"])
class GenerationRequest(BaseModel):
target_language: str
complexity_level: str
text: str
input_texts: list[str]
source_language: str = "en"
class GenerationResponse(BaseModel):
article_id: str
job_id: str
@router.post("", response_model=GenerationResponse, status_code=202)
@ -49,12 +50,19 @@ async def create_generation_job(
target_complexities=[request.complexity_level],
)
await create_summary_article.defer_async(
job = await summarise_job_repository.create(
db,
user_id=uuid.UUID(token_data["sub"]),
translated_article_id=uuid.UUID(article.id),
)
await summarise_article.defer_async(
job_id=str(job.id),
article_id=str(article.id),
source_language=request.source_language,
target_language=request.target_language,
complexity_level=request.complexity_level,
input_text=request.text,
input_texts=request.input_texts,
)
return GenerationResponse(article_id=str(article.id))
return GenerationResponse(job_id=str(job.id))

View file

@ -7,9 +7,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from ...auth import require_admin
from ...outbound.postgres.database import get_db
from ...outbound.postgres.entities.translated_article_entity import (
TranslatedArticleEntity,
)
from ...outbound.postgres.entities.translated_article_entity import TranslatedArticleEntity
from ...outbound.postgres.repositories import summarise_job_repository
from ...tasks import regenerate_audio_for_job
@ -43,7 +41,7 @@ class JobListResponse(BaseModel):
@router.get("/", response_model=JobListResponse)
async def get_jobs(
db: AsyncSession = Depends(get_db),
db: AsyncSession = Depends(get_db),
) -> JobListResponse:
try:
jobs = await summarise_job_repository.list_all(db)
@ -75,3 +73,39 @@ async def get_job(
completed_at=job.completed_at,
error_message=job.error_message,
)
@router.post("/{job_id}/regenerate-audio", status_code=202)
async def regenerate_audio(
job_id: str,
db: AsyncSession = Depends(get_db),
token_data: dict = Depends(require_admin),
) -> dict:
try:
uid = uuid.UUID(job_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid job ID format")
job = await summarise_job_repository.get_by_id(db, uid)
if job is None:
raise HTTPException(status_code=404, detail="Job not found")
if str(job.user_id) != token_data["sub"]:
raise HTTPException(status_code=403, detail="Not authorized to modify this job")
if job.translated_article_id is None:
raise HTTPException(status_code=400, detail="Job has no associated article")
article_entity = await db.get(TranslatedArticleEntity, job.translated_article_id)
if not article_entity or not article_entity.target_body:
raise HTTPException(status_code=400, detail="Job has no generated text to synthesize")
if article_entity.audio_url:
raise HTTPException(status_code=409, detail="Job already has audio")
if job.status == "processing":
raise HTTPException(status_code=409, detail="Job is already processing")
await regenerate_audio_for_job.defer_async(job_id=str(uid))
return {"job_id": job_id}

View file

@ -1,34 +1,31 @@
from fastapi import APIRouter
from .articles import router as article_router
from .account import router as account_router
from .admin.packs import router as admin_packs_router
from .adventures import router as adventures_router
from .auth import router as auth_router
from .dictionary import router as dictionary_router
from .flashcards import router as flashcards_router
from .pos import router as pos_router
from .translate import router as translate_router
from .generation import router as generation_router
from .jobs import router as jobs_router
from .learnable_languages import router as learnable_languages_router
from .packs import router as packs_router
from .pos import router as pos_router
from .translate import router as translate_router
from .vocab import router as vocab_router
from .packs import router as packs_router
from .admin.packs import router as admin_packs_router
from .adventures import router as adventures_router
from fastapi import APIRouter
api_router = APIRouter(prefix="/api", tags=["api"])
api_router.include_router(account_router)
api_router.include_router(admin_packs_router)
api_router.include_router(adventures_router)
api_router.include_router(article_router)
api_router.include_router(auth_router)
api_router.include_router(account_router)
api_router.include_router(dictionary_router)
api_router.include_router(flashcards_router)
api_router.include_router(pos_router)
api_router.include_router(translate_router)
api_router.include_router(generation_router)
api_router.include_router(jobs_router)
api_router.include_router(learnable_languages_router)
api_router.include_router(packs_router)
api_router.include_router(pos_router)
api_router.include_router(translate_router)
api_router.include_router(vocab_router)
api_router.include_router(packs_router)
api_router.include_router(admin_packs_router)
api_router.include_router(adventures_router)

View file

@ -1,35 +1,26 @@
import uuid
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from app.domain.services.article_service import ArticleService
from app.outbound.postgres.repositories.article_repository import (
PostgresArticleOwnershipRepository,
PostgresArticleRepository,
)
from ...auth import verify_token
from ...outbound.postgres.database import get_db
from ...outbound.storage_client import get_storage_client
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
router = APIRouter(prefix="/articles", tags=["bff", "articles"])
def _make_article_service(db) -> ArticleService:
return ArticleService(
article_repository=PostgresArticleRepository(db),
article_ownership_repository=PostgresArticleOwnershipRepository(db),
)
class ArticleItem(BaseModel):
id: str
published_at: datetime | None
language: str
title: str
complexity: str
published_at: datetime
source_language: str
source_title: str
target_language: str
target_complexities: list[str]
target_title: str
class ArticleListResponse(BaseModel):
@ -38,13 +29,18 @@ class ArticleListResponse(BaseModel):
class ArticleDetail(BaseModel):
id: str
published_at: datetime | None
language: str
complexity: str
title: str
body: str
audio_url: str | None
body_pos: dict | None
published_at: datetime
source_language: str
source_title: str
source_body: str
source_body_pos: dict
target_language: str
target_complexities: list[str]
target_title: str
target_body: str
target_audio_url: str | None
target_body_pos: dict
target_body_transcript: dict | None
def _audio_url(key: str | None) -> str | None:
@ -55,20 +51,21 @@ def _audio_url(key: str | None) -> str | None:
@router.get("", response_model=ArticleListResponse, status_code=200)
async def list_articles(
target_language: str = 'fr',
db: AsyncSession = Depends(get_db),
token_data: dict = Depends(verify_token),
_: dict = Depends(verify_token),
) -> ArticleListResponse:
service = _make_article_service(db)
user_id = token_data["sub"]
articles = await service.get_articles_for_user(user_id=user_id)
articles = await TranslatedArticleRepository(db).list_complete(target_language=target_language)
return ArticleListResponse(
articles=[
ArticleItem(
id=a.id,
published_at=a.published_at,
language=a.language,
title=a.title,
complexity=a.target_complexity,
source_language=a.source_language,
source_title=a.source_title,
target_language=a.target_language,
target_complexities=a.target_complexities,
target_title=a.target_title,
)
for a in articles
]
@ -79,22 +76,29 @@ async def list_articles(
async def get_article(
article_id: str,
db: AsyncSession = Depends(get_db),
token_data: dict = Depends(verify_token),
_: dict = Depends(verify_token),
) -> ArticleDetail:
uid: str = token_data["sub"]
service = _make_article_service(db)
article = await service.get_article_as_user(article_id, uid)
try:
uid = uuid.UUID(article_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid article ID")
article = await TranslatedArticleRepository(db).get_complete_by_id(uid)
if article is None:
raise HTTPException(status_code=404, detail="Article not found")
return ArticleDetail(
id=article.id,
published_at=article.published_at,
language=article.language,
complexity=article.target_complexity,
title=article.title,
body=article.text,
body_pos=article.text_linguistic_data,
audio_url=_audio_url(article.audio_key),
source_language=article.source_language,
source_title=article.source_title,
source_body=article.source_body,
source_body_pos=article.source_body_pos,
target_language=article.target_language,
target_complexities=article.target_complexities,
target_title=article.target_title,
target_body=article.target_body,
target_audio_url=_audio_url(article.audio_url),
target_body_pos=article.target_body_pos,
target_body_transcript=article.target_body_transcript,
)

View file

@ -1,11 +1,11 @@
from .adventure import generate_adventure_entry
from .app import procrastinate_app
from .create_summary_article import create_summary_article
from .adventure import generate_adventure_entry
from .summarise import summarise_article
from .regenerate_audio import regenerate_audio_for_job
__all__ = [
"procrastinate_app",
"generate_adventure_entry",
"create_summary_article",
"summarise_article",
"regenerate_audio_for_job",
]

View file

@ -8,7 +8,7 @@ procrastinate_app = App(
import_paths=[
"app.tasks.adventure",
"app.tasks.regenerate_audio",
"app.tasks.create_summary_article",
"app.tasks.summarise",
],
)

View file

@ -1,12 +1,6 @@
import logging
import uuid
from sqlalchemy.ext.asyncio import AsyncSession
from app.outbound.postgres.repositories.article_repository import (
PostgresArticleRepository,
)
from ..config import settings
from ..domain.services.summarise_service import SummariseService
from ..outbound.anthropic.anthropic_client import AnthropicClient
@ -20,27 +14,32 @@ from .app import procrastinate_app
logger = logging.getLogger(__name__)
def _make_summarise_service(db: AsyncSession) -> SummariseService:
def _make_summarise_service() -> SummariseService:
return SummariseService(
anthropic_client=AnthropicClient.new(settings.anthropic_api_key),
deepgram_client=LocalDeepgramClient(settings.deepgram_api_key),
deepl_client=DeepLClient(settings.deepl_api_key),
gemini_client=GeminiClient(settings.gemini_api_key),
spacy_client=SpacyClient(),
article_repository=PostgresArticleRepository(db),
)
@procrastinate_app.task(queue="default")
async def create_summary_article(
article_id: str, target_language: str, complexity_level: str, input_text: str
async def summarise_article(
job_id: str,
article_id: str,
source_language: str,
target_language: str,
complexity_level: str,
input_texts: list[str],
) -> None:
print(f"Starting summarisation task for article_id={article_id}")
async with AsyncSessionLocal() as db:
print("Session opened, calling summarise service...")
await _make_summarise_service(db).summarise_article(
await _make_summarise_service().run(
db=db,
job_id=uuid.UUID(job_id),
article_id=uuid.UUID(article_id),
source_language=source_language,
target_language=target_language,
complexity_level=complexity_level,
input_text=input_text,
input_texts=input_texts,
)

View file

@ -1,66 +0,0 @@
"""
Session-scoped fixtures that spin up and tear down the test stack.
The test stack uses docker-compose.test.yml which:
- Runs on port 18000 (won't collide with the dev stack on 8000)
- Uses tmpfs for all storage (no data survives after `down`)
- Uses project name "langlearn-test" to stay isolated from dev containers
"""
import pathlib
import subprocess
from dotenv import load_dotenv
import uuid
import httpx
import pytest
PROJECT_ROOT = pathlib.Path(__file__).parent.parent
COMPOSE_FILE = str(PROJECT_ROOT / "docker-compose.test.yml")
ENV_FILE = str(PROJECT_ROOT / ".env.test")
COMPOSE_PROJECT = "langlearn-test"
API_BASE_URL = "http://localhost:18000"
load_dotenv(PROJECT_ROOT / ".env.test")
def _compose(*args: str) -> None:
subprocess.run(
["docker", "compose", "-p", COMPOSE_PROJECT, "-f", COMPOSE_FILE, *args],
cwd=PROJECT_ROOT,
check=True,
)
@pytest.fixture(scope="session", autouse=True)
def docker_stack():
"""Bring the test stack up before the session; tear it down (including volumes) after."""
_compose("--env-file", ENV_FILE, "up", "--build", "--wait", "-d")
yield
_compose("down", "-v")
@pytest.fixture
def client() -> httpx.Client:
"""A plain httpx client pointed at the test API. Not authenticated."""
with httpx.Client(base_url=API_BASE_URL) as c:
yield c
def _random_email() -> str:
return f"user-{uuid.uuid4()}@example.com"
@pytest.fixture
def authd_client() -> httpx.Client:
email = _random_email()
password = "password1234"
with httpx.Client(base_url=API_BASE_URL) as client:
register_response = client.post("/api/auth/register", json={"email": email, "password": password})
assert register_response.json().get("success") is True, f"Failed to register test user: {register_response.text}"
login_response = client.post("/api/auth/login", json={"email": email, "password": password})
assert login_response.status_code == 200, f"Failed to log in test user: {login_response.text}"
token = login_response.json().get("access_token")
client.headers["Authorization"] = f"Bearer {token}"
yield client

View file

@ -27,7 +27,6 @@ dependencies = [
"prometheus-fastapi-instrumentator>=7.1.0",
"procrastinate>=3.8.1",
"watchfiles>=1.0.0",
"python-dotenv>=1.2.2",
]
[build-system]
@ -43,9 +42,3 @@ dev = [
"pytest>=9.0.3",
"pytest-asyncio>=1.3.0",
]
[tool.pytest.ini_options]
testpaths = ["."]
[pytest]
addopts = ["--import-mode=importlib"]

View file

View file

@ -1,20 +0,0 @@
import httpx
from fastapi.testclient import TestClient
from app.main import app
def test_create_article(authd_client: httpx.Client):
create_summary_article_response = authd_client.post("/api/articles", json={
"article_type": "summary",
"target_language": "fr",
"competency_level": "B2",
"target_word_count_range": "250-300",
"input_text": "This is an example of a very long text"
})
assert create_summary_article_response.status_code == 201
article_id = create_summary_article_response.json().get("id")
get_response = authd_client.get(f"/api/articles/{article_id}")
assert get_response.status_code == 200

View file

@ -417,7 +417,7 @@ requests = [
[[package]]
name = "google-genai"
version = "2.7.0"
version = "1.70.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@ -431,9 +431,9 @@ dependencies = [
{ name = "typing-extensions" },
{ name = "websockets" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a7/7b/6eb3b3d545b6bb4c374acba1ccf91b0f33b605e551536a6243cfcef2f07f/google_genai-2.7.0.tar.gz", hash = "sha256:3c6f32f5ced9877ededd1b384b5e5b7f09c20046ec3390b662b16d8cd1882ac5", size = 555853, upload-time = "2026-05-28T15:39:24.58Z" }
sdist = { url = "https://files.pythonhosted.org/packages/74/dd/28e4682904b183acbfad3fe6409f13a42f69bb8eab6e882d3bcbea1dde01/google_genai-1.70.0.tar.gz", hash = "sha256:36b67b0fc6f319e08d1f1efd808b790107b1809c8743a05d55dfcf9d9fad7719", size = 519550, upload-time = "2026-04-01T10:52:46.487Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3c/dd/7a8be39e9d698e80e9db796514efbc6083dbd787bdb9a101e8ba47248e5e/google_genai-2.7.0-py3-none-any.whl", hash = "sha256:21cac381e09a869151706aba797b6a4f96cfe92c484e13204d092caee7ff11cb", size = 822545, upload-time = "2026-05-28T15:39:22.907Z" },
{ url = "https://files.pythonhosted.org/packages/36/a3/d4564c8a9beaf6a3cef8d70fa6354318572cebfee65db4f01af0d41f45ba/google_genai-1.70.0-py3-none-any.whl", hash = "sha256:b74c24549d8b4208f4c736fd11857374788e1ffffc725de45d706e35c97fceee", size = 760584, upload-time = "2026-04-01T10:52:44.349Z" },
]
[[package]]
@ -595,7 +595,6 @@ dependencies = [
{ name = "prometheus-fastapi-instrumentator" },
{ name = "pydantic-settings" },
{ name = "pyjwt" },
{ name = "python-dotenv" },
{ name = "spacy" },
{ name = "sqlalchemy", extra = ["asyncio"] },
{ name = "uvicorn", extra = ["standard"] },
@ -618,7 +617,7 @@ requires-dist = [
{ name = "deepgram-sdk", specifier = ">=6.1.0" },
{ name = "email-validator", specifier = ">=2.0.0" },
{ name = "fastapi", specifier = ">=0.115.0" },
{ name = "google-genai", specifier = ">=2.6.0" },
{ name = "google-genai", specifier = ">=1.0.0" },
{ name = "httpx", specifier = ">=0.28.1" },
{ name = "opentelemetry-api", specifier = ">=1.42.1" },
{ name = "opentelemetry-exporter-prometheus", specifier = ">=0.63b1" },
@ -631,7 +630,6 @@ requires-dist = [
{ name = "prometheus-fastapi-instrumentator", specifier = ">=7.1.0" },
{ name = "pydantic-settings", specifier = ">=2.0.0" },
{ name = "pyjwt", specifier = ">=2.10.0" },
{ name = "python-dotenv", specifier = ">=1.2.2" },
{ name = "spacy", specifier = ">=3.8.0" },
{ name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.0" },
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.30.0" },

View file

@ -103,7 +103,6 @@ services:
context: ./frontend
args:
PUBLIC_API_BASE_URL: ${PUBLIC_API_BASE_URL:-http://api:8000}
command: sh -c "npm install && npm run dev"
ports:
- "${FRONTEND_PORT:-3001}:3001"
environment:

View file

@ -67,8 +67,6 @@ services:
worker:
build: ./api
volumes:
- ./api:/app:z
command: python -m worker.main
environment:
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-langlearn}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-langlearn}

View file

@ -37,7 +37,6 @@ services:
- "18000:8000"
environment:
DATABASE_URL: postgresql+asyncpg://langlearn_test:testpassword@db:5432/langlearn_test
PROCRASTINATE_DATABASE_URL: postgresql://${POSTGRES_USER:-langlearn}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-langlearn}
JWT_SECRET: test-jwt-secret-not-for-production
ANTHROPIC_API_KEY: test-key
DEEPL_API_KEY: test-key
@ -50,7 +49,6 @@ services:
STORAGE_SECRET_KEY: testpassword123
STORAGE_BUCKET: langlearn-test
STUB_GENERATION: "true"
TRANSACTIONAL_EMAIL_PROVIDER: stub
depends_on:
db:
condition: service_healthy
@ -67,12 +65,9 @@ services:
worker:
build: ./api
volumes:
- ./api:/app:z
command: python -m worker.main
environment:
DATABASE_URL: postgresql+asyncpg://langlearn_test:testpassword@db:5432/langlearn_test
PROCRASTINATE_DATABASE_URL: postgresql://langlearn_test:testpassword@db:5432/langlearn_test
JWT_SECRET: test-jwt-secret-not-for-production
ANTHROPIC_API_KEY: test-key
DEEPL_API_KEY: test-key

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -37,19 +37,17 @@
<li class="article-item">
<a href="/app/articles/{article.id}" class="article-link">
<div class="article-meta">
<span class="label-md meta-lang">{lang(article.language)}</span>
<span class="complexity-tag label-md">{article.complexity}</span>
<h2 class="article-title">{article.title}</h2>
{#if article.published_at}
<time class="article-date label-md" datetime={article.published_at}
>{fmt(article.published_at)}</time
>
{/if}
</div></a
>
<span class="label-md meta-lang">{lang(article.target_language)}</span>
{#each article.target_complexities as level}
<span class="complexity-tag label-md">{level}</span>
{/each}
</div>
<h2 class="article-title">{article.target_title}</h2>
<p class="article-source">{article.source_title}</p>
<time class="article-date label-md" datetime={article.published_at}
>{fmt(article.published_at)}</time
>
</a>
</li>
{/each}
</ul>

View file

@ -7,17 +7,171 @@
import TranslationPanel from './TranslationPanel.svelte';
const { data }: PageProps = $props();
const {
article: { published_at, language, title, audio_url, body, body_pos, complexity, id }
} = data;
const { article } = data;
const publishedDate = published_at
? new Intl.DateTimeFormat('en-GB', {
year: 'numeric',
month: 'long',
day: 'numeric'
}).format(new Date(published_at))
: 'Unpublished';
// -------------------------------------------------------------------------
// Body parsing: split into paragraphs → sentences → tokens
// -------------------------------------------------------------------------
function extractParagraphsAndWordCount(text: PartsOfSpeechData): {
paragraphs: Paragraph[];
totalWords: number;
} {
const paragraphs: Paragraph[] = [{ index: 0, sentences: [] }];
let wordIdx = 0;
let sentenceIdx = 0;
text.sentences.forEach((s) => {
const sentence: Sentence = {
idx: sentenceIdx++,
text: s.text,
startWordIdx: wordIdx,
endWordIdx: wordIdx + s.tokens.length - 1,
tokens: s.tokens.map((t) => ({
...t,
idx: wordIdx++
})) as SentenceToken[]
};
const sentenceEndsWithNewLine = s.text.endsWith('\n');
paragraphs[paragraphs.length - 1].sentences.push(sentence);
if (sentenceEndsWithNewLine) {
paragraphs.push({ index: paragraphs.length, sentences: [] });
}
});
return { paragraphs, totalWords: wordIdx };
}
const { paragraphs } = extractParagraphsAndWordCount(
article.target_body_pos as Record<string, any> as PartsOfSpeechData
);
// Flat source-sentence list, aligned by sentence index to the target sentences.
// Used by TranslationPanel to show the source-language context for guessing.
const sourceSentences: Array<{ text: string; tokens: PartOfSpeechToken[] }> = (() => {
try {
return (article.source_body_pos as Record<string, any> as PartsOfSpeechData).sentences ?? [];
} catch {
return [];
}
})();
// Flat sentence list for O(n) audio-time lookup
const allSentences: Array<{ idx: number; startWordIdx: number; endWordIdx: number }> = [];
for (const para of paragraphs) {
for (const s of para.sentences) {
allSentences.push({ idx: s.idx, startWordIdx: s.startWordIdx, endWordIdx: s.endWordIdx });
}
}
// -------------------------------------------------------------------------
// Transcript: extract per-word timings from Deepgram response
// -------------------------------------------------------------------------
type WordTiming = { start: number; end: number };
function extractWordTimings(transcript: Transcript | null): WordTiming[] {
if (!transcript) return [];
try {
const timings: WordTiming[] = [];
for (const utterance of transcript.utterances) {
for (const word of utterance.words) {
timings.push({ start: word.start, end: word.end });
}
}
return timings;
} catch {
return [];
}
}
const wordTimings = extractWordTimings(
article.target_body_transcript as unknown as Transcript | null
);
// -------------------------------------------------------------------------
// Reactive state
// -------------------------------------------------------------------------
let audioEl: HTMLAudioElement | null = $state(null);
let activeSentenceIdx = $state(-1);
let selectedTokens: SentenceToken[] = $state([]);
let selectedSentence: Sentence | null = $state(null);
const selectedTokenIndices = $derived(new Set(selectedTokens.map((t) => t.idx)));
// -------------------------------------------------------------------------
// Audio: sentence highlighting
// -------------------------------------------------------------------------
function handleTimeUpdate() {
if (!audioEl || wordTimings.length === 0) return;
const t = audioEl.currentTime;
// Find the word index at current playback time
let wordIdx = -1;
for (let i = 0; i < wordTimings.length; i++) {
if (wordTimings[i].start <= t && t <= wordTimings[i].end) {
wordIdx = i;
break;
}
// Between words: use the most recently started word
if (wordTimings[i].start > t) {
wordIdx = i - 1;
break;
}
}
if (wordIdx < 0) return;
for (const s of allSentences) {
if (s.startWordIdx <= wordIdx && wordIdx <= s.endWordIdx) {
activeSentenceIdx = s.idx;
return;
}
}
}
// -------------------------------------------------------------------------
// Word selection: open panel with sentence context
// -------------------------------------------------------------------------
function handleSelection(tokens: SentenceToken[], sentence: Sentence) {
selectedTokens = tokens;
selectedSentence = sentence;
activeSentenceIdx = sentence.idx;
}
function closePanel() {
selectedTokens = [];
selectedSentence = null;
}
// -------------------------------------------------------------------------
// Display helpers
// -------------------------------------------------------------------------
const languageNames: Record<string, string> = {
en: 'English',
fr: 'French',
es: 'Spanish',
it: 'Italian',
de: 'German',
pt: 'Portuguese',
ja: 'Japanese',
zh: 'Chinese',
ko: 'Korean'
};
const targetLang =
languageNames[article.target_language] ?? article.target_language.toUpperCase();
const publishedDate = new Intl.DateTimeFormat('en-GB', {
year: 'numeric',
month: 'long',
day: 'numeric'
}).format(new Date(article.published_at));
</script>
<!-- Reading progress bar (CSS scroll-driven animation) -->
@ -29,25 +183,56 @@
</nav>
<header class="article-header">
<p class="article-eyebrow label-md">{language} · {publishedDate}</p>
<h1 class="article-title">{title}</h1>
<p class="article-eyebrow label-md">{targetLang} · {publishedDate}</p>
<h1 class="article-title">{article.target_title}</h1>
</header>
<div class="article-main">
{#if audio_url}
<div class="audio-section">
<audio src={audio_url} controls class="audio-player">
Your browser does not support the audio element.
</audio>
</div>
{/if}
<div class="article-layout">
<!-- Main content: audio + body -->
<div class="article-main">
{#if article.target_audio_url}
<div class="audio-section">
<audio
bind:this={audioEl}
src={article.target_audio_url}
controls
ontimeupdate={handleTimeUpdate}
class="audio-player"
>
Your browser does not support the audio element.
</audio>
</div>
{/if}
{#each body.split('\n\n') as paragraph}
<p class="article-paragraph">{paragraph}</p>
{/each}
<TargetLanguageBody
lang={article.source_language}
{paragraphs}
{activeSentenceIdx}
onSelection={handleSelection}
{selectedTokenIndices}
/>
</div>
<TranslationPanel
{closePanel}
{selectedTokens}
targetSentence={selectedSentence}
sourceTokens={selectedSentence !== null
? (sourceSentences[selectedSentence.idx]?.tokens ?? null)
: null}
/>
</div>
</div>
<!-- Mobile backdrop: closes the drawer when tapped outside -->
<!-- svelte-ignore a11y_click_events_have_key_events a11y_no_static_element_interactions -->
<div
class="drawer-backdrop"
class:is-visible={selectedTokens.length > 0}
onclick={closePanel}
aria-hidden="true"
></div>
<style>
/* --- Reading progress bar (CSS scroll-driven animation) --- */
/* Sits at the bottom edge of the sticky topnav (3.25rem) */
@ -113,17 +298,18 @@
max-width: 38rem;
}
.article-main {
display: flex;
flex-direction: column;
/* --- Two-column layout --- */
.article-layout {
display: grid;
grid-template-columns: 1fr;
gap: var(--space-6);
align-items: start;
}
.article-paragraph {
font-size: var(--text-body-lg);
line-height: var(--leading-relaxed);
color: var(--color-on-surface);
max-width: 65ch;
@media (min-width: 768px) {
.article-layout {
grid-template-columns: 1fr 22rem;
}
}
/* --- Audio --- */

View file

@ -1,5 +1,5 @@
import { fail, redirect, type Actions } from '@sveltejs/kit';
import { createArticleApiArticlesPost } from '../../../../client/sdk.gen.ts';
import { createGenerationJobApiGeneratePost } from '../../../../client/sdk.gen.ts';
import type { HttpValidationError } from '../../../../client/types.gen.ts';
export const actions = {
@ -8,20 +8,22 @@ export const actions = {
const target_language = formData.get('target_language') as string;
const source_language = formData.get('source_language') as string;
const complexity_level = formData.get('complexity_level') as string;
const text = formData.get('text') as string;
const input_texts_raw = formData.get('input_texts') as string;
const { response, data } = await createArticleApiArticlesPost({
const input_texts = input_texts_raw
.split(/\n?---\n?/)
.map((t) => t.trim())
.filter(Boolean);
const values = { target_language, source_language, complexity_level, input_texts_raw };
const { response, data } = await createGenerationJobApiGeneratePost({
headers: { Authorization: `Bearer ${locals.authToken}` },
body: {
article_type: 'summary',
language: target_language,
target_complexity: complexity_level,
text: text
}
body: { target_language, source_language, complexity_level, input_texts }
});
if (response.ok) {
return redirect(303, `/app/articles`);
if (response.status === 202 && data) {
return redirect(303, `/app/jobs/${data.job_id}`);
}
let error = 'Something went wrong. Please try again.';
@ -32,14 +34,6 @@ export const actions = {
}
}
return fail(response.status, {
error,
values: {
target_language,
source_language,
complexity_level,
text
}
});
return fail(response.status, { error, values });
}
} satisfies Actions;

View file

@ -29,53 +29,66 @@
<div class="field-row">
<div class="field">
<label for="source_language" class="field-label">Source Language</label>
<select id="source_language" name="source_language" class="field-select" required>
<select
id="source_language"
name="source_language"
class="field-select"
required
>
{#each languages as lang}
<option
value={lang.value}
selected={form?.values?.source_language === lang.value ||
(!form && lang.value === 'en')}>{lang.label}</option
>
selected={form?.values?.source_language === lang.value || (!form && lang.value === 'en')}
>{lang.label}</option>
{/each}
</select>
</div>
<div class="field">
<label for="target_language" class="field-label">Target Language</label>
<select id="target_language" name="target_language" class="field-select" required>
<select
id="target_language"
name="target_language"
class="field-select"
required
>
{#each languages as lang}
<option
value={lang.value}
selected={form?.values?.target_language === lang.value ||
(!form && lang.value === 'fr')}>{lang.label}</option
>
selected={form?.values?.target_language === lang.value || (!form && lang.value === 'fr')}
>{lang.label}</option>
{/each}
</select>
</div>
<div class="field">
<label for="complexity_level" class="field-label">Complexity Level</label>
<select id="complexity_level" name="complexity_level" class="field-select" required>
<select
id="complexity_level"
name="complexity_level"
class="field-select"
required
>
{#each complexityLevels as level}
<option
value={level}
selected={form?.values?.complexity_level === level || (!form && level === 'B1')}
>{level}</option
>
>{level}</option>
{/each}
</select>
</div>
</div>
<div class="field">
<label for="text" class="field-label">Input Texts</label>
<label for="input_texts" class="field-label">Input Texts</label>
<textarea
id="text"
name="text"
id="input_texts"
name="input_texts"
class="field-textarea"
placeholder="Paste your source text here…"
required>{form?.values?.text ?? ''}</textarea
>
required
>{form?.values?.input_texts_raw ?? ''}</textarea>
<p class="field-hint">Separate multiple texts with a line containing only <code>---</code></p>
</div>
<div class="form-actions">