Create the /api router

This commit is contained in:
Thomas 2026-03-21 20:47:15 +00:00
parent afe3b63fa5
commit 5aebb0fd7f
No known key found for this signature in database
14 changed files with 289 additions and 184 deletions

View file

@ -26,3 +26,5 @@ migration:
# Generate a pinned requirements.txt from pyproject.toml (requires uv installed locally)
lock:
cd api && uv pip compile pyproject.toml -o requirements.txt
rebuild: down build up

9
api/app/languages.py Normal file
View file

@ -0,0 +1,9 @@
SUPPORTED_LANGUAGES: dict[str, str] = {
"en": "English",
"fr": "French",
"es": "Spanish",
"it": "Italian",
"de": "German",
}
SUPPORTED_LEVELS = {"A1", "A2", "B1", "B2", "C1", "C2"}

View file

@ -1,11 +1,13 @@
import asyncio
from contextlib import asynccontextmanager
from .routers.api import generation, pos
from fastapi import FastAPI
from .routers import pos, generation, jobs
from .routers.api import jobs
from .routers import auth as auth_router
from .routers import media as media_router
from .routers.api.main import api_router
from .storage import ensure_bucket_exists
from . import worker
@ -24,10 +26,8 @@ async def lifespan(app: FastAPI):
app = FastAPI(title="Language Learning API", lifespan=lifespan)
app.include_router(api_router)
app.include_router(auth_router.router)
app.include_router(pos.router)
app.include_router(generation.router)
app.include_router(jobs.router)
app.include_router(media_router.router)

View file

View file

@ -0,0 +1,104 @@
import uuid
from functools import partial
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from ...languages import SUPPORTED_LANGUAGES, SUPPORTED_LEVELS
from ...auth import verify_token
from ...database import get_db, AsyncSessionLocal
from ...models import Job
from ...storage import upload_audio
from ...services import llm, tts, job_repo
from ...services.tts import VOICE_BY_LANGUAGE
from ...services.translate import translate
from ... import worker
router = APIRouter(prefix="/generate", tags=["api"])
class GenerationRequest(BaseModel):
target_language: str
complexity_level: str
input_texts: list[str]
topic: str | None = None
source_language: str = "en"
class GenerationResponse(BaseModel):
job_id: str
async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None:
async with AsyncSessionLocal() as db:
job = await db.get(Job, job_id)
await job_repo.mark_processing(db, job)
try:
language_name = SUPPORTED_LANGUAGES[request.target_language]
source_material = "\n\n".join(request.input_texts[:3])
generated_text = await llm.generate_summary_text(
content_to_summarise=source_material,
complexity_level=request.complexity_level,
from_language=language_name,
to_language=language_name,
length_preference="200-400 words",
)
await job_repo.save_generated_text(
db, job, generated_text, source_material[:500]
)
translated_text = await translate(generated_text, request.source_language)
# Save LLM results before attempting TTS so they're preserved on failure
await job_repo.save_generated_text(
db, job, generated_text
)
voice = VOICE_BY_LANGUAGE.get(request.target_language, "Kore")
wav_bytes = await tts.generate_audio(generated_text, voice)
audio_key = f"audio/{job_id}.wav"
upload_audio(audio_key, wav_bytes)
await job_repo.mark_succeeded(db, job, audio_key)
except Exception as exc:
await job_repo.mark_failed(db, job, str(exc))
@router.post("", response_model=GenerationResponse, status_code=202)
async def create_generation_job(
request: GenerationRequest,
db: AsyncSession = Depends(get_db),
token_data: dict = Depends(verify_token),
) -> GenerationResponse:
if request.target_language not in SUPPORTED_LANGUAGES:
raise HTTPException(
status_code=400,
detail=f"Unsupported language '{request.target_language}'. "
f"Supported: {list(SUPPORTED_LANGUAGES)}",
)
if request.complexity_level not in SUPPORTED_LEVELS:
raise HTTPException(
status_code=400,
detail=f"Unsupported level '{request.complexity_level}'. "
f"Supported: {sorted(SUPPORTED_LEVELS)}",
)
job = Job(
user_id=uuid.UUID(token_data["sub"]),
source_language=request.source_language,
target_language=request.target_language,
complexity_level=request.complexity_level,
)
db.add(job)
await db.commit()
await db.refresh(job)
await worker.enqueue(partial(_run_generation, job.id, request))
return GenerationResponse(job_id=str(job.id))

View file

@ -7,15 +7,15 @@ from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from ..auth import verify_token
from ..database import get_db, AsyncSessionLocal
from ..models import Job
from ..storage import upload_audio
from ..services import tts, job_repo
from ..services.tts import VOICE_BY_LANGUAGE
from .. import worker
from ...auth import verify_token
from ...database import get_db, AsyncSessionLocal
from ...models import Job
from ...storage import upload_audio
from ...services import tts, job_repo
from ...services.tts import VOICE_BY_LANGUAGE
from ... import worker
router = APIRouter(prefix="/jobs", tags=["jobs"])
router = APIRouter(prefix="/jobs", dependencies=[Depends(verify_token)])
class JobResponse(BaseModel):
@ -34,27 +34,28 @@ class JobResponse(BaseModel):
audio_url: str | None = None
# only present on failure
error_message: str | None = None
model_config = { "from_attributes": True }
model_config = {"from_attributes": True}
class JobSummary(BaseModel):
id: uuid.UUID
status: str
created_at: datetime
class JobListResponse(BaseModel):
jobs: list[JobSummary]
model_config = { "from_attributes": True }
model_config = {"from_attributes": True}
@router.get("/", response_model=JobListResponse)
async def get_jobs(
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token)
) -> JobListResponse:
try:
result = await db.execute(select(Job).order_by(Job.created_at.desc()))
jobs = result.scalars().all()
return { "jobs": jobs }
return {"jobs": jobs}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@ -63,7 +64,6 @@ async def get_jobs(
async def get_job(
job_id: str,
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token),
) -> JobResponse:
try:
uid = uuid.UUID(job_id)
@ -129,16 +129,19 @@ async def regenerate_audio(
raise HTTPException(status_code=404, detail="Job not found")
if str(job.user_id) != token_data["sub"]:
raise HTTPException(status_code=403, detail="Not authorized to modify this job")
raise HTTPException(
status_code=403, detail="Not authorized to modify this job")
if not job.generated_text:
raise HTTPException(status_code=400, detail="Job has no generated text to synthesize")
raise HTTPException(
status_code=400, detail="Job has no generated text to synthesize")
if job.audio_url:
raise HTTPException(status_code=409, detail="Job already has audio")
if job.status == "processing":
raise HTTPException(status_code=409, detail="Job is already processing")
raise HTTPException(
status_code=409, detail="Job is already processing")
await worker.enqueue(partial(_run_regenerate_audio, uid))
return {"job_id": job_id}

View file

@ -0,0 +1,13 @@
from .pos import router as pos_router
from .translate import router as translate_router
from .generation import router as generation_router
from .jobs import router as jobs_router
from fastapi import APIRouter
api_router = APIRouter(prefix="/api", tags=["api"])
api_router.include_router(pos_router)
api_router.include_router(translate_router)
api_router.include_router(generation_router)
api_router.include_router(jobs_router)

View file

@ -2,9 +2,9 @@ from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
import spacy
from ..auth import verify_token
from ...auth import verify_token
router = APIRouter(prefix="/analyze", tags=["analysis"])
router = APIRouter(prefix="/pos", tags=["api", "pos"])
LANGUAGE_MODELS: dict[str, str] = {
"en": "en_core_web_sm",
@ -47,7 +47,7 @@ class POSResponse(BaseModel):
tokens: list[TokenInfo]
@router.post("/pos", response_model=POSResponse)
@router.post("/", response_model=POSResponse)
def analyze_pos(request: POSRequest, _: dict = Depends(verify_token)) -> POSResponse:
nlp = _get_nlp(request.language)
doc = nlp(request.text)

View file

@ -0,0 +1,35 @@
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from ...auth import verify_token
from ...services.translate import DEEPL_LANGUAGE_CODES, translate
router = APIRouter(prefix="/translate",
tags=["api", "translate"],
dependencies=[Depends(verify_token)]
)
class TranslationResponse(BaseModel):
text: str
target_language: str
translated_text: str
@router.get("", response_model=TranslationResponse, summary="Translate text to a target language", )
async def translate_text(
text: str,
target_language: str,
context: str | None = None,
) -> TranslationResponse:
if target_language not in DEEPL_LANGUAGE_CODES:
raise HTTPException(
status_code=400,
detail=f"Unsupported target language '{target_language}'. Supported: {list(DEEPL_LANGUAGE_CODES)}",
)
translated = await translate(text, target_language, context)
return TranslationResponse(
text=text,
target_language=target_language,
translated_text=str(translated),
)

View file

@ -1,137 +0,0 @@
import uuid
from functools import partial
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from ..auth import verify_token
from ..database import get_db, AsyncSessionLocal
from ..models import Job
from ..storage import upload_audio
from ..services import llm, tts, job_repo
from ..services.tts import VOICE_BY_LANGUAGE
from .. import worker
router = APIRouter(prefix="/generate", tags=["generation"])
SUPPORTED_LANGUAGES: dict[str, str] = {
"en": "English",
"fr": "French",
"es": "Spanish",
"it": "Italian",
"de": "German",
}
SUPPORTED_LEVELS = {"A1", "A2", "B1", "B2", "C1", "C2"}
class GenerationRequest(BaseModel):
target_language: str
complexity_level: str
input_texts: list[str]
topic: str | None = None
source_language: str = "en"
class GenerationResponse(BaseModel):
job_id: str
async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None:
async with AsyncSessionLocal() as db:
job = await db.get(Job, job_id)
await job_repo.mark_processing(db, job)
try:
from_language = SUPPORTED_LANGUAGES[request.source_language]
language_name = SUPPORTED_LANGUAGES[request.target_language]
topic_part = f"Topic: {request.topic}. " if request.topic else ""
combined_preview = " ".join(request.input_texts)[:300]
input_summary = (
f"{topic_part}Based on {len(request.input_texts)} input text(s): "
f"{combined_preview}..."
)
source_material = "\n\n".join(request.input_texts[:3])
topic_line = f"\nTopic focus: {request.topic}" if request.topic else ""
prompt = (
f"You are a language learning content creator. "
f"Using the input provided, you generate engaging realistic text in {language_name} "
f"at {request.complexity_level} proficiency level (CEFR scale).\n\n"
f"The text should:\n"
f"- Be appropriate for a {request.complexity_level} learner\n"
f"- Maintain a similar tone to the input text. Where appropriate, use idioms\n"
f"- Feel natural and authentic, like content a native speaker would read\n"
f"- Be formatted in markdown with paragraphs and line breaks\n"
f"- Be 200400 words long\n"
f"- Be inspired by the following source material "
f"(but written originally in {language_name}):\n\n"
f"{source_material}"
f"{topic_line}\n\n"
f"Respond with ONLY the generated text in {language_name}, "
f"no explanations or translations.\n"
f"The 'Topic focus' should be a comma-separated list of up to three topics, in {language_name}."
)
generated_text = await llm.generate_text(prompt)
translate_prompt = (
f"You are a helpful assistant that translates text. Translate just the previous summary "
f"content in {language_name} text you generated based on the input I gave you. Translate "
f"it back into {from_language}.\n"
f"- Keep the translation as close as possible to the original meaning and tone\n"
f"- Send through only the translated text, no explanations or notes\n"
)
translated_text = await llm.translate_text(prompt, generated_text, translate_prompt)
# Save LLM results before attempting TTS so they're preserved on failure
await job_repo.save_llm_results(
db, job, generated_text, translated_text, input_summary[:500]
)
voice = VOICE_BY_LANGUAGE.get(request.target_language, "Kore")
wav_bytes = await tts.generate_audio(generated_text, voice)
audio_key = f"audio/{job_id}.wav"
upload_audio(audio_key, wav_bytes)
await job_repo.mark_succeeded(db, job, audio_key)
except Exception as exc:
await job_repo.mark_failed(db, job, str(exc))
@router.post("", response_model=GenerationResponse, status_code=202)
async def create_generation_job(
request: GenerationRequest,
db: AsyncSession = Depends(get_db),
token_data: dict = Depends(verify_token),
) -> GenerationResponse:
if request.target_language not in SUPPORTED_LANGUAGES:
raise HTTPException(
status_code=400,
detail=f"Unsupported language '{request.target_language}'. "
f"Supported: {list(SUPPORTED_LANGUAGES)}",
)
if request.complexity_level not in SUPPORTED_LEVELS:
raise HTTPException(
status_code=400,
detail=f"Unsupported level '{request.complexity_level}'. "
f"Supported: {sorted(SUPPORTED_LEVELS)}",
)
job = Job(
user_id=uuid.UUID(token_data["sub"]),
source_language=request.source_language,
target_language=request.target_language,
complexity_level=request.complexity_level,
)
db.add(job)
await db.commit()
await db.refresh(job)
await worker.enqueue(partial(_run_generation, job.id, request))
return GenerationResponse(job_id=str(job.id))

View file

@ -12,19 +12,26 @@ async def mark_processing(db: AsyncSession, job: Job) -> None:
await db.commit()
async def save_llm_results(
async def save_generated_text(
db: AsyncSession,
job: Job,
generated_text: str,
translated_text: str,
input_summary: str,
) -> None:
job.generated_text = generated_text
job.translated_text = translated_text
job.input_summary = input_summary
await db.commit()
async def save_translated_text(
db: AsyncSession,
job: Job,
translated_text: str,
) -> None:
job.translated_text = translated_text
await db.commit()
async def mark_succeeded(db: AsyncSession, job: Job, audio_url: str) -> None:
job.status = "succeeded"
job.audio_url = audio_url

View file

@ -1,37 +1,73 @@
import asyncio
import anthropic
from ..config import settings
def _create_client() -> anthropic.Anthropic:
def _create_anthropic_client() -> anthropic.Anthropic:
return anthropic.Anthropic(api_key=settings.anthropic_api_key)
async def generate_text(prompt: str) -> str:
def _create_system_prompt_summarise_text(
complexity_level: str,
from_language: str,
to_language: str,
length_preference="200-400 words",
) -> str:
return (
f"You are a language learning content creator.\n"
f"The user will provide input, you will generate an engaging realistic summary text in {to_language} "
f"at {complexity_level} proficiency level (CEFR scale).\n\n"
f"The text you generate will:\n"
f"- Contain ONLY the generated text in {to_language}.\n"
f"- Be appropriate for a {complexity_level} {to_language} speaker.\n"
f"- Never generate inappropriate (hateful, sexual, violent) content. It is preferable to return no text than to generate such content.\n"
f"- Speak directly to the reader/listener, adopting the tone and style of a semi-formal news reporter or podcaster.\n"
f"- Where appropriate (fluency level, content), use a small number of idiomatic expressions.\n"
f"- Be formatted in markdown with paragraphs and line breaks.\n"
f"- Be {length_preference} long.\n"
f"- Be inspired by the following source material "
f"(but written originally in {from_language}):\n\n"
)
def _create_prompt_summarise_text(
source_material: str,
) -> str:
return (
f"Source material follows: \n\n"
f"{source_material}"
)
async def generate_summary_text(
content_to_summarise: str,
complexity_level: str,
from_language: str,
to_language: str,
length_preference="200-400 words",) -> str:
"""Generate text using Anthropic."""
def _call() -> str:
client = _create_client()
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
)
return message.content[0].text
return await asyncio.to_thread(_call)
async def translate_text(original_prompt: str, generated_text: str, translate_prompt: str) -> str:
def _call() -> str:
client = _create_client()
client = _create_anthropic_client()
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[
{"role": "user", "content": original_prompt},
{"role": "assistant", "content": generated_text},
{"role": "user", "content": translate_prompt},
{
"role": "system",
"content": _create_system_prompt_summarise_text(
complexity_level=complexity_level,
from_language=from_language,
to_language=to_language,
length_preference=length_preference,
)
},
{
"role": "user",
"content": _create_prompt_summarise_text(
content_to_summarise
)
}
],
)
return message.content[0].text

View file

@ -0,0 +1,33 @@
import httpx
from ..config import settings
DEEPL_API_URL = "https://api-free.deepl.com/v2/translate"
DEEPL_LANGUAGE_CODES = {
"en": "EN-GB",
"fr": "FR",
"es": "ES",
"it": "IT",
"de": "DE",
}
async def translate(text: str, to_language: str, context: str | None = None) -> str:
target_lang_code = DEEPL_LANGUAGE_CODES[to_language]
async with httpx.AsyncClient() as client:
response = await client.post(
DEEPL_API_URL,
headers={
"Authorization": f"DeepL-Auth-Key {settings.deepl_api_key}",
"Content-Type": "application/json",
},
json={
"text": [text],
"target_lang": target_lang_code,
"context": context or None,
},
)
response.raise_for_status()
data = response.json()
return data["translations"][0]["text"]

View file

@ -14,9 +14,9 @@ dependencies = [
"email-validator>=2.0.0",
"alembic>=1.13.0",
"pydantic-settings>=2.0.0",
"deepl>=1.18.0",
"google-genai>=1.0.0",
"boto3>=1.35.0",
"httpx>=0.28.1",
]
[build-system]