language-learning-app/api/app/routers/generation.py
2026-03-18 20:55:02 +00:00

178 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import uuid
from datetime import datetime, timezone
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
import anthropic
import deepl
from ..auth import verify_token
from ..database import get_db, AsyncSessionLocal
from ..models import Job
from ..config import settings
router = APIRouter(prefix="/generate", tags=["generation"])
SUPPORTED_LANGUAGES: dict[str, str] = {
"en": "English",
"fr": "French",
"es": "Spanish",
"it": "Italian",
"de": "German",
}
SUPPORTED_LEVELS = {"A1", "A2", "B1", "B2", "C1", "C2"}
# Maps our language codes to DeepL source/target language codes
DEEPL_SOURCE_LANG: dict[str, str] = {
"en": "EN",
"fr": "FR",
"es": "ES",
"it": "IT",
"de": "DE",
}
# DeepL target codes (English needs a regional variant)
DEEPL_TARGET_LANG: dict[str, str] = {
"en": "EN-US",
"fr": "FR",
"es": "ES",
"it": "IT",
"de": "DE",
}
class GenerationRequest(BaseModel):
target_language: str
complexity_level: str
input_texts: list[str]
topic: str | None = None
source_language: str = "en"
class GenerationResponse(BaseModel):
job_id: str
async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None:
async with AsyncSessionLocal() as db:
job = await db.get(Job, job_id)
job.status = "processing"
job.started_at = datetime.now(timezone.utc)
await db.commit()
try:
from_language = SUPPORTED_LANGUAGES[request.source_language]
language_name = SUPPORTED_LANGUAGES[request.target_language]
# Build a short summary of the input to store (not the full text)
topic_part = f"Topic: {request.topic}. " if request.topic else ""
combined_preview = " ".join(request.input_texts)[:300]
input_summary = (
f"{topic_part}Based on {len(request.input_texts)} input text(s): "
f"{combined_preview}..."
)
source_material = "\n\n".join(request.input_texts[:3])
topic_line = f"\nTopic focus: {request.topic}" if request.topic else ""
prompt = (
f"You are a language learning content creator. "
f"Using the input provided, you generate engaging realistic text in {language_name} "
f"at {request.complexity_level} proficiency level (CEFR scale).\n\n"
f"The text should:\n"
f"- Be appropriate for a {request.complexity_level} learner\n"
f"- Maintain a similar tone to the input text. Where appropriate, use idioms\n"
f"- Feel natural and authentic, like content a native speaker would read\n"
f"- Be formatted in markdown with paragraphs and line breaks\n"
f"- Be 200400 words long\n"
f"- Be inspired by the following source material "
f"(but written originally in {language_name}):\n\n"
f"{source_material}"
f"{topic_line}\n\n"
f"Respond with ONLY the generated text in {language_name}, "
f"no explanations or translations.\n"
f"The 'Topic focus' should be a comma-separated list of up to three topics, in {language_name}."
)
client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
)
generated_text = message.content[0].text
# TODO: Come back to this when DeepL unblock my account for being "high risk"
# Translate generated text back into the learner's source language via DeepL
# translator = deepl.Translator(settings.deepl_api_key)
# translation = translator.translate_text(
# generated_text,
# source_lang=DEEPL_SOURCE_LANG[request.target_language],
# target_lang=DEEPL_TARGET_LANG[request.source_language],
#)
translate_prompt = (
f"You are a helpful assistant that translates text. Translate just the previous summary "
f"content in {language_name} text you generated based on the input I gave you. Translate "
f"it back into {from_language}.\n"
f"- Keep the translation as close as possible to the original meaning and tone\n"
f"- Send through only the translated text, no explanations or notes\n"
)
translate_message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[
{ "role": "user", "content": prompt },
{ "role": "assistant", "content": message.content },
{ "role": "user", "content": translate_prompt }
],
)
job.status = "succeeded"
job.generated_text = generated_text
job.translated_text = translate_message.content[0].text
job.input_summary = input_summary[:500]
job.completed_at = datetime.now(timezone.utc)
except Exception as exc:
job.status = "failed"
job.error_message = str(exc)
job.completed_at = datetime.now(timezone.utc)
await db.commit()
@router.post("", response_model=GenerationResponse, status_code=202)
async def create_generation_job(
request: GenerationRequest,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token),
) -> GenerationResponse:
if request.target_language not in SUPPORTED_LANGUAGES:
raise HTTPException(
status_code=400,
detail=f"Unsupported language '{request.target_language}'. "
f"Supported: {list(SUPPORTED_LANGUAGES)}",
)
if request.complexity_level not in SUPPORTED_LEVELS:
raise HTTPException(
status_code=400,
detail=f"Unsupported level '{request.complexity_level}'. "
f"Supported: {sorted(SUPPORTED_LEVELS)}",
)
job = Job(
source_language=request.source_language,
target_language=request.target_language,
complexity_level=request.complexity_level,
)
db.add(job)
await db.commit()
await db.refresh(job)
background_tasks.add_task(_run_generation, job.id, request)
return GenerationResponse(job_id=str(job.id))