Create the /api router
This commit is contained in:
parent
afe3b63fa5
commit
5aebb0fd7f
14 changed files with 289 additions and 184 deletions
2
Makefile
2
Makefile
|
|
@ -26,3 +26,5 @@ migration:
|
||||||
# Generate a pinned requirements.txt from pyproject.toml (requires uv installed locally)
|
# Generate a pinned requirements.txt from pyproject.toml (requires uv installed locally)
|
||||||
lock:
|
lock:
|
||||||
cd api && uv pip compile pyproject.toml -o requirements.txt
|
cd api && uv pip compile pyproject.toml -o requirements.txt
|
||||||
|
|
||||||
|
rebuild: down build up
|
||||||
|
|
|
||||||
9
api/app/languages.py
Normal file
9
api/app/languages.py
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
SUPPORTED_LANGUAGES: dict[str, str] = {
|
||||||
|
"en": "English",
|
||||||
|
"fr": "French",
|
||||||
|
"es": "Spanish",
|
||||||
|
"it": "Italian",
|
||||||
|
"de": "German",
|
||||||
|
}
|
||||||
|
|
||||||
|
SUPPORTED_LEVELS = {"A1", "A2", "B1", "B2", "C1", "C2"}
|
||||||
|
|
@ -1,11 +1,13 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
|
from .routers.api import generation, pos
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
|
|
||||||
from .routers import pos, generation, jobs
|
from .routers.api import jobs
|
||||||
from .routers import auth as auth_router
|
from .routers import auth as auth_router
|
||||||
from .routers import media as media_router
|
from .routers import media as media_router
|
||||||
|
from .routers.api.main import api_router
|
||||||
from .storage import ensure_bucket_exists
|
from .storage import ensure_bucket_exists
|
||||||
from . import worker
|
from . import worker
|
||||||
|
|
||||||
|
|
@ -24,10 +26,8 @@ async def lifespan(app: FastAPI):
|
||||||
|
|
||||||
app = FastAPI(title="Language Learning API", lifespan=lifespan)
|
app = FastAPI(title="Language Learning API", lifespan=lifespan)
|
||||||
|
|
||||||
|
app.include_router(api_router)
|
||||||
app.include_router(auth_router.router)
|
app.include_router(auth_router.router)
|
||||||
app.include_router(pos.router)
|
|
||||||
app.include_router(generation.router)
|
|
||||||
app.include_router(jobs.router)
|
|
||||||
app.include_router(media_router.router)
|
app.include_router(media_router.router)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
0
api/app/routers/api/__init__.py
Normal file
0
api/app/routers/api/__init__.py
Normal file
104
api/app/routers/api/generation.py
Normal file
104
api/app/routers/api/generation.py
Normal file
|
|
@ -0,0 +1,104 @@
|
||||||
|
import uuid
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from ...languages import SUPPORTED_LANGUAGES, SUPPORTED_LEVELS
|
||||||
|
from ...auth import verify_token
|
||||||
|
from ...database import get_db, AsyncSessionLocal
|
||||||
|
from ...models import Job
|
||||||
|
from ...storage import upload_audio
|
||||||
|
from ...services import llm, tts, job_repo
|
||||||
|
from ...services.tts import VOICE_BY_LANGUAGE
|
||||||
|
from ...services.translate import translate
|
||||||
|
from ... import worker
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/generate", tags=["api"])
|
||||||
|
|
||||||
|
|
||||||
|
class GenerationRequest(BaseModel):
|
||||||
|
target_language: str
|
||||||
|
complexity_level: str
|
||||||
|
input_texts: list[str]
|
||||||
|
topic: str | None = None
|
||||||
|
source_language: str = "en"
|
||||||
|
|
||||||
|
|
||||||
|
class GenerationResponse(BaseModel):
|
||||||
|
job_id: str
|
||||||
|
|
||||||
|
|
||||||
|
async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None:
|
||||||
|
async with AsyncSessionLocal() as db:
|
||||||
|
job = await db.get(Job, job_id)
|
||||||
|
await job_repo.mark_processing(db, job)
|
||||||
|
|
||||||
|
try:
|
||||||
|
language_name = SUPPORTED_LANGUAGES[request.target_language]
|
||||||
|
|
||||||
|
source_material = "\n\n".join(request.input_texts[:3])
|
||||||
|
|
||||||
|
generated_text = await llm.generate_summary_text(
|
||||||
|
content_to_summarise=source_material,
|
||||||
|
complexity_level=request.complexity_level,
|
||||||
|
from_language=language_name,
|
||||||
|
to_language=language_name,
|
||||||
|
length_preference="200-400 words",
|
||||||
|
)
|
||||||
|
|
||||||
|
await job_repo.save_generated_text(
|
||||||
|
db, job, generated_text, source_material[:500]
|
||||||
|
)
|
||||||
|
|
||||||
|
translated_text = await translate(generated_text, request.source_language)
|
||||||
|
|
||||||
|
# Save LLM results before attempting TTS so they're preserved on failure
|
||||||
|
await job_repo.save_generated_text(
|
||||||
|
db, job, generated_text
|
||||||
|
)
|
||||||
|
|
||||||
|
voice = VOICE_BY_LANGUAGE.get(request.target_language, "Kore")
|
||||||
|
wav_bytes = await tts.generate_audio(generated_text, voice)
|
||||||
|
audio_key = f"audio/{job_id}.wav"
|
||||||
|
upload_audio(audio_key, wav_bytes)
|
||||||
|
|
||||||
|
await job_repo.mark_succeeded(db, job, audio_key)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
await job_repo.mark_failed(db, job, str(exc))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("", response_model=GenerationResponse, status_code=202)
|
||||||
|
async def create_generation_job(
|
||||||
|
request: GenerationRequest,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
token_data: dict = Depends(verify_token),
|
||||||
|
) -> GenerationResponse:
|
||||||
|
if request.target_language not in SUPPORTED_LANGUAGES:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"Unsupported language '{request.target_language}'. "
|
||||||
|
f"Supported: {list(SUPPORTED_LANGUAGES)}",
|
||||||
|
)
|
||||||
|
if request.complexity_level not in SUPPORTED_LEVELS:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"Unsupported level '{request.complexity_level}'. "
|
||||||
|
f"Supported: {sorted(SUPPORTED_LEVELS)}",
|
||||||
|
)
|
||||||
|
|
||||||
|
job = Job(
|
||||||
|
user_id=uuid.UUID(token_data["sub"]),
|
||||||
|
source_language=request.source_language,
|
||||||
|
target_language=request.target_language,
|
||||||
|
complexity_level=request.complexity_level,
|
||||||
|
)
|
||||||
|
db.add(job)
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(job)
|
||||||
|
|
||||||
|
await worker.enqueue(partial(_run_generation, job.id, request))
|
||||||
|
|
||||||
|
return GenerationResponse(job_id=str(job.id))
|
||||||
|
|
@ -7,15 +7,15 @@ from pydantic import BaseModel
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
|
|
||||||
from ..auth import verify_token
|
from ...auth import verify_token
|
||||||
from ..database import get_db, AsyncSessionLocal
|
from ...database import get_db, AsyncSessionLocal
|
||||||
from ..models import Job
|
from ...models import Job
|
||||||
from ..storage import upload_audio
|
from ...storage import upload_audio
|
||||||
from ..services import tts, job_repo
|
from ...services import tts, job_repo
|
||||||
from ..services.tts import VOICE_BY_LANGUAGE
|
from ...services.tts import VOICE_BY_LANGUAGE
|
||||||
from .. import worker
|
from ... import worker
|
||||||
|
|
||||||
router = APIRouter(prefix="/jobs", tags=["jobs"])
|
router = APIRouter(prefix="/jobs", dependencies=[Depends(verify_token)])
|
||||||
|
|
||||||
|
|
||||||
class JobResponse(BaseModel):
|
class JobResponse(BaseModel):
|
||||||
|
|
@ -36,11 +36,13 @@ class JobResponse(BaseModel):
|
||||||
error_message: str | None = None
|
error_message: str | None = None
|
||||||
model_config = {"from_attributes": True}
|
model_config = {"from_attributes": True}
|
||||||
|
|
||||||
|
|
||||||
class JobSummary(BaseModel):
|
class JobSummary(BaseModel):
|
||||||
id: uuid.UUID
|
id: uuid.UUID
|
||||||
status: str
|
status: str
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
|
|
||||||
|
|
||||||
class JobListResponse(BaseModel):
|
class JobListResponse(BaseModel):
|
||||||
jobs: list[JobSummary]
|
jobs: list[JobSummary]
|
||||||
model_config = {"from_attributes": True}
|
model_config = {"from_attributes": True}
|
||||||
|
|
@ -49,7 +51,6 @@ class JobListResponse(BaseModel):
|
||||||
@router.get("/", response_model=JobListResponse)
|
@router.get("/", response_model=JobListResponse)
|
||||||
async def get_jobs(
|
async def get_jobs(
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
_: dict = Depends(verify_token)
|
|
||||||
) -> JobListResponse:
|
) -> JobListResponse:
|
||||||
try:
|
try:
|
||||||
result = await db.execute(select(Job).order_by(Job.created_at.desc()))
|
result = await db.execute(select(Job).order_by(Job.created_at.desc()))
|
||||||
|
|
@ -63,7 +64,6 @@ async def get_jobs(
|
||||||
async def get_job(
|
async def get_job(
|
||||||
job_id: str,
|
job_id: str,
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
_: dict = Depends(verify_token),
|
|
||||||
) -> JobResponse:
|
) -> JobResponse:
|
||||||
try:
|
try:
|
||||||
uid = uuid.UUID(job_id)
|
uid = uuid.UUID(job_id)
|
||||||
|
|
@ -129,16 +129,19 @@ async def regenerate_audio(
|
||||||
raise HTTPException(status_code=404, detail="Job not found")
|
raise HTTPException(status_code=404, detail="Job not found")
|
||||||
|
|
||||||
if str(job.user_id) != token_data["sub"]:
|
if str(job.user_id) != token_data["sub"]:
|
||||||
raise HTTPException(status_code=403, detail="Not authorized to modify this job")
|
raise HTTPException(
|
||||||
|
status_code=403, detail="Not authorized to modify this job")
|
||||||
|
|
||||||
if not job.generated_text:
|
if not job.generated_text:
|
||||||
raise HTTPException(status_code=400, detail="Job has no generated text to synthesize")
|
raise HTTPException(
|
||||||
|
status_code=400, detail="Job has no generated text to synthesize")
|
||||||
|
|
||||||
if job.audio_url:
|
if job.audio_url:
|
||||||
raise HTTPException(status_code=409, detail="Job already has audio")
|
raise HTTPException(status_code=409, detail="Job already has audio")
|
||||||
|
|
||||||
if job.status == "processing":
|
if job.status == "processing":
|
||||||
raise HTTPException(status_code=409, detail="Job is already processing")
|
raise HTTPException(
|
||||||
|
status_code=409, detail="Job is already processing")
|
||||||
|
|
||||||
await worker.enqueue(partial(_run_regenerate_audio, uid))
|
await worker.enqueue(partial(_run_regenerate_audio, uid))
|
||||||
return {"job_id": job_id}
|
return {"job_id": job_id}
|
||||||
13
api/app/routers/api/main.py
Normal file
13
api/app/routers/api/main.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
from .pos import router as pos_router
|
||||||
|
from .translate import router as translate_router
|
||||||
|
from .generation import router as generation_router
|
||||||
|
from .jobs import router as jobs_router
|
||||||
|
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
api_router = APIRouter(prefix="/api", tags=["api"])
|
||||||
|
|
||||||
|
api_router.include_router(pos_router)
|
||||||
|
api_router.include_router(translate_router)
|
||||||
|
api_router.include_router(generation_router)
|
||||||
|
api_router.include_router(jobs_router)
|
||||||
|
|
@ -2,9 +2,9 @@ from fastapi import APIRouter, Depends, HTTPException
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
import spacy
|
import spacy
|
||||||
|
|
||||||
from ..auth import verify_token
|
from ...auth import verify_token
|
||||||
|
|
||||||
router = APIRouter(prefix="/analyze", tags=["analysis"])
|
router = APIRouter(prefix="/pos", tags=["api", "pos"])
|
||||||
|
|
||||||
LANGUAGE_MODELS: dict[str, str] = {
|
LANGUAGE_MODELS: dict[str, str] = {
|
||||||
"en": "en_core_web_sm",
|
"en": "en_core_web_sm",
|
||||||
|
|
@ -47,7 +47,7 @@ class POSResponse(BaseModel):
|
||||||
tokens: list[TokenInfo]
|
tokens: list[TokenInfo]
|
||||||
|
|
||||||
|
|
||||||
@router.post("/pos", response_model=POSResponse)
|
@router.post("/", response_model=POSResponse)
|
||||||
def analyze_pos(request: POSRequest, _: dict = Depends(verify_token)) -> POSResponse:
|
def analyze_pos(request: POSRequest, _: dict = Depends(verify_token)) -> POSResponse:
|
||||||
nlp = _get_nlp(request.language)
|
nlp = _get_nlp(request.language)
|
||||||
doc = nlp(request.text)
|
doc = nlp(request.text)
|
||||||
35
api/app/routers/api/translate.py
Normal file
35
api/app/routers/api/translate.py
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from ...auth import verify_token
|
||||||
|
from ...services.translate import DEEPL_LANGUAGE_CODES, translate
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/translate",
|
||||||
|
tags=["api", "translate"],
|
||||||
|
dependencies=[Depends(verify_token)]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TranslationResponse(BaseModel):
|
||||||
|
text: str
|
||||||
|
target_language: str
|
||||||
|
translated_text: str
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("", response_model=TranslationResponse, summary="Translate text to a target language", )
|
||||||
|
async def translate_text(
|
||||||
|
text: str,
|
||||||
|
target_language: str,
|
||||||
|
context: str | None = None,
|
||||||
|
) -> TranslationResponse:
|
||||||
|
if target_language not in DEEPL_LANGUAGE_CODES:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"Unsupported target language '{target_language}'. Supported: {list(DEEPL_LANGUAGE_CODES)}",
|
||||||
|
)
|
||||||
|
translated = await translate(text, target_language, context)
|
||||||
|
return TranslationResponse(
|
||||||
|
text=text,
|
||||||
|
target_language=target_language,
|
||||||
|
translated_text=str(translated),
|
||||||
|
)
|
||||||
|
|
@ -1,137 +0,0 @@
|
||||||
import uuid
|
|
||||||
from functools import partial
|
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
|
||||||
|
|
||||||
from ..auth import verify_token
|
|
||||||
from ..database import get_db, AsyncSessionLocal
|
|
||||||
from ..models import Job
|
|
||||||
from ..storage import upload_audio
|
|
||||||
from ..services import llm, tts, job_repo
|
|
||||||
from ..services.tts import VOICE_BY_LANGUAGE
|
|
||||||
from .. import worker
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/generate", tags=["generation"])
|
|
||||||
|
|
||||||
SUPPORTED_LANGUAGES: dict[str, str] = {
|
|
||||||
"en": "English",
|
|
||||||
"fr": "French",
|
|
||||||
"es": "Spanish",
|
|
||||||
"it": "Italian",
|
|
||||||
"de": "German",
|
|
||||||
}
|
|
||||||
SUPPORTED_LEVELS = {"A1", "A2", "B1", "B2", "C1", "C2"}
|
|
||||||
|
|
||||||
|
|
||||||
class GenerationRequest(BaseModel):
|
|
||||||
target_language: str
|
|
||||||
complexity_level: str
|
|
||||||
input_texts: list[str]
|
|
||||||
topic: str | None = None
|
|
||||||
source_language: str = "en"
|
|
||||||
|
|
||||||
|
|
||||||
class GenerationResponse(BaseModel):
|
|
||||||
job_id: str
|
|
||||||
|
|
||||||
|
|
||||||
async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None:
|
|
||||||
async with AsyncSessionLocal() as db:
|
|
||||||
job = await db.get(Job, job_id)
|
|
||||||
await job_repo.mark_processing(db, job)
|
|
||||||
|
|
||||||
try:
|
|
||||||
from_language = SUPPORTED_LANGUAGES[request.source_language]
|
|
||||||
language_name = SUPPORTED_LANGUAGES[request.target_language]
|
|
||||||
|
|
||||||
topic_part = f"Topic: {request.topic}. " if request.topic else ""
|
|
||||||
combined_preview = " ".join(request.input_texts)[:300]
|
|
||||||
input_summary = (
|
|
||||||
f"{topic_part}Based on {len(request.input_texts)} input text(s): "
|
|
||||||
f"{combined_preview}..."
|
|
||||||
)
|
|
||||||
|
|
||||||
source_material = "\n\n".join(request.input_texts[:3])
|
|
||||||
topic_line = f"\nTopic focus: {request.topic}" if request.topic else ""
|
|
||||||
|
|
||||||
prompt = (
|
|
||||||
f"You are a language learning content creator. "
|
|
||||||
f"Using the input provided, you generate engaging realistic text in {language_name} "
|
|
||||||
f"at {request.complexity_level} proficiency level (CEFR scale).\n\n"
|
|
||||||
f"The text should:\n"
|
|
||||||
f"- Be appropriate for a {request.complexity_level} learner\n"
|
|
||||||
f"- Maintain a similar tone to the input text. Where appropriate, use idioms\n"
|
|
||||||
f"- Feel natural and authentic, like content a native speaker would read\n"
|
|
||||||
f"- Be formatted in markdown with paragraphs and line breaks\n"
|
|
||||||
f"- Be 200–400 words long\n"
|
|
||||||
f"- Be inspired by the following source material "
|
|
||||||
f"(but written originally in {language_name}):\n\n"
|
|
||||||
f"{source_material}"
|
|
||||||
f"{topic_line}\n\n"
|
|
||||||
f"Respond with ONLY the generated text in {language_name}, "
|
|
||||||
f"no explanations or translations.\n"
|
|
||||||
f"The 'Topic focus' should be a comma-separated list of up to three topics, in {language_name}."
|
|
||||||
)
|
|
||||||
|
|
||||||
generated_text = await llm.generate_text(prompt)
|
|
||||||
|
|
||||||
translate_prompt = (
|
|
||||||
f"You are a helpful assistant that translates text. Translate just the previous summary "
|
|
||||||
f"content in {language_name} text you generated based on the input I gave you. Translate "
|
|
||||||
f"it back into {from_language}.\n"
|
|
||||||
f"- Keep the translation as close as possible to the original meaning and tone\n"
|
|
||||||
f"- Send through only the translated text, no explanations or notes\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
translated_text = await llm.translate_text(prompt, generated_text, translate_prompt)
|
|
||||||
|
|
||||||
# Save LLM results before attempting TTS so they're preserved on failure
|
|
||||||
await job_repo.save_llm_results(
|
|
||||||
db, job, generated_text, translated_text, input_summary[:500]
|
|
||||||
)
|
|
||||||
|
|
||||||
voice = VOICE_BY_LANGUAGE.get(request.target_language, "Kore")
|
|
||||||
wav_bytes = await tts.generate_audio(generated_text, voice)
|
|
||||||
audio_key = f"audio/{job_id}.wav"
|
|
||||||
upload_audio(audio_key, wav_bytes)
|
|
||||||
|
|
||||||
await job_repo.mark_succeeded(db, job, audio_key)
|
|
||||||
|
|
||||||
except Exception as exc:
|
|
||||||
await job_repo.mark_failed(db, job, str(exc))
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("", response_model=GenerationResponse, status_code=202)
|
|
||||||
async def create_generation_job(
|
|
||||||
request: GenerationRequest,
|
|
||||||
db: AsyncSession = Depends(get_db),
|
|
||||||
token_data: dict = Depends(verify_token),
|
|
||||||
) -> GenerationResponse:
|
|
||||||
if request.target_language not in SUPPORTED_LANGUAGES:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail=f"Unsupported language '{request.target_language}'. "
|
|
||||||
f"Supported: {list(SUPPORTED_LANGUAGES)}",
|
|
||||||
)
|
|
||||||
if request.complexity_level not in SUPPORTED_LEVELS:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail=f"Unsupported level '{request.complexity_level}'. "
|
|
||||||
f"Supported: {sorted(SUPPORTED_LEVELS)}",
|
|
||||||
)
|
|
||||||
|
|
||||||
job = Job(
|
|
||||||
user_id=uuid.UUID(token_data["sub"]),
|
|
||||||
source_language=request.source_language,
|
|
||||||
target_language=request.target_language,
|
|
||||||
complexity_level=request.complexity_level,
|
|
||||||
)
|
|
||||||
db.add(job)
|
|
||||||
await db.commit()
|
|
||||||
await db.refresh(job)
|
|
||||||
|
|
||||||
await worker.enqueue(partial(_run_generation, job.id, request))
|
|
||||||
|
|
||||||
return GenerationResponse(job_id=str(job.id))
|
|
||||||
|
|
@ -12,19 +12,26 @@ async def mark_processing(db: AsyncSession, job: Job) -> None:
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
async def save_llm_results(
|
async def save_generated_text(
|
||||||
db: AsyncSession,
|
db: AsyncSession,
|
||||||
job: Job,
|
job: Job,
|
||||||
generated_text: str,
|
generated_text: str,
|
||||||
translated_text: str,
|
|
||||||
input_summary: str,
|
input_summary: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
job.generated_text = generated_text
|
job.generated_text = generated_text
|
||||||
job.translated_text = translated_text
|
|
||||||
job.input_summary = input_summary
|
job.input_summary = input_summary
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def save_translated_text(
|
||||||
|
db: AsyncSession,
|
||||||
|
job: Job,
|
||||||
|
translated_text: str,
|
||||||
|
) -> None:
|
||||||
|
job.translated_text = translated_text
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
async def mark_succeeded(db: AsyncSession, job: Job, audio_url: str) -> None:
|
async def mark_succeeded(db: AsyncSession, job: Job, audio_url: str) -> None:
|
||||||
job.status = "succeeded"
|
job.status = "succeeded"
|
||||||
job.audio_url = audio_url
|
job.audio_url = audio_url
|
||||||
|
|
|
||||||
|
|
@ -1,37 +1,73 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
import anthropic
|
import anthropic
|
||||||
|
|
||||||
from ..config import settings
|
from ..config import settings
|
||||||
|
|
||||||
|
|
||||||
def _create_client() -> anthropic.Anthropic:
|
def _create_anthropic_client() -> anthropic.Anthropic:
|
||||||
return anthropic.Anthropic(api_key=settings.anthropic_api_key)
|
return anthropic.Anthropic(api_key=settings.anthropic_api_key)
|
||||||
|
|
||||||
|
|
||||||
async def generate_text(prompt: str) -> str:
|
def _create_system_prompt_summarise_text(
|
||||||
def _call() -> str:
|
complexity_level: str,
|
||||||
client = _create_client()
|
from_language: str,
|
||||||
message = client.messages.create(
|
to_language: str,
|
||||||
model="claude-sonnet-4-6",
|
length_preference="200-400 words",
|
||||||
max_tokens=1024,
|
) -> str:
|
||||||
messages=[{"role": "user", "content": prompt}],
|
return (
|
||||||
|
f"You are a language learning content creator.\n"
|
||||||
|
f"The user will provide input, you will generate an engaging realistic summary text in {to_language} "
|
||||||
|
f"at {complexity_level} proficiency level (CEFR scale).\n\n"
|
||||||
|
f"The text you generate will:\n"
|
||||||
|
f"- Contain ONLY the generated text in {to_language}.\n"
|
||||||
|
f"- Be appropriate for a {complexity_level} {to_language} speaker.\n"
|
||||||
|
f"- Never generate inappropriate (hateful, sexual, violent) content. It is preferable to return no text than to generate such content.\n"
|
||||||
|
f"- Speak directly to the reader/listener, adopting the tone and style of a semi-formal news reporter or podcaster.\n"
|
||||||
|
f"- Where appropriate (fluency level, content), use a small number of idiomatic expressions.\n"
|
||||||
|
f"- Be formatted in markdown with paragraphs and line breaks.\n"
|
||||||
|
f"- Be {length_preference} long.\n"
|
||||||
|
f"- Be inspired by the following source material "
|
||||||
|
f"(but written originally in {from_language}):\n\n"
|
||||||
)
|
)
|
||||||
return message.content[0].text
|
|
||||||
|
|
||||||
return await asyncio.to_thread(_call)
|
|
||||||
|
|
||||||
|
|
||||||
async def translate_text(original_prompt: str, generated_text: str, translate_prompt: str) -> str:
|
def _create_prompt_summarise_text(
|
||||||
|
source_material: str,
|
||||||
|
) -> str:
|
||||||
|
return (
|
||||||
|
f"Source material follows: \n\n"
|
||||||
|
f"{source_material}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_summary_text(
|
||||||
|
content_to_summarise: str,
|
||||||
|
complexity_level: str,
|
||||||
|
from_language: str,
|
||||||
|
to_language: str,
|
||||||
|
length_preference="200-400 words",) -> str:
|
||||||
|
"""Generate text using Anthropic."""
|
||||||
def _call() -> str:
|
def _call() -> str:
|
||||||
client = _create_client()
|
client = _create_anthropic_client()
|
||||||
message = client.messages.create(
|
message = client.messages.create(
|
||||||
model="claude-sonnet-4-6",
|
model="claude-sonnet-4-6",
|
||||||
max_tokens=1024,
|
max_tokens=1024,
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "user", "content": original_prompt},
|
{
|
||||||
{"role": "assistant", "content": generated_text},
|
"role": "system",
|
||||||
{"role": "user", "content": translate_prompt},
|
"content": _create_system_prompt_summarise_text(
|
||||||
|
complexity_level=complexity_level,
|
||||||
|
from_language=from_language,
|
||||||
|
to_language=to_language,
|
||||||
|
length_preference=length_preference,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": _create_prompt_summarise_text(
|
||||||
|
content_to_summarise
|
||||||
|
)
|
||||||
|
}
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
return message.content[0].text
|
return message.content[0].text
|
||||||
|
|
|
||||||
33
api/app/services/translate.py
Normal file
33
api/app/services/translate.py
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from ..config import settings
|
||||||
|
|
||||||
|
DEEPL_API_URL = "https://api-free.deepl.com/v2/translate"
|
||||||
|
|
||||||
|
DEEPL_LANGUAGE_CODES = {
|
||||||
|
"en": "EN-GB",
|
||||||
|
"fr": "FR",
|
||||||
|
"es": "ES",
|
||||||
|
"it": "IT",
|
||||||
|
"de": "DE",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def translate(text: str, to_language: str, context: str | None = None) -> str:
|
||||||
|
target_lang_code = DEEPL_LANGUAGE_CODES[to_language]
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.post(
|
||||||
|
DEEPL_API_URL,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"DeepL-Auth-Key {settings.deepl_api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"text": [text],
|
||||||
|
"target_lang": target_lang_code,
|
||||||
|
"context": context or None,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
return data["translations"][0]["text"]
|
||||||
|
|
@ -14,9 +14,9 @@ dependencies = [
|
||||||
"email-validator>=2.0.0",
|
"email-validator>=2.0.0",
|
||||||
"alembic>=1.13.0",
|
"alembic>=1.13.0",
|
||||||
"pydantic-settings>=2.0.0",
|
"pydantic-settings>=2.0.0",
|
||||||
"deepl>=1.18.0",
|
|
||||||
"google-genai>=1.0.0",
|
"google-genai>=1.0.0",
|
||||||
"boto3>=1.35.0",
|
"boto3>=1.35.0",
|
||||||
|
"httpx>=0.28.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue