Generate the audio; move to background task

This commit is contained in:
wilson 2026-03-19 10:51:10 +00:00
parent 6bc1efd333
commit afe3b63fa5
31 changed files with 813 additions and 78 deletions

View file

@ -14,3 +14,11 @@ ANTHROPIC_API_KEY=sk-ant-...
# DeepL (https://www.deepl.com/pro-api) # DeepL (https://www.deepl.com/pro-api)
DEEPL_API_KEY=your-deepl-api-key-here DEEPL_API_KEY=your-deepl-api-key-here
# Google Gemini (for text-to-speech)
GEMINI_API_KEY=your-gemini-api-key-here
# Object storage (MinIO)
STORAGE_ACCESS_KEY=langlearn
STORAGE_SECRET_KEY=changeme-use-a-long-random-string
STORAGE_BUCKET=langlearn

View file

@ -0,0 +1,19 @@
info:
name: Get audio
type: http
seq: 1
http:
method: GET
url: audio/:file_name
params:
- name: file_name
value: ""
type: path
auth: inherit
settings:
encodeUrl: true
timeout: 0
followRedirects: true
maxRedirects: 5

View file

@ -0,0 +1,7 @@
info:
name: audio
type: folder
seq: 6
request:
auth: inherit

View file

@ -0,0 +1,22 @@
info:
name: Login
type: http
seq: 2
http:
method: POST
url: "{{baseUrl}}/auth/register"
body:
type: json
data: |-
{
"email": "wilson@thomaswilson.xyz",
"password": "wilson@thomaswilson.xyz"
}
auth: inherit
settings:
encodeUrl: true
timeout: 0
followRedirects: true
maxRedirects: 5

View file

@ -0,0 +1,15 @@
info:
name: Register
type: http
seq: 1
http:
method: POST
url: ""
auth: inherit
settings:
encodeUrl: true
timeout: 0
followRedirects: true
maxRedirects: 5

View file

@ -0,0 +1,7 @@
info:
name: auth
type: folder
seq: 5
request:
auth: inherit

View file

@ -17,12 +17,12 @@ config:
request: request:
auth: auth:
type: bearer type: bearer
token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.sW8dZVeROpNxCHL2HEXym6aDzaobFW17mLPaYbtlyYs token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJkNDIzODc1NC05ZDljLTRkOWMtYTM1OS1lMGQ4MDZhOGQ1Y2QiLCJlbWFpbCI6IndpbHNvbkB0aG9tYXN3aWxzb24ueHl6IiwiZXhwIjoxNzczOTkzNDcxfQ.J5SH93js-YrJThxGliOKIOV8kbdfP1qUu0rx3iBNX0A
variables: variables:
- name: baseUrl - name: baseUrl
value: http://localhost:8000 value: http://localhost:8000
- name: token - name: token
value: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.sW8dZVeROpNxCHL2HEXym6aDzaobFW17mLPaYbtlyYs value: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJkNDIzODc1NC05ZDljLTRkOWMtYTM1OS1lMGQ4MDZhOGQ1Y2QiLCJlbWFpbCI6IndpbHNvbkB0aG9tYXN3aWxzb24ueHl6IiwiZXhwIjoxNzczOTkzNDcxfQ.J5SH93js-YrJThxGliOKIOV8kbdfP1qUu0rx3iBNX0A
bundled: false bundled: false
extensions: extensions:
bruno: bruno:

View file

@ -1,4 +1,4 @@
.PHONY: build up down logs shell lock .PHONY: down build up logs shell lock migrate migration
build: build:
docker compose build docker compose build
@ -15,6 +15,14 @@ logs:
shell: shell:
docker compose exec api bash docker compose exec api bash
# Run pending migrations against the running db container
migrate:
docker compose exec api alembic upgrade head
# Generate a new migration: make migration NAME="add foo table"
migration:
docker compose exec api alembic revision --autogenerate -m "$(NAME)"
# Generate a pinned requirements.txt from pyproject.toml (requires uv installed locally) # Generate a pinned requirements.txt from pyproject.toml (requires uv installed locally)
lock: lock:
cd api && uv pip compile pyproject.toml -o requirements.txt cd api && uv pip compile pyproject.toml -o requirements.txt

27
README.md Normal file
View file

@ -0,0 +1,27 @@
# Language learning app
## Description
This is an app designed to help people learn a second(+) language. Initially from English. The app will start with French, Spanish, Italian, and German as the target languages. With English as the only source language.
The thesis of the app is that spaced repetition and recall remain effective mechanisms for language acquisition, and that exposure to appropriate, realistic text, can make that process less repetitive or dull. And that mixing text and audio allows the user to know how words "sound".
At present, the app doesn't have a solution to recognising speech, another important part of language learning.
It improve learner proficiency by building a mechanism for generating realistic, level-appropriate text from user-specified inputs, generated by LLMs. There will also be audio medium, similar to short podcasts, generated from the text.
The application has a back-end written in python (fastapi), because of the Python ecosystem around data and machine learning.
The application will have a web front end written in Svelte Kit. It will adopt progressive web app standards, to allow offline use.
Communication between the two is through HTTP, authenticated with JWT tokens.
## Technical Design
This application must remain self-hostable. It should not rely on proprietary infrastructure (e.g. AWS Lambda functions) to run. It should use Docker Compose and Makefiles to build projects and deploy them onto a local server or a VPS.
The main components so far are:
- Backend server (fastapi)
- Front end (SvelteKit), yet to be built
- Object storage (Ceph), yet to be built

View file

@ -3,7 +3,7 @@ FROM python:3.11-slim
WORKDIR /app WORKDIR /app
# Install uv for fast, reproducible installs # Install uv for fast, reproducible installs
RUN pip install --no-cache-dir uv RUN pip install --no-cache-dir uv alembic
# Install Python dependencies from pyproject.toml # Install Python dependencies from pyproject.toml
COPY pyproject.toml . COPY pyproject.toml .
@ -16,8 +16,10 @@ RUN python -m spacy download en_core_web_sm && \
python -m spacy download it_core_news_sm && \ python -m spacy download it_core_news_sm && \
python -m spacy download de_core_news_sm python -m spacy download de_core_news_sm
# Copy application source # Copy application source and migrations
COPY app/ ./app/ COPY app/ ./app/
COPY alembic/ ./alembic/
COPY alembic.ini .
EXPOSE 8000 EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] CMD ["sh", "-c", "alembic upgrade head && uvicorn app.main:app --host 0.0.0.0 --port 8000"]

39
api/alembic.ini Normal file
View file

@ -0,0 +1,39 @@
[alembic]
script_location = alembic
file_template = %%(year)d%%(month).2d%%(day).2d_%%(rev)s_%%(slug)s
truncate_slug_length = 40
prepend_sys_path = .
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

38
api/alembic/env.py Normal file
View file

@ -0,0 +1,38 @@
import asyncio
from logging.config import fileConfig
from alembic import context
from sqlalchemy import pool
from sqlalchemy.ext.asyncio import async_engine_from_config
from app.config import settings
from app.database import Base
import app.models # noqa: F401 — register all models with Base.metadata
config = context.config
config.set_main_option("sqlalchemy.url", settings.database_url)
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = Base.metadata
def do_run_migrations(connection):
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
async def run_async_migrations():
connectable = async_engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
asyncio.run(run_async_migrations())

View file

@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View file

@ -0,0 +1,55 @@
"""initial schema
Revision ID: 0001
Revises:
Create Date: 2026-03-18
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
revision: str = "0001"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"users",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("email", sa.String(255), nullable=False),
sa.Column("hashed_password", sa.String(255), nullable=False),
sa.Column("is_active", sa.Boolean(), nullable=False),
sa.Column("is_email_verified", sa.Boolean(), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(op.f("ix_users_email"), "users", ["email"], unique=True)
op.create_table(
"jobs",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("status", sa.String(20), nullable=False),
sa.Column("source_language", sa.String(10), nullable=False),
sa.Column("target_language", sa.String(10), nullable=False),
sa.Column("complexity_level", sa.String(5), nullable=False),
sa.Column("input_summary", sa.Text(), nullable=True),
sa.Column("generated_text", sa.Text(), nullable=True),
sa.Column("translated_text", sa.Text(), nullable=True),
sa.Column("error_message", sa.Text(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("started_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
def downgrade() -> None:
op.drop_table("jobs")
op.drop_index(op.f("ix_users_email"), table_name="users")
op.drop_table("users")

View file

@ -0,0 +1,43 @@
"""add audio_url and user_id to jobs
Revision ID: 0002
Revises: 0001
Create Date: 2026-03-19
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
revision: str = "0002"
down_revision: Union[str, None] = "0001"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column(
"jobs",
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
)
op.create_index(op.f("ix_jobs_user_id"), "jobs", ["user_id"], unique=False)
op.create_foreign_key(
"fk_jobs_user_id_users",
"jobs",
"users",
["user_id"],
["id"],
)
op.add_column(
"jobs",
sa.Column("audio_url", sa.Text(), nullable=True),
)
def downgrade() -> None:
op.drop_column("jobs", "audio_url")
op.drop_constraint("fk_jobs_user_id_users", "jobs", type_="foreignkey")
op.drop_index(op.f("ix_jobs_user_id"), table_name="jobs")
op.drop_column("jobs", "user_id")

View file

@ -1,10 +1,36 @@
from datetime import datetime, timedelta, timezone
from fastapi import Depends, HTTPException, status from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
import jwt import jwt
from passlib.context import CryptContext
from .config import settings from .config import settings
security = HTTPBearer() security = HTTPBearer()
pwd_context = CryptContext(
schemes=["pbkdf2_sha256"],
deprecated="auto",
)
TOKEN_EXPIRY_HOURS = 24
def hash_password(password: str) -> str:
return pwd_context.hash(password)
def verify_password(plain: str, hashed: str) -> bool:
return pwd_context.verify(plain, hashed)
def create_access_token(user_id: str, email: str) -> str:
payload = {
"sub": user_id,
"email": email,
"exp": datetime.now(timezone.utc) + timedelta(hours=TOKEN_EXPIRY_HOURS),
}
return jwt.encode(payload, settings.jwt_secret, algorithm="HS256")
def verify_token( def verify_token(

View file

@ -6,6 +6,11 @@ class Settings(BaseSettings):
jwt_secret: str jwt_secret: str
anthropic_api_key: str anthropic_api_key: str
deepl_api_key: str deepl_api_key: str
gemini_api_key: str
storage_endpoint_url: str
storage_access_key: str
storage_secret_key: str
storage_bucket: str = "langlearn"
model_config = {"env_file": ".env"} model_config = {"env_file": ".env"}

View file

@ -1,23 +1,34 @@
import asyncio
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from fastapi import FastAPI from fastapi import FastAPI
from .database import engine, Base
from .routers import pos, generation, jobs from .routers import pos, generation, jobs
from .routers import auth as auth_router
from .routers import media as media_router
from .storage import ensure_bucket_exists
from . import worker
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
async with engine.begin() as conn: ensure_bucket_exists()
await conn.run_sync(Base.metadata.create_all) worker_task = asyncio.create_task(worker.worker_loop())
yield yield
worker_task.cancel()
try:
await worker_task
except asyncio.CancelledError:
pass
app = FastAPI(title="Language Learning API", lifespan=lifespan) app = FastAPI(title="Language Learning API", lifespan=lifespan)
app.include_router(auth_router.router)
app.include_router(pos.router) app.include_router(pos.router)
app.include_router(generation.router) app.include_router(generation.router)
app.include_router(jobs.router) app.include_router(jobs.router)
app.include_router(media_router.router)
@app.get("/health") @app.get("/health")

View file

@ -1,19 +1,39 @@
import uuid import uuid
from datetime import datetime, timezone from datetime import datetime, timezone
from sqlalchemy import String, Text, DateTime from sqlalchemy import String, Text, DateTime, Boolean, ForeignKey
from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.dialects.postgresql import UUID from sqlalchemy.dialects.postgresql import UUID
from .database import Base from .database import Base
class User(Base):
__tablename__ = "users"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
email: Mapped[str] = mapped_column(String(255), nullable=False, unique=True, index=True)
hashed_password: Mapped[str] = mapped_column(String(255), nullable=False)
is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
# TODO(email-verification): set to False and require verification once transactional email is implemented
is_email_verified: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
)
class Job(Base): class Job(Base):
__tablename__ = "jobs" __tablename__ = "jobs"
id: Mapped[uuid.UUID] = mapped_column( id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
) )
user_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("users.id"), nullable=True, index=True
)
status: Mapped[str] = mapped_column(String(20), nullable=False, default="pending") status: Mapped[str] = mapped_column(String(20), nullable=False, default="pending")
source_language: Mapped[str] = mapped_column(String(10), nullable=False, default="en") source_language: Mapped[str] = mapped_column(String(10), nullable=False, default="en")
target_language: Mapped[str] = mapped_column(String(10), nullable=False) target_language: Mapped[str] = mapped_column(String(10), nullable=False)
@ -22,6 +42,7 @@ class Job(Base):
generated_text: Mapped[str | None] = mapped_column(Text, nullable=True) generated_text: Mapped[str | None] = mapped_column(Text, nullable=True)
translated_text: Mapped[str | None] = mapped_column(Text, nullable=True) translated_text: Mapped[str | None] = mapped_column(Text, nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True) error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
audio_url: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column( created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc), default=lambda: datetime.now(timezone.utc),

78
api/app/routers/auth.py Normal file
View file

@ -0,0 +1,78 @@
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel, EmailStr
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession
from ..auth import create_access_token, hash_password, verify_password
from ..database import get_db
from ..models import User
router = APIRouter(prefix="/auth", tags=["auth"])
class RegisterRequest(BaseModel):
email: EmailStr
password: str
class LoginRequest(BaseModel):
email: EmailStr
password: str
class TokenResponse(BaseModel):
access_token: str
token_type: str = "bearer"
@router.post("/register", status_code=status.HTTP_201_CREATED)
async def register(body: RegisterRequest, db: AsyncSession = Depends(get_db)):
user = User(
email=body.email,
hashed_password=hash_password(body.password),
)
db.add(user)
try:
await db.commit()
await db.refresh(user)
except IntegrityError:
await db.rollback()
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="Email already registered",
)
# TODO(email-verification): send verification email here once transactional
# email is implemented. Set is_email_verified=False on the User model and
# require verification before allowing login.
return {"id": str(user.id), "email": user.email}
@router.post("/login", response_model=TokenResponse)
async def login(body: LoginRequest, db: AsyncSession = Depends(get_db)):
result = await db.execute(select(User).where(User.email == body.email))
user = result.scalar_one_or_none()
if user is None or not verify_password(body.password, user.hashed_password):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid email or password",
)
if not user.is_active:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Account disabled",
)
# TODO(email-verification): uncomment once email verification is in place
# if not user.is_email_verified:
# raise HTTPException(
# status_code=status.HTTP_403_FORBIDDEN,
# detail="Email address not verified",
# )
token = create_access_token(str(user.id), user.email)
return TokenResponse(access_token=token)

View file

@ -1,16 +1,17 @@
import uuid import uuid
from datetime import datetime, timezone from functools import partial
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
import anthropic
import deepl
from ..auth import verify_token from ..auth import verify_token
from ..database import get_db, AsyncSessionLocal from ..database import get_db, AsyncSessionLocal
from ..models import Job from ..models import Job
from ..config import settings from ..storage import upload_audio
from ..services import llm, tts, job_repo
from ..services.tts import VOICE_BY_LANGUAGE
from .. import worker
router = APIRouter(prefix="/generate", tags=["generation"]) router = APIRouter(prefix="/generate", tags=["generation"])
@ -23,23 +24,6 @@ SUPPORTED_LANGUAGES: dict[str, str] = {
} }
SUPPORTED_LEVELS = {"A1", "A2", "B1", "B2", "C1", "C2"} SUPPORTED_LEVELS = {"A1", "A2", "B1", "B2", "C1", "C2"}
# Maps our language codes to DeepL source/target language codes
DEEPL_SOURCE_LANG: dict[str, str] = {
"en": "EN",
"fr": "FR",
"es": "ES",
"it": "IT",
"de": "DE",
}
# DeepL target codes (English needs a regional variant)
DEEPL_TARGET_LANG: dict[str, str] = {
"en": "EN-US",
"fr": "FR",
"es": "ES",
"it": "IT",
"de": "DE",
}
class GenerationRequest(BaseModel): class GenerationRequest(BaseModel):
target_language: str target_language: str
@ -56,15 +40,12 @@ class GenerationResponse(BaseModel):
async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None: async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None:
async with AsyncSessionLocal() as db: async with AsyncSessionLocal() as db:
job = await db.get(Job, job_id) job = await db.get(Job, job_id)
job.status = "processing" await job_repo.mark_processing(db, job)
job.started_at = datetime.now(timezone.utc)
await db.commit()
try: try:
from_language = SUPPORTED_LANGUAGES[request.source_language] from_language = SUPPORTED_LANGUAGES[request.source_language]
language_name = SUPPORTED_LANGUAGES[request.target_language] language_name = SUPPORTED_LANGUAGES[request.target_language]
# Build a short summary of the input to store (not the full text)
topic_part = f"Topic: {request.topic}. " if request.topic else "" topic_part = f"Topic: {request.topic}. " if request.topic else ""
combined_preview = " ".join(request.input_texts)[:300] combined_preview = " ".join(request.input_texts)[:300]
input_summary = ( input_summary = (
@ -94,23 +75,7 @@ async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None
f"The 'Topic focus' should be a comma-separated list of up to three topics, in {language_name}." f"The 'Topic focus' should be a comma-separated list of up to three topics, in {language_name}."
) )
client = anthropic.Anthropic(api_key=settings.anthropic_api_key) generated_text = await llm.generate_text(prompt)
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
)
generated_text = message.content[0].text
# TODO: Come back to this when DeepL unblock my account for being "high risk"
# Translate generated text back into the learner's source language via DeepL
# translator = deepl.Translator(settings.deepl_api_key)
# translation = translator.translate_text(
# generated_text,
# source_lang=DEEPL_SOURCE_LANG[request.target_language],
# target_lang=DEEPL_TARGET_LANG[request.source_language],
#)
translate_prompt = ( translate_prompt = (
f"You are a helpful assistant that translates text. Translate just the previous summary " f"You are a helpful assistant that translates text. Translate just the previous summary "
@ -120,36 +85,29 @@ async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None
f"- Send through only the translated text, no explanations or notes\n" f"- Send through only the translated text, no explanations or notes\n"
) )
translate_message = client.messages.create( translated_text = await llm.translate_text(prompt, generated_text, translate_prompt)
model="claude-sonnet-4-6",
max_tokens=1024, # Save LLM results before attempting TTS so they're preserved on failure
messages=[ await job_repo.save_llm_results(
{ "role": "user", "content": prompt }, db, job, generated_text, translated_text, input_summary[:500]
{ "role": "assistant", "content": message.content },
{ "role": "user", "content": translate_prompt }
],
) )
job.status = "succeeded" voice = VOICE_BY_LANGUAGE.get(request.target_language, "Kore")
job.generated_text = generated_text wav_bytes = await tts.generate_audio(generated_text, voice)
job.translated_text = translate_message.content[0].text audio_key = f"audio/{job_id}.wav"
job.input_summary = input_summary[:500] upload_audio(audio_key, wav_bytes)
job.completed_at = datetime.now(timezone.utc)
await job_repo.mark_succeeded(db, job, audio_key)
except Exception as exc: except Exception as exc:
job.status = "failed" await job_repo.mark_failed(db, job, str(exc))
job.error_message = str(exc)
job.completed_at = datetime.now(timezone.utc)
await db.commit()
@router.post("", response_model=GenerationResponse, status_code=202) @router.post("", response_model=GenerationResponse, status_code=202)
async def create_generation_job( async def create_generation_job(
request: GenerationRequest, request: GenerationRequest,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token), token_data: dict = Depends(verify_token),
) -> GenerationResponse: ) -> GenerationResponse:
if request.target_language not in SUPPORTED_LANGUAGES: if request.target_language not in SUPPORTED_LANGUAGES:
raise HTTPException( raise HTTPException(
@ -165,6 +123,7 @@ async def create_generation_job(
) )
job = Job( job = Job(
user_id=uuid.UUID(token_data["sub"]),
source_language=request.source_language, source_language=request.source_language,
target_language=request.target_language, target_language=request.target_language,
complexity_level=request.complexity_level, complexity_level=request.complexity_level,
@ -173,6 +132,6 @@ async def create_generation_job(
await db.commit() await db.commit()
await db.refresh(job) await db.refresh(job)
background_tasks.add_task(_run_generation, job.id, request) await worker.enqueue(partial(_run_generation, job.id, request))
return GenerationResponse(job_id=str(job.id)) return GenerationResponse(job_id=str(job.id))

View file

@ -1,5 +1,6 @@
import uuid import uuid
from datetime import datetime from datetime import datetime, timezone
from functools import partial
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel from pydantic import BaseModel
@ -7,8 +8,12 @@ from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select from sqlalchemy import select
from ..auth import verify_token from ..auth import verify_token
from ..database import get_db from ..database import get_db, AsyncSessionLocal
from ..models import Job from ..models import Job
from ..storage import upload_audio
from ..services import tts, job_repo
from ..services.tts import VOICE_BY_LANGUAGE
from .. import worker
router = APIRouter(prefix="/jobs", tags=["jobs"]) router = APIRouter(prefix="/jobs", tags=["jobs"])
@ -26,6 +31,7 @@ class JobResponse(BaseModel):
generated_text: str | None = None generated_text: str | None = None
translated_text: str | None = None translated_text: str | None = None
input_summary: str | None = None input_summary: str | None = None
audio_url: str | None = None
# only present on failure # only present on failure
error_message: str | None = None error_message: str | None = None
model_config = { "from_attributes": True } model_config = { "from_attributes": True }
@ -83,7 +89,56 @@ async def get_job(
response.generated_text = job.generated_text response.generated_text = job.generated_text
response.translated_text = job.translated_text response.translated_text = job.translated_text
response.input_summary = job.input_summary response.input_summary = job.input_summary
response.audio_url = job.audio_url
elif job.status == "failed": elif job.status == "failed":
response.error_message = job.error_message response.error_message = job.error_message
return response return response
async def _run_regenerate_audio(job_id: uuid.UUID) -> None:
async with AsyncSessionLocal() as db:
job = await db.get(Job, job_id)
await job_repo.mark_processing(db, job)
try:
voice = VOICE_BY_LANGUAGE.get(job.target_language, "Kore")
wav_bytes = await tts.generate_audio(job.generated_text, voice)
audio_key = f"audio/{job_id}.wav"
upload_audio(audio_key, wav_bytes)
await job_repo.mark_succeeded(db, job, audio_key)
except Exception as exc:
await job_repo.mark_failed(db, job, str(exc))
@router.post("/{job_id}/regenerate-audio", status_code=202)
async def regenerate_audio(
job_id: str,
db: AsyncSession = Depends(get_db),
token_data: dict = Depends(verify_token),
) -> dict:
try:
uid = uuid.UUID(job_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid job ID format")
job: Job | None = await db.get(Job, uid)
if job is None:
raise HTTPException(status_code=404, detail="Job not found")
if str(job.user_id) != token_data["sub"]:
raise HTTPException(status_code=403, detail="Not authorized to modify this job")
if not job.generated_text:
raise HTTPException(status_code=400, detail="Job has no generated text to synthesize")
if job.audio_url:
raise HTTPException(status_code=409, detail="Job already has audio")
if job.status == "processing":
raise HTTPException(status_code=409, detail="Job is already processing")
await worker.enqueue(partial(_run_regenerate_audio, uid))
return {"job_id": job_id}

39
api/app/routers/media.py Normal file
View file

@ -0,0 +1,39 @@
import uuid
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import Response
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from botocore.exceptions import ClientError
from ..auth import verify_token
from ..database import get_db
from ..models import Job
from ..storage import download_audio
router = APIRouter(prefix="/media", tags=["media"])
@router.get("/{filename:path}")
async def get_media_file(
filename: str,
db: AsyncSession = Depends(get_db),
token_data: dict = Depends(verify_token),
) -> Response:
user_id = uuid.UUID(token_data["sub"])
result = await db.execute(
select(Job).where(Job.audio_url == filename, Job.user_id == user_id)
)
job = result.scalar_one_or_none()
if job is None:
raise HTTPException(status_code=404, detail="File not found")
try:
audio_bytes, content_type = download_audio(filename)
except ClientError as e:
if e.response["Error"]["Code"] in ("NoSuchKey", "404"):
raise HTTPException(status_code=404, detail="File not found")
raise HTTPException(status_code=500, detail="Storage error")
return Response(content=audio_bytes, media_type=content_type)

View file

View file

@ -0,0 +1,39 @@
from datetime import datetime, timezone
from sqlalchemy.ext.asyncio import AsyncSession
from ..models import Job
async def mark_processing(db: AsyncSession, job: Job) -> None:
job.status = "processing"
job.started_at = datetime.now(timezone.utc)
job.error_message = None
await db.commit()
async def save_llm_results(
db: AsyncSession,
job: Job,
generated_text: str,
translated_text: str,
input_summary: str,
) -> None:
job.generated_text = generated_text
job.translated_text = translated_text
job.input_summary = input_summary
await db.commit()
async def mark_succeeded(db: AsyncSession, job: Job, audio_url: str) -> None:
job.status = "succeeded"
job.audio_url = audio_url
job.completed_at = datetime.now(timezone.utc)
await db.commit()
async def mark_failed(db: AsyncSession, job: Job, error: str) -> None:
job.status = "failed"
job.error_message = error
job.completed_at = datetime.now(timezone.utc)
await db.commit()

39
api/app/services/llm.py Normal file
View file

@ -0,0 +1,39 @@
import asyncio
import anthropic
from ..config import settings
def _create_client() -> anthropic.Anthropic:
return anthropic.Anthropic(api_key=settings.anthropic_api_key)
async def generate_text(prompt: str) -> str:
def _call() -> str:
client = _create_client()
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
)
return message.content[0].text
return await asyncio.to_thread(_call)
async def translate_text(original_prompt: str, generated_text: str, translate_prompt: str) -> str:
def _call() -> str:
client = _create_client()
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[
{"role": "user", "content": original_prompt},
{"role": "assistant", "content": generated_text},
{"role": "user", "content": translate_prompt},
],
)
return message.content[0].text
return await asyncio.to_thread(_call)

39
api/app/services/tts.py Normal file
View file

@ -0,0 +1,39 @@
import asyncio
from google import genai
from google.genai import types as genai_types
from ..config import settings
from ..storage import pcm_to_wav
VOICE_BY_LANGUAGE: dict[str, str] = {
"fr": "Kore",
"es": "Charon",
"it": "Aoede",
"de": "Fenrir",
"en": "Kore",
}
async def generate_audio(text: str, voice: str) -> bytes:
"""Generate TTS audio and return WAV bytes."""
def _call() -> bytes:
client = genai.Client(api_key=settings.gemini_api_key)
response = client.models.generate_content(
model="gemini-2.5-flash-preview-tts",
contents=text,
config=genai_types.GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=genai_types.SpeechConfig(
voice_config=genai_types.VoiceConfig(
prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(
voice_name=voice,
)
)
),
),
)
pcm_data = response.candidates[0].content.parts[0].inline_data.data
return pcm_to_wav(pcm_data)
return await asyncio.to_thread(_call)

56
api/app/storage.py Normal file
View file

@ -0,0 +1,56 @@
import io
import wave
import boto3
from botocore.exceptions import ClientError
from .config import settings
def get_s3_client():
return boto3.client(
"s3",
endpoint_url=settings.storage_endpoint_url,
aws_access_key_id=settings.storage_access_key,
aws_secret_access_key=settings.storage_secret_key,
)
def ensure_bucket_exists() -> None:
client = get_s3_client()
try:
client.head_bucket(Bucket=settings.storage_bucket)
except ClientError as e:
if e.response["Error"]["Code"] in ("404", "NoSuchBucket"):
client.create_bucket(Bucket=settings.storage_bucket)
else:
raise
def pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes:
"""Wrap raw 16-bit mono PCM data in a WAV container."""
buf = io.BytesIO()
with wave.open(buf, "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2) # 16-bit
wf.setframerate(sample_rate)
wf.writeframes(pcm_data)
return buf.getvalue()
def upload_audio(object_key: str, audio_bytes: bytes, content_type: str = "audio/wav") -> None:
client = get_s3_client()
client.put_object(
Bucket=settings.storage_bucket,
Key=object_key,
Body=audio_bytes,
ContentType=content_type,
)
def download_audio(object_key: str) -> tuple[bytes, str]:
"""Return (file_bytes, content_type)."""
client = get_s3_client()
response = client.get_object(Bucket=settings.storage_bucket, Key=object_key)
content_type = response.get("ContentType", "audio/wav")
return response["Body"].read(), content_type

22
api/app/worker.py Normal file
View file

@ -0,0 +1,22 @@
import asyncio
import logging
from typing import Awaitable, Callable
logger = logging.getLogger(__name__)
_queue: asyncio.Queue[Callable[[], Awaitable[None]]] = asyncio.Queue()
async def enqueue(task: Callable[[], Awaitable[None]]) -> None:
await _queue.put(task)
async def worker_loop() -> None:
while True:
task = await _queue.get()
try:
await task()
except Exception:
logger.exception("Unhandled error in worker task")
finally:
_queue.task_done()

View file

@ -10,8 +10,13 @@ dependencies = [
"spacy>=3.8.0", "spacy>=3.8.0",
"anthropic>=0.40.0", "anthropic>=0.40.0",
"pyjwt>=2.10.0", "pyjwt>=2.10.0",
"passlib>=1.7.4",
"email-validator>=2.0.0",
"alembic>=1.13.0",
"pydantic-settings>=2.0.0", "pydantic-settings>=2.0.0",
"deepl>=1.18.0", "deepl>=1.18.0",
"google-genai>=1.0.0",
"boto3>=1.35.0",
] ]
[build-system] [build-system]

View file

@ -13,6 +13,23 @@ services:
timeout: 5s timeout: 5s
retries: 10 retries: 10
storage:
image: minio/minio:latest
command: server /data --console-address ":9001"
environment:
MINIO_ROOT_USER: ${STORAGE_ACCESS_KEY:-langlearn}
MINIO_ROOT_PASSWORD: ${STORAGE_SECRET_KEY}
ports:
- "9000:9000"
- "9001:9001"
volumes:
- storagedata:/data
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:9000/minio/health/live || exit 1"]
interval: 5s
timeout: 5s
retries: 10
api: api:
build: ./api build: ./api
ports: ports:
@ -22,10 +39,18 @@ services:
JWT_SECRET: ${JWT_SECRET} JWT_SECRET: ${JWT_SECRET}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
DEEPL_API_KEY: ${DEEPL_API_KEY} DEEPL_API_KEY: ${DEEPL_API_KEY}
GEMINI_API_KEY: ${GEMINI_API_KEY}
STORAGE_ENDPOINT_URL: http://storage:9000
STORAGE_ACCESS_KEY: ${STORAGE_ACCESS_KEY:-langlearn}
STORAGE_SECRET_KEY: ${STORAGE_SECRET_KEY}
STORAGE_BUCKET: ${STORAGE_BUCKET:-langlearn}
depends_on: depends_on:
db: db:
condition: service_healthy condition: service_healthy
storage:
condition: service_healthy
restart: unless-stopped restart: unless-stopped
volumes: volumes:
pgdata: pgdata:
storagedata: