From afe3b63fa513bd39a7b7ae0850b589377267d702 Mon Sep 17 00:00:00 2001 From: wilson Date: Thu, 19 Mar 2026 10:51:10 +0000 Subject: [PATCH] Generate the audio; move to background task --- .env.example | 8 ++ Language Learning API/audio/Get audio.yml | 19 ++++ Language Learning API/audio/folder.yml | 7 ++ Language Learning API/auth/Login.yml | 22 +++++ Language Learning API/auth/Register.yml | 15 ++++ Language Learning API/auth/folder.yml | 7 ++ Language Learning API/opencollection.yml | 4 +- Makefile | 10 ++- README.md | 27 ++++++ api/Dockerfile | 8 +- api/alembic.ini | 39 +++++++++ api/alembic/env.py | 38 ++++++++ api/alembic/script.py.mako | 26 ++++++ .../versions/20260318_0001_initial_schema.py | 55 ++++++++++++ ...0260319_0002_add_audio_and_user_to_jobs.py | 43 +++++++++ api/app/auth.py | 26 ++++++ api/app/config.py | 5 ++ api/app/main.py | 17 +++- api/app/models.py | 23 ++++- api/app/routers/auth.py | 78 +++++++++++++++++ api/app/routers/generation.py | 87 +++++-------------- api/app/routers/jobs.py | 63 +++++++++++++- api/app/routers/media.py | 39 +++++++++ api/app/services/__init__.py | 0 api/app/services/job_repo.py | 39 +++++++++ api/app/services/llm.py | 39 +++++++++ api/app/services/tts.py | 39 +++++++++ api/app/storage.py | 56 ++++++++++++ api/app/worker.py | 22 +++++ api/pyproject.toml | 5 ++ docker-compose.yml | 25 ++++++ 31 files changed, 813 insertions(+), 78 deletions(-) create mode 100644 Language Learning API/audio/Get audio.yml create mode 100644 Language Learning API/audio/folder.yml create mode 100644 Language Learning API/auth/Login.yml create mode 100644 Language Learning API/auth/Register.yml create mode 100644 Language Learning API/auth/folder.yml create mode 100644 README.md create mode 100644 api/alembic.ini create mode 100644 api/alembic/env.py create mode 100644 api/alembic/script.py.mako create mode 100644 api/alembic/versions/20260318_0001_initial_schema.py create mode 100644 api/alembic/versions/20260319_0002_add_audio_and_user_to_jobs.py create mode 100644 api/app/routers/auth.py create mode 100644 api/app/routers/media.py create mode 100644 api/app/services/__init__.py create mode 100644 api/app/services/job_repo.py create mode 100644 api/app/services/llm.py create mode 100644 api/app/services/tts.py create mode 100644 api/app/storage.py create mode 100644 api/app/worker.py diff --git a/.env.example b/.env.example index e843bc9..5df3b06 100644 --- a/.env.example +++ b/.env.example @@ -14,3 +14,11 @@ ANTHROPIC_API_KEY=sk-ant-... # DeepL (https://www.deepl.com/pro-api) DEEPL_API_KEY=your-deepl-api-key-here + +# Google Gemini (for text-to-speech) +GEMINI_API_KEY=your-gemini-api-key-here + +# Object storage (MinIO) +STORAGE_ACCESS_KEY=langlearn +STORAGE_SECRET_KEY=changeme-use-a-long-random-string +STORAGE_BUCKET=langlearn diff --git a/Language Learning API/audio/Get audio.yml b/Language Learning API/audio/Get audio.yml new file mode 100644 index 0000000..79c3238 --- /dev/null +++ b/Language Learning API/audio/Get audio.yml @@ -0,0 +1,19 @@ +info: + name: Get audio + type: http + seq: 1 + +http: + method: GET + url: audio/:file_name + params: + - name: file_name + value: "" + type: path + auth: inherit + +settings: + encodeUrl: true + timeout: 0 + followRedirects: true + maxRedirects: 5 diff --git a/Language Learning API/audio/folder.yml b/Language Learning API/audio/folder.yml new file mode 100644 index 0000000..0e61e5a --- /dev/null +++ b/Language Learning API/audio/folder.yml @@ -0,0 +1,7 @@ +info: + name: audio + type: folder + seq: 6 + +request: + auth: inherit diff --git a/Language Learning API/auth/Login.yml b/Language Learning API/auth/Login.yml new file mode 100644 index 0000000..3b648d1 --- /dev/null +++ b/Language Learning API/auth/Login.yml @@ -0,0 +1,22 @@ +info: + name: Login + type: http + seq: 2 + +http: + method: POST + url: "{{baseUrl}}/auth/register" + body: + type: json + data: |- + { + "email": "wilson@thomaswilson.xyz", + "password": "wilson@thomaswilson.xyz" + } + auth: inherit + +settings: + encodeUrl: true + timeout: 0 + followRedirects: true + maxRedirects: 5 diff --git a/Language Learning API/auth/Register.yml b/Language Learning API/auth/Register.yml new file mode 100644 index 0000000..a6d9f8a --- /dev/null +++ b/Language Learning API/auth/Register.yml @@ -0,0 +1,15 @@ +info: + name: Register + type: http + seq: 1 + +http: + method: POST + url: "" + auth: inherit + +settings: + encodeUrl: true + timeout: 0 + followRedirects: true + maxRedirects: 5 diff --git a/Language Learning API/auth/folder.yml b/Language Learning API/auth/folder.yml new file mode 100644 index 0000000..fad5330 --- /dev/null +++ b/Language Learning API/auth/folder.yml @@ -0,0 +1,7 @@ +info: + name: auth + type: folder + seq: 5 + +request: + auth: inherit diff --git a/Language Learning API/opencollection.yml b/Language Learning API/opencollection.yml index cf11f5d..0e073e7 100644 --- a/Language Learning API/opencollection.yml +++ b/Language Learning API/opencollection.yml @@ -17,12 +17,12 @@ config: request: auth: type: bearer - token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.sW8dZVeROpNxCHL2HEXym6aDzaobFW17mLPaYbtlyYs + token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJkNDIzODc1NC05ZDljLTRkOWMtYTM1OS1lMGQ4MDZhOGQ1Y2QiLCJlbWFpbCI6IndpbHNvbkB0aG9tYXN3aWxzb24ueHl6IiwiZXhwIjoxNzczOTkzNDcxfQ.J5SH93js-YrJThxGliOKIOV8kbdfP1qUu0rx3iBNX0A variables: - name: baseUrl value: http://localhost:8000 - name: token - value: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.sW8dZVeROpNxCHL2HEXym6aDzaobFW17mLPaYbtlyYs + value: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJkNDIzODc1NC05ZDljLTRkOWMtYTM1OS1lMGQ4MDZhOGQ1Y2QiLCJlbWFpbCI6IndpbHNvbkB0aG9tYXN3aWxzb24ueHl6IiwiZXhwIjoxNzczOTkzNDcxfQ.J5SH93js-YrJThxGliOKIOV8kbdfP1qUu0rx3iBNX0A bundled: false extensions: bruno: diff --git a/Makefile b/Makefile index 5efae3f..7d54059 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: build up down logs shell lock +.PHONY: down build up logs shell lock migrate migration build: docker compose build @@ -15,6 +15,14 @@ logs: shell: docker compose exec api bash +# Run pending migrations against the running db container +migrate: + docker compose exec api alembic upgrade head + +# Generate a new migration: make migration NAME="add foo table" +migration: + docker compose exec api alembic revision --autogenerate -m "$(NAME)" + # Generate a pinned requirements.txt from pyproject.toml (requires uv installed locally) lock: cd api && uv pip compile pyproject.toml -o requirements.txt diff --git a/README.md b/README.md new file mode 100644 index 0000000..741f7f6 --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# Language learning app + +## Description + +This is an app designed to help people learn a second(+) language. Initially from English. The app will start with French, Spanish, Italian, and German as the target languages. With English as the only source language. + +The thesis of the app is that spaced repetition and recall remain effective mechanisms for language acquisition, and that exposure to appropriate, realistic text, can make that process less repetitive or dull. And that mixing text and audio allows the user to know how words "sound". + +At present, the app doesn't have a solution to recognising speech, another important part of language learning. + +It improve learner proficiency by building a mechanism for generating realistic, level-appropriate text from user-specified inputs, generated by LLMs. There will also be audio medium, similar to short podcasts, generated from the text. + +The application has a back-end written in python (fastapi), because of the Python ecosystem around data and machine learning. + +The application will have a web front end written in Svelte Kit. It will adopt progressive web app standards, to allow offline use. + +Communication between the two is through HTTP, authenticated with JWT tokens. + +## Technical Design + +This application must remain self-hostable. It should not rely on proprietary infrastructure (e.g. AWS Lambda functions) to run. It should use Docker Compose and Makefiles to build projects and deploy them onto a local server or a VPS. + +The main components so far are: + +- Backend server (fastapi) +- Front end (SvelteKit), yet to be built +- Object storage (Ceph), yet to be built diff --git a/api/Dockerfile b/api/Dockerfile index 14c172d..4176c1c 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -3,7 +3,7 @@ FROM python:3.11-slim WORKDIR /app # Install uv for fast, reproducible installs -RUN pip install --no-cache-dir uv +RUN pip install --no-cache-dir uv alembic # Install Python dependencies from pyproject.toml COPY pyproject.toml . @@ -16,8 +16,10 @@ RUN python -m spacy download en_core_web_sm && \ python -m spacy download it_core_news_sm && \ python -m spacy download de_core_news_sm -# Copy application source +# Copy application source and migrations COPY app/ ./app/ +COPY alembic/ ./alembic/ +COPY alembic.ini . EXPOSE 8000 -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["sh", "-c", "alembic upgrade head && uvicorn app.main:app --host 0.0.0.0 --port 8000"] diff --git a/api/alembic.ini b/api/alembic.ini new file mode 100644 index 0000000..055120f --- /dev/null +++ b/api/alembic.ini @@ -0,0 +1,39 @@ +[alembic] +script_location = alembic +file_template = %%(year)d%%(month).2d%%(day).2d_%%(rev)s_%%(slug)s +truncate_slug_length = 40 +prepend_sys_path = . + +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/api/alembic/env.py b/api/alembic/env.py new file mode 100644 index 0000000..f90fd9a --- /dev/null +++ b/api/alembic/env.py @@ -0,0 +1,38 @@ +import asyncio +from logging.config import fileConfig + +from alembic import context +from sqlalchemy import pool +from sqlalchemy.ext.asyncio import async_engine_from_config + +from app.config import settings +from app.database import Base +import app.models # noqa: F401 — register all models with Base.metadata + +config = context.config +config.set_main_option("sqlalchemy.url", settings.database_url) + +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +target_metadata = Base.metadata + + +def do_run_migrations(connection): + context.configure(connection=connection, target_metadata=target_metadata) + with context.begin_transaction(): + context.run_migrations() + + +async def run_async_migrations(): + connectable = async_engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + async with connectable.connect() as connection: + await connection.run_sync(do_run_migrations) + await connectable.dispose() + + +asyncio.run(run_async_migrations()) diff --git a/api/alembic/script.py.mako b/api/alembic/script.py.mako new file mode 100644 index 0000000..fbc4b07 --- /dev/null +++ b/api/alembic/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/api/alembic/versions/20260318_0001_initial_schema.py b/api/alembic/versions/20260318_0001_initial_schema.py new file mode 100644 index 0000000..29cee8e --- /dev/null +++ b/api/alembic/versions/20260318_0001_initial_schema.py @@ -0,0 +1,55 @@ +"""initial schema + +Revision ID: 0001 +Revises: +Create Date: 2026-03-18 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +revision: str = "0001" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "users", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("email", sa.String(255), nullable=False), + sa.Column("hashed_password", sa.String(255), nullable=False), + sa.Column("is_active", sa.Boolean(), nullable=False), + sa.Column("is_email_verified", sa.Boolean(), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index(op.f("ix_users_email"), "users", ["email"], unique=True) + + op.create_table( + "jobs", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("status", sa.String(20), nullable=False), + sa.Column("source_language", sa.String(10), nullable=False), + sa.Column("target_language", sa.String(10), nullable=False), + sa.Column("complexity_level", sa.String(5), nullable=False), + sa.Column("input_summary", sa.Text(), nullable=True), + sa.Column("generated_text", sa.Text(), nullable=True), + sa.Column("translated_text", sa.Text(), nullable=True), + sa.Column("error_message", sa.Text(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + + +def downgrade() -> None: + op.drop_table("jobs") + op.drop_index(op.f("ix_users_email"), table_name="users") + op.drop_table("users") diff --git a/api/alembic/versions/20260319_0002_add_audio_and_user_to_jobs.py b/api/alembic/versions/20260319_0002_add_audio_and_user_to_jobs.py new file mode 100644 index 0000000..1875905 --- /dev/null +++ b/api/alembic/versions/20260319_0002_add_audio_and_user_to_jobs.py @@ -0,0 +1,43 @@ +"""add audio_url and user_id to jobs + +Revision ID: 0002 +Revises: 0001 +Create Date: 2026-03-19 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +revision: str = "0002" +down_revision: Union[str, None] = "0001" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "jobs", + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True), + ) + op.create_index(op.f("ix_jobs_user_id"), "jobs", ["user_id"], unique=False) + op.create_foreign_key( + "fk_jobs_user_id_users", + "jobs", + "users", + ["user_id"], + ["id"], + ) + op.add_column( + "jobs", + sa.Column("audio_url", sa.Text(), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("jobs", "audio_url") + op.drop_constraint("fk_jobs_user_id_users", "jobs", type_="foreignkey") + op.drop_index(op.f("ix_jobs_user_id"), table_name="jobs") + op.drop_column("jobs", "user_id") diff --git a/api/app/auth.py b/api/app/auth.py index 394a48c..bb41cb4 100644 --- a/api/app/auth.py +++ b/api/app/auth.py @@ -1,10 +1,36 @@ +from datetime import datetime, timedelta, timezone + from fastapi import Depends, HTTPException, status from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials import jwt +from passlib.context import CryptContext from .config import settings security = HTTPBearer() +pwd_context = CryptContext( + schemes=["pbkdf2_sha256"], + deprecated="auto", +) + +TOKEN_EXPIRY_HOURS = 24 + + +def hash_password(password: str) -> str: + return pwd_context.hash(password) + + +def verify_password(plain: str, hashed: str) -> bool: + return pwd_context.verify(plain, hashed) + + +def create_access_token(user_id: str, email: str) -> str: + payload = { + "sub": user_id, + "email": email, + "exp": datetime.now(timezone.utc) + timedelta(hours=TOKEN_EXPIRY_HOURS), + } + return jwt.encode(payload, settings.jwt_secret, algorithm="HS256") def verify_token( diff --git a/api/app/config.py b/api/app/config.py index a66b28f..de2b88a 100644 --- a/api/app/config.py +++ b/api/app/config.py @@ -6,6 +6,11 @@ class Settings(BaseSettings): jwt_secret: str anthropic_api_key: str deepl_api_key: str + gemini_api_key: str + storage_endpoint_url: str + storage_access_key: str + storage_secret_key: str + storage_bucket: str = "langlearn" model_config = {"env_file": ".env"} diff --git a/api/app/main.py b/api/app/main.py index f97c0db..f76b166 100644 --- a/api/app/main.py +++ b/api/app/main.py @@ -1,23 +1,34 @@ +import asyncio from contextlib import asynccontextmanager from fastapi import FastAPI -from .database import engine, Base from .routers import pos, generation, jobs +from .routers import auth as auth_router +from .routers import media as media_router +from .storage import ensure_bucket_exists +from . import worker @asynccontextmanager async def lifespan(app: FastAPI): - async with engine.begin() as conn: - await conn.run_sync(Base.metadata.create_all) + ensure_bucket_exists() + worker_task = asyncio.create_task(worker.worker_loop()) yield + worker_task.cancel() + try: + await worker_task + except asyncio.CancelledError: + pass app = FastAPI(title="Language Learning API", lifespan=lifespan) +app.include_router(auth_router.router) app.include_router(pos.router) app.include_router(generation.router) app.include_router(jobs.router) +app.include_router(media_router.router) @app.get("/health") diff --git a/api/app/models.py b/api/app/models.py index 201561a..32675d9 100644 --- a/api/app/models.py +++ b/api/app/models.py @@ -1,19 +1,39 @@ import uuid from datetime import datetime, timezone -from sqlalchemy import String, Text, DateTime +from sqlalchemy import String, Text, DateTime, Boolean, ForeignKey from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.dialects.postgresql import UUID from .database import Base +class User(Base): + __tablename__ = "users" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + email: Mapped[str] = mapped_column(String(255), nullable=False, unique=True, index=True) + hashed_password: Mapped[str] = mapped_column(String(255), nullable=False) + is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) + # TODO(email-verification): set to False and require verification once transactional email is implemented + is_email_verified: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + default=lambda: datetime.now(timezone.utc), + ) + + class Job(Base): __tablename__ = "jobs" id: Mapped[uuid.UUID] = mapped_column( UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 ) + user_id: Mapped[uuid.UUID | None] = mapped_column( + UUID(as_uuid=True), ForeignKey("users.id"), nullable=True, index=True + ) status: Mapped[str] = mapped_column(String(20), nullable=False, default="pending") source_language: Mapped[str] = mapped_column(String(10), nullable=False, default="en") target_language: Mapped[str] = mapped_column(String(10), nullable=False) @@ -22,6 +42,7 @@ class Job(Base): generated_text: Mapped[str | None] = mapped_column(Text, nullable=True) translated_text: Mapped[str | None] = mapped_column(Text, nullable=True) error_message: Mapped[str | None] = mapped_column(Text, nullable=True) + audio_url: Mapped[str | None] = mapped_column(Text, nullable=True) created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), diff --git a/api/app/routers/auth.py b/api/app/routers/auth.py new file mode 100644 index 0000000..fd32dd2 --- /dev/null +++ b/api/app/routers/auth.py @@ -0,0 +1,78 @@ +from fastapi import APIRouter, Depends, HTTPException, status +from pydantic import BaseModel, EmailStr +from sqlalchemy import select +from sqlalchemy.exc import IntegrityError +from sqlalchemy.ext.asyncio import AsyncSession + +from ..auth import create_access_token, hash_password, verify_password +from ..database import get_db +from ..models import User + +router = APIRouter(prefix="/auth", tags=["auth"]) + + +class RegisterRequest(BaseModel): + email: EmailStr + password: str + + +class LoginRequest(BaseModel): + email: EmailStr + password: str + + +class TokenResponse(BaseModel): + access_token: str + token_type: str = "bearer" + + +@router.post("/register", status_code=status.HTTP_201_CREATED) +async def register(body: RegisterRequest, db: AsyncSession = Depends(get_db)): + user = User( + email=body.email, + hashed_password=hash_password(body.password), + ) + db.add(user) + try: + await db.commit() + await db.refresh(user) + except IntegrityError: + await db.rollback() + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail="Email already registered", + ) + + # TODO(email-verification): send verification email here once transactional + # email is implemented. Set is_email_verified=False on the User model and + # require verification before allowing login. + + return {"id": str(user.id), "email": user.email} + + +@router.post("/login", response_model=TokenResponse) +async def login(body: LoginRequest, db: AsyncSession = Depends(get_db)): + result = await db.execute(select(User).where(User.email == body.email)) + user = result.scalar_one_or_none() + + if user is None or not verify_password(body.password, user.hashed_password): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid email or password", + ) + + if not user.is_active: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Account disabled", + ) + + # TODO(email-verification): uncomment once email verification is in place + # if not user.is_email_verified: + # raise HTTPException( + # status_code=status.HTTP_403_FORBIDDEN, + # detail="Email address not verified", + # ) + + token = create_access_token(str(user.id), user.email) + return TokenResponse(access_token=token) diff --git a/api/app/routers/generation.py b/api/app/routers/generation.py index 23e67f6..acdfd94 100644 --- a/api/app/routers/generation.py +++ b/api/app/routers/generation.py @@ -1,16 +1,17 @@ import uuid -from datetime import datetime, timezone +from functools import partial -from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel from sqlalchemy.ext.asyncio import AsyncSession -import anthropic -import deepl from ..auth import verify_token from ..database import get_db, AsyncSessionLocal from ..models import Job -from ..config import settings +from ..storage import upload_audio +from ..services import llm, tts, job_repo +from ..services.tts import VOICE_BY_LANGUAGE +from .. import worker router = APIRouter(prefix="/generate", tags=["generation"]) @@ -23,23 +24,6 @@ SUPPORTED_LANGUAGES: dict[str, str] = { } SUPPORTED_LEVELS = {"A1", "A2", "B1", "B2", "C1", "C2"} -# Maps our language codes to DeepL source/target language codes -DEEPL_SOURCE_LANG: dict[str, str] = { - "en": "EN", - "fr": "FR", - "es": "ES", - "it": "IT", - "de": "DE", -} -# DeepL target codes (English needs a regional variant) -DEEPL_TARGET_LANG: dict[str, str] = { - "en": "EN-US", - "fr": "FR", - "es": "ES", - "it": "IT", - "de": "DE", -} - class GenerationRequest(BaseModel): target_language: str @@ -56,15 +40,12 @@ class GenerationResponse(BaseModel): async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None: async with AsyncSessionLocal() as db: job = await db.get(Job, job_id) - job.status = "processing" - job.started_at = datetime.now(timezone.utc) - await db.commit() + await job_repo.mark_processing(db, job) try: from_language = SUPPORTED_LANGUAGES[request.source_language] language_name = SUPPORTED_LANGUAGES[request.target_language] - # Build a short summary of the input to store (not the full text) topic_part = f"Topic: {request.topic}. " if request.topic else "" combined_preview = " ".join(request.input_texts)[:300] input_summary = ( @@ -94,23 +75,7 @@ async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None f"The 'Topic focus' should be a comma-separated list of up to three topics, in {language_name}." ) - client = anthropic.Anthropic(api_key=settings.anthropic_api_key) - - message = client.messages.create( - model="claude-sonnet-4-6", - max_tokens=1024, - messages=[{"role": "user", "content": prompt}], - ) - generated_text = message.content[0].text - - # TODO: Come back to this when DeepL unblock my account for being "high risk" - # Translate generated text back into the learner's source language via DeepL - # translator = deepl.Translator(settings.deepl_api_key) - # translation = translator.translate_text( - # generated_text, - # source_lang=DEEPL_SOURCE_LANG[request.target_language], - # target_lang=DEEPL_TARGET_LANG[request.source_language], - #) + generated_text = await llm.generate_text(prompt) translate_prompt = ( f"You are a helpful assistant that translates text. Translate just the previous summary " @@ -120,36 +85,29 @@ async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None f"- Send through only the translated text, no explanations or notes\n" ) - translate_message = client.messages.create( - model="claude-sonnet-4-6", - max_tokens=1024, - messages=[ - { "role": "user", "content": prompt }, - { "role": "assistant", "content": message.content }, - { "role": "user", "content": translate_prompt } - ], + translated_text = await llm.translate_text(prompt, generated_text, translate_prompt) + + # Save LLM results before attempting TTS so they're preserved on failure + await job_repo.save_llm_results( + db, job, generated_text, translated_text, input_summary[:500] ) - job.status = "succeeded" - job.generated_text = generated_text - job.translated_text = translate_message.content[0].text - job.input_summary = input_summary[:500] - job.completed_at = datetime.now(timezone.utc) + voice = VOICE_BY_LANGUAGE.get(request.target_language, "Kore") + wav_bytes = await tts.generate_audio(generated_text, voice) + audio_key = f"audio/{job_id}.wav" + upload_audio(audio_key, wav_bytes) + + await job_repo.mark_succeeded(db, job, audio_key) except Exception as exc: - job.status = "failed" - job.error_message = str(exc) - job.completed_at = datetime.now(timezone.utc) - - await db.commit() + await job_repo.mark_failed(db, job, str(exc)) @router.post("", response_model=GenerationResponse, status_code=202) async def create_generation_job( request: GenerationRequest, - background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db), - _: dict = Depends(verify_token), + token_data: dict = Depends(verify_token), ) -> GenerationResponse: if request.target_language not in SUPPORTED_LANGUAGES: raise HTTPException( @@ -165,6 +123,7 @@ async def create_generation_job( ) job = Job( + user_id=uuid.UUID(token_data["sub"]), source_language=request.source_language, target_language=request.target_language, complexity_level=request.complexity_level, @@ -173,6 +132,6 @@ async def create_generation_job( await db.commit() await db.refresh(job) - background_tasks.add_task(_run_generation, job.id, request) + await worker.enqueue(partial(_run_generation, job.id, request)) return GenerationResponse(job_id=str(job.id)) diff --git a/api/app/routers/jobs.py b/api/app/routers/jobs.py index d6d42a8..cf33b7b 100644 --- a/api/app/routers/jobs.py +++ b/api/app/routers/jobs.py @@ -1,5 +1,6 @@ import uuid -from datetime import datetime +from datetime import datetime, timezone +from functools import partial from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel @@ -7,8 +8,12 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select from ..auth import verify_token -from ..database import get_db +from ..database import get_db, AsyncSessionLocal from ..models import Job +from ..storage import upload_audio +from ..services import tts, job_repo +from ..services.tts import VOICE_BY_LANGUAGE +from .. import worker router = APIRouter(prefix="/jobs", tags=["jobs"]) @@ -26,12 +31,13 @@ class JobResponse(BaseModel): generated_text: str | None = None translated_text: str | None = None input_summary: str | None = None + audio_url: str | None = None # only present on failure error_message: str | None = None model_config = { "from_attributes": True } class JobSummary(BaseModel): - id: uuid.UUID + id: uuid.UUID status: str created_at: datetime @@ -44,7 +50,7 @@ class JobListResponse(BaseModel): async def get_jobs( db: AsyncSession = Depends(get_db), _: dict = Depends(verify_token) -) -> JobListResponse: +) -> JobListResponse: try: result = await db.execute(select(Job).order_by(Job.created_at.desc())) jobs = result.scalars().all() @@ -83,7 +89,56 @@ async def get_job( response.generated_text = job.generated_text response.translated_text = job.translated_text response.input_summary = job.input_summary + response.audio_url = job.audio_url elif job.status == "failed": response.error_message = job.error_message return response + + +async def _run_regenerate_audio(job_id: uuid.UUID) -> None: + async with AsyncSessionLocal() as db: + job = await db.get(Job, job_id) + await job_repo.mark_processing(db, job) + + try: + voice = VOICE_BY_LANGUAGE.get(job.target_language, "Kore") + wav_bytes = await tts.generate_audio(job.generated_text, voice) + audio_key = f"audio/{job_id}.wav" + upload_audio(audio_key, wav_bytes) + + await job_repo.mark_succeeded(db, job, audio_key) + + except Exception as exc: + await job_repo.mark_failed(db, job, str(exc)) + + +@router.post("/{job_id}/regenerate-audio", status_code=202) +async def regenerate_audio( + job_id: str, + db: AsyncSession = Depends(get_db), + token_data: dict = Depends(verify_token), +) -> dict: + try: + uid = uuid.UUID(job_id) + except ValueError: + raise HTTPException(status_code=400, detail="Invalid job ID format") + + job: Job | None = await db.get(Job, uid) + if job is None: + raise HTTPException(status_code=404, detail="Job not found") + + if str(job.user_id) != token_data["sub"]: + raise HTTPException(status_code=403, detail="Not authorized to modify this job") + + if not job.generated_text: + raise HTTPException(status_code=400, detail="Job has no generated text to synthesize") + + if job.audio_url: + raise HTTPException(status_code=409, detail="Job already has audio") + + if job.status == "processing": + raise HTTPException(status_code=409, detail="Job is already processing") + + await worker.enqueue(partial(_run_regenerate_audio, uid)) + return {"job_id": job_id} diff --git a/api/app/routers/media.py b/api/app/routers/media.py new file mode 100644 index 0000000..b1829e5 --- /dev/null +++ b/api/app/routers/media.py @@ -0,0 +1,39 @@ +import uuid + +from fastapi import APIRouter, Depends, HTTPException +from fastapi.responses import Response +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from botocore.exceptions import ClientError + +from ..auth import verify_token +from ..database import get_db +from ..models import Job +from ..storage import download_audio + +router = APIRouter(prefix="/media", tags=["media"]) + + +@router.get("/{filename:path}") +async def get_media_file( + filename: str, + db: AsyncSession = Depends(get_db), + token_data: dict = Depends(verify_token), +) -> Response: + user_id = uuid.UUID(token_data["sub"]) + + result = await db.execute( + select(Job).where(Job.audio_url == filename, Job.user_id == user_id) + ) + job = result.scalar_one_or_none() + if job is None: + raise HTTPException(status_code=404, detail="File not found") + + try: + audio_bytes, content_type = download_audio(filename) + except ClientError as e: + if e.response["Error"]["Code"] in ("NoSuchKey", "404"): + raise HTTPException(status_code=404, detail="File not found") + raise HTTPException(status_code=500, detail="Storage error") + + return Response(content=audio_bytes, media_type=content_type) diff --git a/api/app/services/__init__.py b/api/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/app/services/job_repo.py b/api/app/services/job_repo.py new file mode 100644 index 0000000..e1b14bc --- /dev/null +++ b/api/app/services/job_repo.py @@ -0,0 +1,39 @@ +from datetime import datetime, timezone + +from sqlalchemy.ext.asyncio import AsyncSession + +from ..models import Job + + +async def mark_processing(db: AsyncSession, job: Job) -> None: + job.status = "processing" + job.started_at = datetime.now(timezone.utc) + job.error_message = None + await db.commit() + + +async def save_llm_results( + db: AsyncSession, + job: Job, + generated_text: str, + translated_text: str, + input_summary: str, +) -> None: + job.generated_text = generated_text + job.translated_text = translated_text + job.input_summary = input_summary + await db.commit() + + +async def mark_succeeded(db: AsyncSession, job: Job, audio_url: str) -> None: + job.status = "succeeded" + job.audio_url = audio_url + job.completed_at = datetime.now(timezone.utc) + await db.commit() + + +async def mark_failed(db: AsyncSession, job: Job, error: str) -> None: + job.status = "failed" + job.error_message = error + job.completed_at = datetime.now(timezone.utc) + await db.commit() diff --git a/api/app/services/llm.py b/api/app/services/llm.py new file mode 100644 index 0000000..b6ef7be --- /dev/null +++ b/api/app/services/llm.py @@ -0,0 +1,39 @@ +import asyncio + +import anthropic + +from ..config import settings + + +def _create_client() -> anthropic.Anthropic: + return anthropic.Anthropic(api_key=settings.anthropic_api_key) + + +async def generate_text(prompt: str) -> str: + def _call() -> str: + client = _create_client() + message = client.messages.create( + model="claude-sonnet-4-6", + max_tokens=1024, + messages=[{"role": "user", "content": prompt}], + ) + return message.content[0].text + + return await asyncio.to_thread(_call) + + +async def translate_text(original_prompt: str, generated_text: str, translate_prompt: str) -> str: + def _call() -> str: + client = _create_client() + message = client.messages.create( + model="claude-sonnet-4-6", + max_tokens=1024, + messages=[ + {"role": "user", "content": original_prompt}, + {"role": "assistant", "content": generated_text}, + {"role": "user", "content": translate_prompt}, + ], + ) + return message.content[0].text + + return await asyncio.to_thread(_call) diff --git a/api/app/services/tts.py b/api/app/services/tts.py new file mode 100644 index 0000000..c4f42f4 --- /dev/null +++ b/api/app/services/tts.py @@ -0,0 +1,39 @@ +import asyncio + +from google import genai +from google.genai import types as genai_types + +from ..config import settings +from ..storage import pcm_to_wav + +VOICE_BY_LANGUAGE: dict[str, str] = { + "fr": "Kore", + "es": "Charon", + "it": "Aoede", + "de": "Fenrir", + "en": "Kore", +} + + +async def generate_audio(text: str, voice: str) -> bytes: + """Generate TTS audio and return WAV bytes.""" + def _call() -> bytes: + client = genai.Client(api_key=settings.gemini_api_key) + response = client.models.generate_content( + model="gemini-2.5-flash-preview-tts", + contents=text, + config=genai_types.GenerateContentConfig( + response_modalities=["AUDIO"], + speech_config=genai_types.SpeechConfig( + voice_config=genai_types.VoiceConfig( + prebuilt_voice_config=genai_types.PrebuiltVoiceConfig( + voice_name=voice, + ) + ) + ), + ), + ) + pcm_data = response.candidates[0].content.parts[0].inline_data.data + return pcm_to_wav(pcm_data) + + return await asyncio.to_thread(_call) diff --git a/api/app/storage.py b/api/app/storage.py new file mode 100644 index 0000000..854e07c --- /dev/null +++ b/api/app/storage.py @@ -0,0 +1,56 @@ +import io +import wave + +import boto3 +from botocore.exceptions import ClientError + +from .config import settings + + +def get_s3_client(): + return boto3.client( + "s3", + endpoint_url=settings.storage_endpoint_url, + aws_access_key_id=settings.storage_access_key, + aws_secret_access_key=settings.storage_secret_key, + ) + + +def ensure_bucket_exists() -> None: + client = get_s3_client() + try: + client.head_bucket(Bucket=settings.storage_bucket) + except ClientError as e: + if e.response["Error"]["Code"] in ("404", "NoSuchBucket"): + client.create_bucket(Bucket=settings.storage_bucket) + else: + raise + + +def pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes: + """Wrap raw 16-bit mono PCM data in a WAV container.""" + buf = io.BytesIO() + with wave.open(buf, "wb") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) # 16-bit + wf.setframerate(sample_rate) + wf.writeframes(pcm_data) + return buf.getvalue() + + +def upload_audio(object_key: str, audio_bytes: bytes, content_type: str = "audio/wav") -> None: + client = get_s3_client() + client.put_object( + Bucket=settings.storage_bucket, + Key=object_key, + Body=audio_bytes, + ContentType=content_type, + ) + + +def download_audio(object_key: str) -> tuple[bytes, str]: + """Return (file_bytes, content_type).""" + client = get_s3_client() + response = client.get_object(Bucket=settings.storage_bucket, Key=object_key) + content_type = response.get("ContentType", "audio/wav") + return response["Body"].read(), content_type diff --git a/api/app/worker.py b/api/app/worker.py new file mode 100644 index 0000000..c0f82ef --- /dev/null +++ b/api/app/worker.py @@ -0,0 +1,22 @@ +import asyncio +import logging +from typing import Awaitable, Callable + +logger = logging.getLogger(__name__) + +_queue: asyncio.Queue[Callable[[], Awaitable[None]]] = asyncio.Queue() + + +async def enqueue(task: Callable[[], Awaitable[None]]) -> None: + await _queue.put(task) + + +async def worker_loop() -> None: + while True: + task = await _queue.get() + try: + await task() + except Exception: + logger.exception("Unhandled error in worker task") + finally: + _queue.task_done() diff --git a/api/pyproject.toml b/api/pyproject.toml index bc1f049..4e57afc 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -10,8 +10,13 @@ dependencies = [ "spacy>=3.8.0", "anthropic>=0.40.0", "pyjwt>=2.10.0", + "passlib>=1.7.4", + "email-validator>=2.0.0", + "alembic>=1.13.0", "pydantic-settings>=2.0.0", "deepl>=1.18.0", + "google-genai>=1.0.0", + "boto3>=1.35.0", ] [build-system] diff --git a/docker-compose.yml b/docker-compose.yml index eac01e5..ce3571d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,6 +13,23 @@ services: timeout: 5s retries: 10 + storage: + image: minio/minio:latest + command: server /data --console-address ":9001" + environment: + MINIO_ROOT_USER: ${STORAGE_ACCESS_KEY:-langlearn} + MINIO_ROOT_PASSWORD: ${STORAGE_SECRET_KEY} + ports: + - "9000:9000" + - "9001:9001" + volumes: + - storagedata:/data + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:9000/minio/health/live || exit 1"] + interval: 5s + timeout: 5s + retries: 10 + api: build: ./api ports: @@ -22,10 +39,18 @@ services: JWT_SECRET: ${JWT_SECRET} ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} DEEPL_API_KEY: ${DEEPL_API_KEY} + GEMINI_API_KEY: ${GEMINI_API_KEY} + STORAGE_ENDPOINT_URL: http://storage:9000 + STORAGE_ACCESS_KEY: ${STORAGE_ACCESS_KEY:-langlearn} + STORAGE_SECRET_KEY: ${STORAGE_SECRET_KEY} + STORAGE_BUCKET: ${STORAGE_BUCKET:-langlearn} depends_on: db: condition: service_healthy + storage: + condition: service_healthy restart: unless-stopped volumes: pgdata: + storagedata: