feat: Change out storage from local to Bunny (via env param)

fix: Commit various features relating to CYOA
2026-05-18 21:18:19 +01:00 · 2026-05-17 13:36:21 +01:00
24 changed files with 858 additions and 123 deletions
--- a/api/app/config.py
+++ b/api/app/config.py
@ -15,10 +15,16 @@ class Settings(BaseSettings):
    scaleway_tem_project_id: str = ""
    scaleway_tem_from_address: str = ""
    scaleway_tem_region: str = "fr-par"
-    storage_endpoint_url: str
+    storage_provider: str = "local"  # or 'bunny'
-    storage_access_key: str
+    storage_endpoint_url: str = ""
-    storage_secret_key: str
+    storage_access_key: str = ""
    storage_secret_key: str = ""
    storage_bucket: str = "langlearn"
    bunny_zone: str = "languagelearningapp"
    bunny_api_key: str = ""
    bunny_cdn_base_url: str = ""
    bunny_token_auth_key: str = ""
    bunny_storage_endpoint: str = "https://storage.bunnycdn.com"
    stub_generation: bool = False
    model_config = {"env_file": ".env"}
--- a/api/app/domain/services/adventure_service.py
+++ b/api/app/domain/services/adventure_service.py
@ -24,7 +24,7 @@ from ...outbound.postgres.repositories.adventure_repository import (
    PostgresAdventureRepository,
 )
 from ...outbound.spacy.spacy_client import SpacyClient
-from ...storage import upload_audio
+from ...outbound.storage_client import get_storage_client
 from ..models.adventure import (
    Adventure,
    AdventureEntry,
@ -251,7 +251,7 @@ class AdventureService:
                for sent_idx, target_sent in enumerate(target_nlp["sentences"]):
                    t0 = time.monotonic()
                    translated_sentence = await self.deepl_client.translate(
-                        target_sent["text"], adventure.source_language
+                        target_sent["text"], adventure.source_language, paragraph_text
                    )
                    timing_translations += time.monotonic() - t0
@ -315,7 +315,7 @@ class AdventureService:
            # ── File upload ───────────────────────────────────────────────────
            t0 = time.monotonic()
            audio_key = f"adventure-audio/{entry_id}.wav"
-            upload_audio(audio_key, wav_bytes)
+            get_storage_client().upload(audio_key, wav_bytes)
            timing_file_uploading = time.monotonic() - t0
            await self.audio_repo.create(
--- a/api/app/domain/services/summarise_service.py
+++ b/api/app/domain/services/summarise_service.py
@ -14,7 +14,7 @@ from ...outbound.deepgram.deepgram_client import LocalDeepgramClient
 from ...outbound.deepl.deepl_client import DeepLClient
 from ...outbound.gemini.gemini_client import GeminiClient
 from ...outbound.spacy.spacy_client import SpacyClient
-from ...storage import upload_audio
+from ...outbound.storage_client import get_storage_client
 from ...languages import SUPPORTED_LANGUAGES
@ -137,7 +137,7 @@ class SummariseService:
            voice = self.gemini_client.get_voice_by_language(target_language)
            wav_bytes = await self.gemini_client.generate_audio(generated_text, voice)
            audio_key = f"audio/{job_id}.wav"
-            upload_audio(audio_key, wav_bytes)
+            get_storage_client().upload(audio_key, wav_bytes)
            transcript = await self.deepgram_client.transcribe_bytes(wav_bytes, target_language)
--- a/api/app/main.py
+++ b/api/app/main.py
@ -8,13 +8,13 @@ from .routers.api import jobs
 from .routers import media as media_router
 from .routers.api.main import api_router
 from .routers.bff.main import bff_router
-from .storage import ensure_bucket_exists
+from .outbound.storage_factory import init_storage
 from . import worker
@asynccontextmanager
 async def lifespan(app: FastAPI):
-    ensure_bucket_exists()
+    init_storage()
    worker_task = asyncio.create_task(worker.worker_loop())
    yield
    worker_task.cancel()
--- a/api/app/outbound/bunny/init.py
+++ b/api/app/outbound/bunny/init.py
--- a/api/app/outbound/bunny/bunny_client.py
+++ b/api/app/outbound/bunny/bunny_client.py
@ -0,0 +1,77 @@
 import base64
 import hashlib
 import time
 import urllib.error
 import urllib.request
 _SIGNED_URL_EXPIRY_SECONDS = 3600
 class BunnyClient:
    def __init__(
        self,
        zone: str,
        api_key: str,
        cdn_base_url: str,
        token_auth_key: str,
        storage_endpoint: str = "https://storage.bunnycdn.com",
    ) -> None:
        self._zone = zone
        self._api_key = api_key
        self._cdn_base_url = cdn_base_url.rstrip("/")
        self._token_auth_key = token_auth_key
        self._storage_endpoint = storage_endpoint.rstrip("/")
    def _storage_url(self, path: str) -> str:
        return f"{self._storage_endpoint}/{self._zone}/{path.lstrip('/')}"
    def upload(self, path: str, data: bytes) -> bool:
        req = urllib.request.Request(
            self._storage_url(path),
            data=data,
            method="PUT",
            headers={
                "AccessKey": self._api_key,
                "Content-Type": "audio/wav",
            },
        )
        try:
            with urllib.request.urlopen(req) as resp:
                return resp.status == 201
        except urllib.error.HTTPError:
            return False
    def get_url(self, path: str) -> str:
        url_path = f"/{path.lstrip('/')}"
        expiration = int(time.time()) + _SIGNED_URL_EXPIRY_SECONDS
        digest = hashlib.sha256(
            (self._token_auth_key + url_path + str(expiration)).encode()
        ).digest()
        token = (
            base64.b64encode(digest)
            .decode()
            .replace("+", "-")
            .replace("/", "_")
            .replace("=", "")
        )
        return f"{self._cdn_base_url}{url_path}?token={token}&expires={expiration}"
    def get_public_url(self, path: str) -> str:
        return f"{self._cdn_base_url}/{path.lstrip('/')}"
    def delete(self, path: str) -> bool:
        req = urllib.request.Request(
            self._storage_url(path),
            method="DELETE",
            headers={"AccessKey": self._api_key},
        )
        try:
            with urllib.request.urlopen(req) as resp:
                return resp.status == 200
        except urllib.error.HTTPError:
            return False
    def download(self, path: str) -> tuple[bytes, str]:
        raise NotImplementedError(
            "Direct download not available with Bunny — use get_url() to obtain a signed CDN URL"
        )
--- a/api/app/outbound/gemini/gemini_client.py
+++ b/api/app/outbound/gemini/gemini_client.py
@ -1,9 +1,19 @@
 import asyncio
 import io
 import wave
 from google import genai
 from google.genai import types as genai_types
-from ...storage import pcm_to_wav
+
 def _pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes:
    buf = io.BytesIO()
    with wave.open(buf, "wb") as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(sample_rate)
        wf.writeframes(pcm_data)
    return buf.getvalue()
 VOICE_BY_LANGUAGE: dict[str, str] = {
    "fr": "Kore",
@ -47,6 +57,6 @@ class GeminiClient():
                ),
            )
            pcm_data = response.candidates[0].content.parts[0].inline_data.data
-            return pcm_to_wav(pcm_data)
+            return _pcm_to_wav(pcm_data)
        return await asyncio.to_thread(_call)
--- a/api/app/outbound/minio/init.py
+++ b/api/app/outbound/minio/init.py
--- a/api/app/outbound/minio/minio_client.py
+++ b/api/app/outbound/minio/minio_client.py
@ -0,0 +1,70 @@
 import boto3
 from botocore.exceptions import ClientError
 class MinioClient:
    def __init__(
        self,
        endpoint_url: str,
        access_key: str,
        secret_key: str,
        bucket: str,
        api_base_url: str,
    ) -> None:
        self._endpoint_url = endpoint_url
        self._access_key = access_key
        self._secret_key = secret_key
        self._bucket = bucket
        self._api_base_url = api_base_url.rstrip("/")
    def _s3(self):
        return boto3.client(
            "s3",
            endpoint_url=self._endpoint_url,
            aws_access_key_id=self._access_key,
            aws_secret_access_key=self._secret_key,
        )
    def ensure_bucket_exists(self) -> None:
        client = self._s3()
        try:
            client.head_bucket(Bucket=self._bucket)
        except ClientError as e:
            if e.response["Error"]["Code"] in ("404", "NoSuchBucket"):
                client.create_bucket(Bucket=self._bucket)
            else:
                raise
    def upload(self, path: str, data: bytes) -> bool:
        try:
            self._s3().put_object(
                Bucket=self._bucket,
                Key=path,
                Body=data,
                ContentType="audio/wav",
            )
            return True
        except ClientError:
            return False
    def get_url(self, path: str) -> str:
        return f"{self._api_base_url}/media/{path}"
    def get_public_url(self, path: str) -> str:
        return f"{self._api_base_url}/media/{path}"
    def delete(self, path: str) -> bool:
        try:
            self._s3().delete_object(Bucket=self._bucket, Key=path)
            return True
        except ClientError:
            return False
    def download(self, path: str) -> tuple[bytes, str]:
        try:
            response = self._s3().get_object(Bucket=self._bucket, Key=path)
            return response["Body"].read(), response.get("ContentType", "audio/wav")
        except ClientError as e:
            if e.response["Error"]["Code"] in ("NoSuchKey", "404"):
                raise FileNotFoundError(path)
            raise
--- a/api/app/outbound/storage_client.py
+++ b/api/app/outbound/storage_client.py
@ -0,0 +1,21 @@
 from typing import Protocol
 _client: "StorageClient | None" = None
 class StorageClient(Protocol):
    def upload(self, path: str, data: bytes) -> bool: ...
    def get_url(self, path: str) -> str: ...
    def get_public_url(self, path: str) -> str: ...
    def delete(self, path: str) -> bool: ...
    def download(self, path: str) -> tuple[bytes, str]: ...
 def get_storage_client() -> "StorageClient":
    assert _client is not None, "Storage client not initialised — call init_storage() at startup"
    return _client
 def _set_storage_client(c: "StorageClient") -> None:
    global _client
    _client = c
--- a/api/app/outbound/storage_factory.py
+++ b/api/app/outbound/storage_factory.py
@ -0,0 +1,27 @@
 from ..config import settings
 from .storage_client import StorageClient, _set_storage_client
 from .minio.minio_client import MinioClient
 from .bunny.bunny_client import BunnyClient
 def init_storage() -> None:
    client: StorageClient
    if settings.storage_provider == "bunny":
        client = BunnyClient(
            zone=settings.bunny_zone,
            api_key=settings.bunny_api_key,
            cdn_base_url=settings.bunny_cdn_base_url,
            token_auth_key=settings.bunny_token_auth_key,
            storage_endpoint=settings.bunny_storage_endpoint,
        )
    else:
        minio = MinioClient(
            endpoint_url=settings.storage_endpoint_url,
            access_key=settings.storage_access_key,
            secret_key=settings.storage_secret_key,
            bucket=settings.storage_bucket,
            api_base_url=settings.api_base_url,
        )
        minio.ensure_bucket_exists()
        client = minio
    _set_storage_client(client)
--- a/api/app/routers/api/jobs.py
+++ b/api/app/routers/api/jobs.py
@ -12,7 +12,7 @@ from ...outbound.postgres.repositories import summarise_job_repository
 from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
 from ...outbound.postgres.entities.translated_article_entity import TranslatedArticleEntity
 from ...outbound.gemini.gemini_client import GeminiClient
-from ...storage import upload_audio
+from ...outbound.storage_client import get_storage_client
 from ...config import settings
 from ... import worker
@ -92,7 +92,7 @@ async def _run_regenerate_audio(job_id: uuid.UUID) -> None:
            voice = gemini_client.get_voice_by_language(article_entity.target_language)
            wav_bytes = await gemini_client.generate_audio(article_entity.target_body, voice)
            audio_key = f"audio/{job_id}.wav"
-            upload_audio(audio_key, wav_bytes)
+            get_storage_client().upload(audio_key, wav_bytes)
            await article_repo.update_audio(
                article_entity.id,
--- a/api/app/routers/bff/adventure.py
+++ b/api/app/routers/bff/adventure.py
@ -5,8 +5,8 @@ from pydantic import BaseModel
 from sqlalchemy.ext.asyncio import AsyncSession
 from ...auth import verify_token
 from ...config import settings
 from ...outbound.postgres.database import get_db
 from ...outbound.storage_client import get_storage_client
 from ...outbound.postgres.repositories.adventure_repository import (
    PostgresAdventureEntryAudioRepository,
    PostgresAdventureEntryChoiceRepository,
@ -61,7 +61,7 @@ class AdventureDetailResponse(BaseModel):
 def _audio_url(key: str | None) -> str | None:
    if key is None:
        return None
-    return f"{settings.api_base_url}/media/{key}"
+    return get_storage_client().get_url(key)
@router.get("/{adventure_id}", response_model=AdventureDetailResponse, status_code=200)
--- a/api/app/routers/bff/articles.py
+++ b/api/app/routers/bff/articles.py
@ -6,8 +6,8 @@ from pydantic import BaseModel
 from sqlalchemy.ext.asyncio import AsyncSession
 from ...auth import verify_token
 from ...config import settings
 from ...outbound.postgres.database import get_db
 from ...outbound.storage_client import get_storage_client
 from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
 router = APIRouter(prefix="/articles", tags=["bff", "articles"])
@ -46,7 +46,7 @@ class ArticleDetail(BaseModel):
 def _audio_url(key: str | None) -> str | None:
    if key is None:
        return None
-    return f"{settings.api_base_url}/media/{key}"
+    return get_storage_client().get_url(key)
@router.get("", response_model=ArticleListResponse, status_code=200)
--- a/api/app/routers/media.py
+++ b/api/app/routers/media.py
@ -3,12 +3,11 @@ import uuid
 from fastapi import APIRouter, Depends, HTTPException
 from fastapi.responses import Response
 from sqlalchemy.ext.asyncio import AsyncSession
 from botocore.exceptions import ClientError
 from ..outbound.postgres.database import get_db
 from ..outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
 from ..outbound.postgres.repositories.adventure_repository import PostgresAdventureEntryAudioRepository
-from ..storage import download_audio
+from ..outbound.storage_client import get_storage_client
 router = APIRouter(prefix="/media", tags=["media"])
@ -23,21 +22,23 @@ async def get_adventure_audio_file(
    except ValueError:
        raise HTTPException(status_code=400, detail="Invalid file ID")
    print(f"Looking for adventure audio with entry ID: {eid}")
    adventure_audio = await PostgresAdventureEntryAudioRepository(db).get_for_entry(entry_id=eid, component_type="story_text")
    if adventure_audio is None:
        raise HTTPException(status_code=404, detail="File not found")
    try:
-        audio_bytes, content_type = download_audio("adventure-audio/" + filename)
+        audio_bytes, content_type = get_storage_client().download("adventure-audio/" + filename)
-    except ClientError as e:
+    except FileNotFoundError:
        if e.response["Error"]["Code"] in ("NoSuchKey", "404"):
        raise HTTPException(status_code=404, detail="File not found")
    except NotImplementedError:
        raise HTTPException(status_code=501, detail="Media proxy not available with current storage provider")
    except Exception:
        raise HTTPException(status_code=500, detail="Storage error")
    return Response(content=audio_bytes, media_type=content_type)
@router.get("/{filename:path}")
 async def get_media_file(
    filename: str,
@ -49,11 +50,12 @@ async def get_media_file(
        raise HTTPException(status_code=404, detail="File not found")
    try:
-        audio_bytes, content_type = download_audio(filename)
+        audio_bytes, content_type = get_storage_client().download(filename)
-    except ClientError as e:
+    except FileNotFoundError:
        if e.response["Error"]["Code"] in ("NoSuchKey", "404"):
        raise HTTPException(status_code=404, detail="File not found")
    except NotImplementedError:
        raise HTTPException(status_code=501, detail="Media proxy not available with current storage provider")
    except Exception:
        raise HTTPException(status_code=500, detail="Storage error")
    return Response(content=audio_bytes, media_type=content_type)
--- a/api/app/storage.py
+++ b/api/app/storage.py
@ -1,56 +0,0 @@
 import io
 import wave
 import boto3
 from botocore.exceptions import ClientError
 from .config import settings
 def get_s3_client():
    return boto3.client(
        "s3",
        endpoint_url=settings.storage_endpoint_url,
        aws_access_key_id=settings.storage_access_key,
        aws_secret_access_key=settings.storage_secret_key,
    )
 def ensure_bucket_exists() -> None:
    client = get_s3_client()
    try:
        client.head_bucket(Bucket=settings.storage_bucket)
    except ClientError as e:
        if e.response["Error"]["Code"] in ("404", "NoSuchBucket"):
            client.create_bucket(Bucket=settings.storage_bucket)
        else:
            raise
 def pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes:
    """Wrap raw 16-bit mono PCM data in a WAV container."""
    buf = io.BytesIO()
    with wave.open(buf, "wb") as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)  # 16-bit
        wf.setframerate(sample_rate)
        wf.writeframes(pcm_data)
    return buf.getvalue()
 def upload_audio(object_key: str, audio_bytes: bytes, content_type: str = "audio/wav") -> None:
    client = get_s3_client()
    client.put_object(
        Bucket=settings.storage_bucket,
        Key=object_key,
        Body=audio_bytes,
        ContentType=content_type,
    )
 def download_audio(object_key: str) -> tuple[bytes, str]:
    """Return (file_bytes, content_type)."""
    client = get_s3_client()
    response = client.get_object(Bucket=settings.storage_bucket, Key=object_key)
    content_type = response.get("ContentType", "audio/wav")
    return response["Body"].read(), content_type
--- a/api/docs/design-doc-object-storage.md
+++ b/api/docs/design-doc-object-storage.md
@ -0,0 +1,82 @@
 # Design Document: Object Storage with Bunny CDN
 This is a technical design document for implementing object (e.g. audio file) storage with Bunny CDN.  This directory (`api/docs`) contains other similar files, notably `architecture.md` and `domain.md`.  When you have worked through the change described here, please update `architecture.md`
 ## The problem
 Language Learning App has audio as a core component, which requires files to be delivered to the end user.  When developing locally, these files have been stored in a min.io service, mimicking an S3-like storage bucket.  
 Using this approach on a deployed instance (e.g. on a VPS using Docker), would result in high bandwidth and therefore a high cost.  Using a dedicated, EU-based service like Bunny allows us to offload the delivery of content to a third-party at reduced cost (great!)
 ## The current implementation
 Object storage was one of the first features built into this software in MVP state, as such it does not fit within the current architecture.
 Right now `api/app/storage.py` contains some helper functions, notably the `upload_audio` and `download_audio` functions.
 Users (through the web client) retrieve the media through two URLs (detailed in `api/app/routers/media.py`):
 - `GET /media/adventure-audio/{filename:path}` for the choose-your-own-adventure file names
 - `GET /media/{filename:path}`, used for the summary transcriptions
 ## The solution
 We are going to use Bunny (bunny.net) as the CDN for all objects in deployed environments (right now, just production — in the future preprod or staging may exist).
 Locally, for development purposes, we retain the use of MinIO. To decide which backend to use, we introduce an environment variable `STORAGE_PROVIDER` with a default value of `local` and an accepted alternative of `bunny`.
 In situations where we use `local`, the existing `/media/..` proxy endpoints are returned when constructing audio URLs (e.g. in `api/app/routers/bff/articles.py` and `api/app/routers/bff/adventure.py`). When we use `bunny`, the Bunny CDN URL is returned directly so the request is never proxied through our service.
 ### Client interface
 We will create a `BunnyClient` in `api/app/outbound/bunny/bunny_client.py` and extract the current MinIO logic into a `MinioClient` in `api/app/outbound/minio/minio_client.py`. Both implement a shared `StorageClient` protocol.
 The interface is **generic** — the clients are storage adapters and must not encode domain concepts. Path construction (which directory, which filename) is the responsibility of the caller (the service layer), not the client.
 ```python
 class StorageClient(Protocol):
    def upload(self, path: str, data: bytes) -> bool: ...
    def get_url(self, path: str) -> str: ...
    def delete(self, path: str) -> bool: ...
 ```
 Services construct paths using hardcoded directory prefixes (e.g. `"adventure-audio/"`, `"audio/"`). These are constants, not environment variables — they are not environment-specific and do not belong in config.
 ### Factory and instantiation
 A factory function reads `STORAGE_PROVIDER` and returns the appropriate `StorageClient` implementation. The client is instantiated **once at app startup** (e.g. in `main.py`) as a module-level singleton — not per-request. This is consistent with how other outbound clients (`AnthropicClient`, `GeminiClient`, etc.) are handled.
 ### Bunny configuration
 Bunny requires the following environment variables:
 - `BUNNY_ZONE` — the storage zone name (the zone `languagelearningapp` has been created in the Bunny UI). No "DEFAULT" suffix; there is one zone.
 - `BUNNY_API_KEY` — the Bunny API key for upload/delete operations.
 - `BUNNY_CDN_BASE_URL` — the public CDN hostname used to construct delivery URLs.
 ### Signed vs. public URLs
 Audio files are user-specific (i.e. one user should not be able to use another user's audio URL), Bunny signed URLs are required. Public CDN URLs are shareable by anyone who has the link.
 As per Bunny's own documentation they recommend the token.py package:
 ```py
 from token import sign_url
 url = sign_url(
    "https://myzone.b-cdn.net/videos/stream1/playlist.m3u8",
    "your-security-key",
    expiration_time=3600,
    is_directory=True,
    path_allowed="/videos/stream1/",
    countries_allowed="GB",
 )
 ```
 `get_url(path)` on the `BunnyClient` must generate a time-limited (pick a sensible default for audio content here) signed URL using the Bunny Token Authentication feature. The MinIO implementation would use pre-signed S3 URLs for consistency.
 Create a sibling method that explicitely creates public URLs for any future public content, call this `get_public_url`.
 ### Misc
 `pcm_to_wav()` currently lives in `api/app/storage.py` but is a Gemini output concern. Move it to the Gemini client module (`api/app/outbound/gemini/`) when carrying out this refactor.
--- a/api/docs/technical-doc-choose-your-own-adventure.md
+++ b/api/docs/technical-doc-choose-your-own-adventure.md
@ -146,7 +146,6 @@ def parse_llm_response(text: str) -> tuple[str, list[tuple[str, str]], str]:
 app/domain/models/adventure.py
 app/domain/services/adventure_service.py
 app/routers/api/adventures.py
 app/routers/bff/adventures.py
 app/outbound/postgres/entities/adventure_entities.py
 app/outbound/postgres/repositories/adventure_repository.py
 alembic/versions/20260503_0016_add_choose_your_own_adventure.py
@ -158,7 +157,6 @@ Modified files:
 ```
 app/outbound/anthropic/anthropic_client.py  (add 2 methods)
 app/routers/api/main.py                     (register router)
 app/routers/bff/main.py                     (register router)
 ```
 ---
--- a/content/choose-your-own-adventure/README.md
+++ b/content/choose-your-own-adventure/README.md
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@ -0,0 +1,80 @@
 services:
  db:
    image: postgres:16-alpine
    environment:
      POSTGRES_USER: ${POSTGRES_USER:-langlearn}
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
      POSTGRES_DB: ${POSTGRES_DB:-langlearn}
    volumes:
      - pgdata:/var/lib/postgresql/data
    ports:
      - "5432:5432"
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-langlearn}"]
      interval: 5s
      timeout: 5s
      retries: 10
  storage:
    image: minio/minio:latest
    command: server /data --console-address ":9001"
    environment:
      MINIO_ROOT_USER: ${STORAGE_ACCESS_KEY:-langlearn}
      MINIO_ROOT_PASSWORD: ${STORAGE_SECRET_KEY}
    ports:
      - "9000:9000"
      - "9001:9001"
    volumes:
      - storagedata:/data
    healthcheck:
      test: ["CMD-SHELL", "curl -sf http://localhost:9000/minio/health/live || exit 1"]
      interval: 5s
      timeout: 5s
      retries: 10
  api:
    build: ./api
    volumes: 
      - ./api:/app:z
    ports:
      - "${API_PORT:-8000}:8000"
    command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
    environment:
      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-langlearn}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-langlearn}
      ADMIN_USER_EMAILS: ${ADMIN_USER_EMAILS:-wilson@thomaswilson.xyz}
      API_BASE_URL: ${API_BASE_URL:-http://localhost:8000}
      JWT_SECRET: ${JWT_SECRET}
      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
      DEEPL_API_KEY: ${DEEPL_API_KEY}
      DEEPGRAM_API_KEY: ${DEEPGRAM_API_KEY}
      GEMINI_API_KEY: ${GEMINI_API_KEY}
      PYTHONPATH: /app
      STORAGE_PROVIDER: local
      STORAGE_ENDPOINT_URL: http://storage:9000
      STORAGE_ACCESS_KEY: ${STORAGE_ACCESS_KEY:-langlearn}
      STORAGE_SECRET_KEY: ${STORAGE_SECRET_KEY}
      STORAGE_BUCKET: ${STORAGE_BUCKET:-langlearn}
      TRANSACTIONAL_EMAIL_PROVIDER: ${TRANSACTIONAL_EMAIL_PROVIDER:-stub}
    depends_on:
      db:
        condition: service_healthy
      storage:
        condition: service_healthy
    restart: unless-stopped
  frontend:
    build:
      context: ./frontend
      args:
        PUBLIC_API_BASE_URL: ${PUBLIC_API_BASE_URL:-http://localhost:8000}
    ports:
      - "${FRONTEND_PORT:-3000}:3000"
    environment:
      ORIGIN: ${ORIGIN:-http://localhost:3000}
    depends_on:
      - api
    restart: unless-stopped
 volumes:
  pgdata:
  storagedata:
--- a/docker-compose-prod.yml
+++ b/docker-compose-prod.yml
@ -0,0 +1,83 @@
 services:
  db:
    image: postgres:16-alpine
    environment:
      POSTGRES_USER: ${POSTGRES_USER:-langlearn}
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
      POSTGRES_DB: ${POSTGRES_DB:-langlearn}
    volumes:
      - pgdata:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-langlearn}"]
      interval: 10s
      timeout: 5s
      retries: 10
    restart: unless-stopped
    deploy:
      resources:
        limits:
          cpus: '1'
          memory: 1G
  api:
    build: ./api
    ports:
      - "${API_PORT:-8000}:8000"
    command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers 2
    environment:
      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-langlearn}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-langlearn}
      ADMIN_USER_EMAILS: ${ADMIN_USER_EMAILS}
      API_BASE_URL: ${API_BASE_URL}
      JWT_SECRET: ${JWT_SECRET}
      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
      DEEPL_API_KEY: ${DEEPL_API_KEY}
      DEEPGRAM_API_KEY: ${DEEPGRAM_API_KEY}
      GEMINI_API_KEY: ${GEMINI_API_KEY}
      PYTHONPATH: /app
      STORAGE_PROVIDER: bunny
      BUNNY_ZONE: ${BUNNY_ZONE}
      BUNNY_API_KEY: ${BUNNY_API_KEY}
      BUNNY_CDN_BASE_URL: ${BUNNY_CDN_BASE_URL}
      BUNNY_TOKEN_AUTH_KEY: ${BUNNY_TOKEN_AUTH_KEY}
      TRANSACTIONAL_EMAIL_PROVIDER: ${TRANSACTIONAL_EMAIL_PROVIDER}
    healthcheck:
      test: ["CMD-SHELL", "curl -sf http://localhost:8000/health || exit 1"]
      interval: 10s
      timeout: 5s
      retries: 10
      start_period: 20s
    depends_on:
      db:
        condition: service_healthy
    restart: unless-stopped
    deploy:
      resources:
        limits:
          cpus: '1'
          memory: 1G
  frontend:
    build:
      context: ./frontend
      args:
        PUBLIC_API_BASE_URL: ${PUBLIC_API_BASE_URL}
    ports:
      - "${FRONTEND_PORT:-3000}:3000"
    environment:
      ORIGIN: ${ORIGIN}
    depends_on:
      api:
        condition: service_healthy
    restart: unless-stopped
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 256M
 volumes:
  pgdata:
 networks:
  default:
    name: langlearn
--- a/frontend/src/routes/app/adventures/[id]/+page.svelte
+++ b/frontend/src/routes/app/adventures/[id]/+page.svelte
@ -299,6 +299,7 @@
 		<LatestEntry
 			sourceText={latestEntry?.story_text}
 			translationText={latestEntry?.translation}
 			storyTextLinguisticData={latestEntry?.story_text_linguistic_data}
 			audioUrl={latestEntry?.audio_url}
 			onSelectNextStep={handleNextStepSelect}
 			isWaitingForGeneration={$adventureState.ui.isWaitingForGeneration}
--- a/frontend/src/routes/app/adventures/[id]/LatestEntry.svelte
+++ b/frontend/src/routes/app/adventures/[id]/LatestEntry.svelte
@ -6,6 +6,7 @@
 	type Props = {
 		sourceText: string | null | undefined;
 		translationText: string | null | undefined;
 		storyTextLinguisticData: Record<string, unknown> | null | undefined;
 		audioUrl: string | null | undefined;
 		onSelectNextStep: (optionId: string) => Promise<void>;
@ -24,6 +25,7 @@
 	const {
 		sourceText,
 		translationText,
 		storyTextLinguisticData,
 		audioUrl,
 		onSelectNextStep,
@ -33,10 +35,222 @@
 		errorMessage
 	}: Props = $props();
-	const sourceParagraphs = $derived.by(() => toParagraphs(sourceText));
+	type LinguisticToken = {
-	const translationParagraphs = $derived.by(() => toParagraphs(translationText));
+		text: string;
 		lemma: string | null;
 		pos: string | null;
 	};
-	let lastClickedParagraphIndex: number | null = $state(null);
+	type TextSegment = {
 		kind: 'text';
 		text: string;
 	};
 	type WordSegment = {
 		kind: 'word';
 		text: string;
 		lemma: string | null;
 		pos: string | null;
 	};
 	type SentenceSegments = {
 		key: string;
 		sourceText: string;
 		targetText: string;
 		sourceSegments: Array<TextSegment | WordSegment>;
 		targetSegments: Array<TextSegment | WordSegment>;
 	};
 	type LinguisticParagraph = {
 		key: string;
 		sourceText: string;
 		targetText: string;
 		sentences: SentenceSegments[];
 	};
 	function isRecord(value: unknown): value is Record<string, unknown> {
 		return typeof value === 'object' && value !== null;
 	}
 	function asString(value: unknown): string | null {
 		return typeof value === 'string' ? value : null;
 	}
 	function parseTokens(value: unknown): LinguisticToken[] {
 		if (!Array.isArray(value)) {
 			return [];
 		}
 		return value
 			.map((token): LinguisticToken | null => {
 				if (!isRecord(token)) {
 					return null;
 				}
 				const text = asString(token.text);
 				if (!text) {
 					return null;
 				}
 				return {
 					text,
 					lemma: asString(token.lemma),
 					pos: asString(token.pos)
 				};
 			})
 			.filter((token): token is LinguisticToken => token !== null);
 	}
 	function buildSegments(
 		text: string,
 		tokens: LinguisticToken[]
 	): Array<TextSegment | WordSegment> {
 		if (tokens.length === 0) {
 			return text ? [{ kind: 'text', text }] : [];
 		}
 		if (!text) {
 			return tokens.flatMap((token, index) => [
 				{ kind: 'word', text: token.text, lemma: token.lemma, pos: token.pos } as WordSegment,
 				...(index < tokens.length - 1 ? ([{ kind: 'text', text: ' ' }] as TextSegment[]) : [])
 			]);
 		}
 		const segments: Array<TextSegment | WordSegment> = [];
 		let cursor = 0;
 		for (const token of tokens) {
 			const tokenIndex = text.indexOf(token.text, cursor);
 			if (tokenIndex === -1) {
 				continue;
 			}
 			if (tokenIndex > cursor) {
 				segments.push({
 					kind: 'text',
 					text: text.slice(cursor, tokenIndex)
 				});
 			}
 			segments.push({
 				kind: 'word',
 				text: token.text,
 				lemma: token.lemma,
 				pos: token.pos
 			});
 			cursor = tokenIndex + token.text.length;
 		}
 		if (cursor < text.length) {
 			segments.push({
 				kind: 'text',
 				text: text.slice(cursor)
 			});
 		}
 		return segments.length > 0 ? segments : [{ kind: 'text', text }];
 	}
 	function parseLinguisticParagraphs(
 		value: Record<string, unknown> | null | undefined
 	): LinguisticParagraph[] {
 		if (!value) {
 			return [];
 		}
 		const paragraphs = value.paragraphs;
 		if (!Array.isArray(paragraphs)) {
 			return [];
 		}
 		return paragraphs
 			.map((paragraphValue, paragraphIndex): LinguisticParagraph | null => {
 				if (!isRecord(paragraphValue)) {
 					return null;
 				}
 				const sentencesRaw = paragraphValue.sentences;
 				const sentenceValues = Array.isArray(sentencesRaw) ? sentencesRaw : [];
 				const sentences = sentenceValues
 					.map((sentenceValue, sentenceIndex): SentenceSegments | null => {
 						if (!isRecord(sentenceValue)) {
 							return null;
 						}
 						const sourceSentence = asString(sentenceValue.source_text) ?? '';
 						const targetSentence = asString(sentenceValue.target_text) ?? '';
 						const sourceTokens = parseTokens(sentenceValue.source_tokens);
 						const targetTokens = parseTokens(sentenceValue.target_tokens);
 						return {
 							key: `${paragraphIndex}-${sentenceIndex}`,
 							sourceText: sourceSentence,
 							targetText: targetSentence,
 							sourceSegments: buildSegments(sourceSentence, sourceTokens),
 							targetSegments: buildSegments(targetSentence, targetTokens)
 						};
 					})
 					.filter((sentence): sentence is SentenceSegments => sentence !== null);
 				const sourceText =
 					asString(paragraphValue.source_text) ??
 					sentences
 						.map((sentence) => sentence.sourceText)
 						.filter(Boolean)
 						.join(' ');
 				const targetText =
 					asString(paragraphValue.target_text) ??
 					sentences
 						.map((sentence) => sentence.targetText)
 						.filter(Boolean)
 						.join(' ');
 				if (sentences.length === 0 && (sourceText || targetText)) {
 					sentences.push({
 						key: `${paragraphIndex}-0`,
 						sourceText,
 						targetText,
 						sourceSegments: sourceText ? [{ kind: 'text', text: sourceText }] : [],
 						targetSegments: targetText ? [{ kind: 'text', text: targetText }] : []
 					});
 				}
 				if (!sourceText && !targetText && sentences.length === 0) {
 					return null;
 				}
 				return {
 					key: `p-${paragraphIndex}`,
 					sourceText,
 					targetText,
 					sentences
 				};
 			})
 			.filter((paragraph): paragraph is LinguisticParagraph => paragraph !== null);
 	}
 	function isWordLike(text: string): boolean {
 		return /[\p{L}\p{N}]/u.test(text);
 	}
 	const linguisticParagraphs = $derived.by(() =>
 		parseLinguisticParagraphs(storyTextLinguisticData)
 	);
 	const sourceParagraphs = $derived.by(() =>
 		linguisticParagraphs.length > 0
 			? linguisticParagraphs.map((paragraph) => paragraph.targetText).filter(Boolean)
 			: toParagraphs(sourceText)
 	);
 	const translationParagraphs = $derived.by(() =>
 		linguisticParagraphs.length > 0
 			? linguisticParagraphs.map((paragraph) => paragraph.sourceText).filter(Boolean)
 			: toParagraphs(translationText)
 	);
 	let selectedWord: { sentenceKey: string; text: string } | null = $state(null);
 	let sourcePane = $state<HTMLDivElement | undefined>();
 	let translationPane = $state<HTMLDivElement | undefined>();
 	let suppressSourceScroll = $state(false);
@ -101,8 +315,9 @@
 		}, 20000);
 	}
-	function handleParagraphClicked(paragraphIndex: number) {
+	function handleWordClicked(sentenceKey: string, text: string) {
-		lastClickedParagraphIndex = paragraphIndex;
+		selectedWord = { sentenceKey, text };
 		showTranslation();
 	}
 	async function handleNextStepSelect(optionId: string) {
@ -146,18 +361,45 @@
 			<div class="pane source-pane">
 				<div class="latest-entry__pane-body" bind:this={sourcePane} onscroll={handleSourceScroll}>
 					{#if sourceParagraphs.length > 0}
-						{#each sourceParagraphs as paragraph, index (index)}
+						{#if linguisticParagraphs.length > 0}
 							{#each linguisticParagraphs as paragraph (paragraph.key)}
 								<p class="paragraph paragraph--text" data-language="source">
 									{#each paragraph.sentences as sentence (sentence.key)}
 										<span
 											class="sentence-chunk"
 											class:active-sentence={selectedWord?.sentenceKey === sentence.key}
 										>
 											{#each sentence.targetSegments as segment, segmentIndex (`${sentence.key}-target-${segmentIndex}`)}
 												{#if segment.kind === 'word' && isWordLike(segment.text)}
 													<button
 														type="button"
-								class="paragraph"
+														class="word-token"
-								class:active={lastClickedParagraphIndex === index}
+														class:active={selectedWord?.sentenceKey === sentence.key &&
 															selectedWord?.text === segment.text}
 														title={segment.lemma ? `Lemma: ${segment.lemma}` : undefined}
 														onclick={() => handleWordClicked(sentence.key, segment.text)}
 													>
 														{segment.text}
 													</button>
 												{:else}
 													<span>{segment.text}</span>
 												{/if}
 											{/each}
 										</span>
 									{/each}
 								</p>
 							{/each}
 						{:else}
 							{#each sourceParagraphs as paragraph, index (index)}
 								<p
 									class="paragraph paragraph--text"
 									data-paragraph-index={index}
 									data-language="source"
 								onclick={() => handleParagraphClicked(index)}
 								>
 									{paragraph}
-							</button>
+								</p>
 							{/each}
 						{/if}
 					{:else}
 						<div class="loading-block" role="status" aria-live="polite">
 							<p class="loading-block__label">{statusMessage || 'Writing your next entry...'}</p>
@ -177,6 +419,12 @@
 					</button>
 				</header>
 				{#if selectedWord}
 					<p class="translation-selected-word" role="status" aria-live="polite">
 						Selected word: <strong>{selectedWord.text}</strong>
 					</p>
 				{/if}
 				{#if translationVisible}
 					<div
 						class="latest-entry__pane-body"
@ -184,18 +432,42 @@
 						onscroll={handleTranslationScroll}
 					>
 						{#if translationParagraphs.length > 0}
 							{#if linguisticParagraphs.length > 0}
 								{#each linguisticParagraphs as paragraph (paragraph.key)}
 									<p class="paragraph paragraph--text" data-language="translation">
 										{#each paragraph.sentences as sentence (sentence.key)}
 											<span
 												class="sentence-chunk"
 												class:active-sentence={selectedWord?.sentenceKey === sentence.key}
 											>
 												{#each sentence.sourceSegments as segment, segmentIndex (`${sentence.key}-source-${segmentIndex}`)}
 													{#if segment.kind === 'word'}
 														<span
 															class="word-token word-token--passive"
 															class:active={selectedWord?.sentenceKey === sentence.key &&
 																selectedWord?.text === segment.text}
 														>
 															{segment.text}
 														</span>
 													{:else}
 														<span>{segment.text}</span>
 													{/if}
 												{/each}
 											</span>
 										{/each}
 									</p>
 								{/each}
 							{:else}
 								{#each translationParagraphs as paragraph, index (index)}
-								<button
+									<p
-									type="button"
+										class="paragraph paragraph--text"
 									class="paragraph"
 									class:active={lastClickedParagraphIndex === index}
 										data-paragraph-index={index}
 										data-language="translation"
 									onclick={() => handleParagraphClicked(index)}
 									>
 										{paragraph}
-								</button>
+									</p>
 								{/each}
 							{/if}
 						{:else}
 							<div class="loading-block" role="status" aria-live="polite">
 								<p class="loading-block__label">
@ -511,6 +783,18 @@
 		color: color-mix(in srgb, var(--color-on-surface) 72%, transparent);
 	}
 	.translation-selected-word {
 		margin: 0;
 		padding: 0 var(--latest-entry-pane-padding) var(--space-2);
 		font-size: var(--text-label-md);
 		color: color-mix(in srgb, var(--color-on-surface) 84%, transparent);
 	}
 	.translation-selected-word strong {
 		font-weight: var(--weight-semibold);
 		color: var(--color-primary);
 	}
 	.latest-entry__pane-body::-webkit-scrollbar {
 		width: 0.75rem;
 	}
@ -542,9 +826,58 @@
 		transition: background-color var(--duration-fast) var(--ease-standard);
 	}
-	.paragraph.active {
+	.paragraph--text {
-		background-color: color-mix(in srgb, var(--color-primary-container) 56%, transparent);
+		margin: 0;
-		border-radius: var(--radius-md);
+		padding: 0;
 		border: none;
 		background: transparent;
 		cursor: default;
 		white-space: pre-wrap;
 	}
 	.sentence-chunk {
 		display: inline;
 		border-radius: var(--radius-sm);
 		transition: background-color var(--duration-fast) var(--ease-standard);
 		&::after {
 			content: '';
 			display: inline-block;
 			width: 0.85ch;
 		}
 	}
 	.sentence-chunk.active-sentence {
 		background-color: color-mix(in srgb, var(--color-primary-container) 32%, transparent);
 	}
 	.word-token {
 		display: inline;
 		padding: 0;
 		margin: 0;
 		border: none;
 		background: transparent;
 		color: inherit;
 		font: inherit;
 		line-height: inherit;
 		cursor: pointer;
 	}
 	.word-token:hover {
 		text-decoration: underline;
 	}
 	.word-token.active {
 		color: var(--color-primary);
 		text-decoration: underline;
 	}
 	.word-token--passive {
 		cursor: default;
 	}
 	.word-token--passive:hover {
 		text-decoration: none;
 	}
 	.paragraph:focus-visible {
--- a/frontend/src/routes/app/adventures/[id]/adventureState.ts
+++ b/frontend/src/routes/app/adventures/[id]/adventureState.ts
@ -4,6 +4,7 @@ export type AdventureEntry = {
 	id: string;
 	story_text: string | null;
 	translation: string | null;
 	story_text_linguistic_data: Record<string, unknown> | null;
 	audio_url: string | null;
 	generated_from_choice_id: string | null;
 	possible_choices: { id: string; text: string }[] | null;
Author	SHA1	Message	Date
wilson	9b9bdc3a39	feat: Change out storage from local to Bunny (via env param) Some checks failed / test (push) Has been cancelled Details	2026-05-18 21:18:19 +01:00
wilson	293a8ab3f9	fix: Commit various features relating to CYOA	2026-05-17 13:36:21 +01:00