diff --git a/api/app/config.py b/api/app/config.py index 6303790..15f872e 100644 --- a/api/app/config.py +++ b/api/app/config.py @@ -15,10 +15,16 @@ class Settings(BaseSettings): scaleway_tem_project_id: str = "" scaleway_tem_from_address: str = "" scaleway_tem_region: str = "fr-par" - storage_endpoint_url: str - storage_access_key: str - storage_secret_key: str + storage_provider: str = "local" # or 'bunny' + storage_endpoint_url: str = "" + storage_access_key: str = "" + storage_secret_key: str = "" storage_bucket: str = "langlearn" + bunny_zone: str = "languagelearningapp" + bunny_api_key: str = "" + bunny_cdn_base_url: str = "" + bunny_token_auth_key: str = "" + bunny_storage_endpoint: str = "https://storage.bunnycdn.com" stub_generation: bool = False model_config = {"env_file": ".env"} diff --git a/api/app/domain/services/adventure_service.py b/api/app/domain/services/adventure_service.py index e2b38db..f78b9fa 100644 --- a/api/app/domain/services/adventure_service.py +++ b/api/app/domain/services/adventure_service.py @@ -24,7 +24,7 @@ from ...outbound.postgres.repositories.adventure_repository import ( PostgresAdventureRepository, ) from ...outbound.spacy.spacy_client import SpacyClient -from ...storage import upload_audio +from ...outbound.storage_client import get_storage_client from ..models.adventure import ( Adventure, AdventureEntry, @@ -315,7 +315,7 @@ class AdventureService: # ── File upload ─────────────────────────────────────────────────── t0 = time.monotonic() audio_key = f"adventure-audio/{entry_id}.wav" - upload_audio(audio_key, wav_bytes) + get_storage_client().upload(audio_key, wav_bytes) timing_file_uploading = time.monotonic() - t0 await self.audio_repo.create( diff --git a/api/app/domain/services/summarise_service.py b/api/app/domain/services/summarise_service.py index b866c51..52f8579 100644 --- a/api/app/domain/services/summarise_service.py +++ b/api/app/domain/services/summarise_service.py @@ -14,7 +14,7 @@ from ...outbound.deepgram.deepgram_client import LocalDeepgramClient from ...outbound.deepl.deepl_client import DeepLClient from ...outbound.gemini.gemini_client import GeminiClient from ...outbound.spacy.spacy_client import SpacyClient -from ...storage import upload_audio +from ...outbound.storage_client import get_storage_client from ...languages import SUPPORTED_LANGUAGES @@ -137,7 +137,7 @@ class SummariseService: voice = self.gemini_client.get_voice_by_language(target_language) wav_bytes = await self.gemini_client.generate_audio(generated_text, voice) audio_key = f"audio/{job_id}.wav" - upload_audio(audio_key, wav_bytes) + get_storage_client().upload(audio_key, wav_bytes) transcript = await self.deepgram_client.transcribe_bytes(wav_bytes, target_language) diff --git a/api/app/main.py b/api/app/main.py index 0f58828..1028c4e 100644 --- a/api/app/main.py +++ b/api/app/main.py @@ -8,13 +8,13 @@ from .routers.api import jobs from .routers import media as media_router from .routers.api.main import api_router from .routers.bff.main import bff_router -from .storage import ensure_bucket_exists +from .outbound.storage_factory import init_storage from . import worker @asynccontextmanager async def lifespan(app: FastAPI): - ensure_bucket_exists() + init_storage() worker_task = asyncio.create_task(worker.worker_loop()) yield worker_task.cancel() diff --git a/api/app/outbound/bunny/__init__.py b/api/app/outbound/bunny/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/app/outbound/bunny/bunny_client.py b/api/app/outbound/bunny/bunny_client.py new file mode 100644 index 0000000..6fa8d58 --- /dev/null +++ b/api/app/outbound/bunny/bunny_client.py @@ -0,0 +1,77 @@ +import base64 +import hashlib +import time +import urllib.error +import urllib.request + +_SIGNED_URL_EXPIRY_SECONDS = 3600 + + +class BunnyClient: + def __init__( + self, + zone: str, + api_key: str, + cdn_base_url: str, + token_auth_key: str, + storage_endpoint: str = "https://storage.bunnycdn.com", + ) -> None: + self._zone = zone + self._api_key = api_key + self._cdn_base_url = cdn_base_url.rstrip("/") + self._token_auth_key = token_auth_key + self._storage_endpoint = storage_endpoint.rstrip("/") + + def _storage_url(self, path: str) -> str: + return f"{self._storage_endpoint}/{self._zone}/{path.lstrip('/')}" + + def upload(self, path: str, data: bytes) -> bool: + req = urllib.request.Request( + self._storage_url(path), + data=data, + method="PUT", + headers={ + "AccessKey": self._api_key, + "Content-Type": "audio/wav", + }, + ) + try: + with urllib.request.urlopen(req) as resp: + return resp.status == 201 + except urllib.error.HTTPError: + return False + + def get_url(self, path: str) -> str: + url_path = f"/{path.lstrip('/')}" + expiration = int(time.time()) + _SIGNED_URL_EXPIRY_SECONDS + digest = hashlib.sha256( + (self._token_auth_key + url_path + str(expiration)).encode() + ).digest() + token = ( + base64.b64encode(digest) + .decode() + .replace("+", "-") + .replace("/", "_") + .replace("=", "") + ) + return f"{self._cdn_base_url}{url_path}?token={token}&expires={expiration}" + + def get_public_url(self, path: str) -> str: + return f"{self._cdn_base_url}/{path.lstrip('/')}" + + def delete(self, path: str) -> bool: + req = urllib.request.Request( + self._storage_url(path), + method="DELETE", + headers={"AccessKey": self._api_key}, + ) + try: + with urllib.request.urlopen(req) as resp: + return resp.status == 200 + except urllib.error.HTTPError: + return False + + def download(self, path: str) -> tuple[bytes, str]: + raise NotImplementedError( + "Direct download not available with Bunny — use get_url() to obtain a signed CDN URL" + ) diff --git a/api/app/outbound/gemini/gemini_client.py b/api/app/outbound/gemini/gemini_client.py index a00665e..f25e112 100644 --- a/api/app/outbound/gemini/gemini_client.py +++ b/api/app/outbound/gemini/gemini_client.py @@ -1,9 +1,19 @@ import asyncio +import io +import wave from google import genai from google.genai import types as genai_types -from ...storage import pcm_to_wav + +def _pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes: + buf = io.BytesIO() + with wave.open(buf, "wb") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(sample_rate) + wf.writeframes(pcm_data) + return buf.getvalue() VOICE_BY_LANGUAGE: dict[str, str] = { "fr": "Kore", @@ -47,6 +57,6 @@ class GeminiClient(): ), ) pcm_data = response.candidates[0].content.parts[0].inline_data.data - return pcm_to_wav(pcm_data) + return _pcm_to_wav(pcm_data) return await asyncio.to_thread(_call) diff --git a/api/app/outbound/minio/__init__.py b/api/app/outbound/minio/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/app/outbound/minio/minio_client.py b/api/app/outbound/minio/minio_client.py new file mode 100644 index 0000000..63a2cb9 --- /dev/null +++ b/api/app/outbound/minio/minio_client.py @@ -0,0 +1,70 @@ +import boto3 +from botocore.exceptions import ClientError + + +class MinioClient: + def __init__( + self, + endpoint_url: str, + access_key: str, + secret_key: str, + bucket: str, + api_base_url: str, + ) -> None: + self._endpoint_url = endpoint_url + self._access_key = access_key + self._secret_key = secret_key + self._bucket = bucket + self._api_base_url = api_base_url.rstrip("/") + + def _s3(self): + return boto3.client( + "s3", + endpoint_url=self._endpoint_url, + aws_access_key_id=self._access_key, + aws_secret_access_key=self._secret_key, + ) + + def ensure_bucket_exists(self) -> None: + client = self._s3() + try: + client.head_bucket(Bucket=self._bucket) + except ClientError as e: + if e.response["Error"]["Code"] in ("404", "NoSuchBucket"): + client.create_bucket(Bucket=self._bucket) + else: + raise + + def upload(self, path: str, data: bytes) -> bool: + try: + self._s3().put_object( + Bucket=self._bucket, + Key=path, + Body=data, + ContentType="audio/wav", + ) + return True + except ClientError: + return False + + def get_url(self, path: str) -> str: + return f"{self._api_base_url}/media/{path}" + + def get_public_url(self, path: str) -> str: + return f"{self._api_base_url}/media/{path}" + + def delete(self, path: str) -> bool: + try: + self._s3().delete_object(Bucket=self._bucket, Key=path) + return True + except ClientError: + return False + + def download(self, path: str) -> tuple[bytes, str]: + try: + response = self._s3().get_object(Bucket=self._bucket, Key=path) + return response["Body"].read(), response.get("ContentType", "audio/wav") + except ClientError as e: + if e.response["Error"]["Code"] in ("NoSuchKey", "404"): + raise FileNotFoundError(path) + raise diff --git a/api/app/outbound/storage_client.py b/api/app/outbound/storage_client.py new file mode 100644 index 0000000..1f1af8e --- /dev/null +++ b/api/app/outbound/storage_client.py @@ -0,0 +1,21 @@ +from typing import Protocol + +_client: "StorageClient | None" = None + + +class StorageClient(Protocol): + def upload(self, path: str, data: bytes) -> bool: ... + def get_url(self, path: str) -> str: ... + def get_public_url(self, path: str) -> str: ... + def delete(self, path: str) -> bool: ... + def download(self, path: str) -> tuple[bytes, str]: ... + + +def get_storage_client() -> "StorageClient": + assert _client is not None, "Storage client not initialised — call init_storage() at startup" + return _client + + +def _set_storage_client(c: "StorageClient") -> None: + global _client + _client = c diff --git a/api/app/outbound/storage_factory.py b/api/app/outbound/storage_factory.py new file mode 100644 index 0000000..ecc6130 --- /dev/null +++ b/api/app/outbound/storage_factory.py @@ -0,0 +1,27 @@ +from ..config import settings +from .storage_client import StorageClient, _set_storage_client +from .minio.minio_client import MinioClient +from .bunny.bunny_client import BunnyClient + + +def init_storage() -> None: + client: StorageClient + if settings.storage_provider == "bunny": + client = BunnyClient( + zone=settings.bunny_zone, + api_key=settings.bunny_api_key, + cdn_base_url=settings.bunny_cdn_base_url, + token_auth_key=settings.bunny_token_auth_key, + storage_endpoint=settings.bunny_storage_endpoint, + ) + else: + minio = MinioClient( + endpoint_url=settings.storage_endpoint_url, + access_key=settings.storage_access_key, + secret_key=settings.storage_secret_key, + bucket=settings.storage_bucket, + api_base_url=settings.api_base_url, + ) + minio.ensure_bucket_exists() + client = minio + _set_storage_client(client) diff --git a/api/app/routers/api/jobs.py b/api/app/routers/api/jobs.py index da3f099..9e89d74 100644 --- a/api/app/routers/api/jobs.py +++ b/api/app/routers/api/jobs.py @@ -12,7 +12,7 @@ from ...outbound.postgres.repositories import summarise_job_repository from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository from ...outbound.postgres.entities.translated_article_entity import TranslatedArticleEntity from ...outbound.gemini.gemini_client import GeminiClient -from ...storage import upload_audio +from ...outbound.storage_client import get_storage_client from ...config import settings from ... import worker @@ -92,7 +92,7 @@ async def _run_regenerate_audio(job_id: uuid.UUID) -> None: voice = gemini_client.get_voice_by_language(article_entity.target_language) wav_bytes = await gemini_client.generate_audio(article_entity.target_body, voice) audio_key = f"audio/{job_id}.wav" - upload_audio(audio_key, wav_bytes) + get_storage_client().upload(audio_key, wav_bytes) await article_repo.update_audio( article_entity.id, diff --git a/api/app/routers/bff/adventure.py b/api/app/routers/bff/adventure.py index c9996eb..50d77fe 100644 --- a/api/app/routers/bff/adventure.py +++ b/api/app/routers/bff/adventure.py @@ -5,8 +5,8 @@ from pydantic import BaseModel from sqlalchemy.ext.asyncio import AsyncSession from ...auth import verify_token -from ...config import settings from ...outbound.postgres.database import get_db +from ...outbound.storage_client import get_storage_client from ...outbound.postgres.repositories.adventure_repository import ( PostgresAdventureEntryAudioRepository, PostgresAdventureEntryChoiceRepository, @@ -61,7 +61,7 @@ class AdventureDetailResponse(BaseModel): def _audio_url(key: str | None) -> str | None: if key is None: return None - return f"{settings.api_base_url}/media/{key}" + return get_storage_client().get_url(key) @router.get("/{adventure_id}", response_model=AdventureDetailResponse, status_code=200) diff --git a/api/app/routers/bff/articles.py b/api/app/routers/bff/articles.py index e829b85..c877775 100644 --- a/api/app/routers/bff/articles.py +++ b/api/app/routers/bff/articles.py @@ -6,8 +6,8 @@ from pydantic import BaseModel from sqlalchemy.ext.asyncio import AsyncSession from ...auth import verify_token -from ...config import settings from ...outbound.postgres.database import get_db +from ...outbound.storage_client import get_storage_client from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository router = APIRouter(prefix="/articles", tags=["bff", "articles"]) @@ -46,7 +46,7 @@ class ArticleDetail(BaseModel): def _audio_url(key: str | None) -> str | None: if key is None: return None - return f"{settings.api_base_url}/media/{key}" + return get_storage_client().get_url(key) @router.get("", response_model=ArticleListResponse, status_code=200) diff --git a/api/app/routers/media.py b/api/app/routers/media.py index 47e7986..c804006 100644 --- a/api/app/routers/media.py +++ b/api/app/routers/media.py @@ -3,12 +3,11 @@ import uuid from fastapi import APIRouter, Depends, HTTPException from fastapi.responses import Response from sqlalchemy.ext.asyncio import AsyncSession -from botocore.exceptions import ClientError from ..outbound.postgres.database import get_db from ..outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository from ..outbound.postgres.repositories.adventure_repository import PostgresAdventureEntryAudioRepository -from ..storage import download_audio +from ..outbound.storage_client import get_storage_client router = APIRouter(prefix="/media", tags=["media"]) @@ -22,22 +21,24 @@ async def get_adventure_audio_file( eid = uuid.UUID(filename.rsplit(".", 1)[0]) except ValueError: raise HTTPException(status_code=400, detail="Invalid file ID") - - print(f"Looking for adventure audio with entry ID: {eid}") - + adventure_audio = await PostgresAdventureEntryAudioRepository(db).get_for_entry(entry_id=eid, component_type="story_text") - + if adventure_audio is None: raise HTTPException(status_code=404, detail="File not found") + try: - audio_bytes, content_type = download_audio("adventure-audio/" + filename) - except ClientError as e: - if e.response["Error"]["Code"] in ("NoSuchKey", "404"): - raise HTTPException(status_code=404, detail="File not found") + audio_bytes, content_type = get_storage_client().download("adventure-audio/" + filename) + except FileNotFoundError: + raise HTTPException(status_code=404, detail="File not found") + except NotImplementedError: + raise HTTPException(status_code=501, detail="Media proxy not available with current storage provider") + except Exception: raise HTTPException(status_code=500, detail="Storage error") return Response(content=audio_bytes, media_type=content_type) + @router.get("/{filename:path}") async def get_media_file( filename: str, @@ -49,11 +50,12 @@ async def get_media_file( raise HTTPException(status_code=404, detail="File not found") try: - audio_bytes, content_type = download_audio(filename) - except ClientError as e: - if e.response["Error"]["Code"] in ("NoSuchKey", "404"): - raise HTTPException(status_code=404, detail="File not found") + audio_bytes, content_type = get_storage_client().download(filename) + except FileNotFoundError: + raise HTTPException(status_code=404, detail="File not found") + except NotImplementedError: + raise HTTPException(status_code=501, detail="Media proxy not available with current storage provider") + except Exception: raise HTTPException(status_code=500, detail="Storage error") return Response(content=audio_bytes, media_type=content_type) - diff --git a/api/app/storage.py b/api/app/storage.py deleted file mode 100644 index 854e07c..0000000 --- a/api/app/storage.py +++ /dev/null @@ -1,56 +0,0 @@ -import io -import wave - -import boto3 -from botocore.exceptions import ClientError - -from .config import settings - - -def get_s3_client(): - return boto3.client( - "s3", - endpoint_url=settings.storage_endpoint_url, - aws_access_key_id=settings.storage_access_key, - aws_secret_access_key=settings.storage_secret_key, - ) - - -def ensure_bucket_exists() -> None: - client = get_s3_client() - try: - client.head_bucket(Bucket=settings.storage_bucket) - except ClientError as e: - if e.response["Error"]["Code"] in ("404", "NoSuchBucket"): - client.create_bucket(Bucket=settings.storage_bucket) - else: - raise - - -def pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes: - """Wrap raw 16-bit mono PCM data in a WAV container.""" - buf = io.BytesIO() - with wave.open(buf, "wb") as wf: - wf.setnchannels(1) - wf.setsampwidth(2) # 16-bit - wf.setframerate(sample_rate) - wf.writeframes(pcm_data) - return buf.getvalue() - - -def upload_audio(object_key: str, audio_bytes: bytes, content_type: str = "audio/wav") -> None: - client = get_s3_client() - client.put_object( - Bucket=settings.storage_bucket, - Key=object_key, - Body=audio_bytes, - ContentType=content_type, - ) - - -def download_audio(object_key: str) -> tuple[bytes, str]: - """Return (file_bytes, content_type).""" - client = get_s3_client() - response = client.get_object(Bucket=settings.storage_bucket, Key=object_key) - content_type = response.get("ContentType", "audio/wav") - return response["Body"].read(), content_type diff --git a/api/docs/design-doc-object-storage.md b/api/docs/design-doc-object-storage.md new file mode 100644 index 0000000..8dc2703 --- /dev/null +++ b/api/docs/design-doc-object-storage.md @@ -0,0 +1,82 @@ +# Design Document: Object Storage with Bunny CDN + +This is a technical design document for implementing object (e.g. audio file) storage with Bunny CDN. This directory (`api/docs`) contains other similar files, notably `architecture.md` and `domain.md`. When you have worked through the change described here, please update `architecture.md` + +## The problem + +Language Learning App has audio as a core component, which requires files to be delivered to the end user. When developing locally, these files have been stored in a min.io service, mimicking an S3-like storage bucket. + +Using this approach on a deployed instance (e.g. on a VPS using Docker), would result in high bandwidth and therefore a high cost. Using a dedicated, EU-based service like Bunny allows us to offload the delivery of content to a third-party at reduced cost (great!) + +## The current implementation + +Object storage was one of the first features built into this software in MVP state, as such it does not fit within the current architecture. + +Right now `api/app/storage.py` contains some helper functions, notably the `upload_audio` and `download_audio` functions. + +Users (through the web client) retrieve the media through two URLs (detailed in `api/app/routers/media.py`): + +- `GET /media/adventure-audio/{filename:path}` for the choose-your-own-adventure file names +- `GET /media/{filename:path}`, used for the summary transcriptions + +## The solution + +We are going to use Bunny (bunny.net) as the CDN for all objects in deployed environments (right now, just production — in the future preprod or staging may exist). + +Locally, for development purposes, we retain the use of MinIO. To decide which backend to use, we introduce an environment variable `STORAGE_PROVIDER` with a default value of `local` and an accepted alternative of `bunny`. + +In situations where we use `local`, the existing `/media/..` proxy endpoints are returned when constructing audio URLs (e.g. in `api/app/routers/bff/articles.py` and `api/app/routers/bff/adventure.py`). When we use `bunny`, the Bunny CDN URL is returned directly so the request is never proxied through our service. + +### Client interface + +We will create a `BunnyClient` in `api/app/outbound/bunny/bunny_client.py` and extract the current MinIO logic into a `MinioClient` in `api/app/outbound/minio/minio_client.py`. Both implement a shared `StorageClient` protocol. + +The interface is **generic** — the clients are storage adapters and must not encode domain concepts. Path construction (which directory, which filename) is the responsibility of the caller (the service layer), not the client. + +```python +class StorageClient(Protocol): + def upload(self, path: str, data: bytes) -> bool: ... + def get_url(self, path: str) -> str: ... + def delete(self, path: str) -> bool: ... +``` + +Services construct paths using hardcoded directory prefixes (e.g. `"adventure-audio/"`, `"audio/"`). These are constants, not environment variables — they are not environment-specific and do not belong in config. + +### Factory and instantiation + +A factory function reads `STORAGE_PROVIDER` and returns the appropriate `StorageClient` implementation. The client is instantiated **once at app startup** (e.g. in `main.py`) as a module-level singleton — not per-request. This is consistent with how other outbound clients (`AnthropicClient`, `GeminiClient`, etc.) are handled. + +### Bunny configuration + +Bunny requires the following environment variables: + +- `BUNNY_ZONE` — the storage zone name (the zone `languagelearningapp` has been created in the Bunny UI). No "DEFAULT" suffix; there is one zone. +- `BUNNY_API_KEY` — the Bunny API key for upload/delete operations. +- `BUNNY_CDN_BASE_URL` — the public CDN hostname used to construct delivery URLs. + +### Signed vs. public URLs + +Audio files are user-specific (i.e. one user should not be able to use another user's audio URL), Bunny signed URLs are required. Public CDN URLs are shareable by anyone who has the link. + +As per Bunny's own documentation they recommend the token.py package: + +```py +from token import sign_url + +url = sign_url( + "https://myzone.b-cdn.net/videos/stream1/playlist.m3u8", + "your-security-key", + expiration_time=3600, + is_directory=True, + path_allowed="/videos/stream1/", + countries_allowed="GB", +) +``` + +`get_url(path)` on the `BunnyClient` must generate a time-limited (pick a sensible default for audio content here) signed URL using the Bunny Token Authentication feature. The MinIO implementation would use pre-signed S3 URLs for consistency. + +Create a sibling method that explicitely creates public URLs for any future public content, call this `get_public_url`. + +### Misc + +`pcm_to_wav()` currently lives in `api/app/storage.py` but is a Gemini output concern. Move it to the Gemini client module (`api/app/outbound/gemini/`) when carrying out this refactor. diff --git a/content/choose-your-own-adventure/README.md b/content/choose-your-own-adventure/README.md new file mode 100644 index 0000000..e69de29 diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml new file mode 100644 index 0000000..f1682c3 --- /dev/null +++ b/docker-compose-dev.yml @@ -0,0 +1,80 @@ +services: + db: + image: postgres:16-alpine + environment: + POSTGRES_USER: ${POSTGRES_USER:-langlearn} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB:-langlearn} + volumes: + - pgdata:/var/lib/postgresql/data + ports: + - "5432:5432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-langlearn}"] + interval: 5s + timeout: 5s + retries: 10 + + storage: + image: minio/minio:latest + command: server /data --console-address ":9001" + environment: + MINIO_ROOT_USER: ${STORAGE_ACCESS_KEY:-langlearn} + MINIO_ROOT_PASSWORD: ${STORAGE_SECRET_KEY} + ports: + - "9000:9000" + - "9001:9001" + volumes: + - storagedata:/data + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:9000/minio/health/live || exit 1"] + interval: 5s + timeout: 5s + retries: 10 + + api: + build: ./api + volumes: + - ./api:/app:z + ports: + - "${API_PORT:-8000}:8000" + command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload + environment: + DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-langlearn}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-langlearn} + ADMIN_USER_EMAILS: ${ADMIN_USER_EMAILS:-wilson@thomaswilson.xyz} + API_BASE_URL: ${API_BASE_URL:-http://localhost:8000} + JWT_SECRET: ${JWT_SECRET} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + DEEPL_API_KEY: ${DEEPL_API_KEY} + DEEPGRAM_API_KEY: ${DEEPGRAM_API_KEY} + GEMINI_API_KEY: ${GEMINI_API_KEY} + PYTHONPATH: /app + STORAGE_PROVIDER: local + STORAGE_ENDPOINT_URL: http://storage:9000 + STORAGE_ACCESS_KEY: ${STORAGE_ACCESS_KEY:-langlearn} + STORAGE_SECRET_KEY: ${STORAGE_SECRET_KEY} + STORAGE_BUCKET: ${STORAGE_BUCKET:-langlearn} + TRANSACTIONAL_EMAIL_PROVIDER: ${TRANSACTIONAL_EMAIL_PROVIDER:-stub} + depends_on: + db: + condition: service_healthy + storage: + condition: service_healthy + restart: unless-stopped + + frontend: + build: + context: ./frontend + args: + PUBLIC_API_BASE_URL: ${PUBLIC_API_BASE_URL:-http://localhost:8000} + ports: + - "${FRONTEND_PORT:-3000}:3000" + environment: + ORIGIN: ${ORIGIN:-http://localhost:3000} + depends_on: + - api + restart: unless-stopped + +volumes: + pgdata: + storagedata: diff --git a/docker-compose-prod.yml b/docker-compose-prod.yml new file mode 100644 index 0000000..23d900e --- /dev/null +++ b/docker-compose-prod.yml @@ -0,0 +1,83 @@ +services: + db: + image: postgres:16-alpine + environment: + POSTGRES_USER: ${POSTGRES_USER:-langlearn} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB:-langlearn} + volumes: + - pgdata:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-langlearn}"] + interval: 10s + timeout: 5s + retries: 10 + restart: unless-stopped + deploy: + resources: + limits: + cpus: '1' + memory: 1G + + api: + build: ./api + ports: + - "${API_PORT:-8000}:8000" + command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers 2 + environment: + DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-langlearn}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-langlearn} + ADMIN_USER_EMAILS: ${ADMIN_USER_EMAILS} + API_BASE_URL: ${API_BASE_URL} + JWT_SECRET: ${JWT_SECRET} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + DEEPL_API_KEY: ${DEEPL_API_KEY} + DEEPGRAM_API_KEY: ${DEEPGRAM_API_KEY} + GEMINI_API_KEY: ${GEMINI_API_KEY} + PYTHONPATH: /app + STORAGE_PROVIDER: bunny + BUNNY_ZONE: ${BUNNY_ZONE} + BUNNY_API_KEY: ${BUNNY_API_KEY} + BUNNY_CDN_BASE_URL: ${BUNNY_CDN_BASE_URL} + BUNNY_TOKEN_AUTH_KEY: ${BUNNY_TOKEN_AUTH_KEY} + TRANSACTIONAL_EMAIL_PROVIDER: ${TRANSACTIONAL_EMAIL_PROVIDER} + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:8000/health || exit 1"] + interval: 10s + timeout: 5s + retries: 10 + start_period: 20s + depends_on: + db: + condition: service_healthy + restart: unless-stopped + deploy: + resources: + limits: + cpus: '1' + memory: 1G + + frontend: + build: + context: ./frontend + args: + PUBLIC_API_BASE_URL: ${PUBLIC_API_BASE_URL} + ports: + - "${FRONTEND_PORT:-3000}:3000" + environment: + ORIGIN: ${ORIGIN} + depends_on: + api: + condition: service_healthy + restart: unless-stopped + deploy: + resources: + limits: + cpus: '0.5' + memory: 256M + +volumes: + pgdata: + +networks: + default: + name: langlearn