feat: Change out storage from local to Bunny (via env param)
Some checks failed
/ test (push) Has been cancelled
Some checks failed
/ test (push) Has been cancelled
This commit is contained in:
parent
293a8ab3f9
commit
9b9bdc3a39
20 changed files with 490 additions and 88 deletions
|
|
@ -15,10 +15,16 @@ class Settings(BaseSettings):
|
|||
scaleway_tem_project_id: str = ""
|
||||
scaleway_tem_from_address: str = ""
|
||||
scaleway_tem_region: str = "fr-par"
|
||||
storage_endpoint_url: str
|
||||
storage_access_key: str
|
||||
storage_secret_key: str
|
||||
storage_provider: str = "local" # or 'bunny'
|
||||
storage_endpoint_url: str = ""
|
||||
storage_access_key: str = ""
|
||||
storage_secret_key: str = ""
|
||||
storage_bucket: str = "langlearn"
|
||||
bunny_zone: str = "languagelearningapp"
|
||||
bunny_api_key: str = ""
|
||||
bunny_cdn_base_url: str = ""
|
||||
bunny_token_auth_key: str = ""
|
||||
bunny_storage_endpoint: str = "https://storage.bunnycdn.com"
|
||||
stub_generation: bool = False
|
||||
|
||||
model_config = {"env_file": ".env"}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from ...outbound.postgres.repositories.adventure_repository import (
|
|||
PostgresAdventureRepository,
|
||||
)
|
||||
from ...outbound.spacy.spacy_client import SpacyClient
|
||||
from ...storage import upload_audio
|
||||
from ...outbound.storage_client import get_storage_client
|
||||
from ..models.adventure import (
|
||||
Adventure,
|
||||
AdventureEntry,
|
||||
|
|
@ -315,7 +315,7 @@ class AdventureService:
|
|||
# ── File upload ───────────────────────────────────────────────────
|
||||
t0 = time.monotonic()
|
||||
audio_key = f"adventure-audio/{entry_id}.wav"
|
||||
upload_audio(audio_key, wav_bytes)
|
||||
get_storage_client().upload(audio_key, wav_bytes)
|
||||
timing_file_uploading = time.monotonic() - t0
|
||||
|
||||
await self.audio_repo.create(
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from ...outbound.deepgram.deepgram_client import LocalDeepgramClient
|
|||
from ...outbound.deepl.deepl_client import DeepLClient
|
||||
from ...outbound.gemini.gemini_client import GeminiClient
|
||||
from ...outbound.spacy.spacy_client import SpacyClient
|
||||
from ...storage import upload_audio
|
||||
from ...outbound.storage_client import get_storage_client
|
||||
from ...languages import SUPPORTED_LANGUAGES
|
||||
|
||||
|
||||
|
|
@ -137,7 +137,7 @@ class SummariseService:
|
|||
voice = self.gemini_client.get_voice_by_language(target_language)
|
||||
wav_bytes = await self.gemini_client.generate_audio(generated_text, voice)
|
||||
audio_key = f"audio/{job_id}.wav"
|
||||
upload_audio(audio_key, wav_bytes)
|
||||
get_storage_client().upload(audio_key, wav_bytes)
|
||||
|
||||
transcript = await self.deepgram_client.transcribe_bytes(wav_bytes, target_language)
|
||||
|
||||
|
|
|
|||
|
|
@ -8,13 +8,13 @@ from .routers.api import jobs
|
|||
from .routers import media as media_router
|
||||
from .routers.api.main import api_router
|
||||
from .routers.bff.main import bff_router
|
||||
from .storage import ensure_bucket_exists
|
||||
from .outbound.storage_factory import init_storage
|
||||
from . import worker
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
ensure_bucket_exists()
|
||||
init_storage()
|
||||
worker_task = asyncio.create_task(worker.worker_loop())
|
||||
yield
|
||||
worker_task.cancel()
|
||||
|
|
|
|||
0
api/app/outbound/bunny/__init__.py
Normal file
0
api/app/outbound/bunny/__init__.py
Normal file
77
api/app/outbound/bunny/bunny_client.py
Normal file
77
api/app/outbound/bunny/bunny_client.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import base64
|
||||
import hashlib
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
_SIGNED_URL_EXPIRY_SECONDS = 3600
|
||||
|
||||
|
||||
class BunnyClient:
|
||||
def __init__(
|
||||
self,
|
||||
zone: str,
|
||||
api_key: str,
|
||||
cdn_base_url: str,
|
||||
token_auth_key: str,
|
||||
storage_endpoint: str = "https://storage.bunnycdn.com",
|
||||
) -> None:
|
||||
self._zone = zone
|
||||
self._api_key = api_key
|
||||
self._cdn_base_url = cdn_base_url.rstrip("/")
|
||||
self._token_auth_key = token_auth_key
|
||||
self._storage_endpoint = storage_endpoint.rstrip("/")
|
||||
|
||||
def _storage_url(self, path: str) -> str:
|
||||
return f"{self._storage_endpoint}/{self._zone}/{path.lstrip('/')}"
|
||||
|
||||
def upload(self, path: str, data: bytes) -> bool:
|
||||
req = urllib.request.Request(
|
||||
self._storage_url(path),
|
||||
data=data,
|
||||
method="PUT",
|
||||
headers={
|
||||
"AccessKey": self._api_key,
|
||||
"Content-Type": "audio/wav",
|
||||
},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
return resp.status == 201
|
||||
except urllib.error.HTTPError:
|
||||
return False
|
||||
|
||||
def get_url(self, path: str) -> str:
|
||||
url_path = f"/{path.lstrip('/')}"
|
||||
expiration = int(time.time()) + _SIGNED_URL_EXPIRY_SECONDS
|
||||
digest = hashlib.sha256(
|
||||
(self._token_auth_key + url_path + str(expiration)).encode()
|
||||
).digest()
|
||||
token = (
|
||||
base64.b64encode(digest)
|
||||
.decode()
|
||||
.replace("+", "-")
|
||||
.replace("/", "_")
|
||||
.replace("=", "")
|
||||
)
|
||||
return f"{self._cdn_base_url}{url_path}?token={token}&expires={expiration}"
|
||||
|
||||
def get_public_url(self, path: str) -> str:
|
||||
return f"{self._cdn_base_url}/{path.lstrip('/')}"
|
||||
|
||||
def delete(self, path: str) -> bool:
|
||||
req = urllib.request.Request(
|
||||
self._storage_url(path),
|
||||
method="DELETE",
|
||||
headers={"AccessKey": self._api_key},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
return resp.status == 200
|
||||
except urllib.error.HTTPError:
|
||||
return False
|
||||
|
||||
def download(self, path: str) -> tuple[bytes, str]:
|
||||
raise NotImplementedError(
|
||||
"Direct download not available with Bunny — use get_url() to obtain a signed CDN URL"
|
||||
)
|
||||
|
|
@ -1,9 +1,19 @@
|
|||
import asyncio
|
||||
import io
|
||||
import wave
|
||||
|
||||
from google import genai
|
||||
from google.genai import types as genai_types
|
||||
|
||||
from ...storage import pcm_to_wav
|
||||
|
||||
def _pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes:
|
||||
buf = io.BytesIO()
|
||||
with wave.open(buf, "wb") as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(sample_rate)
|
||||
wf.writeframes(pcm_data)
|
||||
return buf.getvalue()
|
||||
|
||||
VOICE_BY_LANGUAGE: dict[str, str] = {
|
||||
"fr": "Kore",
|
||||
|
|
@ -47,6 +57,6 @@ class GeminiClient():
|
|||
),
|
||||
)
|
||||
pcm_data = response.candidates[0].content.parts[0].inline_data.data
|
||||
return pcm_to_wav(pcm_data)
|
||||
return _pcm_to_wav(pcm_data)
|
||||
|
||||
return await asyncio.to_thread(_call)
|
||||
|
|
|
|||
0
api/app/outbound/minio/__init__.py
Normal file
0
api/app/outbound/minio/__init__.py
Normal file
70
api/app/outbound/minio/minio_client.py
Normal file
70
api/app/outbound/minio/minio_client.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
|
||||
class MinioClient:
|
||||
def __init__(
|
||||
self,
|
||||
endpoint_url: str,
|
||||
access_key: str,
|
||||
secret_key: str,
|
||||
bucket: str,
|
||||
api_base_url: str,
|
||||
) -> None:
|
||||
self._endpoint_url = endpoint_url
|
||||
self._access_key = access_key
|
||||
self._secret_key = secret_key
|
||||
self._bucket = bucket
|
||||
self._api_base_url = api_base_url.rstrip("/")
|
||||
|
||||
def _s3(self):
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=self._endpoint_url,
|
||||
aws_access_key_id=self._access_key,
|
||||
aws_secret_access_key=self._secret_key,
|
||||
)
|
||||
|
||||
def ensure_bucket_exists(self) -> None:
|
||||
client = self._s3()
|
||||
try:
|
||||
client.head_bucket(Bucket=self._bucket)
|
||||
except ClientError as e:
|
||||
if e.response["Error"]["Code"] in ("404", "NoSuchBucket"):
|
||||
client.create_bucket(Bucket=self._bucket)
|
||||
else:
|
||||
raise
|
||||
|
||||
def upload(self, path: str, data: bytes) -> bool:
|
||||
try:
|
||||
self._s3().put_object(
|
||||
Bucket=self._bucket,
|
||||
Key=path,
|
||||
Body=data,
|
||||
ContentType="audio/wav",
|
||||
)
|
||||
return True
|
||||
except ClientError:
|
||||
return False
|
||||
|
||||
def get_url(self, path: str) -> str:
|
||||
return f"{self._api_base_url}/media/{path}"
|
||||
|
||||
def get_public_url(self, path: str) -> str:
|
||||
return f"{self._api_base_url}/media/{path}"
|
||||
|
||||
def delete(self, path: str) -> bool:
|
||||
try:
|
||||
self._s3().delete_object(Bucket=self._bucket, Key=path)
|
||||
return True
|
||||
except ClientError:
|
||||
return False
|
||||
|
||||
def download(self, path: str) -> tuple[bytes, str]:
|
||||
try:
|
||||
response = self._s3().get_object(Bucket=self._bucket, Key=path)
|
||||
return response["Body"].read(), response.get("ContentType", "audio/wav")
|
||||
except ClientError as e:
|
||||
if e.response["Error"]["Code"] in ("NoSuchKey", "404"):
|
||||
raise FileNotFoundError(path)
|
||||
raise
|
||||
21
api/app/outbound/storage_client.py
Normal file
21
api/app/outbound/storage_client.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
from typing import Protocol
|
||||
|
||||
_client: "StorageClient | None" = None
|
||||
|
||||
|
||||
class StorageClient(Protocol):
|
||||
def upload(self, path: str, data: bytes) -> bool: ...
|
||||
def get_url(self, path: str) -> str: ...
|
||||
def get_public_url(self, path: str) -> str: ...
|
||||
def delete(self, path: str) -> bool: ...
|
||||
def download(self, path: str) -> tuple[bytes, str]: ...
|
||||
|
||||
|
||||
def get_storage_client() -> "StorageClient":
|
||||
assert _client is not None, "Storage client not initialised — call init_storage() at startup"
|
||||
return _client
|
||||
|
||||
|
||||
def _set_storage_client(c: "StorageClient") -> None:
|
||||
global _client
|
||||
_client = c
|
||||
27
api/app/outbound/storage_factory.py
Normal file
27
api/app/outbound/storage_factory.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
from ..config import settings
|
||||
from .storage_client import StorageClient, _set_storage_client
|
||||
from .minio.minio_client import MinioClient
|
||||
from .bunny.bunny_client import BunnyClient
|
||||
|
||||
|
||||
def init_storage() -> None:
|
||||
client: StorageClient
|
||||
if settings.storage_provider == "bunny":
|
||||
client = BunnyClient(
|
||||
zone=settings.bunny_zone,
|
||||
api_key=settings.bunny_api_key,
|
||||
cdn_base_url=settings.bunny_cdn_base_url,
|
||||
token_auth_key=settings.bunny_token_auth_key,
|
||||
storage_endpoint=settings.bunny_storage_endpoint,
|
||||
)
|
||||
else:
|
||||
minio = MinioClient(
|
||||
endpoint_url=settings.storage_endpoint_url,
|
||||
access_key=settings.storage_access_key,
|
||||
secret_key=settings.storage_secret_key,
|
||||
bucket=settings.storage_bucket,
|
||||
api_base_url=settings.api_base_url,
|
||||
)
|
||||
minio.ensure_bucket_exists()
|
||||
client = minio
|
||||
_set_storage_client(client)
|
||||
|
|
@ -12,7 +12,7 @@ from ...outbound.postgres.repositories import summarise_job_repository
|
|||
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
|
||||
from ...outbound.postgres.entities.translated_article_entity import TranslatedArticleEntity
|
||||
from ...outbound.gemini.gemini_client import GeminiClient
|
||||
from ...storage import upload_audio
|
||||
from ...outbound.storage_client import get_storage_client
|
||||
from ...config import settings
|
||||
from ... import worker
|
||||
|
||||
|
|
@ -92,7 +92,7 @@ async def _run_regenerate_audio(job_id: uuid.UUID) -> None:
|
|||
voice = gemini_client.get_voice_by_language(article_entity.target_language)
|
||||
wav_bytes = await gemini_client.generate_audio(article_entity.target_body, voice)
|
||||
audio_key = f"audio/{job_id}.wav"
|
||||
upload_audio(audio_key, wav_bytes)
|
||||
get_storage_client().upload(audio_key, wav_bytes)
|
||||
|
||||
await article_repo.update_audio(
|
||||
article_entity.id,
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@ from pydantic import BaseModel
|
|||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from ...auth import verify_token
|
||||
from ...config import settings
|
||||
from ...outbound.postgres.database import get_db
|
||||
from ...outbound.storage_client import get_storage_client
|
||||
from ...outbound.postgres.repositories.adventure_repository import (
|
||||
PostgresAdventureEntryAudioRepository,
|
||||
PostgresAdventureEntryChoiceRepository,
|
||||
|
|
@ -61,7 +61,7 @@ class AdventureDetailResponse(BaseModel):
|
|||
def _audio_url(key: str | None) -> str | None:
|
||||
if key is None:
|
||||
return None
|
||||
return f"{settings.api_base_url}/media/{key}"
|
||||
return get_storage_client().get_url(key)
|
||||
|
||||
|
||||
@router.get("/{adventure_id}", response_model=AdventureDetailResponse, status_code=200)
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ from pydantic import BaseModel
|
|||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from ...auth import verify_token
|
||||
from ...config import settings
|
||||
from ...outbound.postgres.database import get_db
|
||||
from ...outbound.storage_client import get_storage_client
|
||||
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
|
||||
|
||||
router = APIRouter(prefix="/articles", tags=["bff", "articles"])
|
||||
|
|
@ -46,7 +46,7 @@ class ArticleDetail(BaseModel):
|
|||
def _audio_url(key: str | None) -> str | None:
|
||||
if key is None:
|
||||
return None
|
||||
return f"{settings.api_base_url}/media/{key}"
|
||||
return get_storage_client().get_url(key)
|
||||
|
||||
|
||||
@router.get("", response_model=ArticleListResponse, status_code=200)
|
||||
|
|
|
|||
|
|
@ -3,12 +3,11 @@ import uuid
|
|||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import Response
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from ..outbound.postgres.database import get_db
|
||||
from ..outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
|
||||
from ..outbound.postgres.repositories.adventure_repository import PostgresAdventureEntryAudioRepository
|
||||
from ..storage import download_audio
|
||||
from ..outbound.storage_client import get_storage_client
|
||||
|
||||
router = APIRouter(prefix="/media", tags=["media"])
|
||||
|
||||
|
|
@ -23,21 +22,23 @@ async def get_adventure_audio_file(
|
|||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid file ID")
|
||||
|
||||
print(f"Looking for adventure audio with entry ID: {eid}")
|
||||
|
||||
adventure_audio = await PostgresAdventureEntryAudioRepository(db).get_for_entry(entry_id=eid, component_type="story_text")
|
||||
|
||||
if adventure_audio is None:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
try:
|
||||
audio_bytes, content_type = download_audio("adventure-audio/" + filename)
|
||||
except ClientError as e:
|
||||
if e.response["Error"]["Code"] in ("NoSuchKey", "404"):
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
audio_bytes, content_type = get_storage_client().download("adventure-audio/" + filename)
|
||||
except FileNotFoundError:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
except NotImplementedError:
|
||||
raise HTTPException(status_code=501, detail="Media proxy not available with current storage provider")
|
||||
except Exception:
|
||||
raise HTTPException(status_code=500, detail="Storage error")
|
||||
|
||||
return Response(content=audio_bytes, media_type=content_type)
|
||||
|
||||
|
||||
@router.get("/{filename:path}")
|
||||
async def get_media_file(
|
||||
filename: str,
|
||||
|
|
@ -49,11 +50,12 @@ async def get_media_file(
|
|||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
try:
|
||||
audio_bytes, content_type = download_audio(filename)
|
||||
except ClientError as e:
|
||||
if e.response["Error"]["Code"] in ("NoSuchKey", "404"):
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
audio_bytes, content_type = get_storage_client().download(filename)
|
||||
except FileNotFoundError:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
except NotImplementedError:
|
||||
raise HTTPException(status_code=501, detail="Media proxy not available with current storage provider")
|
||||
except Exception:
|
||||
raise HTTPException(status_code=500, detail="Storage error")
|
||||
|
||||
return Response(content=audio_bytes, media_type=content_type)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,56 +0,0 @@
|
|||
import io
|
||||
import wave
|
||||
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from .config import settings
|
||||
|
||||
|
||||
def get_s3_client():
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=settings.storage_endpoint_url,
|
||||
aws_access_key_id=settings.storage_access_key,
|
||||
aws_secret_access_key=settings.storage_secret_key,
|
||||
)
|
||||
|
||||
|
||||
def ensure_bucket_exists() -> None:
|
||||
client = get_s3_client()
|
||||
try:
|
||||
client.head_bucket(Bucket=settings.storage_bucket)
|
||||
except ClientError as e:
|
||||
if e.response["Error"]["Code"] in ("404", "NoSuchBucket"):
|
||||
client.create_bucket(Bucket=settings.storage_bucket)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes:
|
||||
"""Wrap raw 16-bit mono PCM data in a WAV container."""
|
||||
buf = io.BytesIO()
|
||||
with wave.open(buf, "wb") as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2) # 16-bit
|
||||
wf.setframerate(sample_rate)
|
||||
wf.writeframes(pcm_data)
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def upload_audio(object_key: str, audio_bytes: bytes, content_type: str = "audio/wav") -> None:
|
||||
client = get_s3_client()
|
||||
client.put_object(
|
||||
Bucket=settings.storage_bucket,
|
||||
Key=object_key,
|
||||
Body=audio_bytes,
|
||||
ContentType=content_type,
|
||||
)
|
||||
|
||||
|
||||
def download_audio(object_key: str) -> tuple[bytes, str]:
|
||||
"""Return (file_bytes, content_type)."""
|
||||
client = get_s3_client()
|
||||
response = client.get_object(Bucket=settings.storage_bucket, Key=object_key)
|
||||
content_type = response.get("ContentType", "audio/wav")
|
||||
return response["Body"].read(), content_type
|
||||
82
api/docs/design-doc-object-storage.md
Normal file
82
api/docs/design-doc-object-storage.md
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
# Design Document: Object Storage with Bunny CDN
|
||||
|
||||
This is a technical design document for implementing object (e.g. audio file) storage with Bunny CDN. This directory (`api/docs`) contains other similar files, notably `architecture.md` and `domain.md`. When you have worked through the change described here, please update `architecture.md`
|
||||
|
||||
## The problem
|
||||
|
||||
Language Learning App has audio as a core component, which requires files to be delivered to the end user. When developing locally, these files have been stored in a min.io service, mimicking an S3-like storage bucket.
|
||||
|
||||
Using this approach on a deployed instance (e.g. on a VPS using Docker), would result in high bandwidth and therefore a high cost. Using a dedicated, EU-based service like Bunny allows us to offload the delivery of content to a third-party at reduced cost (great!)
|
||||
|
||||
## The current implementation
|
||||
|
||||
Object storage was one of the first features built into this software in MVP state, as such it does not fit within the current architecture.
|
||||
|
||||
Right now `api/app/storage.py` contains some helper functions, notably the `upload_audio` and `download_audio` functions.
|
||||
|
||||
Users (through the web client) retrieve the media through two URLs (detailed in `api/app/routers/media.py`):
|
||||
|
||||
- `GET /media/adventure-audio/{filename:path}` for the choose-your-own-adventure file names
|
||||
- `GET /media/{filename:path}`, used for the summary transcriptions
|
||||
|
||||
## The solution
|
||||
|
||||
We are going to use Bunny (bunny.net) as the CDN for all objects in deployed environments (right now, just production — in the future preprod or staging may exist).
|
||||
|
||||
Locally, for development purposes, we retain the use of MinIO. To decide which backend to use, we introduce an environment variable `STORAGE_PROVIDER` with a default value of `local` and an accepted alternative of `bunny`.
|
||||
|
||||
In situations where we use `local`, the existing `/media/..` proxy endpoints are returned when constructing audio URLs (e.g. in `api/app/routers/bff/articles.py` and `api/app/routers/bff/adventure.py`). When we use `bunny`, the Bunny CDN URL is returned directly so the request is never proxied through our service.
|
||||
|
||||
### Client interface
|
||||
|
||||
We will create a `BunnyClient` in `api/app/outbound/bunny/bunny_client.py` and extract the current MinIO logic into a `MinioClient` in `api/app/outbound/minio/minio_client.py`. Both implement a shared `StorageClient` protocol.
|
||||
|
||||
The interface is **generic** — the clients are storage adapters and must not encode domain concepts. Path construction (which directory, which filename) is the responsibility of the caller (the service layer), not the client.
|
||||
|
||||
```python
|
||||
class StorageClient(Protocol):
|
||||
def upload(self, path: str, data: bytes) -> bool: ...
|
||||
def get_url(self, path: str) -> str: ...
|
||||
def delete(self, path: str) -> bool: ...
|
||||
```
|
||||
|
||||
Services construct paths using hardcoded directory prefixes (e.g. `"adventure-audio/"`, `"audio/"`). These are constants, not environment variables — they are not environment-specific and do not belong in config.
|
||||
|
||||
### Factory and instantiation
|
||||
|
||||
A factory function reads `STORAGE_PROVIDER` and returns the appropriate `StorageClient` implementation. The client is instantiated **once at app startup** (e.g. in `main.py`) as a module-level singleton — not per-request. This is consistent with how other outbound clients (`AnthropicClient`, `GeminiClient`, etc.) are handled.
|
||||
|
||||
### Bunny configuration
|
||||
|
||||
Bunny requires the following environment variables:
|
||||
|
||||
- `BUNNY_ZONE` — the storage zone name (the zone `languagelearningapp` has been created in the Bunny UI). No "DEFAULT" suffix; there is one zone.
|
||||
- `BUNNY_API_KEY` — the Bunny API key for upload/delete operations.
|
||||
- `BUNNY_CDN_BASE_URL` — the public CDN hostname used to construct delivery URLs.
|
||||
|
||||
### Signed vs. public URLs
|
||||
|
||||
Audio files are user-specific (i.e. one user should not be able to use another user's audio URL), Bunny signed URLs are required. Public CDN URLs are shareable by anyone who has the link.
|
||||
|
||||
As per Bunny's own documentation they recommend the token.py package:
|
||||
|
||||
```py
|
||||
from token import sign_url
|
||||
|
||||
url = sign_url(
|
||||
"https://myzone.b-cdn.net/videos/stream1/playlist.m3u8",
|
||||
"your-security-key",
|
||||
expiration_time=3600,
|
||||
is_directory=True,
|
||||
path_allowed="/videos/stream1/",
|
||||
countries_allowed="GB",
|
||||
)
|
||||
```
|
||||
|
||||
`get_url(path)` on the `BunnyClient` must generate a time-limited (pick a sensible default for audio content here) signed URL using the Bunny Token Authentication feature. The MinIO implementation would use pre-signed S3 URLs for consistency.
|
||||
|
||||
Create a sibling method that explicitely creates public URLs for any future public content, call this `get_public_url`.
|
||||
|
||||
### Misc
|
||||
|
||||
`pcm_to_wav()` currently lives in `api/app/storage.py` but is a Gemini output concern. Move it to the Gemini client module (`api/app/outbound/gemini/`) when carrying out this refactor.
|
||||
0
content/choose-your-own-adventure/README.md
Normal file
0
content/choose-your-own-adventure/README.md
Normal file
80
docker-compose-dev.yml
Normal file
80
docker-compose-dev.yml
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
services:
|
||||
db:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER:-langlearn}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: ${POSTGRES_DB:-langlearn}
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
ports:
|
||||
- "5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-langlearn}"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
storage:
|
||||
image: minio/minio:latest
|
||||
command: server /data --console-address ":9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: ${STORAGE_ACCESS_KEY:-langlearn}
|
||||
MINIO_ROOT_PASSWORD: ${STORAGE_SECRET_KEY}
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
volumes:
|
||||
- storagedata:/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -sf http://localhost:9000/minio/health/live || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
api:
|
||||
build: ./api
|
||||
volumes:
|
||||
- ./api:/app:z
|
||||
ports:
|
||||
- "${API_PORT:-8000}:8000"
|
||||
command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
||||
environment:
|
||||
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-langlearn}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-langlearn}
|
||||
ADMIN_USER_EMAILS: ${ADMIN_USER_EMAILS:-wilson@thomaswilson.xyz}
|
||||
API_BASE_URL: ${API_BASE_URL:-http://localhost:8000}
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
|
||||
DEEPL_API_KEY: ${DEEPL_API_KEY}
|
||||
DEEPGRAM_API_KEY: ${DEEPGRAM_API_KEY}
|
||||
GEMINI_API_KEY: ${GEMINI_API_KEY}
|
||||
PYTHONPATH: /app
|
||||
STORAGE_PROVIDER: local
|
||||
STORAGE_ENDPOINT_URL: http://storage:9000
|
||||
STORAGE_ACCESS_KEY: ${STORAGE_ACCESS_KEY:-langlearn}
|
||||
STORAGE_SECRET_KEY: ${STORAGE_SECRET_KEY}
|
||||
STORAGE_BUCKET: ${STORAGE_BUCKET:-langlearn}
|
||||
TRANSACTIONAL_EMAIL_PROVIDER: ${TRANSACTIONAL_EMAIL_PROVIDER:-stub}
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
storage:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
|
||||
frontend:
|
||||
build:
|
||||
context: ./frontend
|
||||
args:
|
||||
PUBLIC_API_BASE_URL: ${PUBLIC_API_BASE_URL:-http://localhost:8000}
|
||||
ports:
|
||||
- "${FRONTEND_PORT:-3000}:3000"
|
||||
environment:
|
||||
ORIGIN: ${ORIGIN:-http://localhost:3000}
|
||||
depends_on:
|
||||
- api
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
pgdata:
|
||||
storagedata:
|
||||
83
docker-compose-prod.yml
Normal file
83
docker-compose-prod.yml
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
services:
|
||||
db:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER:-langlearn}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: ${POSTGRES_DB:-langlearn}
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-langlearn}"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
|
||||
api:
|
||||
build: ./api
|
||||
ports:
|
||||
- "${API_PORT:-8000}:8000"
|
||||
command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers 2
|
||||
environment:
|
||||
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-langlearn}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-langlearn}
|
||||
ADMIN_USER_EMAILS: ${ADMIN_USER_EMAILS}
|
||||
API_BASE_URL: ${API_BASE_URL}
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
|
||||
DEEPL_API_KEY: ${DEEPL_API_KEY}
|
||||
DEEPGRAM_API_KEY: ${DEEPGRAM_API_KEY}
|
||||
GEMINI_API_KEY: ${GEMINI_API_KEY}
|
||||
PYTHONPATH: /app
|
||||
STORAGE_PROVIDER: bunny
|
||||
BUNNY_ZONE: ${BUNNY_ZONE}
|
||||
BUNNY_API_KEY: ${BUNNY_API_KEY}
|
||||
BUNNY_CDN_BASE_URL: ${BUNNY_CDN_BASE_URL}
|
||||
BUNNY_TOKEN_AUTH_KEY: ${BUNNY_TOKEN_AUTH_KEY}
|
||||
TRANSACTIONAL_EMAIL_PROVIDER: ${TRANSACTIONAL_EMAIL_PROVIDER}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -sf http://localhost:8000/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
start_period: 20s
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
|
||||
frontend:
|
||||
build:
|
||||
context: ./frontend
|
||||
args:
|
||||
PUBLIC_API_BASE_URL: ${PUBLIC_API_BASE_URL}
|
||||
ports:
|
||||
- "${FRONTEND_PORT:-3000}:3000"
|
||||
environment:
|
||||
ORIGIN: ${ORIGIN}
|
||||
depends_on:
|
||||
api:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 256M
|
||||
|
||||
volumes:
|
||||
pgdata:
|
||||
|
||||
networks:
|
||||
default:
|
||||
name: langlearn
|
||||
Loading…
Reference in a new issue