feat: Change out storage from local to Bunny (via env param)
Some checks failed
/ test (push) Has been cancelled
Some checks failed
/ test (push) Has been cancelled
This commit is contained in:
parent
293a8ab3f9
commit
9b9bdc3a39
20 changed files with 490 additions and 88 deletions
|
|
@ -15,10 +15,16 @@ class Settings(BaseSettings):
|
||||||
scaleway_tem_project_id: str = ""
|
scaleway_tem_project_id: str = ""
|
||||||
scaleway_tem_from_address: str = ""
|
scaleway_tem_from_address: str = ""
|
||||||
scaleway_tem_region: str = "fr-par"
|
scaleway_tem_region: str = "fr-par"
|
||||||
storage_endpoint_url: str
|
storage_provider: str = "local" # or 'bunny'
|
||||||
storage_access_key: str
|
storage_endpoint_url: str = ""
|
||||||
storage_secret_key: str
|
storage_access_key: str = ""
|
||||||
|
storage_secret_key: str = ""
|
||||||
storage_bucket: str = "langlearn"
|
storage_bucket: str = "langlearn"
|
||||||
|
bunny_zone: str = "languagelearningapp"
|
||||||
|
bunny_api_key: str = ""
|
||||||
|
bunny_cdn_base_url: str = ""
|
||||||
|
bunny_token_auth_key: str = ""
|
||||||
|
bunny_storage_endpoint: str = "https://storage.bunnycdn.com"
|
||||||
stub_generation: bool = False
|
stub_generation: bool = False
|
||||||
|
|
||||||
model_config = {"env_file": ".env"}
|
model_config = {"env_file": ".env"}
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ from ...outbound.postgres.repositories.adventure_repository import (
|
||||||
PostgresAdventureRepository,
|
PostgresAdventureRepository,
|
||||||
)
|
)
|
||||||
from ...outbound.spacy.spacy_client import SpacyClient
|
from ...outbound.spacy.spacy_client import SpacyClient
|
||||||
from ...storage import upload_audio
|
from ...outbound.storage_client import get_storage_client
|
||||||
from ..models.adventure import (
|
from ..models.adventure import (
|
||||||
Adventure,
|
Adventure,
|
||||||
AdventureEntry,
|
AdventureEntry,
|
||||||
|
|
@ -315,7 +315,7 @@ class AdventureService:
|
||||||
# ── File upload ───────────────────────────────────────────────────
|
# ── File upload ───────────────────────────────────────────────────
|
||||||
t0 = time.monotonic()
|
t0 = time.monotonic()
|
||||||
audio_key = f"adventure-audio/{entry_id}.wav"
|
audio_key = f"adventure-audio/{entry_id}.wav"
|
||||||
upload_audio(audio_key, wav_bytes)
|
get_storage_client().upload(audio_key, wav_bytes)
|
||||||
timing_file_uploading = time.monotonic() - t0
|
timing_file_uploading = time.monotonic() - t0
|
||||||
|
|
||||||
await self.audio_repo.create(
|
await self.audio_repo.create(
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ from ...outbound.deepgram.deepgram_client import LocalDeepgramClient
|
||||||
from ...outbound.deepl.deepl_client import DeepLClient
|
from ...outbound.deepl.deepl_client import DeepLClient
|
||||||
from ...outbound.gemini.gemini_client import GeminiClient
|
from ...outbound.gemini.gemini_client import GeminiClient
|
||||||
from ...outbound.spacy.spacy_client import SpacyClient
|
from ...outbound.spacy.spacy_client import SpacyClient
|
||||||
from ...storage import upload_audio
|
from ...outbound.storage_client import get_storage_client
|
||||||
from ...languages import SUPPORTED_LANGUAGES
|
from ...languages import SUPPORTED_LANGUAGES
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -137,7 +137,7 @@ class SummariseService:
|
||||||
voice = self.gemini_client.get_voice_by_language(target_language)
|
voice = self.gemini_client.get_voice_by_language(target_language)
|
||||||
wav_bytes = await self.gemini_client.generate_audio(generated_text, voice)
|
wav_bytes = await self.gemini_client.generate_audio(generated_text, voice)
|
||||||
audio_key = f"audio/{job_id}.wav"
|
audio_key = f"audio/{job_id}.wav"
|
||||||
upload_audio(audio_key, wav_bytes)
|
get_storage_client().upload(audio_key, wav_bytes)
|
||||||
|
|
||||||
transcript = await self.deepgram_client.transcribe_bytes(wav_bytes, target_language)
|
transcript = await self.deepgram_client.transcribe_bytes(wav_bytes, target_language)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,13 +8,13 @@ from .routers.api import jobs
|
||||||
from .routers import media as media_router
|
from .routers import media as media_router
|
||||||
from .routers.api.main import api_router
|
from .routers.api.main import api_router
|
||||||
from .routers.bff.main import bff_router
|
from .routers.bff.main import bff_router
|
||||||
from .storage import ensure_bucket_exists
|
from .outbound.storage_factory import init_storage
|
||||||
from . import worker
|
from . import worker
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
ensure_bucket_exists()
|
init_storage()
|
||||||
worker_task = asyncio.create_task(worker.worker_loop())
|
worker_task = asyncio.create_task(worker.worker_loop())
|
||||||
yield
|
yield
|
||||||
worker_task.cancel()
|
worker_task.cancel()
|
||||||
|
|
|
||||||
0
api/app/outbound/bunny/__init__.py
Normal file
0
api/app/outbound/bunny/__init__.py
Normal file
77
api/app/outbound/bunny/bunny_client.py
Normal file
77
api/app/outbound/bunny/bunny_client.py
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
_SIGNED_URL_EXPIRY_SECONDS = 3600
|
||||||
|
|
||||||
|
|
||||||
|
class BunnyClient:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
zone: str,
|
||||||
|
api_key: str,
|
||||||
|
cdn_base_url: str,
|
||||||
|
token_auth_key: str,
|
||||||
|
storage_endpoint: str = "https://storage.bunnycdn.com",
|
||||||
|
) -> None:
|
||||||
|
self._zone = zone
|
||||||
|
self._api_key = api_key
|
||||||
|
self._cdn_base_url = cdn_base_url.rstrip("/")
|
||||||
|
self._token_auth_key = token_auth_key
|
||||||
|
self._storage_endpoint = storage_endpoint.rstrip("/")
|
||||||
|
|
||||||
|
def _storage_url(self, path: str) -> str:
|
||||||
|
return f"{self._storage_endpoint}/{self._zone}/{path.lstrip('/')}"
|
||||||
|
|
||||||
|
def upload(self, path: str, data: bytes) -> bool:
|
||||||
|
req = urllib.request.Request(
|
||||||
|
self._storage_url(path),
|
||||||
|
data=data,
|
||||||
|
method="PUT",
|
||||||
|
headers={
|
||||||
|
"AccessKey": self._api_key,
|
||||||
|
"Content-Type": "audio/wav",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req) as resp:
|
||||||
|
return resp.status == 201
|
||||||
|
except urllib.error.HTTPError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_url(self, path: str) -> str:
|
||||||
|
url_path = f"/{path.lstrip('/')}"
|
||||||
|
expiration = int(time.time()) + _SIGNED_URL_EXPIRY_SECONDS
|
||||||
|
digest = hashlib.sha256(
|
||||||
|
(self._token_auth_key + url_path + str(expiration)).encode()
|
||||||
|
).digest()
|
||||||
|
token = (
|
||||||
|
base64.b64encode(digest)
|
||||||
|
.decode()
|
||||||
|
.replace("+", "-")
|
||||||
|
.replace("/", "_")
|
||||||
|
.replace("=", "")
|
||||||
|
)
|
||||||
|
return f"{self._cdn_base_url}{url_path}?token={token}&expires={expiration}"
|
||||||
|
|
||||||
|
def get_public_url(self, path: str) -> str:
|
||||||
|
return f"{self._cdn_base_url}/{path.lstrip('/')}"
|
||||||
|
|
||||||
|
def delete(self, path: str) -> bool:
|
||||||
|
req = urllib.request.Request(
|
||||||
|
self._storage_url(path),
|
||||||
|
method="DELETE",
|
||||||
|
headers={"AccessKey": self._api_key},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req) as resp:
|
||||||
|
return resp.status == 200
|
||||||
|
except urllib.error.HTTPError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def download(self, path: str) -> tuple[bytes, str]:
|
||||||
|
raise NotImplementedError(
|
||||||
|
"Direct download not available with Bunny — use get_url() to obtain a signed CDN URL"
|
||||||
|
)
|
||||||
|
|
@ -1,9 +1,19 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import io
|
||||||
|
import wave
|
||||||
|
|
||||||
from google import genai
|
from google import genai
|
||||||
from google.genai import types as genai_types
|
from google.genai import types as genai_types
|
||||||
|
|
||||||
from ...storage import pcm_to_wav
|
|
||||||
|
def _pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes:
|
||||||
|
buf = io.BytesIO()
|
||||||
|
with wave.open(buf, "wb") as wf:
|
||||||
|
wf.setnchannels(1)
|
||||||
|
wf.setsampwidth(2)
|
||||||
|
wf.setframerate(sample_rate)
|
||||||
|
wf.writeframes(pcm_data)
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
VOICE_BY_LANGUAGE: dict[str, str] = {
|
VOICE_BY_LANGUAGE: dict[str, str] = {
|
||||||
"fr": "Kore",
|
"fr": "Kore",
|
||||||
|
|
@ -47,6 +57,6 @@ class GeminiClient():
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
pcm_data = response.candidates[0].content.parts[0].inline_data.data
|
pcm_data = response.candidates[0].content.parts[0].inline_data.data
|
||||||
return pcm_to_wav(pcm_data)
|
return _pcm_to_wav(pcm_data)
|
||||||
|
|
||||||
return await asyncio.to_thread(_call)
|
return await asyncio.to_thread(_call)
|
||||||
|
|
|
||||||
0
api/app/outbound/minio/__init__.py
Normal file
0
api/app/outbound/minio/__init__.py
Normal file
70
api/app/outbound/minio/minio_client.py
Normal file
70
api/app/outbound/minio/minio_client.py
Normal file
|
|
@ -0,0 +1,70 @@
|
||||||
|
import boto3
|
||||||
|
from botocore.exceptions import ClientError
|
||||||
|
|
||||||
|
|
||||||
|
class MinioClient:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
endpoint_url: str,
|
||||||
|
access_key: str,
|
||||||
|
secret_key: str,
|
||||||
|
bucket: str,
|
||||||
|
api_base_url: str,
|
||||||
|
) -> None:
|
||||||
|
self._endpoint_url = endpoint_url
|
||||||
|
self._access_key = access_key
|
||||||
|
self._secret_key = secret_key
|
||||||
|
self._bucket = bucket
|
||||||
|
self._api_base_url = api_base_url.rstrip("/")
|
||||||
|
|
||||||
|
def _s3(self):
|
||||||
|
return boto3.client(
|
||||||
|
"s3",
|
||||||
|
endpoint_url=self._endpoint_url,
|
||||||
|
aws_access_key_id=self._access_key,
|
||||||
|
aws_secret_access_key=self._secret_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
def ensure_bucket_exists(self) -> None:
|
||||||
|
client = self._s3()
|
||||||
|
try:
|
||||||
|
client.head_bucket(Bucket=self._bucket)
|
||||||
|
except ClientError as e:
|
||||||
|
if e.response["Error"]["Code"] in ("404", "NoSuchBucket"):
|
||||||
|
client.create_bucket(Bucket=self._bucket)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
def upload(self, path: str, data: bytes) -> bool:
|
||||||
|
try:
|
||||||
|
self._s3().put_object(
|
||||||
|
Bucket=self._bucket,
|
||||||
|
Key=path,
|
||||||
|
Body=data,
|
||||||
|
ContentType="audio/wav",
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except ClientError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_url(self, path: str) -> str:
|
||||||
|
return f"{self._api_base_url}/media/{path}"
|
||||||
|
|
||||||
|
def get_public_url(self, path: str) -> str:
|
||||||
|
return f"{self._api_base_url}/media/{path}"
|
||||||
|
|
||||||
|
def delete(self, path: str) -> bool:
|
||||||
|
try:
|
||||||
|
self._s3().delete_object(Bucket=self._bucket, Key=path)
|
||||||
|
return True
|
||||||
|
except ClientError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def download(self, path: str) -> tuple[bytes, str]:
|
||||||
|
try:
|
||||||
|
response = self._s3().get_object(Bucket=self._bucket, Key=path)
|
||||||
|
return response["Body"].read(), response.get("ContentType", "audio/wav")
|
||||||
|
except ClientError as e:
|
||||||
|
if e.response["Error"]["Code"] in ("NoSuchKey", "404"):
|
||||||
|
raise FileNotFoundError(path)
|
||||||
|
raise
|
||||||
21
api/app/outbound/storage_client.py
Normal file
21
api/app/outbound/storage_client.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
|
_client: "StorageClient | None" = None
|
||||||
|
|
||||||
|
|
||||||
|
class StorageClient(Protocol):
|
||||||
|
def upload(self, path: str, data: bytes) -> bool: ...
|
||||||
|
def get_url(self, path: str) -> str: ...
|
||||||
|
def get_public_url(self, path: str) -> str: ...
|
||||||
|
def delete(self, path: str) -> bool: ...
|
||||||
|
def download(self, path: str) -> tuple[bytes, str]: ...
|
||||||
|
|
||||||
|
|
||||||
|
def get_storage_client() -> "StorageClient":
|
||||||
|
assert _client is not None, "Storage client not initialised — call init_storage() at startup"
|
||||||
|
return _client
|
||||||
|
|
||||||
|
|
||||||
|
def _set_storage_client(c: "StorageClient") -> None:
|
||||||
|
global _client
|
||||||
|
_client = c
|
||||||
27
api/app/outbound/storage_factory.py
Normal file
27
api/app/outbound/storage_factory.py
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
from ..config import settings
|
||||||
|
from .storage_client import StorageClient, _set_storage_client
|
||||||
|
from .minio.minio_client import MinioClient
|
||||||
|
from .bunny.bunny_client import BunnyClient
|
||||||
|
|
||||||
|
|
||||||
|
def init_storage() -> None:
|
||||||
|
client: StorageClient
|
||||||
|
if settings.storage_provider == "bunny":
|
||||||
|
client = BunnyClient(
|
||||||
|
zone=settings.bunny_zone,
|
||||||
|
api_key=settings.bunny_api_key,
|
||||||
|
cdn_base_url=settings.bunny_cdn_base_url,
|
||||||
|
token_auth_key=settings.bunny_token_auth_key,
|
||||||
|
storage_endpoint=settings.bunny_storage_endpoint,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
minio = MinioClient(
|
||||||
|
endpoint_url=settings.storage_endpoint_url,
|
||||||
|
access_key=settings.storage_access_key,
|
||||||
|
secret_key=settings.storage_secret_key,
|
||||||
|
bucket=settings.storage_bucket,
|
||||||
|
api_base_url=settings.api_base_url,
|
||||||
|
)
|
||||||
|
minio.ensure_bucket_exists()
|
||||||
|
client = minio
|
||||||
|
_set_storage_client(client)
|
||||||
|
|
@ -12,7 +12,7 @@ from ...outbound.postgres.repositories import summarise_job_repository
|
||||||
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
|
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
|
||||||
from ...outbound.postgres.entities.translated_article_entity import TranslatedArticleEntity
|
from ...outbound.postgres.entities.translated_article_entity import TranslatedArticleEntity
|
||||||
from ...outbound.gemini.gemini_client import GeminiClient
|
from ...outbound.gemini.gemini_client import GeminiClient
|
||||||
from ...storage import upload_audio
|
from ...outbound.storage_client import get_storage_client
|
||||||
from ...config import settings
|
from ...config import settings
|
||||||
from ... import worker
|
from ... import worker
|
||||||
|
|
||||||
|
|
@ -92,7 +92,7 @@ async def _run_regenerate_audio(job_id: uuid.UUID) -> None:
|
||||||
voice = gemini_client.get_voice_by_language(article_entity.target_language)
|
voice = gemini_client.get_voice_by_language(article_entity.target_language)
|
||||||
wav_bytes = await gemini_client.generate_audio(article_entity.target_body, voice)
|
wav_bytes = await gemini_client.generate_audio(article_entity.target_body, voice)
|
||||||
audio_key = f"audio/{job_id}.wav"
|
audio_key = f"audio/{job_id}.wav"
|
||||||
upload_audio(audio_key, wav_bytes)
|
get_storage_client().upload(audio_key, wav_bytes)
|
||||||
|
|
||||||
await article_repo.update_audio(
|
await article_repo.update_audio(
|
||||||
article_entity.id,
|
article_entity.id,
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,8 @@ from pydantic import BaseModel
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from ...auth import verify_token
|
from ...auth import verify_token
|
||||||
from ...config import settings
|
|
||||||
from ...outbound.postgres.database import get_db
|
from ...outbound.postgres.database import get_db
|
||||||
|
from ...outbound.storage_client import get_storage_client
|
||||||
from ...outbound.postgres.repositories.adventure_repository import (
|
from ...outbound.postgres.repositories.adventure_repository import (
|
||||||
PostgresAdventureEntryAudioRepository,
|
PostgresAdventureEntryAudioRepository,
|
||||||
PostgresAdventureEntryChoiceRepository,
|
PostgresAdventureEntryChoiceRepository,
|
||||||
|
|
@ -61,7 +61,7 @@ class AdventureDetailResponse(BaseModel):
|
||||||
def _audio_url(key: str | None) -> str | None:
|
def _audio_url(key: str | None) -> str | None:
|
||||||
if key is None:
|
if key is None:
|
||||||
return None
|
return None
|
||||||
return f"{settings.api_base_url}/media/{key}"
|
return get_storage_client().get_url(key)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{adventure_id}", response_model=AdventureDetailResponse, status_code=200)
|
@router.get("/{adventure_id}", response_model=AdventureDetailResponse, status_code=200)
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,8 @@ from pydantic import BaseModel
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from ...auth import verify_token
|
from ...auth import verify_token
|
||||||
from ...config import settings
|
|
||||||
from ...outbound.postgres.database import get_db
|
from ...outbound.postgres.database import get_db
|
||||||
|
from ...outbound.storage_client import get_storage_client
|
||||||
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
|
from ...outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
|
||||||
|
|
||||||
router = APIRouter(prefix="/articles", tags=["bff", "articles"])
|
router = APIRouter(prefix="/articles", tags=["bff", "articles"])
|
||||||
|
|
@ -46,7 +46,7 @@ class ArticleDetail(BaseModel):
|
||||||
def _audio_url(key: str | None) -> str | None:
|
def _audio_url(key: str | None) -> str | None:
|
||||||
if key is None:
|
if key is None:
|
||||||
return None
|
return None
|
||||||
return f"{settings.api_base_url}/media/{key}"
|
return get_storage_client().get_url(key)
|
||||||
|
|
||||||
|
|
||||||
@router.get("", response_model=ArticleListResponse, status_code=200)
|
@router.get("", response_model=ArticleListResponse, status_code=200)
|
||||||
|
|
|
||||||
|
|
@ -3,12 +3,11 @@ import uuid
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
from fastapi.responses import Response
|
from fastapi.responses import Response
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from botocore.exceptions import ClientError
|
|
||||||
|
|
||||||
from ..outbound.postgres.database import get_db
|
from ..outbound.postgres.database import get_db
|
||||||
from ..outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
|
from ..outbound.postgres.repositories.translated_article_repository import TranslatedArticleRepository
|
||||||
from ..outbound.postgres.repositories.adventure_repository import PostgresAdventureEntryAudioRepository
|
from ..outbound.postgres.repositories.adventure_repository import PostgresAdventureEntryAudioRepository
|
||||||
from ..storage import download_audio
|
from ..outbound.storage_client import get_storage_client
|
||||||
|
|
||||||
router = APIRouter(prefix="/media", tags=["media"])
|
router = APIRouter(prefix="/media", tags=["media"])
|
||||||
|
|
||||||
|
|
@ -22,22 +21,24 @@ async def get_adventure_audio_file(
|
||||||
eid = uuid.UUID(filename.rsplit(".", 1)[0])
|
eid = uuid.UUID(filename.rsplit(".", 1)[0])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise HTTPException(status_code=400, detail="Invalid file ID")
|
raise HTTPException(status_code=400, detail="Invalid file ID")
|
||||||
|
|
||||||
print(f"Looking for adventure audio with entry ID: {eid}")
|
|
||||||
|
|
||||||
adventure_audio = await PostgresAdventureEntryAudioRepository(db).get_for_entry(entry_id=eid, component_type="story_text")
|
adventure_audio = await PostgresAdventureEntryAudioRepository(db).get_for_entry(entry_id=eid, component_type="story_text")
|
||||||
|
|
||||||
if adventure_audio is None:
|
if adventure_audio is None:
|
||||||
raise HTTPException(status_code=404, detail="File not found")
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
audio_bytes, content_type = download_audio("adventure-audio/" + filename)
|
audio_bytes, content_type = get_storage_client().download("adventure-audio/" + filename)
|
||||||
except ClientError as e:
|
except FileNotFoundError:
|
||||||
if e.response["Error"]["Code"] in ("NoSuchKey", "404"):
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
raise HTTPException(status_code=404, detail="File not found")
|
except NotImplementedError:
|
||||||
|
raise HTTPException(status_code=501, detail="Media proxy not available with current storage provider")
|
||||||
|
except Exception:
|
||||||
raise HTTPException(status_code=500, detail="Storage error")
|
raise HTTPException(status_code=500, detail="Storage error")
|
||||||
|
|
||||||
return Response(content=audio_bytes, media_type=content_type)
|
return Response(content=audio_bytes, media_type=content_type)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{filename:path}")
|
@router.get("/{filename:path}")
|
||||||
async def get_media_file(
|
async def get_media_file(
|
||||||
filename: str,
|
filename: str,
|
||||||
|
|
@ -49,11 +50,12 @@ async def get_media_file(
|
||||||
raise HTTPException(status_code=404, detail="File not found")
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
audio_bytes, content_type = download_audio(filename)
|
audio_bytes, content_type = get_storage_client().download(filename)
|
||||||
except ClientError as e:
|
except FileNotFoundError:
|
||||||
if e.response["Error"]["Code"] in ("NoSuchKey", "404"):
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
raise HTTPException(status_code=404, detail="File not found")
|
except NotImplementedError:
|
||||||
|
raise HTTPException(status_code=501, detail="Media proxy not available with current storage provider")
|
||||||
|
except Exception:
|
||||||
raise HTTPException(status_code=500, detail="Storage error")
|
raise HTTPException(status_code=500, detail="Storage error")
|
||||||
|
|
||||||
return Response(content=audio_bytes, media_type=content_type)
|
return Response(content=audio_bytes, media_type=content_type)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,56 +0,0 @@
|
||||||
import io
|
|
||||||
import wave
|
|
||||||
|
|
||||||
import boto3
|
|
||||||
from botocore.exceptions import ClientError
|
|
||||||
|
|
||||||
from .config import settings
|
|
||||||
|
|
||||||
|
|
||||||
def get_s3_client():
|
|
||||||
return boto3.client(
|
|
||||||
"s3",
|
|
||||||
endpoint_url=settings.storage_endpoint_url,
|
|
||||||
aws_access_key_id=settings.storage_access_key,
|
|
||||||
aws_secret_access_key=settings.storage_secret_key,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_bucket_exists() -> None:
|
|
||||||
client = get_s3_client()
|
|
||||||
try:
|
|
||||||
client.head_bucket(Bucket=settings.storage_bucket)
|
|
||||||
except ClientError as e:
|
|
||||||
if e.response["Error"]["Code"] in ("404", "NoSuchBucket"):
|
|
||||||
client.create_bucket(Bucket=settings.storage_bucket)
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes:
|
|
||||||
"""Wrap raw 16-bit mono PCM data in a WAV container."""
|
|
||||||
buf = io.BytesIO()
|
|
||||||
with wave.open(buf, "wb") as wf:
|
|
||||||
wf.setnchannels(1)
|
|
||||||
wf.setsampwidth(2) # 16-bit
|
|
||||||
wf.setframerate(sample_rate)
|
|
||||||
wf.writeframes(pcm_data)
|
|
||||||
return buf.getvalue()
|
|
||||||
|
|
||||||
|
|
||||||
def upload_audio(object_key: str, audio_bytes: bytes, content_type: str = "audio/wav") -> None:
|
|
||||||
client = get_s3_client()
|
|
||||||
client.put_object(
|
|
||||||
Bucket=settings.storage_bucket,
|
|
||||||
Key=object_key,
|
|
||||||
Body=audio_bytes,
|
|
||||||
ContentType=content_type,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def download_audio(object_key: str) -> tuple[bytes, str]:
|
|
||||||
"""Return (file_bytes, content_type)."""
|
|
||||||
client = get_s3_client()
|
|
||||||
response = client.get_object(Bucket=settings.storage_bucket, Key=object_key)
|
|
||||||
content_type = response.get("ContentType", "audio/wav")
|
|
||||||
return response["Body"].read(), content_type
|
|
||||||
82
api/docs/design-doc-object-storage.md
Normal file
82
api/docs/design-doc-object-storage.md
Normal file
|
|
@ -0,0 +1,82 @@
|
||||||
|
# Design Document: Object Storage with Bunny CDN
|
||||||
|
|
||||||
|
This is a technical design document for implementing object (e.g. audio file) storage with Bunny CDN. This directory (`api/docs`) contains other similar files, notably `architecture.md` and `domain.md`. When you have worked through the change described here, please update `architecture.md`
|
||||||
|
|
||||||
|
## The problem
|
||||||
|
|
||||||
|
Language Learning App has audio as a core component, which requires files to be delivered to the end user. When developing locally, these files have been stored in a min.io service, mimicking an S3-like storage bucket.
|
||||||
|
|
||||||
|
Using this approach on a deployed instance (e.g. on a VPS using Docker), would result in high bandwidth and therefore a high cost. Using a dedicated, EU-based service like Bunny allows us to offload the delivery of content to a third-party at reduced cost (great!)
|
||||||
|
|
||||||
|
## The current implementation
|
||||||
|
|
||||||
|
Object storage was one of the first features built into this software in MVP state, as such it does not fit within the current architecture.
|
||||||
|
|
||||||
|
Right now `api/app/storage.py` contains some helper functions, notably the `upload_audio` and `download_audio` functions.
|
||||||
|
|
||||||
|
Users (through the web client) retrieve the media through two URLs (detailed in `api/app/routers/media.py`):
|
||||||
|
|
||||||
|
- `GET /media/adventure-audio/{filename:path}` for the choose-your-own-adventure file names
|
||||||
|
- `GET /media/{filename:path}`, used for the summary transcriptions
|
||||||
|
|
||||||
|
## The solution
|
||||||
|
|
||||||
|
We are going to use Bunny (bunny.net) as the CDN for all objects in deployed environments (right now, just production — in the future preprod or staging may exist).
|
||||||
|
|
||||||
|
Locally, for development purposes, we retain the use of MinIO. To decide which backend to use, we introduce an environment variable `STORAGE_PROVIDER` with a default value of `local` and an accepted alternative of `bunny`.
|
||||||
|
|
||||||
|
In situations where we use `local`, the existing `/media/..` proxy endpoints are returned when constructing audio URLs (e.g. in `api/app/routers/bff/articles.py` and `api/app/routers/bff/adventure.py`). When we use `bunny`, the Bunny CDN URL is returned directly so the request is never proxied through our service.
|
||||||
|
|
||||||
|
### Client interface
|
||||||
|
|
||||||
|
We will create a `BunnyClient` in `api/app/outbound/bunny/bunny_client.py` and extract the current MinIO logic into a `MinioClient` in `api/app/outbound/minio/minio_client.py`. Both implement a shared `StorageClient` protocol.
|
||||||
|
|
||||||
|
The interface is **generic** — the clients are storage adapters and must not encode domain concepts. Path construction (which directory, which filename) is the responsibility of the caller (the service layer), not the client.
|
||||||
|
|
||||||
|
```python
|
||||||
|
class StorageClient(Protocol):
|
||||||
|
def upload(self, path: str, data: bytes) -> bool: ...
|
||||||
|
def get_url(self, path: str) -> str: ...
|
||||||
|
def delete(self, path: str) -> bool: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
Services construct paths using hardcoded directory prefixes (e.g. `"adventure-audio/"`, `"audio/"`). These are constants, not environment variables — they are not environment-specific and do not belong in config.
|
||||||
|
|
||||||
|
### Factory and instantiation
|
||||||
|
|
||||||
|
A factory function reads `STORAGE_PROVIDER` and returns the appropriate `StorageClient` implementation. The client is instantiated **once at app startup** (e.g. in `main.py`) as a module-level singleton — not per-request. This is consistent with how other outbound clients (`AnthropicClient`, `GeminiClient`, etc.) are handled.
|
||||||
|
|
||||||
|
### Bunny configuration
|
||||||
|
|
||||||
|
Bunny requires the following environment variables:
|
||||||
|
|
||||||
|
- `BUNNY_ZONE` — the storage zone name (the zone `languagelearningapp` has been created in the Bunny UI). No "DEFAULT" suffix; there is one zone.
|
||||||
|
- `BUNNY_API_KEY` — the Bunny API key for upload/delete operations.
|
||||||
|
- `BUNNY_CDN_BASE_URL` — the public CDN hostname used to construct delivery URLs.
|
||||||
|
|
||||||
|
### Signed vs. public URLs
|
||||||
|
|
||||||
|
Audio files are user-specific (i.e. one user should not be able to use another user's audio URL), Bunny signed URLs are required. Public CDN URLs are shareable by anyone who has the link.
|
||||||
|
|
||||||
|
As per Bunny's own documentation they recommend the token.py package:
|
||||||
|
|
||||||
|
```py
|
||||||
|
from token import sign_url
|
||||||
|
|
||||||
|
url = sign_url(
|
||||||
|
"https://myzone.b-cdn.net/videos/stream1/playlist.m3u8",
|
||||||
|
"your-security-key",
|
||||||
|
expiration_time=3600,
|
||||||
|
is_directory=True,
|
||||||
|
path_allowed="/videos/stream1/",
|
||||||
|
countries_allowed="GB",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
`get_url(path)` on the `BunnyClient` must generate a time-limited (pick a sensible default for audio content here) signed URL using the Bunny Token Authentication feature. The MinIO implementation would use pre-signed S3 URLs for consistency.
|
||||||
|
|
||||||
|
Create a sibling method that explicitely creates public URLs for any future public content, call this `get_public_url`.
|
||||||
|
|
||||||
|
### Misc
|
||||||
|
|
||||||
|
`pcm_to_wav()` currently lives in `api/app/storage.py` but is a Gemini output concern. Move it to the Gemini client module (`api/app/outbound/gemini/`) when carrying out this refactor.
|
||||||
0
content/choose-your-own-adventure/README.md
Normal file
0
content/choose-your-own-adventure/README.md
Normal file
80
docker-compose-dev.yml
Normal file
80
docker-compose-dev.yml
Normal file
|
|
@ -0,0 +1,80 @@
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER:-langlearn}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB:-langlearn}
|
||||||
|
volumes:
|
||||||
|
- pgdata:/var/lib/postgresql/data
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-langlearn}"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 10
|
||||||
|
|
||||||
|
storage:
|
||||||
|
image: minio/minio:latest
|
||||||
|
command: server /data --console-address ":9001"
|
||||||
|
environment:
|
||||||
|
MINIO_ROOT_USER: ${STORAGE_ACCESS_KEY:-langlearn}
|
||||||
|
MINIO_ROOT_PASSWORD: ${STORAGE_SECRET_KEY}
|
||||||
|
ports:
|
||||||
|
- "9000:9000"
|
||||||
|
- "9001:9001"
|
||||||
|
volumes:
|
||||||
|
- storagedata:/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -sf http://localhost:9000/minio/health/live || exit 1"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 10
|
||||||
|
|
||||||
|
api:
|
||||||
|
build: ./api
|
||||||
|
volumes:
|
||||||
|
- ./api:/app:z
|
||||||
|
ports:
|
||||||
|
- "${API_PORT:-8000}:8000"
|
||||||
|
command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-langlearn}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-langlearn}
|
||||||
|
ADMIN_USER_EMAILS: ${ADMIN_USER_EMAILS:-wilson@thomaswilson.xyz}
|
||||||
|
API_BASE_URL: ${API_BASE_URL:-http://localhost:8000}
|
||||||
|
JWT_SECRET: ${JWT_SECRET}
|
||||||
|
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
|
||||||
|
DEEPL_API_KEY: ${DEEPL_API_KEY}
|
||||||
|
DEEPGRAM_API_KEY: ${DEEPGRAM_API_KEY}
|
||||||
|
GEMINI_API_KEY: ${GEMINI_API_KEY}
|
||||||
|
PYTHONPATH: /app
|
||||||
|
STORAGE_PROVIDER: local
|
||||||
|
STORAGE_ENDPOINT_URL: http://storage:9000
|
||||||
|
STORAGE_ACCESS_KEY: ${STORAGE_ACCESS_KEY:-langlearn}
|
||||||
|
STORAGE_SECRET_KEY: ${STORAGE_SECRET_KEY}
|
||||||
|
STORAGE_BUCKET: ${STORAGE_BUCKET:-langlearn}
|
||||||
|
TRANSACTIONAL_EMAIL_PROVIDER: ${TRANSACTIONAL_EMAIL_PROVIDER:-stub}
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
storage:
|
||||||
|
condition: service_healthy
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
frontend:
|
||||||
|
build:
|
||||||
|
context: ./frontend
|
||||||
|
args:
|
||||||
|
PUBLIC_API_BASE_URL: ${PUBLIC_API_BASE_URL:-http://localhost:8000}
|
||||||
|
ports:
|
||||||
|
- "${FRONTEND_PORT:-3000}:3000"
|
||||||
|
environment:
|
||||||
|
ORIGIN: ${ORIGIN:-http://localhost:3000}
|
||||||
|
depends_on:
|
||||||
|
- api
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
pgdata:
|
||||||
|
storagedata:
|
||||||
83
docker-compose-prod.yml
Normal file
83
docker-compose-prod.yml
Normal file
|
|
@ -0,0 +1,83 @@
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER:-langlearn}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB:-langlearn}
|
||||||
|
volumes:
|
||||||
|
- pgdata:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-langlearn}"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 10
|
||||||
|
restart: unless-stopped
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
|
||||||
|
api:
|
||||||
|
build: ./api
|
||||||
|
ports:
|
||||||
|
- "${API_PORT:-8000}:8000"
|
||||||
|
command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers 2
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-langlearn}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-langlearn}
|
||||||
|
ADMIN_USER_EMAILS: ${ADMIN_USER_EMAILS}
|
||||||
|
API_BASE_URL: ${API_BASE_URL}
|
||||||
|
JWT_SECRET: ${JWT_SECRET}
|
||||||
|
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
|
||||||
|
DEEPL_API_KEY: ${DEEPL_API_KEY}
|
||||||
|
DEEPGRAM_API_KEY: ${DEEPGRAM_API_KEY}
|
||||||
|
GEMINI_API_KEY: ${GEMINI_API_KEY}
|
||||||
|
PYTHONPATH: /app
|
||||||
|
STORAGE_PROVIDER: bunny
|
||||||
|
BUNNY_ZONE: ${BUNNY_ZONE}
|
||||||
|
BUNNY_API_KEY: ${BUNNY_API_KEY}
|
||||||
|
BUNNY_CDN_BASE_URL: ${BUNNY_CDN_BASE_URL}
|
||||||
|
BUNNY_TOKEN_AUTH_KEY: ${BUNNY_TOKEN_AUTH_KEY}
|
||||||
|
TRANSACTIONAL_EMAIL_PROVIDER: ${TRANSACTIONAL_EMAIL_PROVIDER}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -sf http://localhost:8000/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 10
|
||||||
|
start_period: 20s
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
restart: unless-stopped
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 1G
|
||||||
|
|
||||||
|
frontend:
|
||||||
|
build:
|
||||||
|
context: ./frontend
|
||||||
|
args:
|
||||||
|
PUBLIC_API_BASE_URL: ${PUBLIC_API_BASE_URL}
|
||||||
|
ports:
|
||||||
|
- "${FRONTEND_PORT:-3000}:3000"
|
||||||
|
environment:
|
||||||
|
ORIGIN: ${ORIGIN}
|
||||||
|
depends_on:
|
||||||
|
api:
|
||||||
|
condition: service_healthy
|
||||||
|
restart: unless-stopped
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 256M
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
pgdata:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
name: langlearn
|
||||||
Loading…
Reference in a new issue