language-learning-app/api/app/storage.py

56 lines
1.6 KiB
Python

import io
import wave
import boto3
from botocore.exceptions import ClientError
from .config import settings
def get_s3_client():
return boto3.client(
"s3",
endpoint_url=settings.storage_endpoint_url,
aws_access_key_id=settings.storage_access_key,
aws_secret_access_key=settings.storage_secret_key,
)
def ensure_bucket_exists() -> None:
client = get_s3_client()
try:
client.head_bucket(Bucket=settings.storage_bucket)
except ClientError as e:
if e.response["Error"]["Code"] in ("404", "NoSuchBucket"):
client.create_bucket(Bucket=settings.storage_bucket)
else:
raise
def pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes:
"""Wrap raw 16-bit mono PCM data in a WAV container."""
buf = io.BytesIO()
with wave.open(buf, "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2) # 16-bit
wf.setframerate(sample_rate)
wf.writeframes(pcm_data)
return buf.getvalue()
def upload_audio(object_key: str, audio_bytes: bytes, content_type: str = "audio/wav") -> None:
client = get_s3_client()
client.put_object(
Bucket=settings.storage_bucket,
Key=object_key,
Body=audio_bytes,
ContentType=content_type,
)
def download_audio(object_key: str) -> tuple[bytes, str]:
"""Return (file_bytes, content_type)."""
client = get_s3_client()
response = client.get_object(Bucket=settings.storage_bucket, Key=object_key)
content_type = response.get("ContentType", "audio/wav")
return response["Body"].read(), content_type