Generate the audio; move to background task
This commit is contained in:
parent
6bc1efd333
commit
afe3b63fa5
31 changed files with 813 additions and 78 deletions
|
|
@ -14,3 +14,11 @@ ANTHROPIC_API_KEY=sk-ant-...
|
||||||
|
|
||||||
# DeepL (https://www.deepl.com/pro-api)
|
# DeepL (https://www.deepl.com/pro-api)
|
||||||
DEEPL_API_KEY=your-deepl-api-key-here
|
DEEPL_API_KEY=your-deepl-api-key-here
|
||||||
|
|
||||||
|
# Google Gemini (for text-to-speech)
|
||||||
|
GEMINI_API_KEY=your-gemini-api-key-here
|
||||||
|
|
||||||
|
# Object storage (MinIO)
|
||||||
|
STORAGE_ACCESS_KEY=langlearn
|
||||||
|
STORAGE_SECRET_KEY=changeme-use-a-long-random-string
|
||||||
|
STORAGE_BUCKET=langlearn
|
||||||
|
|
|
||||||
19
Language Learning API/audio/Get audio.yml
Normal file
19
Language Learning API/audio/Get audio.yml
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
info:
|
||||||
|
name: Get audio
|
||||||
|
type: http
|
||||||
|
seq: 1
|
||||||
|
|
||||||
|
http:
|
||||||
|
method: GET
|
||||||
|
url: audio/:file_name
|
||||||
|
params:
|
||||||
|
- name: file_name
|
||||||
|
value: ""
|
||||||
|
type: path
|
||||||
|
auth: inherit
|
||||||
|
|
||||||
|
settings:
|
||||||
|
encodeUrl: true
|
||||||
|
timeout: 0
|
||||||
|
followRedirects: true
|
||||||
|
maxRedirects: 5
|
||||||
7
Language Learning API/audio/folder.yml
Normal file
7
Language Learning API/audio/folder.yml
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
info:
|
||||||
|
name: audio
|
||||||
|
type: folder
|
||||||
|
seq: 6
|
||||||
|
|
||||||
|
request:
|
||||||
|
auth: inherit
|
||||||
22
Language Learning API/auth/Login.yml
Normal file
22
Language Learning API/auth/Login.yml
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
info:
|
||||||
|
name: Login
|
||||||
|
type: http
|
||||||
|
seq: 2
|
||||||
|
|
||||||
|
http:
|
||||||
|
method: POST
|
||||||
|
url: "{{baseUrl}}/auth/register"
|
||||||
|
body:
|
||||||
|
type: json
|
||||||
|
data: |-
|
||||||
|
{
|
||||||
|
"email": "wilson@thomaswilson.xyz",
|
||||||
|
"password": "wilson@thomaswilson.xyz"
|
||||||
|
}
|
||||||
|
auth: inherit
|
||||||
|
|
||||||
|
settings:
|
||||||
|
encodeUrl: true
|
||||||
|
timeout: 0
|
||||||
|
followRedirects: true
|
||||||
|
maxRedirects: 5
|
||||||
15
Language Learning API/auth/Register.yml
Normal file
15
Language Learning API/auth/Register.yml
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
info:
|
||||||
|
name: Register
|
||||||
|
type: http
|
||||||
|
seq: 1
|
||||||
|
|
||||||
|
http:
|
||||||
|
method: POST
|
||||||
|
url: ""
|
||||||
|
auth: inherit
|
||||||
|
|
||||||
|
settings:
|
||||||
|
encodeUrl: true
|
||||||
|
timeout: 0
|
||||||
|
followRedirects: true
|
||||||
|
maxRedirects: 5
|
||||||
7
Language Learning API/auth/folder.yml
Normal file
7
Language Learning API/auth/folder.yml
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
info:
|
||||||
|
name: auth
|
||||||
|
type: folder
|
||||||
|
seq: 5
|
||||||
|
|
||||||
|
request:
|
||||||
|
auth: inherit
|
||||||
|
|
@ -17,12 +17,12 @@ config:
|
||||||
request:
|
request:
|
||||||
auth:
|
auth:
|
||||||
type: bearer
|
type: bearer
|
||||||
token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.sW8dZVeROpNxCHL2HEXym6aDzaobFW17mLPaYbtlyYs
|
token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJkNDIzODc1NC05ZDljLTRkOWMtYTM1OS1lMGQ4MDZhOGQ1Y2QiLCJlbWFpbCI6IndpbHNvbkB0aG9tYXN3aWxzb24ueHl6IiwiZXhwIjoxNzczOTkzNDcxfQ.J5SH93js-YrJThxGliOKIOV8kbdfP1qUu0rx3iBNX0A
|
||||||
variables:
|
variables:
|
||||||
- name: baseUrl
|
- name: baseUrl
|
||||||
value: http://localhost:8000
|
value: http://localhost:8000
|
||||||
- name: token
|
- name: token
|
||||||
value: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.sW8dZVeROpNxCHL2HEXym6aDzaobFW17mLPaYbtlyYs
|
value: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJkNDIzODc1NC05ZDljLTRkOWMtYTM1OS1lMGQ4MDZhOGQ1Y2QiLCJlbWFpbCI6IndpbHNvbkB0aG9tYXN3aWxzb24ueHl6IiwiZXhwIjoxNzczOTkzNDcxfQ.J5SH93js-YrJThxGliOKIOV8kbdfP1qUu0rx3iBNX0A
|
||||||
bundled: false
|
bundled: false
|
||||||
extensions:
|
extensions:
|
||||||
bruno:
|
bruno:
|
||||||
|
|
|
||||||
10
Makefile
10
Makefile
|
|
@ -1,4 +1,4 @@
|
||||||
.PHONY: build up down logs shell lock
|
.PHONY: down build up logs shell lock migrate migration
|
||||||
|
|
||||||
build:
|
build:
|
||||||
docker compose build
|
docker compose build
|
||||||
|
|
@ -15,6 +15,14 @@ logs:
|
||||||
shell:
|
shell:
|
||||||
docker compose exec api bash
|
docker compose exec api bash
|
||||||
|
|
||||||
|
# Run pending migrations against the running db container
|
||||||
|
migrate:
|
||||||
|
docker compose exec api alembic upgrade head
|
||||||
|
|
||||||
|
# Generate a new migration: make migration NAME="add foo table"
|
||||||
|
migration:
|
||||||
|
docker compose exec api alembic revision --autogenerate -m "$(NAME)"
|
||||||
|
|
||||||
# Generate a pinned requirements.txt from pyproject.toml (requires uv installed locally)
|
# Generate a pinned requirements.txt from pyproject.toml (requires uv installed locally)
|
||||||
lock:
|
lock:
|
||||||
cd api && uv pip compile pyproject.toml -o requirements.txt
|
cd api && uv pip compile pyproject.toml -o requirements.txt
|
||||||
|
|
|
||||||
27
README.md
Normal file
27
README.md
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
# Language learning app
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
This is an app designed to help people learn a second(+) language. Initially from English. The app will start with French, Spanish, Italian, and German as the target languages. With English as the only source language.
|
||||||
|
|
||||||
|
The thesis of the app is that spaced repetition and recall remain effective mechanisms for language acquisition, and that exposure to appropriate, realistic text, can make that process less repetitive or dull. And that mixing text and audio allows the user to know how words "sound".
|
||||||
|
|
||||||
|
At present, the app doesn't have a solution to recognising speech, another important part of language learning.
|
||||||
|
|
||||||
|
It improve learner proficiency by building a mechanism for generating realistic, level-appropriate text from user-specified inputs, generated by LLMs. There will also be audio medium, similar to short podcasts, generated from the text.
|
||||||
|
|
||||||
|
The application has a back-end written in python (fastapi), because of the Python ecosystem around data and machine learning.
|
||||||
|
|
||||||
|
The application will have a web front end written in Svelte Kit. It will adopt progressive web app standards, to allow offline use.
|
||||||
|
|
||||||
|
Communication between the two is through HTTP, authenticated with JWT tokens.
|
||||||
|
|
||||||
|
## Technical Design
|
||||||
|
|
||||||
|
This application must remain self-hostable. It should not rely on proprietary infrastructure (e.g. AWS Lambda functions) to run. It should use Docker Compose and Makefiles to build projects and deploy them onto a local server or a VPS.
|
||||||
|
|
||||||
|
The main components so far are:
|
||||||
|
|
||||||
|
- Backend server (fastapi)
|
||||||
|
- Front end (SvelteKit), yet to be built
|
||||||
|
- Object storage (Ceph), yet to be built
|
||||||
|
|
@ -3,7 +3,7 @@ FROM python:3.11-slim
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install uv for fast, reproducible installs
|
# Install uv for fast, reproducible installs
|
||||||
RUN pip install --no-cache-dir uv
|
RUN pip install --no-cache-dir uv alembic
|
||||||
|
|
||||||
# Install Python dependencies from pyproject.toml
|
# Install Python dependencies from pyproject.toml
|
||||||
COPY pyproject.toml .
|
COPY pyproject.toml .
|
||||||
|
|
@ -16,8 +16,10 @@ RUN python -m spacy download en_core_web_sm && \
|
||||||
python -m spacy download it_core_news_sm && \
|
python -m spacy download it_core_news_sm && \
|
||||||
python -m spacy download de_core_news_sm
|
python -m spacy download de_core_news_sm
|
||||||
|
|
||||||
# Copy application source
|
# Copy application source and migrations
|
||||||
COPY app/ ./app/
|
COPY app/ ./app/
|
||||||
|
COPY alembic/ ./alembic/
|
||||||
|
COPY alembic.ini .
|
||||||
|
|
||||||
EXPOSE 8000
|
EXPOSE 8000
|
||||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
CMD ["sh", "-c", "alembic upgrade head && uvicorn app.main:app --host 0.0.0.0 --port 8000"]
|
||||||
|
|
|
||||||
39
api/alembic.ini
Normal file
39
api/alembic.ini
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
[alembic]
|
||||||
|
script_location = alembic
|
||||||
|
file_template = %%(year)d%%(month).2d%%(day).2d_%%(rev)s_%%(slug)s
|
||||||
|
truncate_slug_length = 40
|
||||||
|
prepend_sys_path = .
|
||||||
|
|
||||||
|
[loggers]
|
||||||
|
keys = root,sqlalchemy,alembic
|
||||||
|
|
||||||
|
[handlers]
|
||||||
|
keys = console
|
||||||
|
|
||||||
|
[formatters]
|
||||||
|
keys = generic
|
||||||
|
|
||||||
|
[logger_root]
|
||||||
|
level = WARN
|
||||||
|
handlers = console
|
||||||
|
qualname =
|
||||||
|
|
||||||
|
[logger_sqlalchemy]
|
||||||
|
level = WARN
|
||||||
|
handlers =
|
||||||
|
qualname = sqlalchemy.engine
|
||||||
|
|
||||||
|
[logger_alembic]
|
||||||
|
level = INFO
|
||||||
|
handlers =
|
||||||
|
qualname = alembic
|
||||||
|
|
||||||
|
[handler_console]
|
||||||
|
class = StreamHandler
|
||||||
|
args = (sys.stderr,)
|
||||||
|
level = NOTSET
|
||||||
|
formatter = generic
|
||||||
|
|
||||||
|
[formatter_generic]
|
||||||
|
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||||
|
datefmt = %H:%M:%S
|
||||||
38
api/alembic/env.py
Normal file
38
api/alembic/env.py
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
import asyncio
|
||||||
|
from logging.config import fileConfig
|
||||||
|
|
||||||
|
from alembic import context
|
||||||
|
from sqlalchemy import pool
|
||||||
|
from sqlalchemy.ext.asyncio import async_engine_from_config
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
from app.database import Base
|
||||||
|
import app.models # noqa: F401 — register all models with Base.metadata
|
||||||
|
|
||||||
|
config = context.config
|
||||||
|
config.set_main_option("sqlalchemy.url", settings.database_url)
|
||||||
|
|
||||||
|
if config.config_file_name is not None:
|
||||||
|
fileConfig(config.config_file_name)
|
||||||
|
|
||||||
|
target_metadata = Base.metadata
|
||||||
|
|
||||||
|
|
||||||
|
def do_run_migrations(connection):
|
||||||
|
context.configure(connection=connection, target_metadata=target_metadata)
|
||||||
|
with context.begin_transaction():
|
||||||
|
context.run_migrations()
|
||||||
|
|
||||||
|
|
||||||
|
async def run_async_migrations():
|
||||||
|
connectable = async_engine_from_config(
|
||||||
|
config.get_section(config.config_ini_section, {}),
|
||||||
|
prefix="sqlalchemy.",
|
||||||
|
poolclass=pool.NullPool,
|
||||||
|
)
|
||||||
|
async with connectable.connect() as connection:
|
||||||
|
await connection.run_sync(do_run_migrations)
|
||||||
|
await connectable.dispose()
|
||||||
|
|
||||||
|
|
||||||
|
asyncio.run(run_async_migrations())
|
||||||
26
api/alembic/script.py.mako
Normal file
26
api/alembic/script.py.mako
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
"""${message}
|
||||||
|
|
||||||
|
Revision ID: ${up_revision}
|
||||||
|
Revises: ${down_revision | comma,n}
|
||||||
|
Create Date: ${create_date}
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
${imports if imports else ""}
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = ${repr(up_revision)}
|
||||||
|
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||||
|
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
${upgrades if upgrades else "pass"}
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
${downgrades if downgrades else "pass"}
|
||||||
55
api/alembic/versions/20260318_0001_initial_schema.py
Normal file
55
api/alembic/versions/20260318_0001_initial_schema.py
Normal file
|
|
@ -0,0 +1,55 @@
|
||||||
|
"""initial schema
|
||||||
|
|
||||||
|
Revision ID: 0001
|
||||||
|
Revises:
|
||||||
|
Create Date: 2026-03-18
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
revision: str = "0001"
|
||||||
|
down_revision: Union[str, None] = None
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.create_table(
|
||||||
|
"users",
|
||||||
|
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||||
|
sa.Column("email", sa.String(255), nullable=False),
|
||||||
|
sa.Column("hashed_password", sa.String(255), nullable=False),
|
||||||
|
sa.Column("is_active", sa.Boolean(), nullable=False),
|
||||||
|
sa.Column("is_email_verified", sa.Boolean(), nullable=False),
|
||||||
|
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
|
||||||
|
sa.PrimaryKeyConstraint("id"),
|
||||||
|
)
|
||||||
|
op.create_index(op.f("ix_users_email"), "users", ["email"], unique=True)
|
||||||
|
|
||||||
|
op.create_table(
|
||||||
|
"jobs",
|
||||||
|
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||||
|
sa.Column("status", sa.String(20), nullable=False),
|
||||||
|
sa.Column("source_language", sa.String(10), nullable=False),
|
||||||
|
sa.Column("target_language", sa.String(10), nullable=False),
|
||||||
|
sa.Column("complexity_level", sa.String(5), nullable=False),
|
||||||
|
sa.Column("input_summary", sa.Text(), nullable=True),
|
||||||
|
sa.Column("generated_text", sa.Text(), nullable=True),
|
||||||
|
sa.Column("translated_text", sa.Text(), nullable=True),
|
||||||
|
sa.Column("error_message", sa.Text(), nullable=True),
|
||||||
|
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
|
||||||
|
sa.Column("started_at", sa.DateTime(timezone=True), nullable=True),
|
||||||
|
sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True),
|
||||||
|
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
|
||||||
|
sa.PrimaryKeyConstraint("id"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_table("jobs")
|
||||||
|
op.drop_index(op.f("ix_users_email"), table_name="users")
|
||||||
|
op.drop_table("users")
|
||||||
|
|
@ -0,0 +1,43 @@
|
||||||
|
"""add audio_url and user_id to jobs
|
||||||
|
|
||||||
|
Revision ID: 0002
|
||||||
|
Revises: 0001
|
||||||
|
Create Date: 2026-03-19
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
revision: str = "0002"
|
||||||
|
down_revision: Union[str, None] = "0001"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.add_column(
|
||||||
|
"jobs",
|
||||||
|
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
|
||||||
|
)
|
||||||
|
op.create_index(op.f("ix_jobs_user_id"), "jobs", ["user_id"], unique=False)
|
||||||
|
op.create_foreign_key(
|
||||||
|
"fk_jobs_user_id_users",
|
||||||
|
"jobs",
|
||||||
|
"users",
|
||||||
|
["user_id"],
|
||||||
|
["id"],
|
||||||
|
)
|
||||||
|
op.add_column(
|
||||||
|
"jobs",
|
||||||
|
sa.Column("audio_url", sa.Text(), nullable=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_column("jobs", "audio_url")
|
||||||
|
op.drop_constraint("fk_jobs_user_id_users", "jobs", type_="foreignkey")
|
||||||
|
op.drop_index(op.f("ix_jobs_user_id"), table_name="jobs")
|
||||||
|
op.drop_column("jobs", "user_id")
|
||||||
|
|
@ -1,10 +1,36 @@
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
from fastapi import Depends, HTTPException, status
|
from fastapi import Depends, HTTPException, status
|
||||||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||||
import jwt
|
import jwt
|
||||||
|
from passlib.context import CryptContext
|
||||||
|
|
||||||
from .config import settings
|
from .config import settings
|
||||||
|
|
||||||
security = HTTPBearer()
|
security = HTTPBearer()
|
||||||
|
pwd_context = CryptContext(
|
||||||
|
schemes=["pbkdf2_sha256"],
|
||||||
|
deprecated="auto",
|
||||||
|
)
|
||||||
|
|
||||||
|
TOKEN_EXPIRY_HOURS = 24
|
||||||
|
|
||||||
|
|
||||||
|
def hash_password(password: str) -> str:
|
||||||
|
return pwd_context.hash(password)
|
||||||
|
|
||||||
|
|
||||||
|
def verify_password(plain: str, hashed: str) -> bool:
|
||||||
|
return pwd_context.verify(plain, hashed)
|
||||||
|
|
||||||
|
|
||||||
|
def create_access_token(user_id: str, email: str) -> str:
|
||||||
|
payload = {
|
||||||
|
"sub": user_id,
|
||||||
|
"email": email,
|
||||||
|
"exp": datetime.now(timezone.utc) + timedelta(hours=TOKEN_EXPIRY_HOURS),
|
||||||
|
}
|
||||||
|
return jwt.encode(payload, settings.jwt_secret, algorithm="HS256")
|
||||||
|
|
||||||
|
|
||||||
def verify_token(
|
def verify_token(
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,11 @@ class Settings(BaseSettings):
|
||||||
jwt_secret: str
|
jwt_secret: str
|
||||||
anthropic_api_key: str
|
anthropic_api_key: str
|
||||||
deepl_api_key: str
|
deepl_api_key: str
|
||||||
|
gemini_api_key: str
|
||||||
|
storage_endpoint_url: str
|
||||||
|
storage_access_key: str
|
||||||
|
storage_secret_key: str
|
||||||
|
storage_bucket: str = "langlearn"
|
||||||
|
|
||||||
model_config = {"env_file": ".env"}
|
model_config = {"env_file": ".env"}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,23 +1,34 @@
|
||||||
|
import asyncio
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
|
|
||||||
from .database import engine, Base
|
|
||||||
from .routers import pos, generation, jobs
|
from .routers import pos, generation, jobs
|
||||||
|
from .routers import auth as auth_router
|
||||||
|
from .routers import media as media_router
|
||||||
|
from .storage import ensure_bucket_exists
|
||||||
|
from . import worker
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
async with engine.begin() as conn:
|
ensure_bucket_exists()
|
||||||
await conn.run_sync(Base.metadata.create_all)
|
worker_task = asyncio.create_task(worker.worker_loop())
|
||||||
yield
|
yield
|
||||||
|
worker_task.cancel()
|
||||||
|
try:
|
||||||
|
await worker_task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(title="Language Learning API", lifespan=lifespan)
|
app = FastAPI(title="Language Learning API", lifespan=lifespan)
|
||||||
|
|
||||||
|
app.include_router(auth_router.router)
|
||||||
app.include_router(pos.router)
|
app.include_router(pos.router)
|
||||||
app.include_router(generation.router)
|
app.include_router(generation.router)
|
||||||
app.include_router(jobs.router)
|
app.include_router(jobs.router)
|
||||||
|
app.include_router(media_router.router)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,39 @@
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
from sqlalchemy import String, Text, DateTime
|
from sqlalchemy import String, Text, DateTime, Boolean, ForeignKey
|
||||||
from sqlalchemy.orm import Mapped, mapped_column
|
from sqlalchemy.orm import Mapped, mapped_column
|
||||||
from sqlalchemy.dialects.postgresql import UUID
|
from sqlalchemy.dialects.postgresql import UUID
|
||||||
|
|
||||||
from .database import Base
|
from .database import Base
|
||||||
|
|
||||||
|
|
||||||
|
class User(Base):
|
||||||
|
__tablename__ = "users"
|
||||||
|
|
||||||
|
id: Mapped[uuid.UUID] = mapped_column(
|
||||||
|
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||||
|
)
|
||||||
|
email: Mapped[str] = mapped_column(String(255), nullable=False, unique=True, index=True)
|
||||||
|
hashed_password: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||||
|
is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
|
||||||
|
# TODO(email-verification): set to False and require verification once transactional email is implemented
|
||||||
|
is_email_verified: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
|
||||||
|
created_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True),
|
||||||
|
default=lambda: datetime.now(timezone.utc),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Job(Base):
|
class Job(Base):
|
||||||
__tablename__ = "jobs"
|
__tablename__ = "jobs"
|
||||||
|
|
||||||
id: Mapped[uuid.UUID] = mapped_column(
|
id: Mapped[uuid.UUID] = mapped_column(
|
||||||
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||||
)
|
)
|
||||||
|
user_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||||
|
UUID(as_uuid=True), ForeignKey("users.id"), nullable=True, index=True
|
||||||
|
)
|
||||||
status: Mapped[str] = mapped_column(String(20), nullable=False, default="pending")
|
status: Mapped[str] = mapped_column(String(20), nullable=False, default="pending")
|
||||||
source_language: Mapped[str] = mapped_column(String(10), nullable=False, default="en")
|
source_language: Mapped[str] = mapped_column(String(10), nullable=False, default="en")
|
||||||
target_language: Mapped[str] = mapped_column(String(10), nullable=False)
|
target_language: Mapped[str] = mapped_column(String(10), nullable=False)
|
||||||
|
|
@ -22,6 +42,7 @@ class Job(Base):
|
||||||
generated_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
generated_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
translated_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
translated_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
audio_url: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
created_at: Mapped[datetime] = mapped_column(
|
created_at: Mapped[datetime] = mapped_column(
|
||||||
DateTime(timezone=True),
|
DateTime(timezone=True),
|
||||||
default=lambda: datetime.now(timezone.utc),
|
default=lambda: datetime.now(timezone.utc),
|
||||||
|
|
|
||||||
78
api/app/routers/auth.py
Normal file
78
api/app/routers/auth.py
Normal file
|
|
@ -0,0 +1,78 @@
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, status
|
||||||
|
from pydantic import BaseModel, EmailStr
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from ..auth import create_access_token, hash_password, verify_password
|
||||||
|
from ..database import get_db
|
||||||
|
from ..models import User
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/auth", tags=["auth"])
|
||||||
|
|
||||||
|
|
||||||
|
class RegisterRequest(BaseModel):
|
||||||
|
email: EmailStr
|
||||||
|
password: str
|
||||||
|
|
||||||
|
|
||||||
|
class LoginRequest(BaseModel):
|
||||||
|
email: EmailStr
|
||||||
|
password: str
|
||||||
|
|
||||||
|
|
||||||
|
class TokenResponse(BaseModel):
|
||||||
|
access_token: str
|
||||||
|
token_type: str = "bearer"
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/register", status_code=status.HTTP_201_CREATED)
|
||||||
|
async def register(body: RegisterRequest, db: AsyncSession = Depends(get_db)):
|
||||||
|
user = User(
|
||||||
|
email=body.email,
|
||||||
|
hashed_password=hash_password(body.password),
|
||||||
|
)
|
||||||
|
db.add(user)
|
||||||
|
try:
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(user)
|
||||||
|
except IntegrityError:
|
||||||
|
await db.rollback()
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_409_CONFLICT,
|
||||||
|
detail="Email already registered",
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO(email-verification): send verification email here once transactional
|
||||||
|
# email is implemented. Set is_email_verified=False on the User model and
|
||||||
|
# require verification before allowing login.
|
||||||
|
|
||||||
|
return {"id": str(user.id), "email": user.email}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/login", response_model=TokenResponse)
|
||||||
|
async def login(body: LoginRequest, db: AsyncSession = Depends(get_db)):
|
||||||
|
result = await db.execute(select(User).where(User.email == body.email))
|
||||||
|
user = result.scalar_one_or_none()
|
||||||
|
|
||||||
|
if user is None or not verify_password(body.password, user.hashed_password):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
|
detail="Invalid email or password",
|
||||||
|
)
|
||||||
|
|
||||||
|
if not user.is_active:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
|
detail="Account disabled",
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO(email-verification): uncomment once email verification is in place
|
||||||
|
# if not user.is_email_verified:
|
||||||
|
# raise HTTPException(
|
||||||
|
# status_code=status.HTTP_403_FORBIDDEN,
|
||||||
|
# detail="Email address not verified",
|
||||||
|
# )
|
||||||
|
|
||||||
|
token = create_access_token(str(user.id), user.email)
|
||||||
|
return TokenResponse(access_token=token)
|
||||||
|
|
@ -1,16 +1,17 @@
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime, timezone
|
from functools import partial
|
||||||
|
|
||||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
import anthropic
|
|
||||||
import deepl
|
|
||||||
|
|
||||||
from ..auth import verify_token
|
from ..auth import verify_token
|
||||||
from ..database import get_db, AsyncSessionLocal
|
from ..database import get_db, AsyncSessionLocal
|
||||||
from ..models import Job
|
from ..models import Job
|
||||||
from ..config import settings
|
from ..storage import upload_audio
|
||||||
|
from ..services import llm, tts, job_repo
|
||||||
|
from ..services.tts import VOICE_BY_LANGUAGE
|
||||||
|
from .. import worker
|
||||||
|
|
||||||
router = APIRouter(prefix="/generate", tags=["generation"])
|
router = APIRouter(prefix="/generate", tags=["generation"])
|
||||||
|
|
||||||
|
|
@ -23,23 +24,6 @@ SUPPORTED_LANGUAGES: dict[str, str] = {
|
||||||
}
|
}
|
||||||
SUPPORTED_LEVELS = {"A1", "A2", "B1", "B2", "C1", "C2"}
|
SUPPORTED_LEVELS = {"A1", "A2", "B1", "B2", "C1", "C2"}
|
||||||
|
|
||||||
# Maps our language codes to DeepL source/target language codes
|
|
||||||
DEEPL_SOURCE_LANG: dict[str, str] = {
|
|
||||||
"en": "EN",
|
|
||||||
"fr": "FR",
|
|
||||||
"es": "ES",
|
|
||||||
"it": "IT",
|
|
||||||
"de": "DE",
|
|
||||||
}
|
|
||||||
# DeepL target codes (English needs a regional variant)
|
|
||||||
DEEPL_TARGET_LANG: dict[str, str] = {
|
|
||||||
"en": "EN-US",
|
|
||||||
"fr": "FR",
|
|
||||||
"es": "ES",
|
|
||||||
"it": "IT",
|
|
||||||
"de": "DE",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class GenerationRequest(BaseModel):
|
class GenerationRequest(BaseModel):
|
||||||
target_language: str
|
target_language: str
|
||||||
|
|
@ -56,15 +40,12 @@ class GenerationResponse(BaseModel):
|
||||||
async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None:
|
async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None:
|
||||||
async with AsyncSessionLocal() as db:
|
async with AsyncSessionLocal() as db:
|
||||||
job = await db.get(Job, job_id)
|
job = await db.get(Job, job_id)
|
||||||
job.status = "processing"
|
await job_repo.mark_processing(db, job)
|
||||||
job.started_at = datetime.now(timezone.utc)
|
|
||||||
await db.commit()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from_language = SUPPORTED_LANGUAGES[request.source_language]
|
from_language = SUPPORTED_LANGUAGES[request.source_language]
|
||||||
language_name = SUPPORTED_LANGUAGES[request.target_language]
|
language_name = SUPPORTED_LANGUAGES[request.target_language]
|
||||||
|
|
||||||
# Build a short summary of the input to store (not the full text)
|
|
||||||
topic_part = f"Topic: {request.topic}. " if request.topic else ""
|
topic_part = f"Topic: {request.topic}. " if request.topic else ""
|
||||||
combined_preview = " ".join(request.input_texts)[:300]
|
combined_preview = " ".join(request.input_texts)[:300]
|
||||||
input_summary = (
|
input_summary = (
|
||||||
|
|
@ -94,23 +75,7 @@ async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None
|
||||||
f"The 'Topic focus' should be a comma-separated list of up to three topics, in {language_name}."
|
f"The 'Topic focus' should be a comma-separated list of up to three topics, in {language_name}."
|
||||||
)
|
)
|
||||||
|
|
||||||
client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
|
generated_text = await llm.generate_text(prompt)
|
||||||
|
|
||||||
message = client.messages.create(
|
|
||||||
model="claude-sonnet-4-6",
|
|
||||||
max_tokens=1024,
|
|
||||||
messages=[{"role": "user", "content": prompt}],
|
|
||||||
)
|
|
||||||
generated_text = message.content[0].text
|
|
||||||
|
|
||||||
# TODO: Come back to this when DeepL unblock my account for being "high risk"
|
|
||||||
# Translate generated text back into the learner's source language via DeepL
|
|
||||||
# translator = deepl.Translator(settings.deepl_api_key)
|
|
||||||
# translation = translator.translate_text(
|
|
||||||
# generated_text,
|
|
||||||
# source_lang=DEEPL_SOURCE_LANG[request.target_language],
|
|
||||||
# target_lang=DEEPL_TARGET_LANG[request.source_language],
|
|
||||||
#)
|
|
||||||
|
|
||||||
translate_prompt = (
|
translate_prompt = (
|
||||||
f"You are a helpful assistant that translates text. Translate just the previous summary "
|
f"You are a helpful assistant that translates text. Translate just the previous summary "
|
||||||
|
|
@ -120,36 +85,29 @@ async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None
|
||||||
f"- Send through only the translated text, no explanations or notes\n"
|
f"- Send through only the translated text, no explanations or notes\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
translate_message = client.messages.create(
|
translated_text = await llm.translate_text(prompt, generated_text, translate_prompt)
|
||||||
model="claude-sonnet-4-6",
|
|
||||||
max_tokens=1024,
|
# Save LLM results before attempting TTS so they're preserved on failure
|
||||||
messages=[
|
await job_repo.save_llm_results(
|
||||||
{ "role": "user", "content": prompt },
|
db, job, generated_text, translated_text, input_summary[:500]
|
||||||
{ "role": "assistant", "content": message.content },
|
|
||||||
{ "role": "user", "content": translate_prompt }
|
|
||||||
],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
job.status = "succeeded"
|
voice = VOICE_BY_LANGUAGE.get(request.target_language, "Kore")
|
||||||
job.generated_text = generated_text
|
wav_bytes = await tts.generate_audio(generated_text, voice)
|
||||||
job.translated_text = translate_message.content[0].text
|
audio_key = f"audio/{job_id}.wav"
|
||||||
job.input_summary = input_summary[:500]
|
upload_audio(audio_key, wav_bytes)
|
||||||
job.completed_at = datetime.now(timezone.utc)
|
|
||||||
|
await job_repo.mark_succeeded(db, job, audio_key)
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
job.status = "failed"
|
await job_repo.mark_failed(db, job, str(exc))
|
||||||
job.error_message = str(exc)
|
|
||||||
job.completed_at = datetime.now(timezone.utc)
|
|
||||||
|
|
||||||
await db.commit()
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("", response_model=GenerationResponse, status_code=202)
|
@router.post("", response_model=GenerationResponse, status_code=202)
|
||||||
async def create_generation_job(
|
async def create_generation_job(
|
||||||
request: GenerationRequest,
|
request: GenerationRequest,
|
||||||
background_tasks: BackgroundTasks,
|
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
_: dict = Depends(verify_token),
|
token_data: dict = Depends(verify_token),
|
||||||
) -> GenerationResponse:
|
) -> GenerationResponse:
|
||||||
if request.target_language not in SUPPORTED_LANGUAGES:
|
if request.target_language not in SUPPORTED_LANGUAGES:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
@ -165,6 +123,7 @@ async def create_generation_job(
|
||||||
)
|
)
|
||||||
|
|
||||||
job = Job(
|
job = Job(
|
||||||
|
user_id=uuid.UUID(token_data["sub"]),
|
||||||
source_language=request.source_language,
|
source_language=request.source_language,
|
||||||
target_language=request.target_language,
|
target_language=request.target_language,
|
||||||
complexity_level=request.complexity_level,
|
complexity_level=request.complexity_level,
|
||||||
|
|
@ -173,6 +132,6 @@ async def create_generation_job(
|
||||||
await db.commit()
|
await db.commit()
|
||||||
await db.refresh(job)
|
await db.refresh(job)
|
||||||
|
|
||||||
background_tasks.add_task(_run_generation, job.id, request)
|
await worker.enqueue(partial(_run_generation, job.id, request))
|
||||||
|
|
||||||
return GenerationResponse(job_id=str(job.id))
|
return GenerationResponse(job_id=str(job.id))
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
@ -7,8 +8,12 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
|
|
||||||
from ..auth import verify_token
|
from ..auth import verify_token
|
||||||
from ..database import get_db
|
from ..database import get_db, AsyncSessionLocal
|
||||||
from ..models import Job
|
from ..models import Job
|
||||||
|
from ..storage import upload_audio
|
||||||
|
from ..services import tts, job_repo
|
||||||
|
from ..services.tts import VOICE_BY_LANGUAGE
|
||||||
|
from .. import worker
|
||||||
|
|
||||||
router = APIRouter(prefix="/jobs", tags=["jobs"])
|
router = APIRouter(prefix="/jobs", tags=["jobs"])
|
||||||
|
|
||||||
|
|
@ -26,6 +31,7 @@ class JobResponse(BaseModel):
|
||||||
generated_text: str | None = None
|
generated_text: str | None = None
|
||||||
translated_text: str | None = None
|
translated_text: str | None = None
|
||||||
input_summary: str | None = None
|
input_summary: str | None = None
|
||||||
|
audio_url: str | None = None
|
||||||
# only present on failure
|
# only present on failure
|
||||||
error_message: str | None = None
|
error_message: str | None = None
|
||||||
model_config = { "from_attributes": True }
|
model_config = { "from_attributes": True }
|
||||||
|
|
@ -83,7 +89,56 @@ async def get_job(
|
||||||
response.generated_text = job.generated_text
|
response.generated_text = job.generated_text
|
||||||
response.translated_text = job.translated_text
|
response.translated_text = job.translated_text
|
||||||
response.input_summary = job.input_summary
|
response.input_summary = job.input_summary
|
||||||
|
response.audio_url = job.audio_url
|
||||||
elif job.status == "failed":
|
elif job.status == "failed":
|
||||||
response.error_message = job.error_message
|
response.error_message = job.error_message
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
async def _run_regenerate_audio(job_id: uuid.UUID) -> None:
|
||||||
|
async with AsyncSessionLocal() as db:
|
||||||
|
job = await db.get(Job, job_id)
|
||||||
|
await job_repo.mark_processing(db, job)
|
||||||
|
|
||||||
|
try:
|
||||||
|
voice = VOICE_BY_LANGUAGE.get(job.target_language, "Kore")
|
||||||
|
wav_bytes = await tts.generate_audio(job.generated_text, voice)
|
||||||
|
audio_key = f"audio/{job_id}.wav"
|
||||||
|
upload_audio(audio_key, wav_bytes)
|
||||||
|
|
||||||
|
await job_repo.mark_succeeded(db, job, audio_key)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
await job_repo.mark_failed(db, job, str(exc))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{job_id}/regenerate-audio", status_code=202)
|
||||||
|
async def regenerate_audio(
|
||||||
|
job_id: str,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
token_data: dict = Depends(verify_token),
|
||||||
|
) -> dict:
|
||||||
|
try:
|
||||||
|
uid = uuid.UUID(job_id)
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid job ID format")
|
||||||
|
|
||||||
|
job: Job | None = await db.get(Job, uid)
|
||||||
|
if job is None:
|
||||||
|
raise HTTPException(status_code=404, detail="Job not found")
|
||||||
|
|
||||||
|
if str(job.user_id) != token_data["sub"]:
|
||||||
|
raise HTTPException(status_code=403, detail="Not authorized to modify this job")
|
||||||
|
|
||||||
|
if not job.generated_text:
|
||||||
|
raise HTTPException(status_code=400, detail="Job has no generated text to synthesize")
|
||||||
|
|
||||||
|
if job.audio_url:
|
||||||
|
raise HTTPException(status_code=409, detail="Job already has audio")
|
||||||
|
|
||||||
|
if job.status == "processing":
|
||||||
|
raise HTTPException(status_code=409, detail="Job is already processing")
|
||||||
|
|
||||||
|
await worker.enqueue(partial(_run_regenerate_audio, uid))
|
||||||
|
return {"job_id": job_id}
|
||||||
|
|
|
||||||
39
api/app/routers/media.py
Normal file
39
api/app/routers/media.py
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from fastapi.responses import Response
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
from botocore.exceptions import ClientError
|
||||||
|
|
||||||
|
from ..auth import verify_token
|
||||||
|
from ..database import get_db
|
||||||
|
from ..models import Job
|
||||||
|
from ..storage import download_audio
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/media", tags=["media"])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{filename:path}")
|
||||||
|
async def get_media_file(
|
||||||
|
filename: str,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
token_data: dict = Depends(verify_token),
|
||||||
|
) -> Response:
|
||||||
|
user_id = uuid.UUID(token_data["sub"])
|
||||||
|
|
||||||
|
result = await db.execute(
|
||||||
|
select(Job).where(Job.audio_url == filename, Job.user_id == user_id)
|
||||||
|
)
|
||||||
|
job = result.scalar_one_or_none()
|
||||||
|
if job is None:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
|
||||||
|
try:
|
||||||
|
audio_bytes, content_type = download_audio(filename)
|
||||||
|
except ClientError as e:
|
||||||
|
if e.response["Error"]["Code"] in ("NoSuchKey", "404"):
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
raise HTTPException(status_code=500, detail="Storage error")
|
||||||
|
|
||||||
|
return Response(content=audio_bytes, media_type=content_type)
|
||||||
0
api/app/services/__init__.py
Normal file
0
api/app/services/__init__.py
Normal file
39
api/app/services/job_repo.py
Normal file
39
api/app/services/job_repo.py
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from ..models import Job
|
||||||
|
|
||||||
|
|
||||||
|
async def mark_processing(db: AsyncSession, job: Job) -> None:
|
||||||
|
job.status = "processing"
|
||||||
|
job.started_at = datetime.now(timezone.utc)
|
||||||
|
job.error_message = None
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def save_llm_results(
|
||||||
|
db: AsyncSession,
|
||||||
|
job: Job,
|
||||||
|
generated_text: str,
|
||||||
|
translated_text: str,
|
||||||
|
input_summary: str,
|
||||||
|
) -> None:
|
||||||
|
job.generated_text = generated_text
|
||||||
|
job.translated_text = translated_text
|
||||||
|
job.input_summary = input_summary
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def mark_succeeded(db: AsyncSession, job: Job, audio_url: str) -> None:
|
||||||
|
job.status = "succeeded"
|
||||||
|
job.audio_url = audio_url
|
||||||
|
job.completed_at = datetime.now(timezone.utc)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def mark_failed(db: AsyncSession, job: Job, error: str) -> None:
|
||||||
|
job.status = "failed"
|
||||||
|
job.error_message = error
|
||||||
|
job.completed_at = datetime.now(timezone.utc)
|
||||||
|
await db.commit()
|
||||||
39
api/app/services/llm.py
Normal file
39
api/app/services/llm.py
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
import anthropic
|
||||||
|
|
||||||
|
from ..config import settings
|
||||||
|
|
||||||
|
|
||||||
|
def _create_client() -> anthropic.Anthropic:
|
||||||
|
return anthropic.Anthropic(api_key=settings.anthropic_api_key)
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_text(prompt: str) -> str:
|
||||||
|
def _call() -> str:
|
||||||
|
client = _create_client()
|
||||||
|
message = client.messages.create(
|
||||||
|
model="claude-sonnet-4-6",
|
||||||
|
max_tokens=1024,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
)
|
||||||
|
return message.content[0].text
|
||||||
|
|
||||||
|
return await asyncio.to_thread(_call)
|
||||||
|
|
||||||
|
|
||||||
|
async def translate_text(original_prompt: str, generated_text: str, translate_prompt: str) -> str:
|
||||||
|
def _call() -> str:
|
||||||
|
client = _create_client()
|
||||||
|
message = client.messages.create(
|
||||||
|
model="claude-sonnet-4-6",
|
||||||
|
max_tokens=1024,
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": original_prompt},
|
||||||
|
{"role": "assistant", "content": generated_text},
|
||||||
|
{"role": "user", "content": translate_prompt},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
return message.content[0].text
|
||||||
|
|
||||||
|
return await asyncio.to_thread(_call)
|
||||||
39
api/app/services/tts.py
Normal file
39
api/app/services/tts.py
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from google import genai
|
||||||
|
from google.genai import types as genai_types
|
||||||
|
|
||||||
|
from ..config import settings
|
||||||
|
from ..storage import pcm_to_wav
|
||||||
|
|
||||||
|
VOICE_BY_LANGUAGE: dict[str, str] = {
|
||||||
|
"fr": "Kore",
|
||||||
|
"es": "Charon",
|
||||||
|
"it": "Aoede",
|
||||||
|
"de": "Fenrir",
|
||||||
|
"en": "Kore",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_audio(text: str, voice: str) -> bytes:
|
||||||
|
"""Generate TTS audio and return WAV bytes."""
|
||||||
|
def _call() -> bytes:
|
||||||
|
client = genai.Client(api_key=settings.gemini_api_key)
|
||||||
|
response = client.models.generate_content(
|
||||||
|
model="gemini-2.5-flash-preview-tts",
|
||||||
|
contents=text,
|
||||||
|
config=genai_types.GenerateContentConfig(
|
||||||
|
response_modalities=["AUDIO"],
|
||||||
|
speech_config=genai_types.SpeechConfig(
|
||||||
|
voice_config=genai_types.VoiceConfig(
|
||||||
|
prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(
|
||||||
|
voice_name=voice,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
pcm_data = response.candidates[0].content.parts[0].inline_data.data
|
||||||
|
return pcm_to_wav(pcm_data)
|
||||||
|
|
||||||
|
return await asyncio.to_thread(_call)
|
||||||
56
api/app/storage.py
Normal file
56
api/app/storage.py
Normal file
|
|
@ -0,0 +1,56 @@
|
||||||
|
import io
|
||||||
|
import wave
|
||||||
|
|
||||||
|
import boto3
|
||||||
|
from botocore.exceptions import ClientError
|
||||||
|
|
||||||
|
from .config import settings
|
||||||
|
|
||||||
|
|
||||||
|
def get_s3_client():
|
||||||
|
return boto3.client(
|
||||||
|
"s3",
|
||||||
|
endpoint_url=settings.storage_endpoint_url,
|
||||||
|
aws_access_key_id=settings.storage_access_key,
|
||||||
|
aws_secret_access_key=settings.storage_secret_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_bucket_exists() -> None:
|
||||||
|
client = get_s3_client()
|
||||||
|
try:
|
||||||
|
client.head_bucket(Bucket=settings.storage_bucket)
|
||||||
|
except ClientError as e:
|
||||||
|
if e.response["Error"]["Code"] in ("404", "NoSuchBucket"):
|
||||||
|
client.create_bucket(Bucket=settings.storage_bucket)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes:
|
||||||
|
"""Wrap raw 16-bit mono PCM data in a WAV container."""
|
||||||
|
buf = io.BytesIO()
|
||||||
|
with wave.open(buf, "wb") as wf:
|
||||||
|
wf.setnchannels(1)
|
||||||
|
wf.setsampwidth(2) # 16-bit
|
||||||
|
wf.setframerate(sample_rate)
|
||||||
|
wf.writeframes(pcm_data)
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
def upload_audio(object_key: str, audio_bytes: bytes, content_type: str = "audio/wav") -> None:
|
||||||
|
client = get_s3_client()
|
||||||
|
client.put_object(
|
||||||
|
Bucket=settings.storage_bucket,
|
||||||
|
Key=object_key,
|
||||||
|
Body=audio_bytes,
|
||||||
|
ContentType=content_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def download_audio(object_key: str) -> tuple[bytes, str]:
|
||||||
|
"""Return (file_bytes, content_type)."""
|
||||||
|
client = get_s3_client()
|
||||||
|
response = client.get_object(Bucket=settings.storage_bucket, Key=object_key)
|
||||||
|
content_type = response.get("ContentType", "audio/wav")
|
||||||
|
return response["Body"].read(), content_type
|
||||||
22
api/app/worker.py
Normal file
22
api/app/worker.py
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from typing import Awaitable, Callable
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_queue: asyncio.Queue[Callable[[], Awaitable[None]]] = asyncio.Queue()
|
||||||
|
|
||||||
|
|
||||||
|
async def enqueue(task: Callable[[], Awaitable[None]]) -> None:
|
||||||
|
await _queue.put(task)
|
||||||
|
|
||||||
|
|
||||||
|
async def worker_loop() -> None:
|
||||||
|
while True:
|
||||||
|
task = await _queue.get()
|
||||||
|
try:
|
||||||
|
await task()
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Unhandled error in worker task")
|
||||||
|
finally:
|
||||||
|
_queue.task_done()
|
||||||
|
|
@ -10,8 +10,13 @@ dependencies = [
|
||||||
"spacy>=3.8.0",
|
"spacy>=3.8.0",
|
||||||
"anthropic>=0.40.0",
|
"anthropic>=0.40.0",
|
||||||
"pyjwt>=2.10.0",
|
"pyjwt>=2.10.0",
|
||||||
|
"passlib>=1.7.4",
|
||||||
|
"email-validator>=2.0.0",
|
||||||
|
"alembic>=1.13.0",
|
||||||
"pydantic-settings>=2.0.0",
|
"pydantic-settings>=2.0.0",
|
||||||
"deepl>=1.18.0",
|
"deepl>=1.18.0",
|
||||||
|
"google-genai>=1.0.0",
|
||||||
|
"boto3>=1.35.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,23 @@ services:
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 10
|
retries: 10
|
||||||
|
|
||||||
|
storage:
|
||||||
|
image: minio/minio:latest
|
||||||
|
command: server /data --console-address ":9001"
|
||||||
|
environment:
|
||||||
|
MINIO_ROOT_USER: ${STORAGE_ACCESS_KEY:-langlearn}
|
||||||
|
MINIO_ROOT_PASSWORD: ${STORAGE_SECRET_KEY}
|
||||||
|
ports:
|
||||||
|
- "9000:9000"
|
||||||
|
- "9001:9001"
|
||||||
|
volumes:
|
||||||
|
- storagedata:/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -sf http://localhost:9000/minio/health/live || exit 1"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 10
|
||||||
|
|
||||||
api:
|
api:
|
||||||
build: ./api
|
build: ./api
|
||||||
ports:
|
ports:
|
||||||
|
|
@ -22,10 +39,18 @@ services:
|
||||||
JWT_SECRET: ${JWT_SECRET}
|
JWT_SECRET: ${JWT_SECRET}
|
||||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
|
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
|
||||||
DEEPL_API_KEY: ${DEEPL_API_KEY}
|
DEEPL_API_KEY: ${DEEPL_API_KEY}
|
||||||
|
GEMINI_API_KEY: ${GEMINI_API_KEY}
|
||||||
|
STORAGE_ENDPOINT_URL: http://storage:9000
|
||||||
|
STORAGE_ACCESS_KEY: ${STORAGE_ACCESS_KEY:-langlearn}
|
||||||
|
STORAGE_SECRET_KEY: ${STORAGE_SECRET_KEY}
|
||||||
|
STORAGE_BUCKET: ${STORAGE_BUCKET:-langlearn}
|
||||||
depends_on:
|
depends_on:
|
||||||
db:
|
db:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
storage:
|
||||||
|
condition: service_healthy
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
pgdata:
|
pgdata:
|
||||||
|
storagedata:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue