Compare commits
No commits in common. "746aa5f382a8e670d20b6035701cddfda6315e8c" and "407d423a4c13cbb4005f87c1a1accb88da2bbb14" have entirely different histories.
746aa5f382
...
407d423a4c
14 changed files with 48 additions and 206 deletions
|
|
@ -1,29 +0,0 @@
|
|||
"""add source_pos_data, target_pos_data, audio_transcript to jobs
|
||||
|
||||
Revision ID: 0003
|
||||
Revises: 0002
|
||||
Create Date: 2026-03-27
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision: str = "0003"
|
||||
down_revision: Union[str, None] = "0002"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column("jobs", sa.Column("source_pos_data", postgresql.JSONB(), nullable=True))
|
||||
op.add_column("jobs", sa.Column("target_pos_data", postgresql.JSONB(), nullable=True))
|
||||
op.add_column("jobs", sa.Column("audio_transcript", postgresql.JSONB(), nullable=True))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("jobs", "audio_transcript")
|
||||
op.drop_column("jobs", "target_pos_data")
|
||||
op.drop_column("jobs", "source_pos_data")
|
||||
|
|
@ -6,7 +6,6 @@ class Settings(BaseSettings):
|
|||
jwt_secret: str
|
||||
anthropic_api_key: str
|
||||
deepl_api_key: str
|
||||
deepgram_api_key: str
|
||||
gemini_api_key: str
|
||||
storage_endpoint_url: str
|
||||
storage_access_key: str
|
||||
|
|
|
|||
|
|
@ -15,9 +15,6 @@ class SummariseJob:
|
|||
translated_text: str
|
||||
error_message: str
|
||||
audio_url: str
|
||||
source_pos_data: dict | None
|
||||
target_pos_data: dict | None
|
||||
audio_transcript: dict | None
|
||||
created_at: datetime
|
||||
started_at: datetime | None = None
|
||||
completed_at: datetime | None = None
|
||||
|
|
|
|||
|
|
@ -19,17 +19,18 @@ class AnthropicClient():
|
|||
) -> str:
|
||||
return (
|
||||
f"You are a language learning content creator.\n"
|
||||
f"You generate markdown summaries of user-provided content.\n"
|
||||
f"You will provide content in {to_language} at {complexity_level} proficiency level on the CEFR scale.\n"
|
||||
f"The user will provide input, you will generate an engaging realistic summary text in {to_language} "
|
||||
f"at {complexity_level} proficiency level (CEFR scale).\n\n"
|
||||
f"The text you generate will:\n"
|
||||
f"- Contain ONLY the generated summary text in {to_language}.\n"
|
||||
f"- Never contain inappropriate (hateful, sexual, violent) content. It is preferable to return no text than to generate such content.\n"
|
||||
f"- Contain ONLY the generated text in {to_language}.\n"
|
||||
f"- Be appropriate for a {complexity_level} {to_language} speaker.\n"
|
||||
f"- Never generate inappropriate (hateful, sexual, violent) content. It is preferable to return no text than to generate such content.\n"
|
||||
f"- Speak directly to the reader/listener, adopting the tone and style of a semi-formal news reporter or podcaster.\n"
|
||||
f"- Where appropriate (fluency level, content), use a small number of idiomatic expressions.\n"
|
||||
f"- Where appropriate use at least one additional tense, beyond the default of the content.\n"
|
||||
f"- Be formatted in markdown. Contain a single level 1 header (#) at the top, followed by paragraphs and line breaks.\n"
|
||||
f"- Be around {length_preference} long.\n"
|
||||
f"- Be inspired by the content, but not the tone, of the source material."
|
||||
f"- Be formatted in markdown with paragraphs and line breaks.\n"
|
||||
f"- Be {length_preference} long.\n"
|
||||
f"- Be inspired by the following source material "
|
||||
f"(but written originally in {from_language}):\n\n"
|
||||
)
|
||||
|
||||
def _create_prompt_summarise_text(
|
||||
|
|
|
|||
|
|
@ -1,24 +0,0 @@
|
|||
import asyncio
|
||||
from deepgram import (
|
||||
AsyncDeepgramClient,
|
||||
)
|
||||
|
||||
class LocalDeepgramClient:
|
||||
def __init__(self, api_key: str):
|
||||
self.deepgram_client = AsyncDeepgramClient(api_key=api_key)
|
||||
|
||||
async def transcribe_bytes(self, audio_bytes: bytes, language_code: str) -> dict:
|
||||
response = await self.deepgram_client.listen.v1.media.transcribe_file(
|
||||
request=audio_bytes,
|
||||
model="nova-3",
|
||||
language=language_code,
|
||||
utterances=True,
|
||||
smart_format=True,
|
||||
)
|
||||
return response.results.json()
|
||||
|
||||
async def transcribe_local_file(self, local_file_path: str, language_code: str) -> dict:
|
||||
with open(local_file_path, "rb") as audio_file:
|
||||
return await self.transcribe_bytes(audio_file, language_code)
|
||||
|
||||
|
||||
|
|
@ -3,7 +3,7 @@ from datetime import datetime, timezone
|
|||
|
||||
from sqlalchemy import String, Text, DateTime, ForeignKey
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
|
||||
from ..database import Base
|
||||
|
||||
|
|
@ -25,9 +25,6 @@ class SummariseJobEntity(Base):
|
|||
translated_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
audio_url: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
source_pos_data: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
|
||||
target_pos_data: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
|
||||
audio_transcript: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
|
|
|
|||
|
|
@ -46,9 +46,6 @@ class PostgresSummariseJobRepository:
|
|||
translated_text=entity.translated_text,
|
||||
error_message=entity.error_message,
|
||||
audio_url=entity.audio_url,
|
||||
source_pos_data=entity.source_pos_data,
|
||||
target_pos_data=entity.target_pos_data,
|
||||
audio_transcript=entity.audio_transcript,
|
||||
created_at=entity.created_at,
|
||||
started_at=entity.started_at,
|
||||
completed_at=entity.completed_at,
|
||||
|
|
@ -115,26 +112,6 @@ async def save_translated_text(
|
|||
await update(db, job)
|
||||
|
||||
|
||||
async def save_pos_data(
|
||||
db: AsyncSession,
|
||||
job: SummariseJobEntity,
|
||||
source_pos_data: dict,
|
||||
target_pos_data: dict,
|
||||
) -> None:
|
||||
job.source_pos_data = source_pos_data
|
||||
job.target_pos_data = target_pos_data
|
||||
await update(db, job)
|
||||
|
||||
|
||||
async def save_audio_transcript(
|
||||
db: AsyncSession,
|
||||
job: SummariseJobEntity,
|
||||
audio_transcript: dict,
|
||||
) -> None:
|
||||
job.audio_transcript = audio_transcript
|
||||
await update(db, job)
|
||||
|
||||
|
||||
async def mark_succeeded(db: AsyncSession, job: SummariseJobEntity, audio_url: str) -> None:
|
||||
job.status = "succeeded"
|
||||
job.audio_url = audio_url
|
||||
|
|
|
|||
|
|
@ -1,46 +0,0 @@
|
|||
import spacy
|
||||
|
||||
LANGUAGE_MODELS: dict[str, str] = {
|
||||
"en": "en_core_web_sm",
|
||||
"fr": "fr_core_news_sm",
|
||||
"es": "es_core_news_sm",
|
||||
"it": "it_core_news_sm",
|
||||
"de": "de_core_news_sm",
|
||||
}
|
||||
|
||||
|
||||
class UnsupportedLanguageError(ValueError):
|
||||
def __init__(self, language: str):
|
||||
self.language = language
|
||||
super().__init__(
|
||||
f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}"
|
||||
)
|
||||
|
||||
|
||||
class SpacyClient:
|
||||
def __init__(self):
|
||||
self._cache: dict[str, spacy.Language] = {}
|
||||
|
||||
def _get_nlp(self, language: str) -> spacy.Language:
|
||||
if language not in LANGUAGE_MODELS:
|
||||
raise UnsupportedLanguageError(language)
|
||||
if language not in self._cache:
|
||||
self._cache[language] = spacy.load(LANGUAGE_MODELS[language])
|
||||
return self._cache[language]
|
||||
|
||||
def get_parts_of_speech(self, text: str, language: str) -> dict:
|
||||
nlp = self._get_nlp(language)
|
||||
doc = nlp(text)
|
||||
tokens = [
|
||||
{
|
||||
"text": token.text,
|
||||
"lemma": token.lemma_,
|
||||
"pos": token.pos_,
|
||||
"tag": token.tag_,
|
||||
"dep": token.dep_,
|
||||
"is_stop": token.is_stop,
|
||||
}
|
||||
for token in doc
|
||||
if not token.is_space
|
||||
]
|
||||
return {"language": language, "tokens": tokens}
|
||||
|
|
@ -1,10 +1,6 @@
|
|||
import asyncio
|
||||
import random
|
||||
import uuid
|
||||
from functools import partial
|
||||
from typing import Any, Callable, Coroutine
|
||||
|
||||
import anthropic
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
|
@ -15,48 +11,13 @@ from ...storage import upload_audio
|
|||
from ...outbound.postgres.database import get_db, AsyncSessionLocal
|
||||
from ...outbound.postgres.repositories import summarise_job_repository
|
||||
from ...outbound.anthropic.anthropic_client import AnthropicClient
|
||||
from ...outbound.deepgram.deepgram_client import LocalDeepgramClient
|
||||
from ...outbound.deepl.deepl_client import DeepLClient
|
||||
from ...outbound.gemini.gemini_client import GeminiClient
|
||||
from ...outbound.spacy.spacy_client import SpacyClient
|
||||
from ...config import settings
|
||||
from ... import worker
|
||||
|
||||
router = APIRouter(prefix="/generate", tags=["api"])
|
||||
|
||||
_ANTHROPIC_RETRYABLE = (
|
||||
anthropic.RateLimitError,
|
||||
anthropic.InternalServerError,
|
||||
anthropic.APITimeoutError,
|
||||
anthropic.APIConnectionError,
|
||||
)
|
||||
_MAX_RETRIES = 4
|
||||
_BASE_DELAY = 1.0 # seconds
|
||||
_MAX_DELAY = 60.0 # seconds
|
||||
|
||||
|
||||
async def _anthropic_with_backoff(
|
||||
coro_fn: Callable[..., Coroutine[Any, Any, Any]],
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
for attempt in range(_MAX_RETRIES + 1):
|
||||
try:
|
||||
return await coro_fn(*args, **kwargs)
|
||||
except _ANTHROPIC_RETRYABLE as exc:
|
||||
if attempt == _MAX_RETRIES:
|
||||
raise
|
||||
retry_after: float | None = None
|
||||
if isinstance(exc, anthropic.RateLimitError):
|
||||
raw = exc.response.headers.get("retry-after")
|
||||
if raw is not None:
|
||||
retry_after = float(raw)
|
||||
if retry_after is None:
|
||||
retry_after = min(_BASE_DELAY * (2 ** attempt), _MAX_DELAY)
|
||||
# ±20 % jitter to spread out concurrent retries
|
||||
jittered = retry_after * (0.8 + random.random() * 0.4)
|
||||
await asyncio.sleep(jittered)
|
||||
|
||||
|
||||
class GenerationRequest(BaseModel):
|
||||
target_language: str
|
||||
|
|
@ -71,10 +32,8 @@ class GenerationResponse(BaseModel):
|
|||
|
||||
async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None:
|
||||
anthropic_client = AnthropicClient.new(settings.anthropic_api_key)
|
||||
deepgram_client = LocalDeepgramClient(settings.deepgram_api_key)
|
||||
deepl_client = DeepLClient(settings.deepl_api_key)
|
||||
gemini_client = GeminiClient(settings.gemini_api_key)
|
||||
spacy_client = SpacyClient()
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
job = await summarise_job_repository.get_by_id(db, job_id)
|
||||
|
|
@ -85,8 +44,7 @@ async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None
|
|||
|
||||
source_material = "\n\n".join(request.input_texts[:3])
|
||||
|
||||
generated_text = await _anthropic_with_backoff(
|
||||
anthropic_client.generate_summary_text,
|
||||
generated_text = await anthropic_client.generate_summary_text(
|
||||
content_to_summarise=source_material,
|
||||
complexity_level=request.complexity_level,
|
||||
from_language=language_name,
|
||||
|
|
@ -102,19 +60,11 @@ async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None
|
|||
|
||||
await summarise_job_repository.save_translated_text(db, job, translated_text)
|
||||
|
||||
target_pos_data = spacy_client.get_parts_of_speech(generated_text, request.target_language)
|
||||
source_pos_data = spacy_client.get_parts_of_speech(translated_text, request.source_language)
|
||||
|
||||
await summarise_job_repository.save_pos_data(db, job, source_pos_data, target_pos_data)
|
||||
|
||||
voice = gemini_client.get_voice_by_language(request.target_language)
|
||||
wav_bytes = await gemini_client.generate_audio(generated_text, voice)
|
||||
audio_key = f"audio/{job_id}.wav"
|
||||
upload_audio(audio_key, wav_bytes)
|
||||
|
||||
transcript = await deepgram_client.transcribe_bytes(wav_bytes, request.target_language)
|
||||
await summarise_job_repository.save_audio_transcript(db, job, transcript)
|
||||
|
||||
await summarise_job_repository.mark_succeeded(db, job, audio_key)
|
||||
|
||||
except Exception as exc:
|
||||
|
|
|
|||
|
|
@ -28,12 +28,9 @@ class JobResponse(BaseModel):
|
|||
completed_at: datetime | None = None
|
||||
# only present on success
|
||||
generated_text: str | None = None
|
||||
generated_text_pos: dict | None = None
|
||||
translated_text: str | None = None
|
||||
translated_text_pos: dict | None = None
|
||||
input_summary: str | None = None
|
||||
audio_url: str | None = None
|
||||
audio_transcript: dict | None = None
|
||||
# only present on failure
|
||||
error_message: str | None = None
|
||||
model_config = {"from_attributes": True}
|
||||
|
|
@ -43,8 +40,6 @@ class JobSummary(BaseModel):
|
|||
id: uuid.UUID
|
||||
status: str
|
||||
created_at: datetime
|
||||
completed_at: datetime | None = None
|
||||
error_message: str | None = None
|
||||
|
||||
|
||||
class JobListResponse(BaseModel):
|
||||
|
|
@ -90,12 +85,9 @@ async def get_job(
|
|||
|
||||
if job.status == "succeeded":
|
||||
response.generated_text = job.generated_text
|
||||
response.generated_text_pos = job.target_pos_data
|
||||
response.translated_text = job.translated_text
|
||||
response.translated_text_pos = job.source_pos_data
|
||||
response.input_summary = job.input_summary
|
||||
response.audio_url = job.audio_url
|
||||
response.audio_transcript = job.audio_transcript
|
||||
elif job.status == "failed":
|
||||
response.error_message = job.error_message
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,31 @@
|
|||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
import spacy
|
||||
|
||||
from ...auth import verify_token
|
||||
from ...outbound.spacy.spacy_client import SpacyClient, UnsupportedLanguageError
|
||||
|
||||
router = APIRouter(prefix="/pos", tags=["api", "pos"])
|
||||
|
||||
_spacy_client = SpacyClient()
|
||||
LANGUAGE_MODELS: dict[str, str] = {
|
||||
"en": "en_core_web_sm",
|
||||
"fr": "fr_core_news_sm",
|
||||
"es": "es_core_news_sm",
|
||||
"it": "it_core_news_sm",
|
||||
"de": "de_core_news_sm",
|
||||
}
|
||||
|
||||
_nlp_cache: dict[str, spacy.Language] = {}
|
||||
|
||||
|
||||
def _get_nlp(language: str) -> spacy.Language:
|
||||
if language not in LANGUAGE_MODELS:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}",
|
||||
)
|
||||
if language not in _nlp_cache:
|
||||
_nlp_cache[language] = spacy.load(LANGUAGE_MODELS[language])
|
||||
return _nlp_cache[language]
|
||||
|
||||
|
||||
class POSRequest(BaseModel):
|
||||
|
|
@ -30,8 +49,18 @@ class POSResponse(BaseModel):
|
|||
|
||||
@router.post("/", response_model=POSResponse)
|
||||
def analyze_pos(request: POSRequest, _: dict = Depends(verify_token)) -> POSResponse:
|
||||
try:
|
||||
result = _spacy_client.get_parts_of_speech(request.text, request.language)
|
||||
except UnsupportedLanguageError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
return POSResponse(**result)
|
||||
nlp = _get_nlp(request.language)
|
||||
doc = nlp(request.text)
|
||||
tokens = [
|
||||
TokenInfo(
|
||||
text=token.text,
|
||||
lemma=token.lemma_,
|
||||
pos=token.pos_,
|
||||
tag=token.tag_,
|
||||
dep=token.dep_,
|
||||
is_stop=token.is_stop,
|
||||
)
|
||||
for token in doc
|
||||
if not token.is_space
|
||||
]
|
||||
return POSResponse(language=request.language, tokens=tokens)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ dependencies = [
|
|||
"google-genai>=1.0.0",
|
||||
"boto3>=1.35.0",
|
||||
"httpx>=0.28.1",
|
||||
"deepgram-sdk>=6.1.0"
|
||||
]
|
||||
|
||||
[build-system]
|
||||
|
|
|
|||
Loading…
Reference in a new issue