api: Add Deepgram and Spacy clients

2026-03-27 07:54:00 +00:00 · 2026-03-27 07:54:00 +00:00 · 3d5551c3d9
commit 3d5551c3d9
parent 407d423a4c
6 changed files with 75 additions and 36 deletions
--- a/api/app/outbound/deepgram/init.py
+++ b/api/app/outbound/deepgram/init.py
--- a/api/app/outbound/deepgram/deepgram_client.py
+++ b/api/app/outbound/deepgram/deepgram_client.py
@ -0,0 +1,21 @@
 import asyncio
 from deepgram import (
    AsyncDeepgramClient,
 )
 class LocalDeepgramClient:
    def __init__(self, api_key: str):
        self.deepgram_client = AsyncDeepgramClient(api_key=api_key)
    async def transcribe_local_file(self, local_file_path: str, language_code: str):
        with open(local_file_path, "rb") as audio_file:
            response = await self.deepgram_client.listen.v1.media.transcribe_file(
                request=audio_file.read(),
                model="nova-3",
                language=language_code,
                utterances=True,
                smart_format=True,
            )
        return response.results
--- a/api/app/outbound/spacy/init.py
+++ b/api/app/outbound/spacy/init.py
--- a/api/app/outbound/spacy/spacy_client.py
+++ b/api/app/outbound/spacy/spacy_client.py
@ -0,0 +1,46 @@
 import spacy
 LANGUAGE_MODELS: dict[str, str] = {
    "en": "en_core_web_sm",
    "fr": "fr_core_news_sm",
    "es": "es_core_news_sm",
    "it": "it_core_news_sm",
    "de": "de_core_news_sm",
 }
 class UnsupportedLanguageError(ValueError):
    def __init__(self, language: str):
        self.language = language
        super().__init__(
            f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}"
        )
 class SpacyClient:
    def __init__(self):
        self._cache: dict[str, spacy.Language] = {}
    def _get_nlp(self, language: str) -> spacy.Language:
        if language not in LANGUAGE_MODELS:
            raise UnsupportedLanguageError(language)
        if language not in self._cache:
            self._cache[language] = spacy.load(LANGUAGE_MODELS[language])
        return self._cache[language]
    def get_parts_of_speech(self, text: str, language: str) -> dict:
        nlp = self._get_nlp(language)
        doc = nlp(text)
        tokens = [
            {
                "text": token.text,
                "lemma": token.lemma_,
                "pos": token.pos_,
                "tag": token.tag_,
                "dep": token.dep_,
                "is_stop": token.is_stop,
            }
            for token in doc
            if not token.is_space
        ]
        return {"language": language, "tokens": tokens}
--- a/api/app/routers/api/pos.py
+++ b/api/app/routers/api/pos.py
@ -1,31 +1,12 @@
 from fastapi import APIRouter, Depends, HTTPException
 from pydantic import BaseModel
 import spacy
 from ...auth import verify_token
 from ...outbound.spacy.spacy_client import SpacyClient, UnsupportedLanguageError
 router = APIRouter(prefix="/pos", tags=["api", "pos"])
-LANGUAGE_MODELS: dict[str, str] = {
+_spacy_client = SpacyClient()
    "en": "en_core_web_sm",
    "fr": "fr_core_news_sm",
    "es": "es_core_news_sm",
    "it": "it_core_news_sm",
    "de": "de_core_news_sm",
 }
 _nlp_cache: dict[str, spacy.Language] = {}
 def _get_nlp(language: str) -> spacy.Language:
    if language not in LANGUAGE_MODELS:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}",
        )
    if language not in _nlp_cache:
        _nlp_cache[language] = spacy.load(LANGUAGE_MODELS[language])
    return _nlp_cache[language]
 class POSRequest(BaseModel):
@ -49,18 +30,8 @@ class POSResponse(BaseModel):
@router.post("/", response_model=POSResponse)
 def analyze_pos(request: POSRequest, _: dict = Depends(verify_token)) -> POSResponse:
-    nlp = _get_nlp(request.language)
+    try:
-    doc = nlp(request.text)
+        result = _spacy_client.get_parts_of_speech(request.text, request.language)
-    tokens = [
+    except UnsupportedLanguageError as e:
-        TokenInfo(
+        raise HTTPException(status_code=400, detail=str(e))
-            text=token.text,
+    return POSResponse(**result)
            lemma=token.lemma_,
            pos=token.pos_,
            tag=token.tag_,
            dep=token.dep_,
            is_stop=token.is_stop,
        )
        for token in doc
        if not token.is_space
    ]
    return POSResponse(language=request.language, tokens=tokens)
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -17,6 +17,7 @@ dependencies = [
    "google-genai>=1.0.0",
    "boto3>=1.35.0",
    "httpx>=0.28.1",
    "deepgram-sdk>=6.1.0"
 ]
 [build-system]