api: Add Deepgram and Spacy clients
This commit is contained in:
parent
407d423a4c
commit
3d5551c3d9
6 changed files with 75 additions and 36 deletions
0
api/app/outbound/deepgram/__init__.py
Normal file
0
api/app/outbound/deepgram/__init__.py
Normal file
21
api/app/outbound/deepgram/deepgram_client.py
Normal file
21
api/app/outbound/deepgram/deepgram_client.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
import asyncio
|
||||
from deepgram import (
|
||||
AsyncDeepgramClient,
|
||||
)
|
||||
|
||||
class LocalDeepgramClient:
|
||||
def __init__(self, api_key: str):
|
||||
self.deepgram_client = AsyncDeepgramClient(api_key=api_key)
|
||||
|
||||
async def transcribe_local_file(self, local_file_path: str, language_code: str):
|
||||
with open(local_file_path, "rb") as audio_file:
|
||||
response = await self.deepgram_client.listen.v1.media.transcribe_file(
|
||||
request=audio_file.read(),
|
||||
model="nova-3",
|
||||
language=language_code,
|
||||
utterances=True,
|
||||
smart_format=True,
|
||||
)
|
||||
|
||||
return response.results
|
||||
|
||||
0
api/app/outbound/spacy/__init__.py
Normal file
0
api/app/outbound/spacy/__init__.py
Normal file
46
api/app/outbound/spacy/spacy_client.py
Normal file
46
api/app/outbound/spacy/spacy_client.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
import spacy
|
||||
|
||||
LANGUAGE_MODELS: dict[str, str] = {
|
||||
"en": "en_core_web_sm",
|
||||
"fr": "fr_core_news_sm",
|
||||
"es": "es_core_news_sm",
|
||||
"it": "it_core_news_sm",
|
||||
"de": "de_core_news_sm",
|
||||
}
|
||||
|
||||
|
||||
class UnsupportedLanguageError(ValueError):
|
||||
def __init__(self, language: str):
|
||||
self.language = language
|
||||
super().__init__(
|
||||
f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}"
|
||||
)
|
||||
|
||||
|
||||
class SpacyClient:
|
||||
def __init__(self):
|
||||
self._cache: dict[str, spacy.Language] = {}
|
||||
|
||||
def _get_nlp(self, language: str) -> spacy.Language:
|
||||
if language not in LANGUAGE_MODELS:
|
||||
raise UnsupportedLanguageError(language)
|
||||
if language not in self._cache:
|
||||
self._cache[language] = spacy.load(LANGUAGE_MODELS[language])
|
||||
return self._cache[language]
|
||||
|
||||
def get_parts_of_speech(self, text: str, language: str) -> dict:
|
||||
nlp = self._get_nlp(language)
|
||||
doc = nlp(text)
|
||||
tokens = [
|
||||
{
|
||||
"text": token.text,
|
||||
"lemma": token.lemma_,
|
||||
"pos": token.pos_,
|
||||
"tag": token.tag_,
|
||||
"dep": token.dep_,
|
||||
"is_stop": token.is_stop,
|
||||
}
|
||||
for token in doc
|
||||
if not token.is_space
|
||||
]
|
||||
return {"language": language, "tokens": tokens}
|
||||
|
|
@ -1,31 +1,12 @@
|
|||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
import spacy
|
||||
|
||||
from ...auth import verify_token
|
||||
from ...outbound.spacy.spacy_client import SpacyClient, UnsupportedLanguageError
|
||||
|
||||
router = APIRouter(prefix="/pos", tags=["api", "pos"])
|
||||
|
||||
LANGUAGE_MODELS: dict[str, str] = {
|
||||
"en": "en_core_web_sm",
|
||||
"fr": "fr_core_news_sm",
|
||||
"es": "es_core_news_sm",
|
||||
"it": "it_core_news_sm",
|
||||
"de": "de_core_news_sm",
|
||||
}
|
||||
|
||||
_nlp_cache: dict[str, spacy.Language] = {}
|
||||
|
||||
|
||||
def _get_nlp(language: str) -> spacy.Language:
|
||||
if language not in LANGUAGE_MODELS:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}",
|
||||
)
|
||||
if language not in _nlp_cache:
|
||||
_nlp_cache[language] = spacy.load(LANGUAGE_MODELS[language])
|
||||
return _nlp_cache[language]
|
||||
_spacy_client = SpacyClient()
|
||||
|
||||
|
||||
class POSRequest(BaseModel):
|
||||
|
|
@ -49,18 +30,8 @@ class POSResponse(BaseModel):
|
|||
|
||||
@router.post("/", response_model=POSResponse)
|
||||
def analyze_pos(request: POSRequest, _: dict = Depends(verify_token)) -> POSResponse:
|
||||
nlp = _get_nlp(request.language)
|
||||
doc = nlp(request.text)
|
||||
tokens = [
|
||||
TokenInfo(
|
||||
text=token.text,
|
||||
lemma=token.lemma_,
|
||||
pos=token.pos_,
|
||||
tag=token.tag_,
|
||||
dep=token.dep_,
|
||||
is_stop=token.is_stop,
|
||||
)
|
||||
for token in doc
|
||||
if not token.is_space
|
||||
]
|
||||
return POSResponse(language=request.language, tokens=tokens)
|
||||
try:
|
||||
result = _spacy_client.get_parts_of_speech(request.text, request.language)
|
||||
except UnsupportedLanguageError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
return POSResponse(**result)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ dependencies = [
|
|||
"google-genai>=1.0.0",
|
||||
"boto3>=1.35.0",
|
||||
"httpx>=0.28.1",
|
||||
"deepgram-sdk>=6.1.0"
|
||||
]
|
||||
|
||||
[build-system]
|
||||
|
|
|
|||
Loading…
Reference in a new issue