diff --git a/api/app/outbound/deepgram/__init__.py b/api/app/outbound/deepgram/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/app/outbound/deepgram/deepgram_client.py b/api/app/outbound/deepgram/deepgram_client.py new file mode 100644 index 0000000..5ffc266 --- /dev/null +++ b/api/app/outbound/deepgram/deepgram_client.py @@ -0,0 +1,21 @@ +import asyncio +from deepgram import ( + AsyncDeepgramClient, +) + +class LocalDeepgramClient: + def __init__(self, api_key: str): + self.deepgram_client = AsyncDeepgramClient(api_key=api_key) + + async def transcribe_local_file(self, local_file_path: str, language_code: str): + with open(local_file_path, "rb") as audio_file: + response = await self.deepgram_client.listen.v1.media.transcribe_file( + request=audio_file.read(), + model="nova-3", + language=language_code, + utterances=True, + smart_format=True, + ) + + return response.results + diff --git a/api/app/outbound/spacy/__init__.py b/api/app/outbound/spacy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/app/outbound/spacy/spacy_client.py b/api/app/outbound/spacy/spacy_client.py new file mode 100644 index 0000000..93200ed --- /dev/null +++ b/api/app/outbound/spacy/spacy_client.py @@ -0,0 +1,46 @@ +import spacy + +LANGUAGE_MODELS: dict[str, str] = { + "en": "en_core_web_sm", + "fr": "fr_core_news_sm", + "es": "es_core_news_sm", + "it": "it_core_news_sm", + "de": "de_core_news_sm", +} + + +class UnsupportedLanguageError(ValueError): + def __init__(self, language: str): + self.language = language + super().__init__( + f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}" + ) + + +class SpacyClient: + def __init__(self): + self._cache: dict[str, spacy.Language] = {} + + def _get_nlp(self, language: str) -> spacy.Language: + if language not in LANGUAGE_MODELS: + raise UnsupportedLanguageError(language) + if language not in self._cache: + self._cache[language] = spacy.load(LANGUAGE_MODELS[language]) + return self._cache[language] + + def get_parts_of_speech(self, text: str, language: str) -> dict: + nlp = self._get_nlp(language) + doc = nlp(text) + tokens = [ + { + "text": token.text, + "lemma": token.lemma_, + "pos": token.pos_, + "tag": token.tag_, + "dep": token.dep_, + "is_stop": token.is_stop, + } + for token in doc + if not token.is_space + ] + return {"language": language, "tokens": tokens} diff --git a/api/app/routers/api/pos.py b/api/app/routers/api/pos.py index 4d99f4c..73ab6bc 100644 --- a/api/app/routers/api/pos.py +++ b/api/app/routers/api/pos.py @@ -1,31 +1,12 @@ from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel -import spacy from ...auth import verify_token +from ...outbound.spacy.spacy_client import SpacyClient, UnsupportedLanguageError router = APIRouter(prefix="/pos", tags=["api", "pos"]) -LANGUAGE_MODELS: dict[str, str] = { - "en": "en_core_web_sm", - "fr": "fr_core_news_sm", - "es": "es_core_news_sm", - "it": "it_core_news_sm", - "de": "de_core_news_sm", -} - -_nlp_cache: dict[str, spacy.Language] = {} - - -def _get_nlp(language: str) -> spacy.Language: - if language not in LANGUAGE_MODELS: - raise HTTPException( - status_code=400, - detail=f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}", - ) - if language not in _nlp_cache: - _nlp_cache[language] = spacy.load(LANGUAGE_MODELS[language]) - return _nlp_cache[language] +_spacy_client = SpacyClient() class POSRequest(BaseModel): @@ -49,18 +30,8 @@ class POSResponse(BaseModel): @router.post("/", response_model=POSResponse) def analyze_pos(request: POSRequest, _: dict = Depends(verify_token)) -> POSResponse: - nlp = _get_nlp(request.language) - doc = nlp(request.text) - tokens = [ - TokenInfo( - text=token.text, - lemma=token.lemma_, - pos=token.pos_, - tag=token.tag_, - dep=token.dep_, - is_stop=token.is_stop, - ) - for token in doc - if not token.is_space - ] - return POSResponse(language=request.language, tokens=tokens) + try: + result = _spacy_client.get_parts_of_speech(request.text, request.language) + except UnsupportedLanguageError as e: + raise HTTPException(status_code=400, detail=str(e)) + return POSResponse(**result) diff --git a/api/pyproject.toml b/api/pyproject.toml index e76ffae..3891854 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "google-genai>=1.0.0", "boto3>=1.35.0", "httpx>=0.28.1", + "deepgram-sdk>=6.1.0" ] [build-system]