language-learning-app/api/app/routers/pos.py
2026-03-18 20:55:02 +00:00

66 lines
1.6 KiB
Python

from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
import spacy
from ..auth import verify_token
router = APIRouter(prefix="/analyze", tags=["analysis"])
LANGUAGE_MODELS: dict[str, str] = {
"en": "en_core_web_sm",
"fr": "fr_core_news_sm",
"es": "es_core_news_sm",
"it": "it_core_news_sm",
"de": "de_core_news_sm",
}
_nlp_cache: dict[str, spacy.Language] = {}
def _get_nlp(language: str) -> spacy.Language:
if language not in LANGUAGE_MODELS:
raise HTTPException(
status_code=400,
detail=f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}",
)
if language not in _nlp_cache:
_nlp_cache[language] = spacy.load(LANGUAGE_MODELS[language])
return _nlp_cache[language]
class POSRequest(BaseModel):
text: str
language: str
class TokenInfo(BaseModel):
text: str
lemma: str
pos: str
tag: str
dep: str
is_stop: bool
class POSResponse(BaseModel):
language: str
tokens: list[TokenInfo]
@router.post("/pos", response_model=POSResponse)
def analyze_pos(request: POSRequest, _: dict = Depends(verify_token)) -> POSResponse:
nlp = _get_nlp(request.language)
doc = nlp(request.text)
tokens = [
TokenInfo(
text=token.text,
lemma=token.lemma_,
pos=token.pos_,
tag=token.tag_,
dep=token.dep_,
is_stop=token.is_stop,
)
for token in doc
if not token.is_space
]
return POSResponse(language=request.language, tokens=tokens)