from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel import spacy from ..auth import verify_token router = APIRouter(prefix="/analyze", tags=["analysis"]) LANGUAGE_MODELS: dict[str, str] = { "en": "en_core_web_sm", "fr": "fr_core_news_sm", "es": "es_core_news_sm", "it": "it_core_news_sm", "de": "de_core_news_sm", } _nlp_cache: dict[str, spacy.Language] = {} def _get_nlp(language: str) -> spacy.Language: if language not in LANGUAGE_MODELS: raise HTTPException( status_code=400, detail=f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}", ) if language not in _nlp_cache: _nlp_cache[language] = spacy.load(LANGUAGE_MODELS[language]) return _nlp_cache[language] class POSRequest(BaseModel): text: str language: str class TokenInfo(BaseModel): text: str lemma: str pos: str tag: str dep: str is_stop: bool class POSResponse(BaseModel): language: str tokens: list[TokenInfo] @router.post("/pos", response_model=POSResponse) def analyze_pos(request: POSRequest, _: dict = Depends(verify_token)) -> POSResponse: nlp = _get_nlp(request.language) doc = nlp(request.text) tokens = [ TokenInfo( text=token.text, lemma=token.lemma_, pos=token.pos_, tag=token.tag_, dep=token.dep_, is_stop=token.is_stop, ) for token in doc if not token.is_space ] return POSResponse(language=request.language, tokens=tokens)