66 lines
1.6 KiB
Python
66 lines
1.6 KiB
Python
from fastapi import APIRouter, Depends, HTTPException
|
|
from pydantic import BaseModel
|
|
import spacy
|
|
|
|
from ..auth import verify_token
|
|
|
|
router = APIRouter(prefix="/analyze", tags=["analysis"])
|
|
|
|
LANGUAGE_MODELS: dict[str, str] = {
|
|
"en": "en_core_web_sm",
|
|
"fr": "fr_core_news_sm",
|
|
"es": "es_core_news_sm",
|
|
"it": "it_core_news_sm",
|
|
"de": "de_core_news_sm",
|
|
}
|
|
|
|
_nlp_cache: dict[str, spacy.Language] = {}
|
|
|
|
|
|
def _get_nlp(language: str) -> spacy.Language:
|
|
if language not in LANGUAGE_MODELS:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}",
|
|
)
|
|
if language not in _nlp_cache:
|
|
_nlp_cache[language] = spacy.load(LANGUAGE_MODELS[language])
|
|
return _nlp_cache[language]
|
|
|
|
|
|
class POSRequest(BaseModel):
|
|
text: str
|
|
language: str
|
|
|
|
|
|
class TokenInfo(BaseModel):
|
|
text: str
|
|
lemma: str
|
|
pos: str
|
|
tag: str
|
|
dep: str
|
|
is_stop: bool
|
|
|
|
|
|
class POSResponse(BaseModel):
|
|
language: str
|
|
tokens: list[TokenInfo]
|
|
|
|
|
|
@router.post("/pos", response_model=POSResponse)
|
|
def analyze_pos(request: POSRequest, _: dict = Depends(verify_token)) -> POSResponse:
|
|
nlp = _get_nlp(request.language)
|
|
doc = nlp(request.text)
|
|
tokens = [
|
|
TokenInfo(
|
|
text=token.text,
|
|
lemma=token.lemma_,
|
|
pos=token.pos_,
|
|
tag=token.tag_,
|
|
dep=token.dep_,
|
|
is_stop=token.is_stop,
|
|
)
|
|
for token in doc
|
|
if not token.is_space
|
|
]
|
|
return POSResponse(language=request.language, tokens=tokens)
|