200 lines
5.5 KiB
Python
200 lines
5.5 KiB
Python
import uuid
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
from pydantic import BaseModel
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.domain.models.dictionary import Lemma, Sense
|
|
|
|
from ...auth import verify_token
|
|
from ...outbound.postgres.database import get_db
|
|
from ...outbound.postgres.repositories.dictionary_repository import (
|
|
PostgresDictionaryRepository,
|
|
)
|
|
|
|
router = APIRouter(prefix="/dictionary", tags=["dictionary"])
|
|
|
|
|
|
# ── Response models ───────────────────────────────────────────────────────────
|
|
|
|
|
|
class SenseResponse(BaseModel):
|
|
id: str
|
|
sense_index: int
|
|
gloss: str
|
|
topics: list[str]
|
|
tags: list[str]
|
|
|
|
|
|
class LemmaResponse(BaseModel):
|
|
id: str
|
|
headword: str
|
|
language: str
|
|
pos_raw: str
|
|
pos_normalised: str | None
|
|
gender: str | None
|
|
tags: list[str]
|
|
|
|
|
|
def _sense_to_response(s: Sense) -> SenseResponse:
|
|
return SenseResponse(
|
|
id=s.id,
|
|
sense_index=s.sense_index,
|
|
gloss=s.gloss,
|
|
topics=s.topics,
|
|
tags=s.tags,
|
|
)
|
|
|
|
|
|
def _lemma_to_response(lemma: Lemma) -> LemmaResponse:
|
|
return LemmaResponse(
|
|
id=lemma.id,
|
|
headword=lemma.headword,
|
|
language=lemma.language,
|
|
pos_raw=lemma.pos_raw,
|
|
pos_normalised=lemma.pos_normalised,
|
|
gender=lemma.gender,
|
|
tags=lemma.tags,
|
|
)
|
|
|
|
|
|
class WordformMatch(BaseModel):
|
|
lemma: LemmaResponse
|
|
senses: list[SenseResponse]
|
|
|
|
|
|
class SenseMatch(BaseModel):
|
|
sense: SenseResponse
|
|
lemma: LemmaResponse
|
|
|
|
|
|
# ── Endpoint ──────────────────────────────────────────────────────────────────
|
|
|
|
|
|
@router.get("/search", response_model=list[WordformMatch])
|
|
async def search_wordforms_prefix(
|
|
lang_code: str,
|
|
text: str,
|
|
db: AsyncSession = Depends(get_db),
|
|
_: dict = Depends(verify_token),
|
|
) -> list[WordformMatch]:
|
|
"""
|
|
Search for wordforms whose surface text starts with the given prefix.
|
|
|
|
Uses accent-insensitive, case-insensitive prefix matching so that e.g.
|
|
"chatea" returns both "château" and "châteaux", and "lent" returns all
|
|
four forms of the adjective. Returns one entry per matching lemma.
|
|
"""
|
|
repo = PostgresDictionaryRepository(db)
|
|
wordforms = await repo.search_wordforms_by_prefix(text, lang_code)
|
|
|
|
if not wordforms:
|
|
return []
|
|
|
|
seen_lemma_ids: set[str] = set()
|
|
results: list[WordformMatch] = []
|
|
|
|
for wf in wordforms:
|
|
if wf.lemma_id in seen_lemma_ids:
|
|
continue
|
|
seen_lemma_ids.add(wf.lemma_id)
|
|
|
|
lemma = await repo.get_lemma(uuid.UUID(wf.lemma_id))
|
|
if lemma is None:
|
|
continue
|
|
|
|
senses = await repo.get_senses_for_lemma(uuid.UUID(wf.lemma_id))
|
|
|
|
results.append(
|
|
WordformMatch(
|
|
lemma=LemmaResponse(
|
|
id=lemma.id,
|
|
headword=lemma.headword,
|
|
language=lemma.language,
|
|
pos_raw=lemma.pos_raw,
|
|
pos_normalised=lemma.pos_normalised,
|
|
gender=lemma.gender,
|
|
tags=lemma.tags,
|
|
),
|
|
senses=[
|
|
SenseResponse(
|
|
id=s.id,
|
|
sense_index=s.sense_index,
|
|
gloss=s.gloss,
|
|
topics=s.topics,
|
|
tags=s.tags,
|
|
)
|
|
for s in senses
|
|
],
|
|
)
|
|
)
|
|
|
|
return results
|
|
|
|
|
|
@router.get("/senses", response_model=list[SenseMatch])
|
|
async def search_senses(
|
|
lang_code: str,
|
|
text: str,
|
|
db: AsyncSession = Depends(get_db),
|
|
_: dict = Depends(verify_token),
|
|
) -> list[SenseMatch]:
|
|
"""
|
|
Search for a Sense by (English) definition
|
|
|
|
Returns one entry per matching senses,each with its Sense.
|
|
"""
|
|
repo = PostgresDictionaryRepository(db)
|
|
senses = await repo.search_senses_by_prefix(text, lang_code)
|
|
|
|
if not senses:
|
|
return []
|
|
|
|
return [
|
|
SenseMatch(lemma=_lemma_to_response(lemma), sense=_sense_to_response(sense))
|
|
for (sense, lemma) in senses
|
|
]
|
|
|
|
|
|
@router.get("/wordforms", response_model=list[WordformMatch])
|
|
async def search_wordforms(
|
|
lang_code: str,
|
|
text: str,
|
|
db: AsyncSession = Depends(get_db),
|
|
_: dict = Depends(verify_token),
|
|
) -> list[WordformMatch]:
|
|
"""
|
|
Search for a wordform by surface text within a language.
|
|
|
|
Returns one entry per matching lemma, each with the lemma's senses. A single
|
|
form (e.g. "allons") may resolve to more than one lemma when homographs exist.
|
|
"""
|
|
repo = PostgresDictionaryRepository(db)
|
|
wordforms = await repo.get_wordforms_by_form(text, lang_code)
|
|
|
|
if not wordforms:
|
|
return []
|
|
|
|
# Deduplicate lemma IDs — multiple wordform rows may point to the same lemma
|
|
seen_lemma_ids: set[str] = set()
|
|
results: list[WordformMatch] = []
|
|
|
|
for wf in wordforms:
|
|
if wf.lemma_id in seen_lemma_ids:
|
|
continue
|
|
seen_lemma_ids.add(wf.lemma_id)
|
|
|
|
lemma = await repo.get_lemma(uuid.UUID(wf.lemma_id))
|
|
if lemma is None:
|
|
continue
|
|
|
|
senses = await repo.get_senses_for_lemma(uuid.UUID(wf.lemma_id))
|
|
|
|
results.append(
|
|
WordformMatch(
|
|
lemma=_lemma_to_response(lemma),
|
|
senses=[_sense_to_response(s) for s in senses],
|
|
)
|
|
)
|
|
|
|
return results
|