language-learning-app/api/app/routers/api/dictionary.py

200 lines
5.5 KiB
Python

import uuid
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from app.domain.models.dictionary import Lemma, Sense
from ...auth import verify_token
from ...outbound.postgres.database import get_db
from ...outbound.postgres.repositories.dictionary_repository import (
PostgresDictionaryRepository,
)
router = APIRouter(prefix="/dictionary", tags=["dictionary"])
# ── Response models ───────────────────────────────────────────────────────────
class SenseResponse(BaseModel):
id: str
sense_index: int
gloss: str
topics: list[str]
tags: list[str]
class LemmaResponse(BaseModel):
id: str
headword: str
language: str
pos_raw: str
pos_normalised: str | None
gender: str | None
tags: list[str]
def _sense_to_response(s: Sense) -> SenseResponse:
return SenseResponse(
id=s.id,
sense_index=s.sense_index,
gloss=s.gloss,
topics=s.topics,
tags=s.tags,
)
def _lemma_to_response(lemma: Lemma) -> LemmaResponse:
return LemmaResponse(
id=lemma.id,
headword=lemma.headword,
language=lemma.language,
pos_raw=lemma.pos_raw,
pos_normalised=lemma.pos_normalised,
gender=lemma.gender,
tags=lemma.tags,
)
class WordformMatch(BaseModel):
lemma: LemmaResponse
senses: list[SenseResponse]
class SenseMatch(BaseModel):
sense: SenseResponse
lemma: LemmaResponse
# ── Endpoint ──────────────────────────────────────────────────────────────────
@router.get("/search", response_model=list[WordformMatch])
async def search_wordforms_prefix(
lang_code: str,
text: str,
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token),
) -> list[WordformMatch]:
"""
Search for wordforms whose surface text starts with the given prefix.
Uses accent-insensitive, case-insensitive prefix matching so that e.g.
"chatea" returns both "château" and "châteaux", and "lent" returns all
four forms of the adjective. Returns one entry per matching lemma.
"""
repo = PostgresDictionaryRepository(db)
wordforms = await repo.search_wordforms_by_prefix(text, lang_code)
if not wordforms:
return []
seen_lemma_ids: set[str] = set()
results: list[WordformMatch] = []
for wf in wordforms:
if wf.lemma_id in seen_lemma_ids:
continue
seen_lemma_ids.add(wf.lemma_id)
lemma = await repo.get_lemma(uuid.UUID(wf.lemma_id))
if lemma is None:
continue
senses = await repo.get_senses_for_lemma(uuid.UUID(wf.lemma_id))
results.append(
WordformMatch(
lemma=LemmaResponse(
id=lemma.id,
headword=lemma.headword,
language=lemma.language,
pos_raw=lemma.pos_raw,
pos_normalised=lemma.pos_normalised,
gender=lemma.gender,
tags=lemma.tags,
),
senses=[
SenseResponse(
id=s.id,
sense_index=s.sense_index,
gloss=s.gloss,
topics=s.topics,
tags=s.tags,
)
for s in senses
],
)
)
return results
@router.get("/senses", response_model=list[SenseMatch])
async def search_senses(
lang_code: str,
text: str,
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token),
) -> list[SenseMatch]:
"""
Search for a Sense by (English) definition
Returns one entry per matching senses,each with its Sense.
"""
repo = PostgresDictionaryRepository(db)
senses = await repo.search_senses_by_prefix(text, lang_code)
if not senses:
return []
return [
SenseMatch(lemma=_lemma_to_response(lemma), sense=_sense_to_response(sense))
for (sense, lemma) in senses
]
@router.get("/wordforms", response_model=list[WordformMatch])
async def search_wordforms(
lang_code: str,
text: str,
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token),
) -> list[WordformMatch]:
"""
Search for a wordform by surface text within a language.
Returns one entry per matching lemma, each with the lemma's senses. A single
form (e.g. "allons") may resolve to more than one lemma when homographs exist.
"""
repo = PostgresDictionaryRepository(db)
wordforms = await repo.get_wordforms_by_form(text, lang_code)
if not wordforms:
return []
# Deduplicate lemma IDs — multiple wordform rows may point to the same lemma
seen_lemma_ids: set[str] = set()
results: list[WordformMatch] = []
for wf in wordforms:
if wf.lemma_id in seen_lemma_ids:
continue
seen_lemma_ids.add(wf.lemma_id)
lemma = await repo.get_lemma(uuid.UUID(wf.lemma_id))
if lemma is None:
continue
senses = await repo.get_senses_for_lemma(uuid.UUID(wf.lemma_id))
results.append(
WordformMatch(
lemma=_lemma_to_response(lemma),
senses=[_sense_to_response(s) for s in senses],
)
)
return results