language-learning-app/api/app/routers/api/dictionary.py

import uuid

from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession

from app.domain.models.dictionary import Lemma, Sense

from ...auth import verify_token
from ...outbound.postgres.database import get_db
from ...outbound.postgres.repositories.dictionary_repository import (
    PostgresDictionaryRepository,
)

router = APIRouter(prefix="/dictionary", tags=["dictionary"])


# ── Response models ───────────────────────────────────────────────────────────


class SenseResponse(BaseModel):
    id: str
    sense_index: int
    gloss: str
    topics: list[str]
    tags: list[str]


class LemmaResponse(BaseModel):
    id: str
    headword: str
    language: str
    pos_raw: str
    pos_normalised: str | None
    gender: str | None
    tags: list[str]


def _sense_to_response(s: Sense) -> SenseResponse:
    return SenseResponse(
        id=s.id,
        sense_index=s.sense_index,
        gloss=s.gloss,
        topics=s.topics,
        tags=s.tags,
    )


def _lemma_to_response(lemma: Lemma) -> LemmaResponse:
    return LemmaResponse(
        id=lemma.id,
        headword=lemma.headword,
        language=lemma.language,
        pos_raw=lemma.pos_raw,
        pos_normalised=lemma.pos_normalised,
        gender=lemma.gender,
        tags=lemma.tags,
    )


class WordformMatch(BaseModel):
    lemma: LemmaResponse
    senses: list[SenseResponse]


class SenseMatch(BaseModel):
    sense: SenseResponse
    lemma: LemmaResponse


# ── Endpoint ──────────────────────────────────────────────────────────────────


@router.get("/search", response_model=list[WordformMatch])
async def search_wordforms_prefix(
    lang_code: str,
    text: str,
    db: AsyncSession = Depends(get_db),
    _: dict = Depends(verify_token),
) -> list[WordformMatch]:
    """
    Search for wordforms whose surface text starts with the given prefix.

    Uses accent-insensitive, case-insensitive prefix matching so that e.g.
    "chatea" returns both "château" and "châteaux", and "lent" returns all
    four forms of the adjective.  Returns one entry per matching lemma.
    """
    repo = PostgresDictionaryRepository(db)
    wordforms = await repo.search_wordforms_by_prefix(text, lang_code)

    if not wordforms:
        return []

    seen_lemma_ids: set[str] = set()
    results: list[WordformMatch] = []

    for wf in wordforms:
        if wf.lemma_id in seen_lemma_ids:
            continue
        seen_lemma_ids.add(wf.lemma_id)

        lemma = await repo.get_lemma(uuid.UUID(wf.lemma_id))
        if lemma is None:
            continue

        senses = await repo.get_senses_for_lemma(uuid.UUID(wf.lemma_id))

        results.append(
            WordformMatch(
                lemma=LemmaResponse(
                    id=lemma.id,
                    headword=lemma.headword,
                    language=lemma.language,
                    pos_raw=lemma.pos_raw,
                    pos_normalised=lemma.pos_normalised,
                    gender=lemma.gender,
                    tags=lemma.tags,
                ),
                senses=[
                    SenseResponse(
                        id=s.id,
                        sense_index=s.sense_index,
                        gloss=s.gloss,
                        topics=s.topics,
                        tags=s.tags,
                    )
                    for s in senses
                ],
            )
        )

    return results


@router.get("/senses", response_model=list[SenseMatch])
async def search_senses(
    lang_code: str,
    text: str,
    db: AsyncSession = Depends(get_db),
    _: dict = Depends(verify_token),
) -> list[SenseMatch]:
    """
    Search for a Sense by (English) definition

    Returns one entry per matching senses,each with its Sense.
    """
    repo = PostgresDictionaryRepository(db)
    senses = await repo.search_senses_by_prefix(text, lang_code)

    if not senses:
        return []

    return [
        SenseMatch(lemma=_lemma_to_response(lemma), sense=_sense_to_response(sense))
        for (sense, lemma) in senses
    ]


@router.get("/wordforms", response_model=list[WordformMatch])
async def search_wordforms(
    lang_code: str,
    text: str,
    db: AsyncSession = Depends(get_db),
    _: dict = Depends(verify_token),
) -> list[WordformMatch]:
    """
    Search for a wordform by surface text within a language.

    Returns one entry per matching lemma, each with the lemma's senses. A single
    form (e.g. "allons") may resolve to more than one lemma when homographs exist.
    """
    repo = PostgresDictionaryRepository(db)
    wordforms = await repo.get_wordforms_by_form(text, lang_code)

    if not wordforms:
        return []

    # Deduplicate lemma IDs — multiple wordform rows may point to the same lemma
    seen_lemma_ids: set[str] = set()
    results: list[WordformMatch] = []

    for wf in wordforms:
        if wf.lemma_id in seen_lemma_ids:
            continue
        seen_lemma_ids.add(wf.lemma_id)

        lemma = await repo.get_lemma(uuid.UUID(wf.lemma_id))
        if lemma is None:
            continue

        senses = await repo.get_senses_for_lemma(uuid.UUID(wf.lemma_id))

        results.append(
            WordformMatch(
                lemma=_lemma_to_response(lemma),
                senses=[_sense_to_response(s) for s in senses],
            )
        )

    return results