language-learning-app/api/app/outbound/postgres/repositories/dictionary_repository.py

import uuid
from typing import Protocol

from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

from ..entities.dictionary_entities import (
    DictionaryLemmaEntity,
    DictionarySenseEntity,
    DictionaryWordformEntity,
)
from ....domain.models.dictionary import Lemma, Sense, Wordform


class DictionaryRepository(Protocol):
    async def get_senses_for_headword(self, headword: str, language: str) -> list[Sense]: ...
    async def find_senses_by_english_gloss(self, text: str, target_lang: str) -> list[Sense]: ...
    async def get_wordforms_for_lemma(self, lemma_id: uuid.UUID) -> list[Wordform]: ...


def _sense_to_model(entity: DictionarySenseEntity) -> Sense:
    return Sense(
        id=str(entity.id),
        lemma_id=str(entity.lemma_id),
        sense_index=entity.sense_index,
        gloss=entity.gloss,
        topics=entity.topics or [],
        tags=entity.tags or [],
    )


def _wordform_to_model(entity: DictionaryWordformEntity) -> Wordform:
    return Wordform(
        id=str(entity.id),
        lemma_id=str(entity.lemma_id),
        form=entity.form,
        tags=entity.tags or [],
    )


class PostgresDictionaryRepository:
    def __init__(self, db: AsyncSession) -> None:
        self.db = db

    async def get_senses_for_headword(self, headword: str, language: str) -> list[Sense]:
        result = await self.db.execute(
            select(DictionarySenseEntity)
            .join(DictionaryLemmaEntity, DictionarySenseEntity.lemma_id == DictionaryLemmaEntity.id)
            .where(
                DictionaryLemmaEntity.headword == headword,
                DictionaryLemmaEntity.language == language,
            )
            .order_by(DictionarySenseEntity.sense_index)
        )
        return [_sense_to_model(e) for e in result.scalars().all()]

    async def find_senses_by_english_gloss(self, text: str, target_lang: str) -> list[Sense]:
        """EN→target direction: find senses whose gloss matches the given English text.

        Uses a case-insensitive exact match on the gloss column, filtered to the
        target language via the joined lemma row.
        """
        result = await self.db.execute(
            select(DictionarySenseEntity)
            .join(DictionaryLemmaEntity, DictionarySenseEntity.lemma_id == DictionaryLemmaEntity.id)
            .where(
                DictionarySenseEntity.gloss.ilike(text),
                DictionaryLemmaEntity.language == target_lang,
            )
            .order_by(DictionarySenseEntity.sense_index)
        )
        return [_sense_to_model(e) for e in result.scalars().all()]

    async def get_wordforms_for_lemma(self, lemma_id: uuid.UUID) -> list[Wordform]:
        result = await self.db.execute(
            select(DictionaryWordformEntity).where(
                DictionaryWordformEntity.lemma_id == lemma_id
            )
        )
        return [_wordform_to_model(e) for e in result.scalars().all()]