2026-04-08 19:26:26 +00:00
|
|
|
import uuid
|
2026-04-18 16:26:09 +00:00
|
|
|
from dataclasses import dataclass
|
2026-04-08 19:26:26 +00:00
|
|
|
from typing import Protocol
|
|
|
|
|
|
2026-04-18 16:26:09 +00:00
|
|
|
from sqlalchemy import func, select
|
2026-04-08 19:26:26 +00:00
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
|
2026-04-18 16:26:09 +00:00
|
|
|
from ....domain.models.dictionary import Lemma, Sense, Wordform
|
2026-04-08 19:26:26 +00:00
|
|
|
from ..entities.dictionary_entities import (
|
|
|
|
|
DictionaryLemmaEntity,
|
|
|
|
|
DictionarySenseEntity,
|
|
|
|
|
DictionaryWordformEntity,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DictionaryRepository(Protocol):
|
2026-04-18 16:26:09 +00:00
|
|
|
async def get_senses_for_headword(
|
|
|
|
|
self, headword: str, language: str
|
|
|
|
|
) -> list[Sense]: ...
|
|
|
|
|
async def get_senses_for_headword_and_pos(
|
|
|
|
|
self, headword: str, language: str, pos_normalised: str
|
|
|
|
|
) -> list[Sense]: ...
|
2026-04-10 06:11:57 +00:00
|
|
|
async def get_senses_for_lemma(self, lemma_id: uuid.UUID) -> list[Sense]: ...
|
2026-04-18 16:26:09 +00:00
|
|
|
async def find_senses_by_english_gloss(
|
|
|
|
|
self, text: str, target_lang: str
|
|
|
|
|
) -> list[Sense]: ...
|
2026-04-09 19:40:11 +00:00
|
|
|
async def get_sense(self, sense_id: uuid.UUID) -> Sense | None: ...
|
|
|
|
|
async def get_lemma(self, lemma_id: uuid.UUID) -> Lemma | None: ...
|
2026-04-18 16:26:09 +00:00
|
|
|
async def get_wordforms_by_form(
|
|
|
|
|
self, form: str, language: str
|
|
|
|
|
) -> list[Wordform]: ...
|
|
|
|
|
async def search_wordforms_by_prefix(
|
|
|
|
|
self, prefix: str, language: str
|
|
|
|
|
) -> list[Wordform]: ...
|
2026-04-08 19:26:26 +00:00
|
|
|
async def get_wordforms_for_lemma(self, lemma_id: uuid.UUID) -> list[Wordform]: ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _sense_to_model(entity: DictionarySenseEntity) -> Sense:
|
|
|
|
|
return Sense(
|
|
|
|
|
id=str(entity.id),
|
|
|
|
|
lemma_id=str(entity.lemma_id),
|
|
|
|
|
sense_index=entity.sense_index,
|
|
|
|
|
gloss=entity.gloss,
|
|
|
|
|
topics=entity.topics or [],
|
|
|
|
|
tags=entity.tags or [],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2026-04-09 19:40:11 +00:00
|
|
|
def _lemma_to_model(entity: DictionaryLemmaEntity) -> Lemma:
|
|
|
|
|
return Lemma(
|
|
|
|
|
id=str(entity.id),
|
|
|
|
|
headword=entity.headword,
|
|
|
|
|
language=entity.language,
|
|
|
|
|
pos_raw=entity.pos_raw,
|
|
|
|
|
pos_normalised=entity.pos_normalised,
|
|
|
|
|
gender=entity.gender,
|
|
|
|
|
tags=entity.tags or [],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2026-04-08 19:26:26 +00:00
|
|
|
def _wordform_to_model(entity: DictionaryWordformEntity) -> Wordform:
|
|
|
|
|
return Wordform(
|
|
|
|
|
id=str(entity.id),
|
|
|
|
|
lemma_id=str(entity.lemma_id),
|
|
|
|
|
form=entity.form,
|
|
|
|
|
tags=entity.tags or [],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PostgresDictionaryRepository:
|
|
|
|
|
def __init__(self, db: AsyncSession) -> None:
|
|
|
|
|
self.db = db
|
|
|
|
|
|
2026-04-18 16:26:09 +00:00
|
|
|
async def get_senses_for_headword(
|
|
|
|
|
self, headword: str, language: str
|
|
|
|
|
) -> list[Sense]:
|
2026-04-08 19:26:26 +00:00
|
|
|
result = await self.db.execute(
|
|
|
|
|
select(DictionarySenseEntity)
|
2026-04-18 16:26:09 +00:00
|
|
|
.join(
|
|
|
|
|
DictionaryLemmaEntity,
|
|
|
|
|
DictionarySenseEntity.lemma_id == DictionaryLemmaEntity.id,
|
|
|
|
|
)
|
2026-04-08 19:26:26 +00:00
|
|
|
.where(
|
|
|
|
|
DictionaryLemmaEntity.headword == headword,
|
|
|
|
|
DictionaryLemmaEntity.language == language,
|
|
|
|
|
)
|
|
|
|
|
.order_by(DictionarySenseEntity.sense_index)
|
|
|
|
|
)
|
|
|
|
|
return [_sense_to_model(e) for e in result.scalars().all()]
|
|
|
|
|
|
2026-04-18 16:26:09 +00:00
|
|
|
async def find_senses_by_english_gloss(
|
|
|
|
|
self, text: str, target_lang: str
|
|
|
|
|
) -> list[Sense]:
|
2026-04-08 19:26:26 +00:00
|
|
|
"""EN→target direction: find senses whose gloss matches the given English text.
|
|
|
|
|
|
|
|
|
|
Uses a case-insensitive exact match on the gloss column, filtered to the
|
|
|
|
|
target language via the joined lemma row.
|
|
|
|
|
"""
|
|
|
|
|
result = await self.db.execute(
|
|
|
|
|
select(DictionarySenseEntity)
|
2026-04-18 16:26:09 +00:00
|
|
|
.join(
|
|
|
|
|
DictionaryLemmaEntity,
|
|
|
|
|
DictionarySenseEntity.lemma_id == DictionaryLemmaEntity.id,
|
|
|
|
|
)
|
2026-04-08 19:26:26 +00:00
|
|
|
.where(
|
|
|
|
|
DictionarySenseEntity.gloss.ilike(text),
|
|
|
|
|
DictionaryLemmaEntity.language == target_lang,
|
|
|
|
|
)
|
|
|
|
|
.order_by(DictionarySenseEntity.sense_index)
|
|
|
|
|
)
|
|
|
|
|
return [_sense_to_model(e) for e in result.scalars().all()]
|
|
|
|
|
|
2026-04-09 19:40:11 +00:00
|
|
|
async def get_sense(self, sense_id: uuid.UUID) -> Sense | None:
|
|
|
|
|
result = await self.db.execute(
|
|
|
|
|
select(DictionarySenseEntity).where(DictionarySenseEntity.id == sense_id)
|
|
|
|
|
)
|
|
|
|
|
entity = result.scalar_one_or_none()
|
|
|
|
|
return _sense_to_model(entity) if entity else None
|
|
|
|
|
|
|
|
|
|
async def get_lemma(self, lemma_id: uuid.UUID) -> Lemma | None:
|
|
|
|
|
result = await self.db.execute(
|
|
|
|
|
select(DictionaryLemmaEntity).where(DictionaryLemmaEntity.id == lemma_id)
|
|
|
|
|
)
|
|
|
|
|
entity = result.scalar_one_or_none()
|
|
|
|
|
return _lemma_to_model(entity) if entity else None
|
|
|
|
|
|
2026-04-10 06:11:57 +00:00
|
|
|
async def get_senses_for_headword_and_pos(
|
|
|
|
|
self, headword: str, language: str, pos_normalised: str
|
|
|
|
|
) -> list[Sense]:
|
|
|
|
|
result = await self.db.execute(
|
|
|
|
|
select(DictionarySenseEntity)
|
2026-04-18 16:26:09 +00:00
|
|
|
.join(
|
|
|
|
|
DictionaryLemmaEntity,
|
|
|
|
|
DictionarySenseEntity.lemma_id == DictionaryLemmaEntity.id,
|
|
|
|
|
)
|
2026-04-10 06:11:57 +00:00
|
|
|
.where(
|
|
|
|
|
DictionaryLemmaEntity.headword == headword,
|
|
|
|
|
DictionaryLemmaEntity.language == language,
|
|
|
|
|
DictionaryLemmaEntity.pos_normalised == pos_normalised,
|
|
|
|
|
)
|
|
|
|
|
.order_by(DictionarySenseEntity.sense_index)
|
|
|
|
|
)
|
|
|
|
|
return [_sense_to_model(e) for e in result.scalars().all()]
|
|
|
|
|
|
|
|
|
|
async def get_senses_for_lemma(self, lemma_id: uuid.UUID) -> list[Sense]:
|
|
|
|
|
result = await self.db.execute(
|
|
|
|
|
select(DictionarySenseEntity)
|
|
|
|
|
.where(DictionarySenseEntity.lemma_id == lemma_id)
|
|
|
|
|
.order_by(DictionarySenseEntity.sense_index)
|
|
|
|
|
)
|
|
|
|
|
return [_sense_to_model(e) for e in result.scalars().all()]
|
|
|
|
|
|
|
|
|
|
async def get_wordforms_by_form(self, form: str, language: str) -> list[Wordform]:
|
|
|
|
|
result = await self.db.execute(
|
|
|
|
|
select(DictionaryWordformEntity)
|
2026-04-18 16:26:09 +00:00
|
|
|
.join(
|
|
|
|
|
DictionaryLemmaEntity,
|
|
|
|
|
DictionaryWordformEntity.lemma_id == DictionaryLemmaEntity.id,
|
|
|
|
|
)
|
2026-04-10 06:11:57 +00:00
|
|
|
.where(
|
|
|
|
|
DictionaryWordformEntity.form == form,
|
|
|
|
|
DictionaryLemmaEntity.language == language,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
return [_wordform_to_model(e) for e in result.scalars().all()]
|
|
|
|
|
|
2026-04-18 16:26:09 +00:00
|
|
|
async def search_wordforms_by_prefix(
|
|
|
|
|
self, prefix: str, language: str
|
|
|
|
|
) -> list[Wordform]:
|
|
|
|
|
result = await self.db.execute(
|
|
|
|
|
select(DictionaryWordformEntity)
|
|
|
|
|
.join(
|
|
|
|
|
DictionaryLemmaEntity,
|
|
|
|
|
DictionaryWordformEntity.lemma_id == DictionaryLemmaEntity.id,
|
|
|
|
|
)
|
|
|
|
|
.where(
|
|
|
|
|
func.unaccent(DictionaryWordformEntity.form).ilike(
|
|
|
|
|
func.unaccent(prefix) + "%"
|
|
|
|
|
),
|
|
|
|
|
DictionaryLemmaEntity.language == language,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
return [_wordform_to_model(e) for e in result.scalars().all()]
|
|
|
|
|
|
|
|
|
|
async def search_senses_by_prefix(
|
|
|
|
|
self, prefix: str, lang: str
|
|
|
|
|
) -> list[tuple[Sense, Lemma]]:
|
|
|
|
|
result = await self.db.execute(
|
|
|
|
|
select(DictionarySenseEntity, DictionaryLemmaEntity)
|
|
|
|
|
.join(
|
|
|
|
|
DictionaryLemmaEntity,
|
|
|
|
|
DictionarySenseEntity.lemma_id == DictionaryLemmaEntity.id,
|
|
|
|
|
)
|
|
|
|
|
.where(
|
|
|
|
|
DictionarySenseEntity.gloss.ilike(prefix),
|
|
|
|
|
DictionaryLemmaEntity.language == lang,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
results: list[tuple[Sense, Lemma]] = []
|
|
|
|
|
|
|
|
|
|
for sense_with_lemma in result.all():
|
|
|
|
|
sense, lemma = sense_with_lemma.tuple()
|
|
|
|
|
results.append((_sense_to_model(sense), _lemma_to_model(lemma)))
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
2026-04-08 19:26:26 +00:00
|
|
|
async def get_wordforms_for_lemma(self, lemma_id: uuid.UUID) -> list[Wordform]:
|
|
|
|
|
result = await self.db.execute(
|
|
|
|
|
select(DictionaryWordformEntity).where(
|
|
|
|
|
DictionaryWordformEntity.lemma_id == lemma_id
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
return [_wordform_to_model(e) for e in result.scalars().all()]
|