2026-04-08 19:37:00 +00:00
|
|
|
import uuid
|
|
|
|
|
|
2026-04-10 06:11:57 +00:00
|
|
|
from ..models.dictionary import Sense
|
2026-04-08 19:37:00 +00:00
|
|
|
from ..models.vocab import LearnableWordBankEntry
|
|
|
|
|
from ...outbound.postgres.repositories.vocab_repository import VocabRepository
|
|
|
|
|
from ...outbound.postgres.repositories.dictionary_repository import DictionaryRepository
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class VocabService:
|
|
|
|
|
"""Manages a user's learnable word bank — adding words from various sources and
|
|
|
|
|
resolving which dictionary sense a word belongs to.
|
|
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
|
service = VocabService(
|
|
|
|
|
vocab_repo=PostgresVocabRepository(db),
|
|
|
|
|
dict_repo=PostgresDictionaryRepository(db),
|
|
|
|
|
)
|
|
|
|
|
entry = await service.add_word_to_bank(
|
|
|
|
|
user_id=user.id,
|
|
|
|
|
surface_text="banque",
|
|
|
|
|
language_pair_id=pair.id,
|
|
|
|
|
pathway="highlight",
|
|
|
|
|
)
|
|
|
|
|
# entry.disambiguation_status is "auto_resolved" if "banque" has exactly one
|
|
|
|
|
# dictionary sense, or "pending" if the user needs to pick from multiple senses.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, vocab_repo: VocabRepository, dict_repo: DictionaryRepository) -> None:
|
|
|
|
|
self.vocab_repo = vocab_repo
|
|
|
|
|
self.dict_repo = dict_repo
|
|
|
|
|
|
|
|
|
|
async def add_word_to_bank(
|
|
|
|
|
self,
|
|
|
|
|
user_id: uuid.UUID,
|
|
|
|
|
surface_text: str,
|
|
|
|
|
language_pair_id: uuid.UUID,
|
|
|
|
|
pathway: str,
|
|
|
|
|
is_phrase: bool = False,
|
2026-04-10 06:11:57 +00:00
|
|
|
wordform_id: uuid.UUID | None = None,
|
2026-04-08 19:37:00 +00:00
|
|
|
source_article_id: uuid.UUID | None = None,
|
|
|
|
|
) -> LearnableWordBankEntry:
|
|
|
|
|
"""Add a word or phrase to the user's vocab bank, automatically linking it to a
|
|
|
|
|
dictionary sense when exactly one match exists, or flagging it as pending
|
|
|
|
|
disambiguation when zero or multiple senses are found.
|
|
|
|
|
|
|
|
|
|
Phrases (``is_phrase=True``) bypass dictionary lookup entirely and are always
|
|
|
|
|
created with ``disambiguation_status="pending"`` since they cannot be resolved
|
|
|
|
|
to a single headword.
|
|
|
|
|
|
|
|
|
|
Usage::
|
|
|
|
|
|
|
|
|
|
# Word with a single sense — auto-resolved immediately
|
|
|
|
|
entry = await service.add_word_to_bank(
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
surface_text="bisque",
|
|
|
|
|
language_pair_id=fr_en_pair_id,
|
|
|
|
|
pathway="highlight",
|
|
|
|
|
)
|
|
|
|
|
assert entry.disambiguation_status == "auto_resolved"
|
|
|
|
|
|
|
|
|
|
# Common word with many senses — user must pick one
|
|
|
|
|
entry = await service.add_word_to_bank(
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
surface_text="avoir",
|
|
|
|
|
language_pair_id=fr_en_pair_id,
|
|
|
|
|
pathway="manual",
|
|
|
|
|
)
|
|
|
|
|
assert entry.disambiguation_status == "pending"
|
|
|
|
|
|
|
|
|
|
# Multi-word expression — skips lookup, always pending
|
|
|
|
|
entry = await service.add_word_to_bank(
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
surface_text="avoir l'air",
|
|
|
|
|
language_pair_id=fr_en_pair_id,
|
|
|
|
|
pathway="manual",
|
|
|
|
|
is_phrase=True,
|
|
|
|
|
)
|
|
|
|
|
"""
|
|
|
|
|
pair = await self.vocab_repo.get_language_pair(language_pair_id)
|
|
|
|
|
if pair is None:
|
|
|
|
|
raise ValueError(f"Language pair {language_pair_id} not found")
|
|
|
|
|
|
|
|
|
|
if is_phrase:
|
|
|
|
|
return await self.vocab_repo.add_entry(
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
language_pair_id=language_pair_id,
|
|
|
|
|
surface_text=surface_text,
|
|
|
|
|
entry_pathway=pathway,
|
|
|
|
|
is_phrase=True,
|
|
|
|
|
source_article_id=source_article_id,
|
|
|
|
|
disambiguation_status="pending",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
senses = await self.dict_repo.get_senses_for_headword(surface_text, pair.target_lang)
|
|
|
|
|
|
|
|
|
|
if len(senses) == 1:
|
|
|
|
|
sense_id = uuid.UUID(senses[0].id)
|
|
|
|
|
status = "auto_resolved"
|
|
|
|
|
elif len(senses) > 1:
|
|
|
|
|
sense_id = None
|
|
|
|
|
status = "pending"
|
|
|
|
|
else:
|
|
|
|
|
sense_id = None
|
|
|
|
|
status = "pending"
|
|
|
|
|
|
|
|
|
|
return await self.vocab_repo.add_entry(
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
language_pair_id=language_pair_id,
|
|
|
|
|
surface_text=surface_text,
|
|
|
|
|
entry_pathway=pathway,
|
|
|
|
|
is_phrase=False,
|
|
|
|
|
sense_id=sense_id,
|
2026-04-10 06:11:57 +00:00
|
|
|
wordform_id=wordform_id,
|
|
|
|
|
source_article_id=source_article_id,
|
|
|
|
|
disambiguation_status=status,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
async def add_token_to_bank(
|
|
|
|
|
self,
|
|
|
|
|
user_id: uuid.UUID,
|
|
|
|
|
surface_text: str,
|
|
|
|
|
language_pair_id: uuid.UUID,
|
|
|
|
|
senses: list[Sense],
|
|
|
|
|
wordform_id: uuid.UUID | None,
|
|
|
|
|
source_article_id: uuid.UUID | None = None,
|
|
|
|
|
) -> LearnableWordBankEntry:
|
|
|
|
|
"""Add a token from the NLP pipeline to the vocab bank using pre-resolved lookup
|
|
|
|
|
results, skipping the redundant dictionary query that ``add_word_to_bank`` would
|
|
|
|
|
otherwise perform.
|
|
|
|
|
|
|
|
|
|
``senses`` and ``wordform_id`` come from :class:`DictionaryLookupService` and
|
|
|
|
|
are stored directly on the bank entry. Auto-resolution still applies: exactly
|
|
|
|
|
one sense means ``auto_resolved``; anything else means ``pending``.
|
|
|
|
|
|
|
|
|
|
Usage::
|
|
|
|
|
|
|
|
|
|
result = await lookup_service.lookup_token("allons", "aller", "VERB", "fr")
|
|
|
|
|
wf_id = uuid.UUID(result.wordform_id) if result.wordform_id else None
|
|
|
|
|
entry = await vocab_service.add_token_to_bank(
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
surface_text="allons",
|
|
|
|
|
language_pair_id=pair_id,
|
|
|
|
|
senses=result.senses,
|
|
|
|
|
wordform_id=wf_id,
|
|
|
|
|
)
|
|
|
|
|
# entry.wordform_id == result.wordform_id (pre-linked to "allons" wordform)
|
|
|
|
|
"""
|
|
|
|
|
pair = await self.vocab_repo.get_language_pair(language_pair_id)
|
|
|
|
|
if pair is None:
|
|
|
|
|
raise ValueError(f"Language pair {language_pair_id} not found")
|
|
|
|
|
|
|
|
|
|
if len(senses) == 1:
|
|
|
|
|
sense_id: uuid.UUID | None = uuid.UUID(senses[0].id)
|
|
|
|
|
status = "auto_resolved"
|
|
|
|
|
else:
|
|
|
|
|
sense_id = None
|
|
|
|
|
status = "pending"
|
|
|
|
|
|
|
|
|
|
return await self.vocab_repo.add_entry(
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
language_pair_id=language_pair_id,
|
|
|
|
|
surface_text=surface_text,
|
|
|
|
|
entry_pathway="nlp_extraction",
|
|
|
|
|
wordform_id=wordform_id,
|
|
|
|
|
sense_id=sense_id,
|
2026-04-08 19:37:00 +00:00
|
|
|
source_article_id=source_article_id,
|
|
|
|
|
disambiguation_status=status,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
async def resolve_disambiguation(
|
|
|
|
|
self, entry_id: uuid.UUID, sense_id: uuid.UUID
|
|
|
|
|
) -> LearnableWordBankEntry:
|
|
|
|
|
"""Attach a specific dictionary sense to a pending vocab bank entry, marking it
|
|
|
|
|
as ``resolved`` so it can be used for flashcard generation.
|
|
|
|
|
|
|
|
|
|
This is called after the user selects the correct sense from the list presented
|
|
|
|
|
during disambiguation — for example, choosing "bank (finance)" over
|
|
|
|
|
"bank (river)" for the French word "banque".
|
|
|
|
|
|
|
|
|
|
Usage::
|
|
|
|
|
|
|
|
|
|
# User has been shown the sense list and picked sense_id for "bank (finance)"
|
|
|
|
|
resolved_entry = await service.resolve_disambiguation(
|
|
|
|
|
entry_id=pending_entry.id,
|
|
|
|
|
sense_id=finance_sense_id,
|
|
|
|
|
)
|
|
|
|
|
assert resolved_entry.disambiguation_status == "resolved"
|
|
|
|
|
assert resolved_entry.sense_id == str(finance_sense_id)
|
|
|
|
|
"""
|
|
|
|
|
return await self.vocab_repo.set_sense(entry_id, sense_id)
|