import uuid from ..models.dictionary import Sense from ..models.vocab import LearnableWordBankEntry from ...outbound.postgres.repositories.vocab_repository import VocabRepository from ...outbound.postgres.repositories.dictionary_repository import DictionaryRepository class VocabService: """Manages a user's learnable word bank — adding words from various sources and resolving which dictionary sense a word belongs to. Usage: service = VocabService( vocab_repo=PostgresVocabRepository(db), dict_repo=PostgresDictionaryRepository(db), ) entry = await service.add_word_to_bank( user_id=user.id, surface_text="banque", language_pair_id=pair.id, pathway="highlight", ) # entry.disambiguation_status is "auto_resolved" if "banque" has exactly one # dictionary sense, or "pending" if the user needs to pick from multiple senses. """ def __init__(self, vocab_repo: VocabRepository, dict_repo: DictionaryRepository) -> None: self.vocab_repo = vocab_repo self.dict_repo = dict_repo async def add_word_to_bank( self, user_id: uuid.UUID, surface_text: str, language_pair_id: uuid.UUID, pathway: str, is_phrase: bool = False, wordform_id: uuid.UUID | None = None, source_article_id: uuid.UUID | None = None, ) -> LearnableWordBankEntry: """Add a word or phrase to the user's vocab bank, automatically linking it to a dictionary sense when exactly one match exists, or flagging it as pending disambiguation when zero or multiple senses are found. Phrases (``is_phrase=True``) bypass dictionary lookup entirely and are always created with ``disambiguation_status="pending"`` since they cannot be resolved to a single headword. Usage:: # Word with a single sense — auto-resolved immediately entry = await service.add_word_to_bank( user_id=user_id, surface_text="bisque", language_pair_id=fr_en_pair_id, pathway="highlight", ) assert entry.disambiguation_status == "auto_resolved" # Common word with many senses — user must pick one entry = await service.add_word_to_bank( user_id=user_id, surface_text="avoir", language_pair_id=fr_en_pair_id, pathway="manual", ) assert entry.disambiguation_status == "pending" # Multi-word expression — skips lookup, always pending entry = await service.add_word_to_bank( user_id=user_id, surface_text="avoir l'air", language_pair_id=fr_en_pair_id, pathway="manual", is_phrase=True, ) """ pair = await self.vocab_repo.get_language_pair(language_pair_id) if pair is None: raise ValueError(f"Language pair {language_pair_id} not found") if is_phrase: return await self.vocab_repo.add_entry( user_id=user_id, language_pair_id=language_pair_id, surface_text=surface_text, entry_pathway=pathway, is_phrase=True, source_article_id=source_article_id, disambiguation_status="pending", ) senses = await self.dict_repo.get_senses_for_headword(surface_text, pair.target_lang) if len(senses) == 1: sense_id = uuid.UUID(senses[0].id) status = "auto_resolved" elif len(senses) > 1: sense_id = None status = "pending" else: sense_id = None status = "pending" return await self.vocab_repo.add_entry( user_id=user_id, language_pair_id=language_pair_id, surface_text=surface_text, entry_pathway=pathway, is_phrase=False, sense_id=sense_id, wordform_id=wordform_id, source_article_id=source_article_id, disambiguation_status=status, ) async def add_token_to_bank( self, user_id: uuid.UUID, surface_text: str, language_pair_id: uuid.UUID, senses: list[Sense], wordform_id: uuid.UUID | None, source_article_id: uuid.UUID | None = None, ) -> LearnableWordBankEntry: """Add a token from the NLP pipeline to the vocab bank using pre-resolved lookup results, skipping the redundant dictionary query that ``add_word_to_bank`` would otherwise perform. ``senses`` and ``wordform_id`` come from :class:`DictionaryLookupService` and are stored directly on the bank entry. Auto-resolution still applies: exactly one sense means ``auto_resolved``; anything else means ``pending``. Usage:: result = await lookup_service.lookup_token("allons", "aller", "VERB", "fr") wf_id = uuid.UUID(result.wordform_id) if result.wordform_id else None entry = await vocab_service.add_token_to_bank( user_id=user_id, surface_text="allons", language_pair_id=pair_id, senses=result.senses, wordform_id=wf_id, ) # entry.wordform_id == result.wordform_id (pre-linked to "allons" wordform) """ pair = await self.vocab_repo.get_language_pair(language_pair_id) if pair is None: raise ValueError(f"Language pair {language_pair_id} not found") if len(senses) == 1: sense_id: uuid.UUID | None = uuid.UUID(senses[0].id) status = "auto_resolved" else: sense_id = None status = "pending" return await self.vocab_repo.add_entry( user_id=user_id, language_pair_id=language_pair_id, surface_text=surface_text, entry_pathway="nlp_extraction", wordform_id=wordform_id, sense_id=sense_id, source_article_id=source_article_id, disambiguation_status=status, ) async def resolve_disambiguation( self, entry_id: uuid.UUID, sense_id: uuid.UUID ) -> LearnableWordBankEntry: """Attach a specific dictionary sense to a pending vocab bank entry, marking it as ``resolved`` so it can be used for flashcard generation. This is called after the user selects the correct sense from the list presented during disambiguation — for example, choosing "bank (finance)" over "bank (river)" for the French word "banque". Usage:: # User has been shown the sense list and picked sense_id for "bank (finance)" resolved_entry = await service.resolve_disambiguation( entry_id=pending_entry.id, sense_id=finance_sense_id, ) assert resolved_entry.disambiguation_status == "resolved" assert resolved_entry.sense_id == str(finance_sense_id) """ return await self.vocab_repo.set_sense(entry_id, sense_id)