feat: Create the Dictionary Lookup Service; methods for fidning

vocabulary and words
feat: Build the flashcards model, routes, etc.
2026-04-10 07:11:57 +01:00 · 2026-04-09 20:40:11 +01:00 · 2026-04-08 20:50:26 +01:00 · 2026-04-08 20:37:00 +01:00 · 2026-04-08 20:26:57 +01:00 · 2026-04-08 20:26:26 +01:00
30 changed files with 2454 additions and 18 deletions
--- a/9
+++ b/9
@ -1,4 +1,4 @@
-.PHONY: down build up logs shell lock migrate migration
+.PHONY: down build up logs shell lock migrate migration import-dictionary

 build:
 	docker compose build
@ -28,3 +28,10 @@ lock:
 	cd api && uv pip compile pyproject.toml -o requirements.txt

 rebuild: down build up
+
+# Import a kaikki dictionary JSONL into Postgres.
+# Requires the DB to be running with its port exposed on localhost (docker compose up).
+# DATABASE_URL defaults to the docker-compose dev credentials.
+# Usage: make import-dictionary lang=fr
+import-dictionary:
+	cd api && python scripts/import_dictionary.py --lang $(lang)
--- a/api/alembic/env.py
+++ b/api/alembic/env.py
@ -10,6 +10,7 @@ from app.outbound.postgres.database import Base

 import app.outbound.postgres.entities.summarise_job_entity
 import app.outbound.postgres.entities.user_entity
+import app.outbound.postgres.entities.dictionary_entities

 config = context.config
 config.set_main_option("sqlalchemy.url", settings.database_url)
--- a/api/alembic/versions/20260407_0007_add_dictionary_tables.py
+++ b/api/alembic/versions/20260407_0007_add_dictionary_tables.py
@ -0,0 +1,89 @@
+"""add dictionary tables
+
+Revision ID: 0007
+Revises: 0006
+Create Date: 2026-04-07
+
+"""
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+revision: str = "0007"
+down_revision: Union[str, None] = "0006"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "dictionary_lemma",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("headword", sa.Text(), nullable=False),
+        sa.Column("language", sa.String(2), nullable=False),
+        sa.Column("pos_raw", sa.Text(), nullable=False),
+        sa.Column("pos_normalised", sa.Text(), nullable=True),
+        sa.Column("gender", sa.Text(), nullable=True),
+        sa.Column("tags", postgresql.ARRAY(sa.Text()), nullable=False, server_default="{}"),
+    )
+    op.create_index("ix_dictionary_lemma_headword_language", "dictionary_lemma", ["headword", "language"])
+    op.create_index("ix_dictionary_lemma_language", "dictionary_lemma", ["language"])
+
+    op.create_table(
+        "dictionary_sense",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column(
+            "lemma_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("dictionary_lemma.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column("sense_index", sa.Integer(), nullable=False),
+        sa.Column("gloss", sa.Text(), nullable=False, server_default=""),
+        sa.Column("topics", postgresql.ARRAY(sa.Text()), nullable=False, server_default="{}"),
+        sa.Column("tags", postgresql.ARRAY(sa.Text()), nullable=False, server_default="{}"),
+    )
+    op.create_index("ix_dictionary_sense_lemma_id", "dictionary_sense", ["lemma_id"])
+
+    op.create_table(
+        "dictionary_wordform",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column(
+            "lemma_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("dictionary_lemma.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column("form", sa.Text(), nullable=False),
+        sa.Column("tags", postgresql.ARRAY(sa.Text()), nullable=False, server_default="{}"),
+    )
+    op.create_index("ix_dictionary_wordform_lemma_id", "dictionary_wordform", ["lemma_id"])
+    op.create_index("ix_dictionary_wordform_form", "dictionary_wordform", ["form"])
+
+    op.create_table(
+        "dictionary_lemma_raw",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column(
+            "lemma_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("dictionary_lemma.id", ondelete="CASCADE"),
+            nullable=False,
+            unique=True,
+        ),
+        sa.Column("language", sa.String(2), nullable=False),
+        sa.Column("raw", postgresql.JSONB(), nullable=False),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("dictionary_lemma_raw")
+    op.drop_index("ix_dictionary_wordform_form", table_name="dictionary_wordform")
+    op.drop_index("ix_dictionary_wordform_lemma_id", table_name="dictionary_wordform")
+    op.drop_table("dictionary_wordform")
+    op.drop_index("ix_dictionary_sense_lemma_id", table_name="dictionary_sense")
+    op.drop_table("dictionary_sense")
+    op.drop_index("ix_dictionary_lemma_language", table_name="dictionary_lemma")
+    op.drop_index("ix_dictionary_lemma_headword_language", table_name="dictionary_lemma")
+    op.drop_table("dictionary_lemma")
--- a/api/alembic/versions/20260408_0008_add_vocab_bank.py
+++ b/api/alembic/versions/20260408_0008_add_vocab_bank.py
@ -0,0 +1,96 @@
+"""add vocab bank tables
+
+Revision ID: 0008
+Revises: 0007
+Create Date: 2026-04-08
+
+"""
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+revision: str = "0008"
+down_revision: Union[str, None] = "0007"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "user_language_pair",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column(
+            "user_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("users.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column("source_lang", sa.String(2), nullable=False),
+        sa.Column("target_lang", sa.String(2), nullable=False),
+        sa.UniqueConstraint("user_id", "source_lang", "target_lang", name="uq_user_language_pair"),
+    )
+    op.create_index("ix_user_language_pair_user_id", "user_language_pair", ["user_id"])
+
+    op.create_table(
+        "learnable_word_bank_entry",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column(
+            "user_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("users.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column(
+            "language_pair_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("user_language_pair.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column(
+            "sense_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("dictionary_sense.id", ondelete="SET NULL"),
+            nullable=True,
+        ),
+        sa.Column(
+            "wordform_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("dictionary_wordform.id", ondelete="SET NULL"),
+            nullable=True,
+        ),
+        sa.Column("surface_text", sa.Text(), nullable=False),
+        sa.Column("is_phrase", sa.Boolean(), nullable=False, server_default="false"),
+        sa.Column("entry_pathway", sa.Text(), nullable=False),
+        sa.Column("source_article_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("disambiguation_status", sa.Text(), nullable=False, server_default="pending"),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+    )
+    op.create_index(
+        "ix_learnable_word_bank_entry_user_id", "learnable_word_bank_entry", ["user_id"]
+    )
+    op.create_index(
+        "ix_learnable_word_bank_entry_language_pair_id",
+        "learnable_word_bank_entry",
+        ["language_pair_id"],
+    )
+    op.create_index(
+        "ix_learnable_word_bank_entry_sense_id", "learnable_word_bank_entry", ["sense_id"]
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_learnable_word_bank_entry_sense_id", table_name="learnable_word_bank_entry")
+    op.drop_index(
+        "ix_learnable_word_bank_entry_language_pair_id", table_name="learnable_word_bank_entry"
+    )
+    op.drop_index("ix_learnable_word_bank_entry_user_id", table_name="learnable_word_bank_entry")
+    op.drop_table("learnable_word_bank_entry")
+    op.drop_index("ix_user_language_pair_user_id", table_name="user_language_pair")
+    op.drop_table("user_language_pair")
--- a/api/alembic/versions/20260408_0009_add_flashcard_tables.py
+++ b/api/alembic/versions/20260408_0009_add_flashcard_tables.py
@ -0,0 +1,88 @@
+"""add flashcard tables
+
+Revision ID: 0009
+Revises: 0008
+Create Date: 2026-04-08
+
+"""
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+revision: str = "0009"
+down_revision: Union[str, None] = "0008"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "flashcard",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column(
+            "user_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("users.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column(
+            "bank_entry_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("learnable_word_bank_entry.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column("source_lang", sa.Text(), nullable=False),
+        sa.Column("target_lang", sa.Text(), nullable=False),
+        sa.Column("prompt_text", sa.Text(), nullable=False),
+        sa.Column("answer_text", sa.Text(), nullable=False),
+        sa.Column("prompt_context_text", sa.Text(), nullable=True),
+        sa.Column("answer_context_text", sa.Text(), nullable=True),
+        sa.Column("card_direction", sa.Text(), nullable=False),
+        sa.Column("prompt_modality", sa.Text(), nullable=False, server_default="text"),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+    )
+    op.create_index("ix_flashcard_user_id", "flashcard", ["user_id"])
+    op.create_index("ix_flashcard_bank_entry_id", "flashcard", ["bank_entry_id"])
+
+    op.create_table(
+        "flashcard_event",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column(
+            "flashcard_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("flashcard.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column(
+            "user_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("users.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column("event_type", sa.Text(), nullable=False),
+        sa.Column("user_response", sa.Text(), nullable=True),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+    )
+    op.create_index("ix_flashcard_event_flashcard_id", "flashcard_event", ["flashcard_id"])
+    op.create_index("ix_flashcard_event_user_id", "flashcard_event", ["user_id"])
+
+
+def downgrade() -> None:
+    op.drop_index("ix_flashcard_event_user_id", table_name="flashcard_event")
+    op.drop_index("ix_flashcard_event_flashcard_id", table_name="flashcard_event")
+    op.drop_table("flashcard_event")
+    op.drop_index("ix_flashcard_bank_entry_id", table_name="flashcard")
+    op.drop_index("ix_flashcard_user_id", table_name="flashcard")
+    op.drop_table("flashcard")
--- a/api/app/domain/models/account.py
+++ b/api/app/domain/models/account.py
@ -0,0 +1,14 @@
+from dataclasses import dataclass, field
+from datetime import datetime
+
+from .learnable_language import LearnableLanguage
+
+
+@dataclass
+class Account:
+    id: str
+    email: str
+    is_active: bool
+    is_email_verified: bool
+    created_at: datetime
+    learnable_languages: list[LearnableLanguage] = field(default_factory=list)
--- a/api/app/domain/models/dictionary.py
+++ b/api/app/domain/models/dictionary.py
@ -0,0 +1,32 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class Wordform:
+    id: str
+    lemma_id: str
+    form: str
+    tags: list[str]
+
+
+@dataclass
+class Sense:
+    id: str
+    lemma_id: str
+    sense_index: int
+    gloss: str
+    topics: list[str]
+    tags: list[str]
+
+
+@dataclass
+class Lemma:
+    id: str
+    headword: str
+    language: str
+    pos_raw: str
+    pos_normalised: str | None
+    gender: str | None
+    tags: list[str]
+    senses: list[Sense] = field(default_factory=list)
+    wordforms: list[Wordform] = field(default_factory=list)
--- a/api/app/domain/models/flashcard.py
+++ b/api/app/domain/models/flashcard.py
@ -0,0 +1,28 @@
+from dataclasses import dataclass
+from datetime import datetime
+
+
+@dataclass
+class Flashcard:
+    id: str
+    user_id: str
+    bank_entry_id: str
+    source_lang: str
+    target_lang: str
+    prompt_text: str
+    answer_text: str
+    prompt_context_text: str | None
+    answer_context_text: str | None
+    card_direction: str
+    prompt_modality: str
+    created_at: datetime
+
+
+@dataclass
+class FlashcardEvent:
+    id: str
+    flashcard_id: str
+    user_id: str
+    event_type: str
+    user_response: str | None
+    created_at: datetime
--- a/api/app/domain/models/vocab.py
+++ b/api/app/domain/models/vocab.py
@ -0,0 +1,25 @@
+from dataclasses import dataclass
+from datetime import datetime
+
+
+@dataclass
+class UserLanguagePair:
+    id: str
+    user_id: str
+    source_lang: str
+    target_lang: str
+
+
+@dataclass
+class LearnableWordBankEntry:
+    id: str
+    user_id: str
+    language_pair_id: str
+    sense_id: str | None
+    wordform_id: str | None
+    surface_text: str
+    is_phrase: bool
+    entry_pathway: str
+    source_article_id: str | None
+    disambiguation_status: str
+    created_at: datetime
--- a/api/app/domain/services/account_service.py
+++ b/api/app/domain/services/account_service.py
@ -0,0 +1,150 @@
+import uuid
+
+from sqlalchemy import select
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ..models.account import Account
+from ..models.learnable_language import LearnableLanguage
+from ...auth import hash_password
+from ...outbound.postgres.entities.user_entity import User as UserEntity
+from ...outbound.postgres.repositories import learnable_language_repository, user_repository
+
+
+class AccountService:
+    """Handles account-level operations: registration, profile retrieval, and managing
+    the set of languages a user is learning.
+
+    All methods operate on behalf of a single authenticated user (or, for
+    ``create_account``, the user being created).
+
+    Usage::
+
+        service = AccountService(db)
+
+        # Registration
+        account = await service.create_account("alice@example.com", "s3cr3t")
+
+        # Profile retrieval
+        account = await service.get_account(user_id)
+        print(account.learnable_languages)   # [LearnableLanguage(...), ...]
+
+        # Add French (B1) to the account
+        lang = await service.add_learnable_language(
+            user_id, source_language="en", target_language="fr", proficiencies=["B1"]
+        )
+
+        # Remove it again
+        await service.remove_learnable_language(user_id, lang.id)
+    """
+
+    def __init__(self, db: AsyncSession) -> None:
+        self.db = db
+
+    async def create_account(self, email: str, password: str) -> Account:
+        """Create a new user account, hashing the plain-text password before storage.
+
+        Raises ``ValueError`` if the email address is already registered, so the
+        caller does not need to catch SQLAlchemy exceptions directly.
+
+        Usage::
+
+            try:
+                account = await service.create_account("alice@example.com", "s3cr3t")
+            except ValueError:
+                # email already taken
+                ...
+        """
+        try:
+            user = await user_repository.create(
+                self.db,
+                email=email,
+                hashed_password=hash_password(password),
+            )
+        except IntegrityError:
+            await self.db.rollback()
+            raise ValueError("Email already registered")
+
+        return Account(
+            id=str(user.id),
+            email=user.email,
+            is_active=user.is_active,
+            is_email_verified=user.is_email_verified,
+            created_at=user.created_at,
+        )
+
+    async def get_account(self, user_id: uuid.UUID) -> Account:
+        """Retrieve a user's account profile including all their learnable languages.
+
+        Raises ``ValueError`` if no user exists for the given ``user_id``.
+
+        Usage::
+
+            account = await service.get_account(user_id)
+            for lang in account.learnable_languages:
+                print(lang.target_language, lang.proficiencies)
+        """
+        # user_repository only exposes get_by_email; query by id directly
+        result = await self.db.execute(
+            select(UserEntity).where(UserEntity.id == user_id)
+        )
+        user = result.scalar_one_or_none()
+        if user is None:
+            raise ValueError(f"User {user_id} not found")
+
+        languages = await learnable_language_repository.list_for_user(self.db, user_id)
+
+        return Account(
+            id=str(user.id),
+            email=user.email,
+            is_active=user.is_active,
+            is_email_verified=user.is_email_verified,
+            created_at=user.created_at,
+            learnable_languages=languages,
+        )
+
+    async def add_learnable_language(
+        self,
+        user_id: uuid.UUID,
+        source_language: str,
+        target_language: str,
+        proficiencies: list[str],
+    ) -> LearnableLanguage:
+        """Add a language pair to the user's account, or update proficiency levels if
+        the pair already exists (upsert semantics).
+
+        Usage::
+
+            lang = await service.add_learnable_language(
+                user_id,
+                source_language="en",
+                target_language="fr",
+                proficiencies=["B1", "B2"],
+            )
+            print(lang.id)   # UUID of the learnable_language row
+        """
+        return await learnable_language_repository.upsert(
+            self.db,
+            user_id=user_id,
+            source_language=source_language,
+            target_language=target_language,
+            proficiencies=proficiencies,
+        )
+
+    async def remove_learnable_language(
+        self, user_id: uuid.UUID, language_id: uuid.UUID
+    ) -> None:
+        """Remove a learnable language from the user's account by its row ID.
+
+        Raises ``ValueError`` if the language entry does not exist or does not belong
+        to ``user_id``.
+
+        Usage::
+
+            await service.remove_learnable_language(user_id, lang.id)
+        """
+        deleted = await learnable_language_repository.delete(
+            self.db, user_id=user_id, language_id=language_id
+        )
+        if not deleted:
+            raise ValueError(f"Learnable language {language_id} not found for this user")
--- a/api/app/domain/services/dictionary_lookup_service.py
+++ b/api/app/domain/services/dictionary_lookup_service.py
@ -0,0 +1,108 @@
+import uuid
+from dataclasses import dataclass, field
+
+from ..models.dictionary import Sense, Wordform
+from ...outbound.postgres.repositories.dictionary_repository import DictionaryRepository
+
+
+@dataclass
+class TokenLookupResult:
+    """The result of resolving a spaCy token against the dictionary.
+
+    ``senses`` is the ranked list of candidate senses for disambiguation.
+    ``wordform_id`` is set when the surface form was found in ``dictionary_wordform``,
+    allowing the vocab bank entry to be pre-linked to the exact inflected form.
+    ``matched_via`` describes which lookup strategy succeeded.
+    """
+    senses: list[Sense]
+    wordform_id: str | None
+    matched_via: str  # "wordform" | "lemma_pos" | "lemma" | "none"
+    matched_wordforms: list[Wordform] = field(default_factory=list)
+
+
+class DictionaryLookupService:
+    """Resolves a spaCy token (surface form + UD POS + lemma) to candidate dictionary
+    senses, using a three-stage fallback strategy.
+
+    Stage 1 — wordform table lookup (most precise):
+        Searches ``dictionary_wordform`` for an exact match on the inflected surface
+        form within the target language.  "allons" → wordform row → lemma "aller".
+        When exactly one lemma matches, ``wordform_id`` is pre-populated on the result.
+
+    Stage 2 — lemma + POS fallback:
+        If no wordform row exists, tries the spaCy-provided lemma string against
+        ``dictionary_lemma.headword`` filtered by ``pos_normalised`` (UD tag).
+        Reduces false matches for homographs with different parts of speech.
+
+    Stage 3 — lemma-only fallback:
+        Drops the POS filter as a last resort.  Returns all senses for the headword
+        regardless of POS.
+
+    Usage::
+
+        service = DictionaryLookupService(PostgresDictionaryRepository(db))
+
+        result = await service.lookup_token(
+            surface="allons",
+            spacy_lemma="aller",
+            pos_ud="VERB",
+            language="fr",
+        )
+        # result.senses  — candidate Sense rows for disambiguation
+        # result.wordform_id  — pre-resolved wordform UUID string, or None
+        # result.matched_via  — "wordform" | "lemma_pos" | "lemma" | "none"
+    """
+
+    def __init__(self, dict_repo: DictionaryRepository) -> None:
+        self.dict_repo = dict_repo
+
+    async def lookup_token(
+        self,
+        surface: str,
+        spacy_lemma: str,
+        pos_ud: str,
+        language: str,
+    ) -> TokenLookupResult:
+        """Resolve a spaCy token to candidate senses using a three-stage fallback.
+
+        ``surface`` is the raw token text (e.g. ``"allons"``).
+        ``spacy_lemma`` is spaCy's lemmatisation of the token (e.g. ``"aller"``).
+        ``pos_ud`` is the Universal Dependencies POS tag (e.g. ``"VERB"``).
+        ``language`` is the target language code (e.g. ``"fr"``).
+
+        Returns a :class:`TokenLookupResult` with the candidate senses and, when the
+        surface form was found in the wordform table, a ``wordform_id`` that can be
+        stored on the vocab bank entry for precise inflection tracking.
+        """
+        # Stage 1: wordform table lookup by inflected surface form
+        wordforms = await self.dict_repo.get_wordforms_by_form(surface, language)
+        if wordforms:
+            unique_lemma_ids = list(dict.fromkeys(wf.lemma_id for wf in wordforms))
+            senses: list[Sense] = []
+            for lemma_id in unique_lemma_ids:
+                senses.extend(await self.dict_repo.get_senses_for_lemma(uuid.UUID(lemma_id)))
+
+            # Only pre-assign wordform_id when a single wordform matched — if multiple
+            # wordforms from different lemmas matched, the ambiguity must be resolved
+            # by the user and we cannot confidently pick one.
+            wordform_id = wordforms[0].id if len(unique_lemma_ids) == 1 else None
+            return TokenLookupResult(
+                senses=senses,
+                wordform_id=wordform_id,
+                matched_via="wordform",
+                matched_wordforms=wordforms,
+            )
+
+        # Stage 2: spaCy lemma + UD POS filter
+        senses = await self.dict_repo.get_senses_for_headword_and_pos(
+            spacy_lemma, language, pos_ud
+        )
+        if senses:
+            return TokenLookupResult(senses=senses, wordform_id=None, matched_via="lemma_pos")
+
+        # Stage 3: spaCy lemma only — no POS filter
+        senses = await self.dict_repo.get_senses_for_headword(spacy_lemma, language)
+        if senses:
+            return TokenLookupResult(senses=senses, wordform_id=None, matched_via="lemma")
+
+        return TokenLookupResult(senses=[], wordform_id=None, matched_via="none")
--- a/api/app/domain/services/flashcard_service.py
+++ b/api/app/domain/services/flashcard_service.py
@ -0,0 +1,152 @@
+import uuid
+
+from ..models.flashcard import Flashcard, FlashcardEvent
+from ...outbound.postgres.repositories.flashcard_repository import FlashcardRepository
+from ...outbound.postgres.repositories.vocab_repository import VocabRepository
+from ...outbound.postgres.repositories.dictionary_repository import DictionaryRepository
+
+VALID_DIRECTIONS = {"target_to_en", "en_to_target"}
+VALID_EVENT_TYPES = {"shown", "answered", "skipped"}
+
+
+class FlashcardService:
+    """Generates flashcards from resolved vocab bank entries and records study events.
+
+    Flashcard text is derived directly from the dictionary: the lemma headword is the
+    target-language side and the sense gloss is the English side.  Both directions are
+    created by default.
+
+    Usage::
+
+        service = FlashcardService(
+            flashcard_repo=PostgresFlashcardRepository(db),
+            vocab_repo=PostgresVocabRepository(db),
+            dict_repo=PostgresDictionaryRepository(db),
+        )
+
+        # Generate both directions for a resolved bank entry
+        cards = await service.generate_flashcard_from_entry(entry_id)
+
+        # Record that the user answered correctly
+        event = await service.record_flashcard_event(
+            flashcard_id=cards[0].id,
+            user_id=user_id,
+            event_type="answered",
+            response="banque",
+        )
+    """
+
+    def __init__(
+        self,
+        flashcard_repo: FlashcardRepository,
+        vocab_repo: VocabRepository,
+        dict_repo: DictionaryRepository,
+    ) -> None:
+        self.flashcard_repo = flashcard_repo
+        self.vocab_repo = vocab_repo
+        self.dict_repo = dict_repo
+
+    async def generate_flashcard_from_entry(
+        self,
+        entry_id: uuid.UUID,
+        direction: str | None = None,
+    ) -> list[Flashcard]:
+        """Create flashcard(s) from a vocab bank entry that has a resolved sense.
+
+        Looks up the sense gloss (English meaning) and lemma headword (target-language
+        word) and creates one card per direction.  Pass ``direction`` to generate only
+        ``"target_to_en"`` or ``"en_to_target"``; omit it to create both.
+
+        Raises ``ValueError`` if the entry does not exist, has no resolved sense, or
+        if the underlying sense or lemma rows cannot be found in the dictionary.
+
+        Usage::
+
+            # Both directions — typical case
+            cards = await service.generate_flashcard_from_entry(entry_id)
+            assert len(cards) == 2
+
+            # One direction only
+            cards = await service.generate_flashcard_from_entry(
+                entry_id, direction="target_to_en"
+            )
+        """
+        if direction is not None and direction not in VALID_DIRECTIONS:
+            raise ValueError(f"Invalid direction '{direction}'. Must be one of {VALID_DIRECTIONS}")
+
+        entry = await self.vocab_repo.get_entry(entry_id)
+        if entry is None:
+            raise ValueError(f"Bank entry {entry_id} not found")
+        if entry.sense_id is None:
+            raise ValueError(
+                "Entry has no resolved sense; disambiguate before generating flashcards"
+            )
+
+        sense = await self.dict_repo.get_sense(uuid.UUID(entry.sense_id))
+        if sense is None:
+            raise ValueError(f"Sense {entry.sense_id} not found in dictionary")
+
+        lemma = await self.dict_repo.get_lemma(uuid.UUID(sense.lemma_id))
+        if lemma is None:
+            raise ValueError(f"Lemma for sense {entry.sense_id} not found in dictionary")
+
+        pair = await self.vocab_repo.get_language_pair(uuid.UUID(entry.language_pair_id))
+        if pair is None:
+            raise ValueError(f"Language pair {entry.language_pair_id} not found")
+
+        user_id = uuid.UUID(entry.user_id)
+        directions = [direction] if direction else ["target_to_en", "en_to_target"]
+
+        flashcards = []
+        for d in directions:
+            if d == "target_to_en":
+                prompt, answer = lemma.headword, sense.gloss
+            else:
+                prompt, answer = sense.gloss, lemma.headword
+
+            card = await self.flashcard_repo.create_flashcard(
+                user_id=user_id,
+                bank_entry_id=entry_id,
+                source_lang=pair.source_lang,
+                target_lang=pair.target_lang,
+                prompt_text=prompt,
+                answer_text=answer,
+                card_direction=d,
+            )
+            flashcards.append(card)
+
+        return flashcards
+
+    async def record_flashcard_event(
+        self,
+        flashcard_id: uuid.UUID,
+        user_id: uuid.UUID,
+        event_type: str,
+        response: str | None = None,
+    ) -> FlashcardEvent:
+        """Record a study event against a flashcard — shown, answered, or skipped.
+
+        ``response`` is the user's free-text answer and is only meaningful for
+        ``event_type="answered"``; it is stored as-is without grading.
+
+        Raises ``ValueError`` for unrecognised event types.
+
+        Usage::
+
+            event = await service.record_flashcard_event(
+                flashcard_id=card.id,
+                user_id=user_id,
+                event_type="answered",
+                response="banque",
+            )
+        """
+        if event_type not in VALID_EVENT_TYPES:
+            raise ValueError(
+                f"Invalid event_type '{event_type}'. Must be one of {VALID_EVENT_TYPES}"
+            )
+        return await self.flashcard_repo.record_event(
+            flashcard_id=flashcard_id,
+            user_id=user_id,
+            event_type=event_type,
+            user_response=response,
+        )
--- a/api/app/domain/services/vocab_service.py
+++ b/api/app/domain/services/vocab_service.py
@ -0,0 +1,190 @@
+import uuid
+
+from ..models.dictionary import Sense
+from ..models.vocab import LearnableWordBankEntry
+from ...outbound.postgres.repositories.vocab_repository import VocabRepository
+from ...outbound.postgres.repositories.dictionary_repository import DictionaryRepository
+
+
+class VocabService:
+    """Manages a user's learnable word bank — adding words from various sources and
+    resolving which dictionary sense a word belongs to.
+
+    Usage:
+        service = VocabService(
+            vocab_repo=PostgresVocabRepository(db),
+            dict_repo=PostgresDictionaryRepository(db),
+        )
+        entry = await service.add_word_to_bank(
+            user_id=user.id,
+            surface_text="banque",
+            language_pair_id=pair.id,
+            pathway="highlight",
+        )
+        # entry.disambiguation_status is "auto_resolved" if "banque" has exactly one
+        # dictionary sense, or "pending" if the user needs to pick from multiple senses.
+    """
+
+    def __init__(self, vocab_repo: VocabRepository, dict_repo: DictionaryRepository) -> None:
+        self.vocab_repo = vocab_repo
+        self.dict_repo = dict_repo
+
+    async def add_word_to_bank(
+        self,
+        user_id: uuid.UUID,
+        surface_text: str,
+        language_pair_id: uuid.UUID,
+        pathway: str,
+        is_phrase: bool = False,
+        wordform_id: uuid.UUID | None = None,
+        source_article_id: uuid.UUID | None = None,
+    ) -> LearnableWordBankEntry:
+        """Add a word or phrase to the user's vocab bank, automatically linking it to a
+        dictionary sense when exactly one match exists, or flagging it as pending
+        disambiguation when zero or multiple senses are found.
+
+        Phrases (``is_phrase=True``) bypass dictionary lookup entirely and are always
+        created with ``disambiguation_status="pending"`` since they cannot be resolved
+        to a single headword.
+
+        Usage::
+
+            # Word with a single sense — auto-resolved immediately
+            entry = await service.add_word_to_bank(
+                user_id=user_id,
+                surface_text="bisque",
+                language_pair_id=fr_en_pair_id,
+                pathway="highlight",
+            )
+            assert entry.disambiguation_status == "auto_resolved"
+
+            # Common word with many senses — user must pick one
+            entry = await service.add_word_to_bank(
+                user_id=user_id,
+                surface_text="avoir",
+                language_pair_id=fr_en_pair_id,
+                pathway="manual",
+            )
+            assert entry.disambiguation_status == "pending"
+
+            # Multi-word expression — skips lookup, always pending
+            entry = await service.add_word_to_bank(
+                user_id=user_id,
+                surface_text="avoir l'air",
+                language_pair_id=fr_en_pair_id,
+                pathway="manual",
+                is_phrase=True,
+            )
+        """
+        pair = await self.vocab_repo.get_language_pair(language_pair_id)
+        if pair is None:
+            raise ValueError(f"Language pair {language_pair_id} not found")
+
+        if is_phrase:
+            return await self.vocab_repo.add_entry(
+                user_id=user_id,
+                language_pair_id=language_pair_id,
+                surface_text=surface_text,
+                entry_pathway=pathway,
+                is_phrase=True,
+                source_article_id=source_article_id,
+                disambiguation_status="pending",
+            )
+
+        senses = await self.dict_repo.get_senses_for_headword(surface_text, pair.target_lang)
+
+        if len(senses) == 1:
+            sense_id = uuid.UUID(senses[0].id)
+            status = "auto_resolved"
+        elif len(senses) > 1:
+            sense_id = None
+            status = "pending"
+        else:
+            sense_id = None
+            status = "pending"
+
+        return await self.vocab_repo.add_entry(
+            user_id=user_id,
+            language_pair_id=language_pair_id,
+            surface_text=surface_text,
+            entry_pathway=pathway,
+            is_phrase=False,
+            sense_id=sense_id,
+            wordform_id=wordform_id,
+            source_article_id=source_article_id,
+            disambiguation_status=status,
+        )
+
+    async def add_token_to_bank(
+        self,
+        user_id: uuid.UUID,
+        surface_text: str,
+        language_pair_id: uuid.UUID,
+        senses: list[Sense],
+        wordform_id: uuid.UUID | None,
+        source_article_id: uuid.UUID | None = None,
+    ) -> LearnableWordBankEntry:
+        """Add a token from the NLP pipeline to the vocab bank using pre-resolved lookup
+        results, skipping the redundant dictionary query that ``add_word_to_bank`` would
+        otherwise perform.
+
+        ``senses`` and ``wordform_id`` come from :class:`DictionaryLookupService` and
+        are stored directly on the bank entry.  Auto-resolution still applies: exactly
+        one sense means ``auto_resolved``; anything else means ``pending``.
+
+        Usage::
+
+            result = await lookup_service.lookup_token("allons", "aller", "VERB", "fr")
+            wf_id = uuid.UUID(result.wordform_id) if result.wordform_id else None
+            entry = await vocab_service.add_token_to_bank(
+                user_id=user_id,
+                surface_text="allons",
+                language_pair_id=pair_id,
+                senses=result.senses,
+                wordform_id=wf_id,
+            )
+            # entry.wordform_id == result.wordform_id (pre-linked to "allons" wordform)
+        """
+        pair = await self.vocab_repo.get_language_pair(language_pair_id)
+        if pair is None:
+            raise ValueError(f"Language pair {language_pair_id} not found")
+
+        if len(senses) == 1:
+            sense_id: uuid.UUID | None = uuid.UUID(senses[0].id)
+            status = "auto_resolved"
+        else:
+            sense_id = None
+            status = "pending"
+
+        return await self.vocab_repo.add_entry(
+            user_id=user_id,
+            language_pair_id=language_pair_id,
+            surface_text=surface_text,
+            entry_pathway="nlp_extraction",
+            wordform_id=wordform_id,
+            sense_id=sense_id,
+            source_article_id=source_article_id,
+            disambiguation_status=status,
+        )
+
+    async def resolve_disambiguation(
+        self, entry_id: uuid.UUID, sense_id: uuid.UUID
+    ) -> LearnableWordBankEntry:
+        """Attach a specific dictionary sense to a pending vocab bank entry, marking it
+        as ``resolved`` so it can be used for flashcard generation.
+
+        This is called after the user selects the correct sense from the list presented
+        during disambiguation — for example, choosing "bank (finance)" over
+        "bank (river)" for the French word "banque".
+
+        Usage::
+
+            # User has been shown the sense list and picked sense_id for "bank (finance)"
+            resolved_entry = await service.resolve_disambiguation(
+                entry_id=pending_entry.id,
+                sense_id=finance_sense_id,
+            )
+            assert resolved_entry.disambiguation_status == "resolved"
+            assert resolved_entry.sense_id == str(finance_sense_id)
+        """
+        return await self.vocab_repo.set_sense(entry_id, sense_id)
--- a/api/app/outbound/postgres/entities/dictionary_entities.py
+++ b/api/app/outbound/postgres/entities/dictionary_entities.py
@ -0,0 +1,63 @@
+import uuid
+
+from sqlalchemy import String, Text, ForeignKey, Integer
+from sqlalchemy.orm import Mapped, mapped_column
+from sqlalchemy.dialects.postgresql import UUID, ARRAY, JSONB
+
+from ..database import Base
+
+
+class DictionaryLemmaEntity(Base):
+    __tablename__ = "dictionary_lemma"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    headword: Mapped[str] = mapped_column(Text, nullable=False)
+    language: Mapped[str] = mapped_column(String(2), nullable=False, index=True)
+    pos_raw: Mapped[str] = mapped_column(Text, nullable=False)
+    pos_normalised: Mapped[str | None] = mapped_column(Text, nullable=True)
+    gender: Mapped[str | None] = mapped_column(Text, nullable=True)
+    tags: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=False, server_default="{}")
+
+
+class DictionarySenseEntity(Base):
+    __tablename__ = "dictionary_sense"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    lemma_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("dictionary_lemma.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    sense_index: Mapped[int] = mapped_column(Integer, nullable=False)
+    gloss: Mapped[str] = mapped_column(Text, nullable=False, server_default="")
+    topics: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=False, server_default="{}")
+    tags: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=False, server_default="{}")
+
+
+class DictionaryWordformEntity(Base):
+    __tablename__ = "dictionary_wordform"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    lemma_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("dictionary_lemma.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    form: Mapped[str] = mapped_column(Text, nullable=False, index=True)
+    tags: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=False, server_default="{}")
+
+
+class DictionaryLemmaRawEntity(Base):
+    __tablename__ = "dictionary_lemma_raw"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    lemma_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("dictionary_lemma.id", ondelete="CASCADE"),
+        nullable=False,
+        unique=True,
+    )
+    language: Mapped[str] = mapped_column(String(2), nullable=False)
+    raw: Mapped[dict] = mapped_column(JSONB, nullable=False)
--- a/api/app/outbound/postgres/entities/flashcard_entities.py
+++ b/api/app/outbound/postgres/entities/flashcard_entities.py
@ -0,0 +1,64 @@
+import uuid
+from datetime import datetime, timezone
+
+from sqlalchemy import DateTime, ForeignKey, Text
+from sqlalchemy.orm import Mapped, mapped_column
+from sqlalchemy.dialects.postgresql import UUID
+
+from ..database import Base
+
+
+class FlashcardEntity(Base):
+    __tablename__ = "flashcard"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    user_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("users.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    bank_entry_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("learnable_word_bank_entry.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    source_lang: Mapped[str] = mapped_column(Text, nullable=False)
+    target_lang: Mapped[str] = mapped_column(Text, nullable=False)
+    prompt_text: Mapped[str] = mapped_column(Text, nullable=False)
+    answer_text: Mapped[str] = mapped_column(Text, nullable=False)
+    prompt_context_text: Mapped[str | None] = mapped_column(Text, nullable=True)
+    answer_context_text: Mapped[str | None] = mapped_column(Text, nullable=True)
+    card_direction: Mapped[str] = mapped_column(Text, nullable=False)
+    prompt_modality: Mapped[str] = mapped_column(Text, nullable=False, default="text")
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True),
+        nullable=False,
+        default=lambda: datetime.now(timezone.utc),
+    )
+
+
+class FlashcardEventEntity(Base):
+    __tablename__ = "flashcard_event"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    flashcard_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("flashcard.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    user_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("users.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    event_type: Mapped[str] = mapped_column(Text, nullable=False)
+    user_response: Mapped[str | None] = mapped_column(Text, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True),
+        nullable=False,
+        default=lambda: datetime.now(timezone.utc),
+    )
--- a/api/app/outbound/postgres/entities/vocab_entities.py
+++ b/api/app/outbound/postgres/entities/vocab_entities.py
@ -0,0 +1,64 @@
+import uuid
+from datetime import datetime, timezone
+
+from sqlalchemy import Boolean, ForeignKey, String, Text, UniqueConstraint, DateTime
+from sqlalchemy.orm import Mapped, mapped_column
+from sqlalchemy.dialects.postgresql import UUID
+
+from ..database import Base
+
+
+class UserLanguagePairEntity(Base):
+    __tablename__ = "user_language_pair"
+    __table_args__ = (UniqueConstraint("user_id", "source_lang", "target_lang"),)
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    user_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("users.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    source_lang: Mapped[str] = mapped_column(String(2), nullable=False)
+    target_lang: Mapped[str] = mapped_column(String(2), nullable=False)
+
+
+class LearnableWordBankEntryEntity(Base):
+    __tablename__ = "learnable_word_bank_entry"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    user_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("users.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    language_pair_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("user_language_pair.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    sense_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("dictionary_sense.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+    wordform_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("dictionary_wordform.id", ondelete="SET NULL"),
+        nullable=True,
+    )
+    surface_text: Mapped[str] = mapped_column(Text, nullable=False)
+    is_phrase: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    entry_pathway: Mapped[str] = mapped_column(Text, nullable=False)
+    source_article_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), nullable=True
+    )
+    disambiguation_status: Mapped[str] = mapped_column(Text, nullable=False, default="pending")
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True),
+        nullable=False,
+        default=lambda: datetime.now(timezone.utc),
+    )
--- a/api/app/outbound/postgres/repositories/dictionary_repository.py
+++ b/api/app/outbound/postgres/repositories/dictionary_repository.py
@ -0,0 +1,145 @@
+import uuid
+from typing import Protocol
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ..entities.dictionary_entities import (
+    DictionaryLemmaEntity,
+    DictionarySenseEntity,
+    DictionaryWordformEntity,
+)
+from ....domain.models.dictionary import Lemma, Sense, Wordform
+
+
+class DictionaryRepository(Protocol):
+    async def get_senses_for_headword(self, headword: str, language: str) -> list[Sense]: ...
+    async def get_senses_for_headword_and_pos(self, headword: str, language: str, pos_normalised: str) -> list[Sense]: ...
+    async def get_senses_for_lemma(self, lemma_id: uuid.UUID) -> list[Sense]: ...
+    async def find_senses_by_english_gloss(self, text: str, target_lang: str) -> list[Sense]: ...
+    async def get_sense(self, sense_id: uuid.UUID) -> Sense | None: ...
+    async def get_lemma(self, lemma_id: uuid.UUID) -> Lemma | None: ...
+    async def get_wordforms_by_form(self, form: str, language: str) -> list[Wordform]: ...
+    async def get_wordforms_for_lemma(self, lemma_id: uuid.UUID) -> list[Wordform]: ...
+
+
+def _sense_to_model(entity: DictionarySenseEntity) -> Sense:
+    return Sense(
+        id=str(entity.id),
+        lemma_id=str(entity.lemma_id),
+        sense_index=entity.sense_index,
+        gloss=entity.gloss,
+        topics=entity.topics or [],
+        tags=entity.tags or [],
+    )
+
+
+def _lemma_to_model(entity: DictionaryLemmaEntity) -> Lemma:
+    return Lemma(
+        id=str(entity.id),
+        headword=entity.headword,
+        language=entity.language,
+        pos_raw=entity.pos_raw,
+        pos_normalised=entity.pos_normalised,
+        gender=entity.gender,
+        tags=entity.tags or [],
+    )
+
+
+def _wordform_to_model(entity: DictionaryWordformEntity) -> Wordform:
+    return Wordform(
+        id=str(entity.id),
+        lemma_id=str(entity.lemma_id),
+        form=entity.form,
+        tags=entity.tags or [],
+    )
+
+
+class PostgresDictionaryRepository:
+    def __init__(self, db: AsyncSession) -> None:
+        self.db = db
+
+    async def get_senses_for_headword(self, headword: str, language: str) -> list[Sense]:
+        result = await self.db.execute(
+            select(DictionarySenseEntity)
+            .join(DictionaryLemmaEntity, DictionarySenseEntity.lemma_id == DictionaryLemmaEntity.id)
+            .where(
+                DictionaryLemmaEntity.headword == headword,
+                DictionaryLemmaEntity.language == language,
+            )
+            .order_by(DictionarySenseEntity.sense_index)
+        )
+        return [_sense_to_model(e) for e in result.scalars().all()]
+
+    async def find_senses_by_english_gloss(self, text: str, target_lang: str) -> list[Sense]:
+        """EN→target direction: find senses whose gloss matches the given English text.
+
+        Uses a case-insensitive exact match on the gloss column, filtered to the
+        target language via the joined lemma row.
+        """
+        result = await self.db.execute(
+            select(DictionarySenseEntity)
+            .join(DictionaryLemmaEntity, DictionarySenseEntity.lemma_id == DictionaryLemmaEntity.id)
+            .where(
+                DictionarySenseEntity.gloss.ilike(text),
+                DictionaryLemmaEntity.language == target_lang,
+            )
+            .order_by(DictionarySenseEntity.sense_index)
+        )
+        return [_sense_to_model(e) for e in result.scalars().all()]
+
+    async def get_sense(self, sense_id: uuid.UUID) -> Sense | None:
+        result = await self.db.execute(
+            select(DictionarySenseEntity).where(DictionarySenseEntity.id == sense_id)
+        )
+        entity = result.scalar_one_or_none()
+        return _sense_to_model(entity) if entity else None
+
+    async def get_lemma(self, lemma_id: uuid.UUID) -> Lemma | None:
+        result = await self.db.execute(
+            select(DictionaryLemmaEntity).where(DictionaryLemmaEntity.id == lemma_id)
+        )
+        entity = result.scalar_one_or_none()
+        return _lemma_to_model(entity) if entity else None
+
+    async def get_senses_for_headword_and_pos(
+        self, headword: str, language: str, pos_normalised: str
+    ) -> list[Sense]:
+        result = await self.db.execute(
+            select(DictionarySenseEntity)
+            .join(DictionaryLemmaEntity, DictionarySenseEntity.lemma_id == DictionaryLemmaEntity.id)
+            .where(
+                DictionaryLemmaEntity.headword == headword,
+                DictionaryLemmaEntity.language == language,
+                DictionaryLemmaEntity.pos_normalised == pos_normalised,
+            )
+            .order_by(DictionarySenseEntity.sense_index)
+        )
+        return [_sense_to_model(e) for e in result.scalars().all()]
+
+    async def get_senses_for_lemma(self, lemma_id: uuid.UUID) -> list[Sense]:
+        result = await self.db.execute(
+            select(DictionarySenseEntity)
+            .where(DictionarySenseEntity.lemma_id == lemma_id)
+            .order_by(DictionarySenseEntity.sense_index)
+        )
+        return [_sense_to_model(e) for e in result.scalars().all()]
+
+    async def get_wordforms_by_form(self, form: str, language: str) -> list[Wordform]:
+        result = await self.db.execute(
+            select(DictionaryWordformEntity)
+            .join(DictionaryLemmaEntity, DictionaryWordformEntity.lemma_id == DictionaryLemmaEntity.id)
+            .where(
+                DictionaryWordformEntity.form == form,
+                DictionaryLemmaEntity.language == language,
+            )
+        )
+        return [_wordform_to_model(e) for e in result.scalars().all()]
+
+    async def get_wordforms_for_lemma(self, lemma_id: uuid.UUID) -> list[Wordform]:
+        result = await self.db.execute(
+            select(DictionaryWordformEntity).where(
+                DictionaryWordformEntity.lemma_id == lemma_id
+            )
+        )
+        return [_wordform_to_model(e) for e in result.scalars().all()]
--- a/api/app/outbound/postgres/repositories/flashcard_repository.py
+++ b/api/app/outbound/postgres/repositories/flashcard_repository.py
@ -0,0 +1,136 @@
+import uuid
+from datetime import datetime, timezone
+from typing import Protocol
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ..entities.flashcard_entities import FlashcardEntity, FlashcardEventEntity
+from ....domain.models.flashcard import Flashcard, FlashcardEvent
+
+
+class FlashcardRepository(Protocol):
+    async def create_flashcard(
+        self,
+        user_id: uuid.UUID,
+        bank_entry_id: uuid.UUID,
+        source_lang: str,
+        target_lang: str,
+        prompt_text: str,
+        answer_text: str,
+        card_direction: str,
+        prompt_modality: str = "text",
+        prompt_context_text: str | None = None,
+        answer_context_text: str | None = None,
+    ) -> Flashcard: ...
+
+    async def get_flashcards_for_user(self, user_id: uuid.UUID) -> list[Flashcard]: ...
+
+    async def get_flashcards_for_entry(self, bank_entry_id: uuid.UUID) -> list[Flashcard]: ...
+
+    async def record_event(
+        self,
+        flashcard_id: uuid.UUID,
+        user_id: uuid.UUID,
+        event_type: str,
+        user_response: str | None = None,
+    ) -> FlashcardEvent: ...
+
+
+def _flashcard_to_model(entity: FlashcardEntity) -> Flashcard:
+    return Flashcard(
+        id=str(entity.id),
+        user_id=str(entity.user_id),
+        bank_entry_id=str(entity.bank_entry_id),
+        source_lang=entity.source_lang,
+        target_lang=entity.target_lang,
+        prompt_text=entity.prompt_text,
+        answer_text=entity.answer_text,
+        prompt_context_text=entity.prompt_context_text,
+        answer_context_text=entity.answer_context_text,
+        card_direction=entity.card_direction,
+        prompt_modality=entity.prompt_modality,
+        created_at=entity.created_at,
+    )
+
+
+def _event_to_model(entity: FlashcardEventEntity) -> FlashcardEvent:
+    return FlashcardEvent(
+        id=str(entity.id),
+        flashcard_id=str(entity.flashcard_id),
+        user_id=str(entity.user_id),
+        event_type=entity.event_type,
+        user_response=entity.user_response,
+        created_at=entity.created_at,
+    )
+
+
+class PostgresFlashcardRepository:
+    def __init__(self, db: AsyncSession) -> None:
+        self.db = db
+
+    async def create_flashcard(
+        self,
+        user_id: uuid.UUID,
+        bank_entry_id: uuid.UUID,
+        source_lang: str,
+        target_lang: str,
+        prompt_text: str,
+        answer_text: str,
+        card_direction: str,
+        prompt_modality: str = "text",
+        prompt_context_text: str | None = None,
+        answer_context_text: str | None = None,
+    ) -> Flashcard:
+        entity = FlashcardEntity(
+            user_id=user_id,
+            bank_entry_id=bank_entry_id,
+            source_lang=source_lang,
+            target_lang=target_lang,
+            prompt_text=prompt_text,
+            answer_text=answer_text,
+            prompt_context_text=prompt_context_text,
+            answer_context_text=answer_context_text,
+            card_direction=card_direction,
+            prompt_modality=prompt_modality,
+            created_at=datetime.now(timezone.utc),
+        )
+        self.db.add(entity)
+        await self.db.commit()
+        await self.db.refresh(entity)
+        return _flashcard_to_model(entity)
+
+    async def get_flashcards_for_user(self, user_id: uuid.UUID) -> list[Flashcard]:
+        result = await self.db.execute(
+            select(FlashcardEntity)
+            .where(FlashcardEntity.user_id == user_id)
+            .order_by(FlashcardEntity.created_at.desc())
+        )
+        return [_flashcard_to_model(e) for e in result.scalars().all()]
+
+    async def get_flashcards_for_entry(self, bank_entry_id: uuid.UUID) -> list[Flashcard]:
+        result = await self.db.execute(
+            select(FlashcardEntity)
+            .where(FlashcardEntity.bank_entry_id == bank_entry_id)
+            .order_by(FlashcardEntity.created_at.desc())
+        )
+        return [_flashcard_to_model(e) for e in result.scalars().all()]
+
+    async def record_event(
+        self,
+        flashcard_id: uuid.UUID,
+        user_id: uuid.UUID,
+        event_type: str,
+        user_response: str | None = None,
+    ) -> FlashcardEvent:
+        entity = FlashcardEventEntity(
+            flashcard_id=flashcard_id,
+            user_id=user_id,
+            event_type=event_type,
+            user_response=user_response,
+            created_at=datetime.now(timezone.utc),
+        )
+        self.db.add(entity)
+        await self.db.commit()
+        await self.db.refresh(entity)
+        return _event_to_model(entity)
--- a/api/app/outbound/postgres/repositories/learnable_language_repository.py
+++ b/api/app/outbound/postgres/repositories/learnable_language_repository.py
@ -7,6 +7,26 @@ from ..entities.learnable_language_entity import LearnableLanguageEntity
 from ....domain.models.learnable_language import LearnableLanguage


+async def delete(db: AsyncSession, user_id: uuid.UUID, language_id: uuid.UUID) -> bool:
+    """Delete a learnable language row owned by ``user_id``.
+
+    Returns ``True`` if a row was deleted, ``False`` if no matching row was found.
+    The ``user_id`` check prevents one user from deleting another's data.
+    """
+    result = await db.execute(
+        select(LearnableLanguageEntity).where(
+            LearnableLanguageEntity.id == language_id,
+            LearnableLanguageEntity.user_id == user_id,
+        )
+    )
+    entity = result.scalar_one_or_none()
+    if entity is None:
+        return False
+    await db.delete(entity)
+    await db.commit()
+    return True
+
+
 def _to_model(entity: LearnableLanguageEntity) -> LearnableLanguage:
    return LearnableLanguage(
        id=str(entity.id),
--- a/api/app/outbound/postgres/repositories/vocab_repository.py
+++ b/api/app/outbound/postgres/repositories/vocab_repository.py
@ -0,0 +1,177 @@
+import uuid
+from datetime import datetime, timezone
+from typing import Protocol
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ..entities.vocab_entities import LearnableWordBankEntryEntity, UserLanguagePairEntity
+from ....domain.models.vocab import LearnableWordBankEntry, UserLanguagePair
+
+
+class VocabRepository(Protocol):
+    async def get_or_create_language_pair(
+        self, user_id: uuid.UUID, source_lang: str, target_lang: str
+    ) -> UserLanguagePair: ...
+
+    async def get_language_pair(self, language_pair_id: uuid.UUID) -> UserLanguagePair | None: ...
+
+    async def add_entry(
+        self,
+        user_id: uuid.UUID,
+        language_pair_id: uuid.UUID,
+        surface_text: str,
+        entry_pathway: str,
+        is_phrase: bool = False,
+        sense_id: uuid.UUID | None = None,
+        wordform_id: uuid.UUID | None = None,
+        source_article_id: uuid.UUID | None = None,
+        disambiguation_status: str = "pending",
+    ) -> LearnableWordBankEntry: ...
+
+    async def get_entries_for_user(
+        self, user_id: uuid.UUID, language_pair_id: uuid.UUID
+    ) -> list[LearnableWordBankEntry]: ...
+
+    async def set_sense(
+        self, entry_id: uuid.UUID, sense_id: uuid.UUID
+    ) -> LearnableWordBankEntry: ...
+
+    async def get_entry(self, entry_id: uuid.UUID) -> LearnableWordBankEntry | None: ...
+
+    async def get_pending_disambiguation(self, user_id: uuid.UUID) -> list[LearnableWordBankEntry]: ...
+
+
+def _pair_to_model(entity: UserLanguagePairEntity) -> UserLanguagePair:
+    return UserLanguagePair(
+        id=str(entity.id),
+        user_id=str(entity.user_id),
+        source_lang=entity.source_lang,
+        target_lang=entity.target_lang,
+    )
+
+
+def _entry_to_model(entity: LearnableWordBankEntryEntity) -> LearnableWordBankEntry:
+    return LearnableWordBankEntry(
+        id=str(entity.id),
+        user_id=str(entity.user_id),
+        language_pair_id=str(entity.language_pair_id),
+        sense_id=str(entity.sense_id) if entity.sense_id else None,
+        wordform_id=str(entity.wordform_id) if entity.wordform_id else None,
+        surface_text=entity.surface_text,
+        is_phrase=entity.is_phrase,
+        entry_pathway=entity.entry_pathway,
+        source_article_id=str(entity.source_article_id) if entity.source_article_id else None,
+        disambiguation_status=entity.disambiguation_status,
+        created_at=entity.created_at,
+    )
+
+
+class PostgresVocabRepository:
+    def __init__(self, db: AsyncSession) -> None:
+        self.db = db
+
+    async def get_or_create_language_pair(
+        self, user_id: uuid.UUID, source_lang: str, target_lang: str
+    ) -> UserLanguagePair:
+        result = await self.db.execute(
+            select(UserLanguagePairEntity).where(
+                UserLanguagePairEntity.user_id == user_id,
+                UserLanguagePairEntity.source_lang == source_lang,
+                UserLanguagePairEntity.target_lang == target_lang,
+            )
+        )
+        entity = result.scalar_one_or_none()
+        if entity is None:
+            entity = UserLanguagePairEntity(
+                user_id=user_id,
+                source_lang=source_lang,
+                target_lang=target_lang,
+            )
+            self.db.add(entity)
+            await self.db.flush()
+        return _pair_to_model(entity)
+
+    async def get_language_pair(self, language_pair_id: uuid.UUID) -> UserLanguagePair | None:
+        result = await self.db.execute(
+            select(UserLanguagePairEntity).where(UserLanguagePairEntity.id == language_pair_id)
+        )
+        entity = result.scalar_one_or_none()
+        return _pair_to_model(entity) if entity else None
+
+    async def add_entry(
+        self,
+        user_id: uuid.UUID,
+        language_pair_id: uuid.UUID,
+        surface_text: str,
+        entry_pathway: str,
+        is_phrase: bool = False,
+        sense_id: uuid.UUID | None = None,
+        wordform_id: uuid.UUID | None = None,
+        source_article_id: uuid.UUID | None = None,
+        disambiguation_status: str = "pending",
+    ) -> LearnableWordBankEntry:
+        entity = LearnableWordBankEntryEntity(
+            user_id=user_id,
+            language_pair_id=language_pair_id,
+            surface_text=surface_text,
+            entry_pathway=entry_pathway,
+            is_phrase=is_phrase,
+            sense_id=sense_id,
+            wordform_id=wordform_id,
+            source_article_id=source_article_id,
+            disambiguation_status=disambiguation_status,
+            created_at=datetime.now(timezone.utc),
+        )
+        self.db.add(entity)
+        await self.db.commit()
+        await self.db.refresh(entity)
+        return _entry_to_model(entity)
+
+    async def get_entries_for_user(
+        self, user_id: uuid.UUID, language_pair_id: uuid.UUID
+    ) -> list[LearnableWordBankEntry]:
+        result = await self.db.execute(
+            select(LearnableWordBankEntryEntity)
+            .where(
+                LearnableWordBankEntryEntity.user_id == user_id,
+                LearnableWordBankEntryEntity.language_pair_id == language_pair_id,
+            )
+            .order_by(LearnableWordBankEntryEntity.created_at.desc())
+        )
+        return [_entry_to_model(e) for e in result.scalars().all()]
+
+    async def set_sense(
+        self, entry_id: uuid.UUID, sense_id: uuid.UUID
+    ) -> LearnableWordBankEntry:
+        result = await self.db.execute(
+            select(LearnableWordBankEntryEntity).where(
+                LearnableWordBankEntryEntity.id == entry_id
+            )
+        )
+        entity = result.scalar_one()
+        entity.sense_id = sense_id
+        entity.disambiguation_status = "resolved"
+        await self.db.commit()
+        await self.db.refresh(entity)
+        return _entry_to_model(entity)
+
+    async def get_entry(self, entry_id: uuid.UUID) -> LearnableWordBankEntry | None:
+        result = await self.db.execute(
+            select(LearnableWordBankEntryEntity).where(
+                LearnableWordBankEntryEntity.id == entry_id
+            )
+        )
+        entity = result.scalar_one_or_none()
+        return _entry_to_model(entity) if entity else None
+
+    async def get_pending_disambiguation(self, user_id: uuid.UUID) -> list[LearnableWordBankEntry]:
+        result = await self.db.execute(
+            select(LearnableWordBankEntryEntity)
+            .where(
+                LearnableWordBankEntryEntity.user_id == user_id,
+                LearnableWordBankEntryEntity.disambiguation_status == "pending",
+            )
+            .order_by(LearnableWordBankEntryEntity.created_at.desc())
+        )
+        return [_entry_to_model(e) for e in result.scalars().all()]
--- a/api/app/routers/api/account.py
+++ b/api/app/routers/api/account.py
@ -0,0 +1,97 @@
+import uuid
+
+from fastapi import APIRouter, Depends, HTTPException, status
+from pydantic import BaseModel, field_validator
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ...auth import verify_token
+from ...domain.services.account_service import AccountService
+from ...languages import SUPPORTED_LANGUAGES, SUPPORTED_LEVELS
+from ...outbound.postgres.database import get_db
+
+router = APIRouter(prefix="/account", tags=["account"])
+
+
+class AddLearnableLanguageRequest(BaseModel):
+    source_language: str
+    target_language: str
+    proficiencies: list[str]
+
+    @field_validator("proficiencies")
+    @classmethod
+    def validate_proficiencies(cls, v: list[str]) -> list[str]:
+        if not (1 <= len(v) <= 2):
+            raise ValueError("proficiencies must contain 1 or 2 levels")
+        invalid = [p for p in v if p not in SUPPORTED_LEVELS]
+        if invalid:
+            raise ValueError(f"Invalid proficiency levels: {invalid}. Supported: {sorted(SUPPORTED_LEVELS)}")
+        return v
+
+
+class LearnableLanguageResponse(BaseModel):
+    id: str
+    source_language: str
+    target_language: str
+    proficiencies: list[str]
+
+
+@router.post(
+    "/learnable-languages",
+    response_model=LearnableLanguageResponse,
+    status_code=status.HTTP_201_CREATED,
+)
+async def add_learnable_language(
+    body: AddLearnableLanguageRequest,
+    db: AsyncSession = Depends(get_db),
+    token_data: dict = Depends(verify_token),
+) -> LearnableLanguageResponse:
+    if body.source_language not in SUPPORTED_LANGUAGES:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Unsupported source language '{body.source_language}'. Supported: {list(SUPPORTED_LANGUAGES)}",
+        )
+    if body.target_language not in SUPPORTED_LANGUAGES:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Unsupported target language '{body.target_language}'. Supported: {list(SUPPORTED_LANGUAGES)}",
+        )
+    if body.source_language == body.target_language:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="source_language and target_language must differ",
+        )
+
+    user_id = uuid.UUID(token_data["sub"])
+    lang = await AccountService(db).add_learnable_language(
+        user_id=user_id,
+        source_language=body.source_language,
+        target_language=body.target_language,
+        proficiencies=body.proficiencies,
+    )
+    return LearnableLanguageResponse(
+        id=lang.id,
+        source_language=lang.source_language,
+        target_language=lang.target_language,
+        proficiencies=lang.proficiencies,
+    )
+
+
+@router.delete(
+    "/learnable-languages/{language_id}",
+    status_code=status.HTTP_204_NO_CONTENT,
+)
+async def remove_learnable_language(
+    language_id: str,
+    db: AsyncSession = Depends(get_db),
+    token_data: dict = Depends(verify_token),
+) -> None:
+    try:
+        lid = uuid.UUID(language_id)
+    except ValueError:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid language_id")
+
+    user_id = uuid.UUID(token_data["sub"])
+    try:
+        await AccountService(db).remove_learnable_language(user_id=user_id, language_id=lid)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc))
--- a/api/app/routers/api/flashcards.py
+++ b/api/app/routers/api/flashcards.py
@ -0,0 +1,143 @@
+import uuid
+
+from fastapi import APIRouter, Depends, HTTPException, status
+from pydantic import BaseModel
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ...auth import verify_token
+from ...domain.services.flashcard_service import FlashcardService
+from ...outbound.postgres.database import get_db
+from ...outbound.postgres.repositories.dictionary_repository import PostgresDictionaryRepository
+from ...outbound.postgres.repositories.flashcard_repository import PostgresFlashcardRepository
+from ...outbound.postgres.repositories.vocab_repository import PostgresVocabRepository
+
+router = APIRouter(tags=["flashcards"])
+
+
+class FlashcardResponse(BaseModel):
+    id: str
+    user_id: str
+    bank_entry_id: str
+    source_lang: str
+    target_lang: str
+    prompt_text: str
+    answer_text: str
+    prompt_context_text: str | None
+    answer_context_text: str | None
+    card_direction: str
+    prompt_modality: str
+    created_at: str
+
+
+class FlashcardEventResponse(BaseModel):
+    id: str
+    flashcard_id: str
+    user_id: str
+    event_type: str
+    user_response: str | None
+    created_at: str
+
+
+class GenerateFlashcardsRequest(BaseModel):
+    direction: str | None = None
+
+
+class RecordEventRequest(BaseModel):
+    event_type: str
+    user_response: str | None = None
+
+
+def _service(db: AsyncSession) -> FlashcardService:
+    return FlashcardService(
+        flashcard_repo=PostgresFlashcardRepository(db),
+        vocab_repo=PostgresVocabRepository(db),
+        dict_repo=PostgresDictionaryRepository(db),
+    )
+
+
+@router.post(
+    "/vocab/{entry_id}/flashcards",
+    response_model=list[FlashcardResponse],
+    status_code=status.HTTP_201_CREATED,
+)
+async def generate_flashcards(
+    entry_id: str,
+    body: GenerateFlashcardsRequest,
+    db: AsyncSession = Depends(get_db),
+    token_data: dict = Depends(verify_token),
+) -> list[FlashcardResponse]:
+    try:
+        eid = uuid.UUID(entry_id)
+    except ValueError:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid entry_id")
+
+    try:
+        cards = await _service(db).generate_flashcard_from_entry(eid, direction=body.direction)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(exc))
+
+    return [_flashcard_response(c) for c in cards]
+
+
+@router.get("/flashcards", response_model=list[FlashcardResponse])
+async def list_flashcards(
+    db: AsyncSession = Depends(get_db),
+    token_data: dict = Depends(verify_token),
+) -> list[FlashcardResponse]:
+    user_id = uuid.UUID(token_data["sub"])
+    cards = await PostgresFlashcardRepository(db).get_flashcards_for_user(user_id)
+    return [_flashcard_response(c) for c in cards]
+
+
+@router.post(
+    "/flashcards/{flashcard_id}/events",
+    response_model=FlashcardEventResponse,
+    status_code=status.HTTP_201_CREATED,
+)
+async def record_event(
+    flashcard_id: str,
+    body: RecordEventRequest,
+    db: AsyncSession = Depends(get_db),
+    token_data: dict = Depends(verify_token),
+) -> FlashcardEventResponse:
+    try:
+        fid = uuid.UUID(flashcard_id)
+    except ValueError:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid flashcard_id")
+
+    user_id = uuid.UUID(token_data["sub"])
+    try:
+        event = await _service(db).record_flashcard_event(
+            flashcard_id=fid,
+            user_id=user_id,
+            event_type=body.event_type,
+            response=body.user_response,
+        )
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc))
+
+    return FlashcardEventResponse(
+        id=event.id,
+        flashcard_id=event.flashcard_id,
+        user_id=event.user_id,
+        event_type=event.event_type,
+        user_response=event.user_response,
+        created_at=event.created_at.isoformat(),
+    )
+
+
+def _flashcard_response(card) -> FlashcardResponse:
+    return FlashcardResponse(
+        id=card.id,
+        user_id=card.user_id,
+        bank_entry_id=card.bank_entry_id,
+        source_lang=card.source_lang,
+        target_lang=card.target_lang,
+        prompt_text=card.prompt_text,
+        answer_text=card.answer_text,
+        prompt_context_text=card.prompt_context_text,
+        answer_context_text=card.answer_context_text,
+        card_direction=card.card_direction,
+        prompt_modality=card.prompt_modality,
+        created_at=card.created_at.isoformat(),
+    )
--- a/api/app/routers/api/main.py
+++ b/api/app/routers/api/main.py
@ -1,15 +1,21 @@
+from .account import router as account_router
+from .flashcards import router as flashcards_router
 from .pos import router as pos_router
 from .translate import router as translate_router
 from .generation import router as generation_router
 from .jobs import router as jobs_router
 from .learnable_languages import router as learnable_languages_router
+from .vocab import router as vocab_router

 from fastapi import APIRouter

 api_router = APIRouter(prefix="/api", tags=["api"])

+api_router.include_router(account_router)
+api_router.include_router(flashcards_router)
 api_router.include_router(pos_router)
 api_router.include_router(translate_router)
 api_router.include_router(generation_router)
 api_router.include_router(jobs_router)
 api_router.include_router(learnable_languages_router)
+api_router.include_router(vocab_router)
--- a/api/app/routers/api/vocab.py
+++ b/api/app/routers/api/vocab.py
@ -0,0 +1,218 @@
+import uuid
+
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ...auth import verify_token
+from ...domain.services.dictionary_lookup_service import DictionaryLookupService, TokenLookupResult
+from ...domain.services.vocab_service import VocabService
+from ...outbound.postgres.database import get_db
+from ...outbound.postgres.repositories.dictionary_repository import PostgresDictionaryRepository
+from ...outbound.postgres.repositories.vocab_repository import PostgresVocabRepository
+
+router = APIRouter(prefix="/vocab", tags=["vocab"])
+
+
+class AddWordRequest(BaseModel):
+    language_pair_id: str
+    surface_text: str
+    entry_pathway: str = "manual"
+    is_phrase: bool = False
+    source_article_id: str | None = None
+
+
+class AddFromTokenRequest(BaseModel):
+    language_pair_id: str
+    surface: str
+    spacy_lemma: str
+    pos_ud: str
+    language: str
+    source_article_id: str | None = None
+
+
+class SenseCandidateResponse(BaseModel):
+    id: str
+    gloss: str
+    topics: list[str]
+    tags: list[str]
+
+
+class FromTokenResponse(BaseModel):
+    entry: "WordBankEntryResponse"
+    sense_candidates: list[SenseCandidateResponse]
+    matched_via: str
+
+
+class SetSenseRequest(BaseModel):
+    sense_id: str
+
+
+class WordBankEntryResponse(BaseModel):
+    id: str
+    user_id: str
+    language_pair_id: str
+    sense_id: str | None
+    wordform_id: str | None
+    surface_text: str
+    is_phrase: bool
+    entry_pathway: str
+    source_article_id: str | None
+    disambiguation_status: str
+    created_at: str
+
+
+def _service(db: AsyncSession) -> VocabService:
+    return VocabService(
+        vocab_repo=PostgresVocabRepository(db),
+        dict_repo=PostgresDictionaryRepository(db),
+    )
+
+
+@router.post("", response_model=WordBankEntryResponse, status_code=201)
+async def add_word(
+    request: AddWordRequest,
+    db: AsyncSession = Depends(get_db),
+    token_data: dict = Depends(verify_token),
+) -> WordBankEntryResponse:
+    user_id = uuid.UUID(token_data["sub"])
+    try:
+        language_pair_id = uuid.UUID(request.language_pair_id)
+    except ValueError:
+        raise HTTPException(status_code=400, detail="Invalid language_pair_id")
+
+    source_article_id = None
+    if request.source_article_id:
+        try:
+            source_article_id = uuid.UUID(request.source_article_id)
+        except ValueError:
+            raise HTTPException(status_code=400, detail="Invalid source_article_id")
+
+    try:
+        entry = await _service(db).add_word_to_bank(
+            user_id=user_id,
+            surface_text=request.surface_text.strip(),
+            language_pair_id=language_pair_id,
+            pathway=request.entry_pathway,
+            is_phrase=request.is_phrase,
+            source_article_id=source_article_id,
+        )
+    except ValueError as exc:
+        raise HTTPException(status_code=404, detail=str(exc))
+
+    return _to_response(entry)
+
+
+@router.post("/from-token", response_model=FromTokenResponse, status_code=201)
+async def add_from_token(
+    request: AddFromTokenRequest,
+    db: AsyncSession = Depends(get_db),
+    token_data: dict = Depends(verify_token),
+) -> FromTokenResponse:
+    user_id = uuid.UUID(token_data["sub"])
+    try:
+        language_pair_id = uuid.UUID(request.language_pair_id)
+    except ValueError:
+        raise HTTPException(status_code=400, detail="Invalid language_pair_id")
+
+    source_article_id = None
+    if request.source_article_id:
+        try:
+            source_article_id = uuid.UUID(request.source_article_id)
+        except ValueError:
+            raise HTTPException(status_code=400, detail="Invalid source_article_id")
+
+    lookup_service = DictionaryLookupService(PostgresDictionaryRepository(db))
+    result: TokenLookupResult = await lookup_service.lookup_token(
+        surface=request.surface,
+        spacy_lemma=request.spacy_lemma,
+        pos_ud=request.pos_ud,
+        language=request.language,
+    )
+
+    wordform_id = uuid.UUID(result.wordform_id) if result.wordform_id else None
+
+    try:
+        entry = await _service(db).add_token_to_bank(
+            user_id=user_id,
+            surface_text=request.surface,
+            language_pair_id=language_pair_id,
+            senses=result.senses,
+            wordform_id=wordform_id,
+            source_article_id=source_article_id,
+        )
+    except ValueError as exc:
+        raise HTTPException(status_code=404, detail=str(exc))
+
+    candidates = [
+        SenseCandidateResponse(id=s.id, gloss=s.gloss, topics=s.topics, tags=s.tags)
+        for s in result.senses
+    ]
+    return FromTokenResponse(
+        entry=_to_response(entry),
+        sense_candidates=candidates,
+        matched_via=result.matched_via,
+    )
+
+
+@router.get("", response_model=list[WordBankEntryResponse])
+async def list_entries(
+    language_pair_id: str,
+    db: AsyncSession = Depends(get_db),
+    token_data: dict = Depends(verify_token),
+) -> list[WordBankEntryResponse]:
+    user_id = uuid.UUID(token_data["sub"])
+    try:
+        pair_id = uuid.UUID(language_pair_id)
+    except ValueError:
+        raise HTTPException(status_code=400, detail="Invalid language_pair_id")
+
+    entries = await PostgresVocabRepository(db).get_entries_for_user(user_id, pair_id)
+    return [_to_response(e) for e in entries]
+
+
+@router.get("/pending-disambiguation", response_model=list[WordBankEntryResponse])
+async def pending_disambiguation(
+    db: AsyncSession = Depends(get_db),
+    token_data: dict = Depends(verify_token),
+) -> list[WordBankEntryResponse]:
+    user_id = uuid.UUID(token_data["sub"])
+    entries = await PostgresVocabRepository(db).get_pending_disambiguation(user_id)
+    return [_to_response(e) for e in entries]
+
+
+@router.patch("/{entry_id}/sense", response_model=WordBankEntryResponse)
+async def resolve_sense(
+    entry_id: str,
+    request: SetSenseRequest,
+    db: AsyncSession = Depends(get_db),
+    token_data: dict = Depends(verify_token),
+) -> WordBankEntryResponse:
+    try:
+        eid = uuid.UUID(entry_id)
+        sid = uuid.UUID(request.sense_id)
+    except ValueError:
+        raise HTTPException(status_code=400, detail="Invalid UUID")
+
+    try:
+        entry = await _service(db).resolve_disambiguation(eid, sid)
+    except Exception:
+        raise HTTPException(status_code=404, detail="Entry not found")
+
+    return _to_response(entry)
+
+
+def _to_response(entry) -> WordBankEntryResponse:
+    return WordBankEntryResponse(
+        id=entry.id,
+        user_id=entry.user_id,
+        language_pair_id=entry.language_pair_id,
+        sense_id=entry.sense_id,
+        wordform_id=entry.wordform_id,
+        surface_text=entry.surface_text,
+        is_phrase=entry.is_phrase,
+        entry_pathway=entry.entry_pathway,
+        source_article_id=entry.source_article_id,
+        disambiguation_status=entry.disambiguation_status,
+        created_at=entry.created_at.isoformat(),
+    )
--- a/api/app/routers/auth.py
+++ b/api/app/routers/auth.py
@ -1,9 +1,9 @@
 from fastapi import APIRouter, Depends, HTTPException, status
 from pydantic import BaseModel, EmailStr
-from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncSession

-from ..auth import create_access_token, hash_password, verify_password
+from ..auth import create_access_token, verify_password
+from ..domain.services.account_service import AccountService
 from ..outbound.postgres.database import get_db
 from ..outbound.postgres.repositories import user_repository

@ -27,24 +27,15 @@ class TokenResponse(BaseModel):

@router.post("/register", status_code=status.HTTP_201_CREATED)
 async def register(body: RegisterRequest, db: AsyncSession = Depends(get_db)):
-    try:
-        user = await user_repository.create(
-            db,
-            email=body.email,
-            hashed_password=hash_password(body.password),
-        )
-    except IntegrityError:
-        await db.rollback()
-        raise HTTPException(
-            status_code=status.HTTP_409_CONFLICT,
-            detail="Email already registered",
-        )
-
    # TODO(email-verification): send verification email here once transactional
    # email is implemented. Set is_email_verified=False on the User model and
    # require verification before allowing login.
+    try:
+        account = await AccountService(db).create_account(body.email, body.password)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(exc))

-    return {"id": str(user.id), "email": user.email}
+    return {"id": account.id, "email": account.email}


@router.post("/login", response_model=TokenResponse)
--- a/api/architecture.md
+++ b/api/architecture.md
@ -46,3 +46,9 @@ Example Api Clients in their own modules are:
 - `AnthropicClient` to communicate with Anthorpic's LLM, i.e. Claude, to generate text and synthesis.
 - `GeminiClient` to communicate with Google's Gemini for text-to-speech generation
 - `DeepgramClient` for timestamped speech-to-text transcription
+
+## Deploymnet
+
+The application has not been deployed yet, but local development should mimic the deployed environment as much as possible.
+
+It will be deployed on a VPS using containerisation technologies (docker, podman).  At the root of the projec there is a `docker-compose.yaml` file which will describe each dependency (e.g. database, queue, storage).
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -1,7 +1,7 @@
 [project]
 name = "language-learning-api"
 version = "0.1.0"
-requires-python = ">=3.11"
+requires-python = "==3.13.*"
 dependencies = [
    "fastapi>=0.115.0",
    "uvicorn[standard]>=0.30.0",
--- a/api/scripts/import_dictionary.py
+++ b/api/scripts/import_dictionary.py
@ -0,0 +1,322 @@
+#!/usr/bin/env python
+"""
+CLI import script for kaikki/wiktextract JSONL dictionary data.
+
+Usage (from api/ directory):
+    uv run ./scripts/import_dictionary.py --lang fr
+
+    # or via Make from the repo root:
+    make import-dictionary lang=fr
+
+DATABASE_URL defaults to postgresql+asyncpg://langlearn:langlearn@localhost:5432/langlearn
+which matches the docker-compose dev credentials when the DB port is exposed on the host.
+"""
+
+import argparse
+import asyncio
+import json
+import os
+import sys
+import uuid
+from pathlib import Path
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import ARRAY, JSONB
+from sqlalchemy.dialects.postgresql import UUID as PG_UUID
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
+
+_API_DIR = Path(__file__).parent.parent
+_REPO_ROOT = _API_DIR.parent
+_DICT_DIR = _REPO_ROOT / "dictionaries" / "kaikki"
+
+_LANG_FILE_MAP: dict[str, str] = {
+    "fr": "french.jsonl",
+}
+
+_POS_MAP: dict[str, str] = {
+    "noun": "NOUN",
+    "verb": "VERB",
+    "adj": "ADJ",
+    "adv": "ADV",
+    "det": "DET",
+    "article": "DET",
+    "pron": "PRON",
+    "prep": "ADP",
+    "adp": "ADP",
+    "conj": "CCONJ",
+    "cconj": "CCONJ",
+    "sconj": "SCONJ",
+    "intj": "INTJ",
+    "num": "NUM",
+    "numeral": "NUM",
+    "part": "PART",
+    "particle": "PART",
+    "name": "PROPN",
+    "propn": "PROPN",
+    "proper noun": "PROPN",
+    "punct": "PUNCT",
+    "sym": "SYM",
+}
+
+_GENDER_MAP: dict[str, str] = {
+    "masculine": "masculine",
+    "masc": "masculine",
+    "feminine": "feminine",
+    "fem": "feminine",
+    "neuter": "neuter",
+    "common": "common",
+}
+
+# ---------------------------------------------------------------------------
+# Standalone table definitions — no app imports, no Settings() call
+# ---------------------------------------------------------------------------
+
+_meta = sa.MetaData()
+
+_lemma_table = sa.Table(
+    "dictionary_lemma",
+    _meta,
+    sa.Column("id", PG_UUID(as_uuid=True), primary_key=True),
+    sa.Column("headword", sa.Text(), nullable=False),
+    sa.Column("language", sa.String(2), nullable=False),
+    sa.Column("pos_raw", sa.Text(), nullable=False),
+    sa.Column("pos_normalised", sa.Text(), nullable=True),
+    sa.Column("gender", sa.Text(), nullable=True),
+    sa.Column("tags", ARRAY(sa.Text()), nullable=False),
+)
+
+_sense_table = sa.Table(
+    "dictionary_sense",
+    _meta,
+    sa.Column("id", PG_UUID(as_uuid=True), primary_key=True),
+    sa.Column("lemma_id", PG_UUID(as_uuid=True), nullable=False),
+    sa.Column("sense_index", sa.Integer(), nullable=False),
+    sa.Column("gloss", sa.Text(), nullable=False),
+    sa.Column("topics", ARRAY(sa.Text()), nullable=False),
+    sa.Column("tags", ARRAY(sa.Text()), nullable=False),
+)
+
+_wordform_table = sa.Table(
+    "dictionary_wordform",
+    _meta,
+    sa.Column("id", PG_UUID(as_uuid=True), primary_key=True),
+    sa.Column("lemma_id", PG_UUID(as_uuid=True), nullable=False),
+    sa.Column("form", sa.Text(), nullable=False),
+    sa.Column("tags", ARRAY(sa.Text()), nullable=False),
+)
+
+_raw_table = sa.Table(
+    "dictionary_lemma_raw",
+    _meta,
+    sa.Column("id", PG_UUID(as_uuid=True), primary_key=True),
+    sa.Column("lemma_id", PG_UUID(as_uuid=True), nullable=False),
+    sa.Column("language", sa.String(2), nullable=False),
+    sa.Column("raw", JSONB(), nullable=False),
+)
+
+# ---------------------------------------------------------------------------
+# Normalisation helpers
+# ---------------------------------------------------------------------------
+
+
+def _normalise_pos(pos_raw: str) -> str | None:
+    return _POS_MAP.get(pos_raw.lower().strip())
+
+
+def _normalise_gender(tags: list) -> str | None:
+    for tag in tags:
+        mapped = _GENDER_MAP.get(tag)
+        if mapped:
+            return mapped
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Parsing
+# ---------------------------------------------------------------------------
+
+
+def _parse_entry(record: dict, lang_code: str) -> dict | None:
+    """Parse one kaikki JSONL record into insertion-ready row dicts.
+
+    Returns None if the entry should be skipped.
+    """
+    if record.get("lang_code") != lang_code:
+        return None
+
+    word = (record.get("word") or "").strip()
+    if not word:
+        return None
+
+    pos_raw = (record.get("pos") or "").strip()
+    top_tags = record.get("tags") or []
+
+    lemma_id = uuid.uuid4()
+
+    senses = []
+    for i, sense_record in enumerate(record.get("senses") or []):
+        sense_id = uuid.uuid4()
+        glosses = sense_record.get("glosses") or []
+        gloss = glosses[0] if glosses else ""
+        topics = sense_record.get("topics") or []
+        sense_tags = sense_record.get("tags") or []
+
+        senses.append(
+            {
+                "id": sense_id,
+                "lemma_id": lemma_id,
+                "sense_index": i,
+                "gloss": gloss,
+                "topics": topics,
+                "tags": sense_tags,
+            }
+        )
+
+    wordforms = []
+    for f in record.get("forms") or []:
+        form_text = (f.get("form") or "").strip()
+        if not form_text or form_text == word:
+            continue
+        form_tags = f.get("tags") or []
+        wordforms.append(
+            {
+                "id": uuid.uuid4(),
+                "lemma_id": lemma_id,
+                "form": form_text,
+                "tags": form_tags,
+            }
+        )
+
+    return {
+        "lemma": {
+            "id": lemma_id,
+            "headword": word,
+            "language": lang_code,
+            "pos_raw": pos_raw,
+            "pos_normalised": _normalise_pos(pos_raw),
+            "gender": _normalise_gender(top_tags),
+            "tags": top_tags,
+        },
+        "senses": senses,
+        "wordforms": wordforms,
+        "raw": {
+            "id": uuid.uuid4(),
+            "lemma_id": lemma_id,
+            "language": lang_code,
+            "raw": record,
+        },
+    }
+
+
+# ---------------------------------------------------------------------------
+# DB operations
+# ---------------------------------------------------------------------------
+
+
+async def _flush_batch(conn: sa.ext.asyncio.AsyncConnection, batch: list[dict]) -> None:
+    lemma_rows = [e["lemma"] for e in batch]
+    sense_rows = [s for e in batch for s in e["senses"]]
+    wordform_rows = [w for e in batch for w in e["wordforms"]]
+    raw_rows = [e["raw"] for e in batch]
+
+    if lemma_rows:
+        await conn.execute(_lemma_table.insert(), lemma_rows)
+    if sense_rows:
+        await conn.execute(_sense_table.insert(), sense_rows)
+    if wordform_rows:
+        await conn.execute(_wordform_table.insert(), wordform_rows)
+    if raw_rows:
+        await conn.execute(_raw_table.insert(), raw_rows)
+
+    await conn.commit()
+
+
+async def run_import(lang_code: str, batch_size: int = 1000) -> None:
+    lang_file = _LANG_FILE_MAP.get(lang_code)
+    if not lang_file:
+        print(
+            f"No file mapping for lang_code={lang_code!r}. Known: {list(_LANG_FILE_MAP)}",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    jsonl_path = _DICT_DIR / lang_file
+    if not jsonl_path.exists():
+        print(f"JSONL file not found: {jsonl_path}", file=sys.stderr)
+        sys.exit(1)
+
+    database_url = os.environ.get(
+        "DATABASE_URL",
+        "postgresql+asyncpg://langlearn:changeme@localhost:5432/langlearn",
+    )
+
+    engine = create_async_engine(database_url, echo=False)
+
+    try:
+        async with engine.connect() as conn:
+            print(f"Deleting existing entries for language={lang_code!r}...")
+            await conn.execute(
+                _lemma_table.delete().where(_lemma_table.c.language == lang_code)
+            )
+            await conn.commit()
+
+            print(f"Importing {jsonl_path} ...")
+            batch: list[dict] = []
+            total_lemmas = 0
+            skipped = 0
+
+            with open(jsonl_path, encoding="utf-8") as f:
+                for line_num, line in enumerate(f, 1):
+                    line = line.strip()
+                    if not line:
+                        continue
+
+                    try:
+                        record = json.loads(line)
+                    except json.JSONDecodeError as exc:
+                        print(
+                            f"  Line {line_num}: JSON parse error: {exc}",
+                            file=sys.stderr,
+                        )
+                        skipped += 1
+                        continue
+
+                    parsed = _parse_entry(record, lang_code)
+                    if parsed is None:
+                        skipped += 1
+                        continue
+
+                    batch.append(parsed)
+
+                    if len(batch) >= batch_size:
+                        await _flush_batch(conn, batch)
+                        total_lemmas += len(batch)
+                        print(f"  Committed {total_lemmas} lemmas...")
+                        batch = []
+
+            if batch:
+                await _flush_batch(conn, batch)
+                total_lemmas += len(batch)
+
+        print(f"Done. Imported {total_lemmas} lemmas, skipped {skipped} lines.")
+    finally:
+        await engine.dispose()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Import kaikki dictionary JSONL into Postgres."
+    )
+    parser.add_argument(
+        "--lang", required=True, help="Language code to import (e.g. fr)"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, default=1000, help="Rows per commit (default: 1000)"
+    )
+    args = parser.parse_args()
+
+    asyncio.run(run_import(args.lang, args.batch_size))
+
+
+if __name__ == "__main__":
+    main()
--- a/dictionaries/.gitignore
+++ b/dictionaries/.gitignore
@ -0,0 +1 @@
+*.jsonl
--- a/dictionaries/README.md
+++ b/dictionaries/README.md
@ -0,0 +1,3 @@
+# Dictionaries
+
+This module contains dictionaires of words, namely from the [Kaikki](https://kaikki.org/dictionary/index.html) project.  It is responsible for genering lexical information about words, for both the system and the user, to help describe the language they are using.
Author	SHA1	Message	Date
wilson	aa4987981d	feat: Create the Dictionary Lookup Service; methods for fidning Some checks are pending / test (push) Waiting to run Details vocabulary and words	2026-04-10 07:11:57 +01:00
wilson	27f7a7c3f3	feat: Build the flashcards model, routes, etc.	2026-04-09 20:40:11 +01:00
wilson	0281caef7c	feat: Endpoints to manage your account.	2026-04-08 20:50:26 +01:00
wilson	689e10d1bc	feat: vocab endpoints	2026-04-08 20:37:00 +01:00
wilson	486e0bf3d5	docs: Update the pyproject.toml to make it compatible with spaCy; update architecture.md	2026-04-08 20:26:57 +01:00
wilson	873ebacd4d	feat: Build the bilingual dictionary data	2026-04-08 20:26:26 +01:00