#!/usr/bin/env python """ Clear all rows from dictionary tables and re-import from source JSONL files. Usage (from api/ directory): uv run ./scripts/clear_dictionary.py # Dry-run: clear only, no re-import uv run ./scripts/clear_dictionary.py --no-import DATABASE_URL defaults to postgresql+asyncpg://langlearn:langlearn@localhost:5432/langlearn which matches the docker-compose dev credentials when the DB port is exposed on the host. """ import argparse import asyncio import os import sys from pathlib import Path import sqlalchemy as sa from sqlalchemy.ext.asyncio import create_async_engine # Re-use table definitions and run_import from the sibling script so there is # no duplication of schema knowledge. sys.path.insert(0, str(Path(__file__).parent)) from import_dictionary import ( # noqa: E402 _LANG_FILE_MAP, _lemma_table, _raw_table, _sense_link_table, _sense_table, _wordform_table, run_import, ) # Delete order respects foreign-key dependencies: # sense_link → sense # sense → lemma # wordform → lemma # raw → lemma # lemma (parent) _DELETE_ORDER = [ _sense_link_table, _sense_table, _wordform_table, _raw_table, _lemma_table, ] async def clear_all(database_url: str) -> None: engine = create_async_engine(database_url, echo=False) try: async with engine.connect() as conn: print("Clearing all dictionary tables...") for table in _DELETE_ORDER: result = await conn.execute(sa.delete(table)) print(f" Deleted {result.rowcount} rows from {table.name}") await conn.commit() print("All dictionary tables cleared.") finally: await engine.dispose() async def main(run_reimport: bool, batch_size: int) -> None: database_url = os.environ.get( "DATABASE_URL", "postgresql+asyncpg://langlearn:changeme@localhost:5432/langlearn", ) await clear_all(database_url) if not run_reimport: return for lang_code in _LANG_FILE_MAP: print(f"\nRe-importing language={lang_code!r}...") await run_import(lang_code, batch_size) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Clear all dictionary tables and optionally re-import." ) parser.add_argument( "--no-import", action="store_true", help="Clear tables only; skip re-import.", ) parser.add_argument( "--batch-size", type=int, default=1000, help="Rows per commit during re-import (default: 1000)", ) args = parser.parse_args() asyncio.run(main(run_reimport=not args.no_import, batch_size=args.batch_size))