100 lines
2.7 KiB
Python
100 lines
2.7 KiB
Python
|
|
#!/usr/bin/env python
|
||
|
|
"""
|
||
|
|
Clear all rows from dictionary tables and re-import from source JSONL files.
|
||
|
|
|
||
|
|
Usage (from api/ directory):
|
||
|
|
uv run ./scripts/clear_dictionary.py
|
||
|
|
|
||
|
|
# Dry-run: clear only, no re-import
|
||
|
|
uv run ./scripts/clear_dictionary.py --no-import
|
||
|
|
|
||
|
|
DATABASE_URL defaults to postgresql+asyncpg://langlearn:langlearn@localhost:5432/langlearn
|
||
|
|
which matches the docker-compose dev credentials when the DB port is exposed on the host.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import asyncio
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
import sqlalchemy as sa
|
||
|
|
from sqlalchemy.ext.asyncio import create_async_engine
|
||
|
|
|
||
|
|
# Re-use table definitions and run_import from the sibling script so there is
|
||
|
|
# no duplication of schema knowledge.
|
||
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
||
|
|
from import_dictionary import ( # noqa: E402
|
||
|
|
_LANG_FILE_MAP,
|
||
|
|
_lemma_table,
|
||
|
|
_raw_table,
|
||
|
|
_sense_link_table,
|
||
|
|
_sense_table,
|
||
|
|
_wordform_table,
|
||
|
|
run_import,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Delete order respects foreign-key dependencies:
|
||
|
|
# sense_link → sense
|
||
|
|
# sense → lemma
|
||
|
|
# wordform → lemma
|
||
|
|
# raw → lemma
|
||
|
|
# lemma (parent)
|
||
|
|
_DELETE_ORDER = [
|
||
|
|
_sense_link_table,
|
||
|
|
_sense_table,
|
||
|
|
_wordform_table,
|
||
|
|
_raw_table,
|
||
|
|
_lemma_table,
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
async def clear_all(database_url: str) -> None:
|
||
|
|
engine = create_async_engine(database_url, echo=False)
|
||
|
|
try:
|
||
|
|
async with engine.connect() as conn:
|
||
|
|
print("Clearing all dictionary tables...")
|
||
|
|
for table in _DELETE_ORDER:
|
||
|
|
result = await conn.execute(sa.delete(table))
|
||
|
|
print(f" Deleted {result.rowcount} rows from {table.name}")
|
||
|
|
await conn.commit()
|
||
|
|
print("All dictionary tables cleared.")
|
||
|
|
finally:
|
||
|
|
await engine.dispose()
|
||
|
|
|
||
|
|
|
||
|
|
async def main(run_reimport: bool, batch_size: int) -> None:
|
||
|
|
database_url = os.environ.get(
|
||
|
|
"DATABASE_URL",
|
||
|
|
"postgresql+asyncpg://langlearn:changeme@localhost:5432/langlearn",
|
||
|
|
)
|
||
|
|
|
||
|
|
await clear_all(database_url)
|
||
|
|
|
||
|
|
if not run_reimport:
|
||
|
|
return
|
||
|
|
|
||
|
|
for lang_code in _LANG_FILE_MAP:
|
||
|
|
print(f"\nRe-importing language={lang_code!r}...")
|
||
|
|
await run_import(lang_code, batch_size)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
parser = argparse.ArgumentParser(
|
||
|
|
description="Clear all dictionary tables and optionally re-import."
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--no-import",
|
||
|
|
action="store_true",
|
||
|
|
help="Clear tables only; skip re-import.",
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--batch-size",
|
||
|
|
type=int,
|
||
|
|
default=1000,
|
||
|
|
help="Rows per commit during re-import (default: 1000)",
|
||
|
|
)
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
asyncio.run(main(run_reimport=not args.no_import, batch_size=args.batch_size))
|