commit 6bc1efd333473d83096e721a1006b1adba58223d Author: wilson Date: Wed Mar 18 20:55:02 2026 +0000 initial commit diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..e843bc9 --- /dev/null +++ b/.env.example @@ -0,0 +1,16 @@ +# Postgres +POSTGRES_USER=langlearn +POSTGRES_PASSWORD=changeme +POSTGRES_DB=langlearn + +# API +API_PORT=8000 + +# Auth — sign JWTs with this secret (use a long random string in production) +JWT_SECRET=replace-with-a-long-random-secret + +# Anthropic +ANTHROPIC_API_KEY=sk-ant-... + +# DeepL (https://www.deepl.com/pro-api) +DEEPL_API_KEY=your-deepl-api-key-here diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..596b93a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +todo.md +.env diff --git a/Language Learning API/Health.yml b/Language Learning API/Health.yml new file mode 100644 index 0000000..375f968 --- /dev/null +++ b/Language Learning API/Health.yml @@ -0,0 +1,31 @@ +info: + name: Health + type: http + seq: 1 + +http: + method: GET + url: "{{baseUrl}}/health" + auth: inherit + +settings: + encodeUrl: true + timeout: 0 + followRedirects: true + maxRedirects: 5 + +examples: + - name: 200 Response + description: Successful Response + request: + url: "{{baseUrl}}/health" + method: GET + response: + status: 200 + statusText: OK + headers: + - name: Content-Type + value: application/json + body: + type: json + data: "{}" diff --git a/Language Learning API/analysis/Analyze Pos.yml b/Language Learning API/analysis/Analyze Pos.yml new file mode 100644 index 0000000..56023c8 --- /dev/null +++ b/Language Learning API/analysis/Analyze Pos.yml @@ -0,0 +1,99 @@ +info: + name: Analyze Pos + type: http + seq: 1 + tags: + - analysis + +http: + method: POST + url: "{{baseUrl}}/analyze/pos" + body: + type: json + data: |- + { + "text": "This is a test", + "language": "en" + } + auth: + type: bearer + token: "{{token}}" + +runtime: + variables: + - name: baseUrl + value: http://localhost:8000 + +settings: + encodeUrl: true + timeout: 0 + followRedirects: true + maxRedirects: 5 + +examples: + - name: 200 Response + description: Successful Response + request: + url: "{{baseUrl}}/analyze/pos" + method: POST + body: + type: json + data: |- + { + "text": "", + "language": "" + } + response: + status: 200 + statusText: OK + headers: + - name: Content-Type + value: application/json + body: + type: json + data: |- + { + "language": "", + "tokens": [ + { + "text": "", + "lemma": "", + "pos": "", + "tag": "", + "dep": "", + "is_stop": false + } + ] + } + - name: 422 Response + description: Validation Error + request: + url: "{{baseUrl}}/analyze/pos" + method: POST + body: + type: json + data: |- + { + "text": "", + "language": "" + } + response: + status: 422 + statusText: Unprocessable Entity + headers: + - name: Content-Type + value: application/json + body: + type: json + data: |- + { + "detail": [ + { + "loc": [], + "msg": "", + "type": "", + "input": "", + "ctx": {} + } + ] + } diff --git a/Language Learning API/analysis/folder.yml b/Language Learning API/analysis/folder.yml new file mode 100644 index 0000000..2d5b3eb --- /dev/null +++ b/Language Learning API/analysis/folder.yml @@ -0,0 +1,7 @@ +info: + name: analysis + type: folder + seq: 1 + +request: + auth: inherit diff --git a/Language Learning API/generation/Create Generation Job.yml b/Language Learning API/generation/Create Generation Job.yml new file mode 100644 index 0000000..06e048c --- /dev/null +++ b/Language Learning API/generation/Create Generation Job.yml @@ -0,0 +1,90 @@ +info: + name: Create Generation Job + type: http + seq: 1 + tags: + - generation + +http: + method: POST + url: "{{baseUrl}}/generate" + body: + type: json + data: |- + { + "target_language": "", + "complexity_level": "", + "input_texts": [], + "topic": "" + } + auth: + type: bearer + token: "{{token}}" + +settings: + encodeUrl: true + timeout: 0 + followRedirects: true + maxRedirects: 5 + +examples: + - name: 202 Response + description: Successful Response + request: + url: "{{baseUrl}}/generate" + method: POST + body: + type: json + data: |- + { + "target_language": "", + "complexity_level": "", + "input_texts": [], + "topic": "" + } + response: + status: 202 + statusText: Accepted + headers: + - name: Content-Type + value: application/json + body: + type: json + data: |- + { + "job_id": "" + } + - name: 422 Response + description: Validation Error + request: + url: "{{baseUrl}}/generate" + method: POST + body: + type: json + data: |- + { + "target_language": "", + "complexity_level": "", + "input_texts": [], + "topic": "" + } + response: + status: 422 + statusText: Unprocessable Entity + headers: + - name: Content-Type + value: application/json + body: + type: json + data: |- + { + "detail": [ + { + "loc": [], + "msg": "", + "type": "", + "input": "", + "ctx": {} + } + ] + } diff --git a/Language Learning API/generation/folder.yml b/Language Learning API/generation/folder.yml new file mode 100644 index 0000000..b213120 --- /dev/null +++ b/Language Learning API/generation/folder.yml @@ -0,0 +1,7 @@ +info: + name: generation + type: folder + seq: 1 + +request: + auth: inherit diff --git a/Language Learning API/jobs/Get Job.yml b/Language Learning API/jobs/Get Job.yml new file mode 100644 index 0000000..d091827 --- /dev/null +++ b/Language Learning API/jobs/Get Job.yml @@ -0,0 +1,82 @@ +info: + name: Get Job + type: http + seq: 1 + tags: + - jobs + +http: + method: GET + url: "{{baseUrl}}/jobs/:job_id" + params: + - name: job_id + value: "" + type: path + auth: + type: bearer + token: "{{token}}" + +settings: + encodeUrl: true + timeout: 0 + followRedirects: true + maxRedirects: 5 + +examples: + - name: 200 Response + description: Successful Response + request: + url: "{{baseUrl}}/jobs/:job_id" + method: GET + params: + - name: job_id + value: "" + type: path + response: + status: 200 + statusText: OK + headers: + - name: Content-Type + value: application/json + body: + type: json + data: |- + { + "id": "", + "status": "", + "target_language": "", + "complexity_level": "", + "created_at": "", + "generated_text": "", + "input_summary": "", + "error_message": "" + } + - name: 422 Response + description: Validation Error + request: + url: "{{baseUrl}}/jobs/:job_id" + method: GET + params: + - name: job_id + value: "" + type: path + response: + status: 422 + statusText: Unprocessable Entity + headers: + - name: Content-Type + value: application/json + body: + type: json + data: |- + { + "detail": [ + { + "loc": [], + "msg": "", + "type": "", + "input": "", + "ctx": {} + } + ] + } diff --git a/Language Learning API/jobs/Get jobs.yml b/Language Learning API/jobs/Get jobs.yml new file mode 100644 index 0000000..dd2aacd --- /dev/null +++ b/Language Learning API/jobs/Get jobs.yml @@ -0,0 +1,82 @@ +info: + name: Get jobs + type: http + seq: 2 + tags: + - jobs + +http: + method: GET + url: "{{baseUrl}}/jobs/:job_id" + params: + - name: job_id + value: d2130df6-cab2-4407-b35b-45e90dca8555 + type: path + auth: + type: bearer + token: "{{token}}" + +settings: + encodeUrl: true + timeout: 0 + followRedirects: true + maxRedirects: 5 + +examples: + - name: 200 Response + description: Successful Response + request: + url: "{{baseUrl}}/jobs/:job_id" + method: GET + params: + - name: job_id + value: "" + type: path + response: + status: 200 + statusText: OK + headers: + - name: Content-Type + value: application/json + body: + type: json + data: |- + { + "id": "", + "status": "", + "target_language": "", + "complexity_level": "", + "created_at": "", + "generated_text": "", + "input_summary": "", + "error_message": "" + } + - name: 422 Response + description: Validation Error + request: + url: "{{baseUrl}}/jobs/:job_id" + method: GET + params: + - name: job_id + value: "" + type: path + response: + status: 422 + statusText: Unprocessable Entity + headers: + - name: Content-Type + value: application/json + body: + type: json + data: |- + { + "detail": [ + { + "loc": [], + "msg": "", + "type": "", + "input": "", + "ctx": {} + } + ] + } diff --git a/Language Learning API/jobs/folder.yml b/Language Learning API/jobs/folder.yml new file mode 100644 index 0000000..3c5f4b9 --- /dev/null +++ b/Language Learning API/jobs/folder.yml @@ -0,0 +1,7 @@ +info: + name: jobs + type: folder + seq: 1 + +request: + auth: inherit diff --git a/Language Learning API/opencollection.yml b/Language Learning API/opencollection.yml new file mode 100644 index 0000000..cf11f5d --- /dev/null +++ b/Language Learning API/opencollection.yml @@ -0,0 +1,31 @@ +opencollection: 1.0.0 + +info: + name: Language Learning API +config: + proxy: + inherit: true + config: + protocol: http + hostname: "" + port: "" + auth: + username: "" + password: "" + bypassProxy: "" + +request: + auth: + type: bearer + token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.sW8dZVeROpNxCHL2HEXym6aDzaobFW17mLPaYbtlyYs + variables: + - name: baseUrl + value: http://localhost:8000 + - name: token + value: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.sW8dZVeROpNxCHL2HEXym6aDzaobFW17mLPaYbtlyYs +bundled: false +extensions: + bruno: + ignore: + - node_modules + - .git diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5efae3f --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +.PHONY: build up down logs shell lock + +build: + docker compose build + +up: + docker compose up -d + +down: + docker compose down + +logs: + docker compose logs -f api + +shell: + docker compose exec api bash + +# Generate a pinned requirements.txt from pyproject.toml (requires uv installed locally) +lock: + cd api && uv pip compile pyproject.toml -o requirements.txt diff --git a/api/Dockerfile b/api/Dockerfile new file mode 100644 index 0000000..14c172d --- /dev/null +++ b/api/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install uv for fast, reproducible installs +RUN pip install --no-cache-dir uv + +# Install Python dependencies from pyproject.toml +COPY pyproject.toml . +RUN uv pip install --system --no-cache . + +# Download spaCy language models +RUN python -m spacy download en_core_web_sm && \ + python -m spacy download fr_core_news_sm && \ + python -m spacy download es_core_news_sm && \ + python -m spacy download it_core_news_sm && \ + python -m spacy download de_core_news_sm + +# Copy application source +COPY app/ ./app/ + +EXPOSE 8000 +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/api/app/__init__.py b/api/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/app/auth.py b/api/app/auth.py new file mode 100644 index 0000000..394a48c --- /dev/null +++ b/api/app/auth.py @@ -0,0 +1,24 @@ +from fastapi import Depends, HTTPException, status +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +import jwt + +from .config import settings + +security = HTTPBearer() + + +def verify_token( + credentials: HTTPAuthorizationCredentials = Depends(security), +) -> dict: + try: + payload = jwt.decode( + credentials.credentials, + settings.jwt_secret, + algorithms=["HS256"], + ) + return payload + except jwt.InvalidTokenError: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid or expired token", + ) diff --git a/api/app/config.py b/api/app/config.py new file mode 100644 index 0000000..a66b28f --- /dev/null +++ b/api/app/config.py @@ -0,0 +1,13 @@ +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + database_url: str + jwt_secret: str + anthropic_api_key: str + deepl_api_key: str + + model_config = {"env_file": ".env"} + + +settings = Settings() diff --git a/api/app/database.py b/api/app/database.py new file mode 100644 index 0000000..f073a38 --- /dev/null +++ b/api/app/database.py @@ -0,0 +1,16 @@ +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker +from sqlalchemy.orm import DeclarativeBase + +from .config import settings + +engine = create_async_engine(settings.database_url) +AsyncSessionLocal = async_sessionmaker(engine, expire_on_commit=False) + + +class Base(DeclarativeBase): + pass + + +async def get_db(): + async with AsyncSessionLocal() as session: + yield session diff --git a/api/app/main.py b/api/app/main.py new file mode 100644 index 0000000..f97c0db --- /dev/null +++ b/api/app/main.py @@ -0,0 +1,25 @@ +from contextlib import asynccontextmanager + +from fastapi import FastAPI + +from .database import engine, Base +from .routers import pos, generation, jobs + + +@asynccontextmanager +async def lifespan(app: FastAPI): + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + yield + + +app = FastAPI(title="Language Learning API", lifespan=lifespan) + +app.include_router(pos.router) +app.include_router(generation.router) +app.include_router(jobs.router) + + +@app.get("/health") +async def health() -> dict: + return {"status": "ok"} diff --git a/api/app/models.py b/api/app/models.py new file mode 100644 index 0000000..201561a --- /dev/null +++ b/api/app/models.py @@ -0,0 +1,34 @@ +import uuid +from datetime import datetime, timezone + +from sqlalchemy import String, Text, DateTime +from sqlalchemy.orm import Mapped, mapped_column +from sqlalchemy.dialects.postgresql import UUID + +from .database import Base + + +class Job(Base): + __tablename__ = "jobs" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + status: Mapped[str] = mapped_column(String(20), nullable=False, default="pending") + source_language: Mapped[str] = mapped_column(String(10), nullable=False, default="en") + target_language: Mapped[str] = mapped_column(String(10), nullable=False) + complexity_level: Mapped[str] = mapped_column(String(5), nullable=False) + input_summary: Mapped[str | None] = mapped_column(Text, nullable=True) + generated_text: Mapped[str | None] = mapped_column(Text, nullable=True) + translated_text: Mapped[str | None] = mapped_column(Text, nullable=True) + error_message: Mapped[str | None] = mapped_column(Text, nullable=True) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + default=lambda: datetime.now(timezone.utc), + ) + started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + default=lambda: datetime.now(timezone.utc), + ) diff --git a/api/app/routers/__init__.py b/api/app/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/app/routers/generation.py b/api/app/routers/generation.py new file mode 100644 index 0000000..23e67f6 --- /dev/null +++ b/api/app/routers/generation.py @@ -0,0 +1,178 @@ +import uuid +from datetime import datetime, timezone + +from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException +from pydantic import BaseModel +from sqlalchemy.ext.asyncio import AsyncSession +import anthropic +import deepl + +from ..auth import verify_token +from ..database import get_db, AsyncSessionLocal +from ..models import Job +from ..config import settings + +router = APIRouter(prefix="/generate", tags=["generation"]) + +SUPPORTED_LANGUAGES: dict[str, str] = { + "en": "English", + "fr": "French", + "es": "Spanish", + "it": "Italian", + "de": "German", +} +SUPPORTED_LEVELS = {"A1", "A2", "B1", "B2", "C1", "C2"} + +# Maps our language codes to DeepL source/target language codes +DEEPL_SOURCE_LANG: dict[str, str] = { + "en": "EN", + "fr": "FR", + "es": "ES", + "it": "IT", + "de": "DE", +} +# DeepL target codes (English needs a regional variant) +DEEPL_TARGET_LANG: dict[str, str] = { + "en": "EN-US", + "fr": "FR", + "es": "ES", + "it": "IT", + "de": "DE", +} + + +class GenerationRequest(BaseModel): + target_language: str + complexity_level: str + input_texts: list[str] + topic: str | None = None + source_language: str = "en" + + +class GenerationResponse(BaseModel): + job_id: str + + +async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None: + async with AsyncSessionLocal() as db: + job = await db.get(Job, job_id) + job.status = "processing" + job.started_at = datetime.now(timezone.utc) + await db.commit() + + try: + from_language = SUPPORTED_LANGUAGES[request.source_language] + language_name = SUPPORTED_LANGUAGES[request.target_language] + + # Build a short summary of the input to store (not the full text) + topic_part = f"Topic: {request.topic}. " if request.topic else "" + combined_preview = " ".join(request.input_texts)[:300] + input_summary = ( + f"{topic_part}Based on {len(request.input_texts)} input text(s): " + f"{combined_preview}..." + ) + + source_material = "\n\n".join(request.input_texts[:3]) + topic_line = f"\nTopic focus: {request.topic}" if request.topic else "" + + prompt = ( + f"You are a language learning content creator. " + f"Using the input provided, you generate engaging realistic text in {language_name} " + f"at {request.complexity_level} proficiency level (CEFR scale).\n\n" + f"The text should:\n" + f"- Be appropriate for a {request.complexity_level} learner\n" + f"- Maintain a similar tone to the input text. Where appropriate, use idioms\n" + f"- Feel natural and authentic, like content a native speaker would read\n" + f"- Be formatted in markdown with paragraphs and line breaks\n" + f"- Be 200–400 words long\n" + f"- Be inspired by the following source material " + f"(but written originally in {language_name}):\n\n" + f"{source_material}" + f"{topic_line}\n\n" + f"Respond with ONLY the generated text in {language_name}, " + f"no explanations or translations.\n" + f"The 'Topic focus' should be a comma-separated list of up to three topics, in {language_name}." + ) + + client = anthropic.Anthropic(api_key=settings.anthropic_api_key) + + message = client.messages.create( + model="claude-sonnet-4-6", + max_tokens=1024, + messages=[{"role": "user", "content": prompt}], + ) + generated_text = message.content[0].text + + # TODO: Come back to this when DeepL unblock my account for being "high risk" + # Translate generated text back into the learner's source language via DeepL + # translator = deepl.Translator(settings.deepl_api_key) + # translation = translator.translate_text( + # generated_text, + # source_lang=DEEPL_SOURCE_LANG[request.target_language], + # target_lang=DEEPL_TARGET_LANG[request.source_language], + #) + + translate_prompt = ( + f"You are a helpful assistant that translates text. Translate just the previous summary " + f"content in {language_name} text you generated based on the input I gave you. Translate " + f"it back into {from_language}.\n" + f"- Keep the translation as close as possible to the original meaning and tone\n" + f"- Send through only the translated text, no explanations or notes\n" + ) + + translate_message = client.messages.create( + model="claude-sonnet-4-6", + max_tokens=1024, + messages=[ + { "role": "user", "content": prompt }, + { "role": "assistant", "content": message.content }, + { "role": "user", "content": translate_prompt } + ], + ) + + job.status = "succeeded" + job.generated_text = generated_text + job.translated_text = translate_message.content[0].text + job.input_summary = input_summary[:500] + job.completed_at = datetime.now(timezone.utc) + + except Exception as exc: + job.status = "failed" + job.error_message = str(exc) + job.completed_at = datetime.now(timezone.utc) + + await db.commit() + + +@router.post("", response_model=GenerationResponse, status_code=202) +async def create_generation_job( + request: GenerationRequest, + background_tasks: BackgroundTasks, + db: AsyncSession = Depends(get_db), + _: dict = Depends(verify_token), +) -> GenerationResponse: + if request.target_language not in SUPPORTED_LANGUAGES: + raise HTTPException( + status_code=400, + detail=f"Unsupported language '{request.target_language}'. " + f"Supported: {list(SUPPORTED_LANGUAGES)}", + ) + if request.complexity_level not in SUPPORTED_LEVELS: + raise HTTPException( + status_code=400, + detail=f"Unsupported level '{request.complexity_level}'. " + f"Supported: {sorted(SUPPORTED_LEVELS)}", + ) + + job = Job( + source_language=request.source_language, + target_language=request.target_language, + complexity_level=request.complexity_level, + ) + db.add(job) + await db.commit() + await db.refresh(job) + + background_tasks.add_task(_run_generation, job.id, request) + + return GenerationResponse(job_id=str(job.id)) diff --git a/api/app/routers/jobs.py b/api/app/routers/jobs.py new file mode 100644 index 0000000..d6d42a8 --- /dev/null +++ b/api/app/routers/jobs.py @@ -0,0 +1,89 @@ +import uuid +from datetime import datetime + +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy import select + +from ..auth import verify_token +from ..database import get_db +from ..models import Job + +router = APIRouter(prefix="/jobs", tags=["jobs"]) + + +class JobResponse(BaseModel): + id: uuid.UUID + status: str + source_language: str + target_language: str + complexity_level: str + created_at: datetime + started_at: datetime | None = None + completed_at: datetime | None = None + # only present on success + generated_text: str | None = None + translated_text: str | None = None + input_summary: str | None = None + # only present on failure + error_message: str | None = None + model_config = { "from_attributes": True } + +class JobSummary(BaseModel): + id: uuid.UUID + status: str + created_at: datetime + +class JobListResponse(BaseModel): + jobs: list[JobSummary] + model_config = { "from_attributes": True } + + +@router.get("/", response_model=JobListResponse) +async def get_jobs( + db: AsyncSession = Depends(get_db), + _: dict = Depends(verify_token) +) -> JobListResponse: + try: + result = await db.execute(select(Job).order_by(Job.created_at.desc())) + jobs = result.scalars().all() + return { "jobs": jobs } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/{job_id}", response_model=JobResponse) +async def get_job( + job_id: str, + db: AsyncSession = Depends(get_db), + _: dict = Depends(verify_token), +) -> JobResponse: + try: + uid = uuid.UUID(job_id) + except ValueError: + raise HTTPException(status_code=400, detail="Invalid job ID format") + + job: Job | None = await db.get(Job, uid) + if job is None: + raise HTTPException(status_code=404, detail="Job not found") + + response = JobResponse( + id=str(job.id), + status=job.status, + source_language=job.source_language, + target_language=job.target_language, + complexity_level=job.complexity_level, + created_at=job.created_at, + started_at=job.started_at, + completed_at=job.completed_at, + ) + + if job.status == "succeeded": + response.generated_text = job.generated_text + response.translated_text = job.translated_text + response.input_summary = job.input_summary + elif job.status == "failed": + response.error_message = job.error_message + + return response diff --git a/api/app/routers/pos.py b/api/app/routers/pos.py new file mode 100644 index 0000000..ba9eb02 --- /dev/null +++ b/api/app/routers/pos.py @@ -0,0 +1,66 @@ +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel +import spacy + +from ..auth import verify_token + +router = APIRouter(prefix="/analyze", tags=["analysis"]) + +LANGUAGE_MODELS: dict[str, str] = { + "en": "en_core_web_sm", + "fr": "fr_core_news_sm", + "es": "es_core_news_sm", + "it": "it_core_news_sm", + "de": "de_core_news_sm", +} + +_nlp_cache: dict[str, spacy.Language] = {} + + +def _get_nlp(language: str) -> spacy.Language: + if language not in LANGUAGE_MODELS: + raise HTTPException( + status_code=400, + detail=f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}", + ) + if language not in _nlp_cache: + _nlp_cache[language] = spacy.load(LANGUAGE_MODELS[language]) + return _nlp_cache[language] + + +class POSRequest(BaseModel): + text: str + language: str + + +class TokenInfo(BaseModel): + text: str + lemma: str + pos: str + tag: str + dep: str + is_stop: bool + + +class POSResponse(BaseModel): + language: str + tokens: list[TokenInfo] + + +@router.post("/pos", response_model=POSResponse) +def analyze_pos(request: POSRequest, _: dict = Depends(verify_token)) -> POSResponse: + nlp = _get_nlp(request.language) + doc = nlp(request.text) + tokens = [ + TokenInfo( + text=token.text, + lemma=token.lemma_, + pos=token.pos_, + tag=token.tag_, + dep=token.dep_, + is_stop=token.is_stop, + ) + for token in doc + if not token.is_space + ] + return POSResponse(language=request.language, tokens=tokens) diff --git a/api/pyproject.toml b/api/pyproject.toml new file mode 100644 index 0000000..bc1f049 --- /dev/null +++ b/api/pyproject.toml @@ -0,0 +1,22 @@ +[project] +name = "language-learning-api" +version = "0.1.0" +requires-python = ">=3.11" +dependencies = [ + "fastapi>=0.115.0", + "uvicorn[standard]>=0.30.0", + "sqlalchemy[asyncio]>=2.0.0", + "asyncpg>=0.30.0", + "spacy>=3.8.0", + "anthropic>=0.40.0", + "pyjwt>=2.10.0", + "pydantic-settings>=2.0.0", + "deepl>=1.18.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["app"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..eac01e5 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,31 @@ +services: + db: + image: postgres:16-alpine + environment: + POSTGRES_USER: ${POSTGRES_USER:-langlearn} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB:-langlearn} + volumes: + - pgdata:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-langlearn}"] + interval: 5s + timeout: 5s + retries: 10 + + api: + build: ./api + ports: + - "${API_PORT:-8000}:8000" + environment: + DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-langlearn}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-langlearn} + JWT_SECRET: ${JWT_SECRET} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + DEEPL_API_KEY: ${DEEPL_API_KEY} + depends_on: + db: + condition: service_healthy + restart: unless-stopped + +volumes: + pgdata: