initial commit

This commit is contained in:
wilson 2026-03-18 20:55:02 +00:00
commit 6bc1efd333
25 changed files with 995 additions and 0 deletions

16
.env.example Normal file
View file

@ -0,0 +1,16 @@
# Postgres
POSTGRES_USER=langlearn
POSTGRES_PASSWORD=changeme
POSTGRES_DB=langlearn
# API
API_PORT=8000
# Auth — sign JWTs with this secret (use a long random string in production)
JWT_SECRET=replace-with-a-long-random-secret
# Anthropic
ANTHROPIC_API_KEY=sk-ant-...
# DeepL (https://www.deepl.com/pro-api)
DEEPL_API_KEY=your-deepl-api-key-here

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
todo.md
.env

View file

@ -0,0 +1,31 @@
info:
name: Health
type: http
seq: 1
http:
method: GET
url: "{{baseUrl}}/health"
auth: inherit
settings:
encodeUrl: true
timeout: 0
followRedirects: true
maxRedirects: 5
examples:
- name: 200 Response
description: Successful Response
request:
url: "{{baseUrl}}/health"
method: GET
response:
status: 200
statusText: OK
headers:
- name: Content-Type
value: application/json
body:
type: json
data: "{}"

View file

@ -0,0 +1,99 @@
info:
name: Analyze Pos
type: http
seq: 1
tags:
- analysis
http:
method: POST
url: "{{baseUrl}}/analyze/pos"
body:
type: json
data: |-
{
"text": "This is a test",
"language": "en"
}
auth:
type: bearer
token: "{{token}}"
runtime:
variables:
- name: baseUrl
value: http://localhost:8000
settings:
encodeUrl: true
timeout: 0
followRedirects: true
maxRedirects: 5
examples:
- name: 200 Response
description: Successful Response
request:
url: "{{baseUrl}}/analyze/pos"
method: POST
body:
type: json
data: |-
{
"text": "",
"language": ""
}
response:
status: 200
statusText: OK
headers:
- name: Content-Type
value: application/json
body:
type: json
data: |-
{
"language": "",
"tokens": [
{
"text": "",
"lemma": "",
"pos": "",
"tag": "",
"dep": "",
"is_stop": false
}
]
}
- name: 422 Response
description: Validation Error
request:
url: "{{baseUrl}}/analyze/pos"
method: POST
body:
type: json
data: |-
{
"text": "",
"language": ""
}
response:
status: 422
statusText: Unprocessable Entity
headers:
- name: Content-Type
value: application/json
body:
type: json
data: |-
{
"detail": [
{
"loc": [],
"msg": "",
"type": "",
"input": "",
"ctx": {}
}
]
}

View file

@ -0,0 +1,7 @@
info:
name: analysis
type: folder
seq: 1
request:
auth: inherit

View file

@ -0,0 +1,90 @@
info:
name: Create Generation Job
type: http
seq: 1
tags:
- generation
http:
method: POST
url: "{{baseUrl}}/generate"
body:
type: json
data: |-
{
"target_language": "",
"complexity_level": "",
"input_texts": [],
"topic": ""
}
auth:
type: bearer
token: "{{token}}"
settings:
encodeUrl: true
timeout: 0
followRedirects: true
maxRedirects: 5
examples:
- name: 202 Response
description: Successful Response
request:
url: "{{baseUrl}}/generate"
method: POST
body:
type: json
data: |-
{
"target_language": "",
"complexity_level": "",
"input_texts": [],
"topic": ""
}
response:
status: 202
statusText: Accepted
headers:
- name: Content-Type
value: application/json
body:
type: json
data: |-
{
"job_id": ""
}
- name: 422 Response
description: Validation Error
request:
url: "{{baseUrl}}/generate"
method: POST
body:
type: json
data: |-
{
"target_language": "",
"complexity_level": "",
"input_texts": [],
"topic": ""
}
response:
status: 422
statusText: Unprocessable Entity
headers:
- name: Content-Type
value: application/json
body:
type: json
data: |-
{
"detail": [
{
"loc": [],
"msg": "",
"type": "",
"input": "",
"ctx": {}
}
]
}

View file

@ -0,0 +1,7 @@
info:
name: generation
type: folder
seq: 1
request:
auth: inherit

View file

@ -0,0 +1,82 @@
info:
name: Get Job
type: http
seq: 1
tags:
- jobs
http:
method: GET
url: "{{baseUrl}}/jobs/:job_id"
params:
- name: job_id
value: ""
type: path
auth:
type: bearer
token: "{{token}}"
settings:
encodeUrl: true
timeout: 0
followRedirects: true
maxRedirects: 5
examples:
- name: 200 Response
description: Successful Response
request:
url: "{{baseUrl}}/jobs/:job_id"
method: GET
params:
- name: job_id
value: ""
type: path
response:
status: 200
statusText: OK
headers:
- name: Content-Type
value: application/json
body:
type: json
data: |-
{
"id": "",
"status": "",
"target_language": "",
"complexity_level": "",
"created_at": "",
"generated_text": "",
"input_summary": "",
"error_message": ""
}
- name: 422 Response
description: Validation Error
request:
url: "{{baseUrl}}/jobs/:job_id"
method: GET
params:
- name: job_id
value: ""
type: path
response:
status: 422
statusText: Unprocessable Entity
headers:
- name: Content-Type
value: application/json
body:
type: json
data: |-
{
"detail": [
{
"loc": [],
"msg": "",
"type": "",
"input": "",
"ctx": {}
}
]
}

View file

@ -0,0 +1,82 @@
info:
name: Get jobs
type: http
seq: 2
tags:
- jobs
http:
method: GET
url: "{{baseUrl}}/jobs/:job_id"
params:
- name: job_id
value: d2130df6-cab2-4407-b35b-45e90dca8555
type: path
auth:
type: bearer
token: "{{token}}"
settings:
encodeUrl: true
timeout: 0
followRedirects: true
maxRedirects: 5
examples:
- name: 200 Response
description: Successful Response
request:
url: "{{baseUrl}}/jobs/:job_id"
method: GET
params:
- name: job_id
value: ""
type: path
response:
status: 200
statusText: OK
headers:
- name: Content-Type
value: application/json
body:
type: json
data: |-
{
"id": "",
"status": "",
"target_language": "",
"complexity_level": "",
"created_at": "",
"generated_text": "",
"input_summary": "",
"error_message": ""
}
- name: 422 Response
description: Validation Error
request:
url: "{{baseUrl}}/jobs/:job_id"
method: GET
params:
- name: job_id
value: ""
type: path
response:
status: 422
statusText: Unprocessable Entity
headers:
- name: Content-Type
value: application/json
body:
type: json
data: |-
{
"detail": [
{
"loc": [],
"msg": "",
"type": "",
"input": "",
"ctx": {}
}
]
}

View file

@ -0,0 +1,7 @@
info:
name: jobs
type: folder
seq: 1
request:
auth: inherit

View file

@ -0,0 +1,31 @@
opencollection: 1.0.0
info:
name: Language Learning API
config:
proxy:
inherit: true
config:
protocol: http
hostname: ""
port: ""
auth:
username: ""
password: ""
bypassProxy: ""
request:
auth:
type: bearer
token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.sW8dZVeROpNxCHL2HEXym6aDzaobFW17mLPaYbtlyYs
variables:
- name: baseUrl
value: http://localhost:8000
- name: token
value: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.sW8dZVeROpNxCHL2HEXym6aDzaobFW17mLPaYbtlyYs
bundled: false
extensions:
bruno:
ignore:
- node_modules
- .git

20
Makefile Normal file
View file

@ -0,0 +1,20 @@
.PHONY: build up down logs shell lock
build:
docker compose build
up:
docker compose up -d
down:
docker compose down
logs:
docker compose logs -f api
shell:
docker compose exec api bash
# Generate a pinned requirements.txt from pyproject.toml (requires uv installed locally)
lock:
cd api && uv pip compile pyproject.toml -o requirements.txt

23
api/Dockerfile Normal file
View file

@ -0,0 +1,23 @@
FROM python:3.11-slim
WORKDIR /app
# Install uv for fast, reproducible installs
RUN pip install --no-cache-dir uv
# Install Python dependencies from pyproject.toml
COPY pyproject.toml .
RUN uv pip install --system --no-cache .
# Download spaCy language models
RUN python -m spacy download en_core_web_sm && \
python -m spacy download fr_core_news_sm && \
python -m spacy download es_core_news_sm && \
python -m spacy download it_core_news_sm && \
python -m spacy download de_core_news_sm
# Copy application source
COPY app/ ./app/
EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

0
api/app/__init__.py Normal file
View file

24
api/app/auth.py Normal file
View file

@ -0,0 +1,24 @@
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
import jwt
from .config import settings
security = HTTPBearer()
def verify_token(
credentials: HTTPAuthorizationCredentials = Depends(security),
) -> dict:
try:
payload = jwt.decode(
credentials.credentials,
settings.jwt_secret,
algorithms=["HS256"],
)
return payload
except jwt.InvalidTokenError:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid or expired token",
)

13
api/app/config.py Normal file
View file

@ -0,0 +1,13 @@
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
database_url: str
jwt_secret: str
anthropic_api_key: str
deepl_api_key: str
model_config = {"env_file": ".env"}
settings = Settings()

16
api/app/database.py Normal file
View file

@ -0,0 +1,16 @@
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
from sqlalchemy.orm import DeclarativeBase
from .config import settings
engine = create_async_engine(settings.database_url)
AsyncSessionLocal = async_sessionmaker(engine, expire_on_commit=False)
class Base(DeclarativeBase):
pass
async def get_db():
async with AsyncSessionLocal() as session:
yield session

25
api/app/main.py Normal file
View file

@ -0,0 +1,25 @@
from contextlib import asynccontextmanager
from fastapi import FastAPI
from .database import engine, Base
from .routers import pos, generation, jobs
@asynccontextmanager
async def lifespan(app: FastAPI):
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
yield
app = FastAPI(title="Language Learning API", lifespan=lifespan)
app.include_router(pos.router)
app.include_router(generation.router)
app.include_router(jobs.router)
@app.get("/health")
async def health() -> dict:
return {"status": "ok"}

34
api/app/models.py Normal file
View file

@ -0,0 +1,34 @@
import uuid
from datetime import datetime, timezone
from sqlalchemy import String, Text, DateTime
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.dialects.postgresql import UUID
from .database import Base
class Job(Base):
__tablename__ = "jobs"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
status: Mapped[str] = mapped_column(String(20), nullable=False, default="pending")
source_language: Mapped[str] = mapped_column(String(10), nullable=False, default="en")
target_language: Mapped[str] = mapped_column(String(10), nullable=False)
complexity_level: Mapped[str] = mapped_column(String(5), nullable=False)
input_summary: Mapped[str | None] = mapped_column(Text, nullable=True)
generated_text: Mapped[str | None] = mapped_column(Text, nullable=True)
translated_text: Mapped[str | None] = mapped_column(Text, nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
)
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
)

View file

View file

@ -0,0 +1,178 @@
import uuid
from datetime import datetime, timezone
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
import anthropic
import deepl
from ..auth import verify_token
from ..database import get_db, AsyncSessionLocal
from ..models import Job
from ..config import settings
router = APIRouter(prefix="/generate", tags=["generation"])
SUPPORTED_LANGUAGES: dict[str, str] = {
"en": "English",
"fr": "French",
"es": "Spanish",
"it": "Italian",
"de": "German",
}
SUPPORTED_LEVELS = {"A1", "A2", "B1", "B2", "C1", "C2"}
# Maps our language codes to DeepL source/target language codes
DEEPL_SOURCE_LANG: dict[str, str] = {
"en": "EN",
"fr": "FR",
"es": "ES",
"it": "IT",
"de": "DE",
}
# DeepL target codes (English needs a regional variant)
DEEPL_TARGET_LANG: dict[str, str] = {
"en": "EN-US",
"fr": "FR",
"es": "ES",
"it": "IT",
"de": "DE",
}
class GenerationRequest(BaseModel):
target_language: str
complexity_level: str
input_texts: list[str]
topic: str | None = None
source_language: str = "en"
class GenerationResponse(BaseModel):
job_id: str
async def _run_generation(job_id: uuid.UUID, request: GenerationRequest) -> None:
async with AsyncSessionLocal() as db:
job = await db.get(Job, job_id)
job.status = "processing"
job.started_at = datetime.now(timezone.utc)
await db.commit()
try:
from_language = SUPPORTED_LANGUAGES[request.source_language]
language_name = SUPPORTED_LANGUAGES[request.target_language]
# Build a short summary of the input to store (not the full text)
topic_part = f"Topic: {request.topic}. " if request.topic else ""
combined_preview = " ".join(request.input_texts)[:300]
input_summary = (
f"{topic_part}Based on {len(request.input_texts)} input text(s): "
f"{combined_preview}..."
)
source_material = "\n\n".join(request.input_texts[:3])
topic_line = f"\nTopic focus: {request.topic}" if request.topic else ""
prompt = (
f"You are a language learning content creator. "
f"Using the input provided, you generate engaging realistic text in {language_name} "
f"at {request.complexity_level} proficiency level (CEFR scale).\n\n"
f"The text should:\n"
f"- Be appropriate for a {request.complexity_level} learner\n"
f"- Maintain a similar tone to the input text. Where appropriate, use idioms\n"
f"- Feel natural and authentic, like content a native speaker would read\n"
f"- Be formatted in markdown with paragraphs and line breaks\n"
f"- Be 200400 words long\n"
f"- Be inspired by the following source material "
f"(but written originally in {language_name}):\n\n"
f"{source_material}"
f"{topic_line}\n\n"
f"Respond with ONLY the generated text in {language_name}, "
f"no explanations or translations.\n"
f"The 'Topic focus' should be a comma-separated list of up to three topics, in {language_name}."
)
client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
)
generated_text = message.content[0].text
# TODO: Come back to this when DeepL unblock my account for being "high risk"
# Translate generated text back into the learner's source language via DeepL
# translator = deepl.Translator(settings.deepl_api_key)
# translation = translator.translate_text(
# generated_text,
# source_lang=DEEPL_SOURCE_LANG[request.target_language],
# target_lang=DEEPL_TARGET_LANG[request.source_language],
#)
translate_prompt = (
f"You are a helpful assistant that translates text. Translate just the previous summary "
f"content in {language_name} text you generated based on the input I gave you. Translate "
f"it back into {from_language}.\n"
f"- Keep the translation as close as possible to the original meaning and tone\n"
f"- Send through only the translated text, no explanations or notes\n"
)
translate_message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[
{ "role": "user", "content": prompt },
{ "role": "assistant", "content": message.content },
{ "role": "user", "content": translate_prompt }
],
)
job.status = "succeeded"
job.generated_text = generated_text
job.translated_text = translate_message.content[0].text
job.input_summary = input_summary[:500]
job.completed_at = datetime.now(timezone.utc)
except Exception as exc:
job.status = "failed"
job.error_message = str(exc)
job.completed_at = datetime.now(timezone.utc)
await db.commit()
@router.post("", response_model=GenerationResponse, status_code=202)
async def create_generation_job(
request: GenerationRequest,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token),
) -> GenerationResponse:
if request.target_language not in SUPPORTED_LANGUAGES:
raise HTTPException(
status_code=400,
detail=f"Unsupported language '{request.target_language}'. "
f"Supported: {list(SUPPORTED_LANGUAGES)}",
)
if request.complexity_level not in SUPPORTED_LEVELS:
raise HTTPException(
status_code=400,
detail=f"Unsupported level '{request.complexity_level}'. "
f"Supported: {sorted(SUPPORTED_LEVELS)}",
)
job = Job(
source_language=request.source_language,
target_language=request.target_language,
complexity_level=request.complexity_level,
)
db.add(job)
await db.commit()
await db.refresh(job)
background_tasks.add_task(_run_generation, job.id, request)
return GenerationResponse(job_id=str(job.id))

89
api/app/routers/jobs.py Normal file
View file

@ -0,0 +1,89 @@
import uuid
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from ..auth import verify_token
from ..database import get_db
from ..models import Job
router = APIRouter(prefix="/jobs", tags=["jobs"])
class JobResponse(BaseModel):
id: uuid.UUID
status: str
source_language: str
target_language: str
complexity_level: str
created_at: datetime
started_at: datetime | None = None
completed_at: datetime | None = None
# only present on success
generated_text: str | None = None
translated_text: str | None = None
input_summary: str | None = None
# only present on failure
error_message: str | None = None
model_config = { "from_attributes": True }
class JobSummary(BaseModel):
id: uuid.UUID
status: str
created_at: datetime
class JobListResponse(BaseModel):
jobs: list[JobSummary]
model_config = { "from_attributes": True }
@router.get("/", response_model=JobListResponse)
async def get_jobs(
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token)
) -> JobListResponse:
try:
result = await db.execute(select(Job).order_by(Job.created_at.desc()))
jobs = result.scalars().all()
return { "jobs": jobs }
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/{job_id}", response_model=JobResponse)
async def get_job(
job_id: str,
db: AsyncSession = Depends(get_db),
_: dict = Depends(verify_token),
) -> JobResponse:
try:
uid = uuid.UUID(job_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid job ID format")
job: Job | None = await db.get(Job, uid)
if job is None:
raise HTTPException(status_code=404, detail="Job not found")
response = JobResponse(
id=str(job.id),
status=job.status,
source_language=job.source_language,
target_language=job.target_language,
complexity_level=job.complexity_level,
created_at=job.created_at,
started_at=job.started_at,
completed_at=job.completed_at,
)
if job.status == "succeeded":
response.generated_text = job.generated_text
response.translated_text = job.translated_text
response.input_summary = job.input_summary
elif job.status == "failed":
response.error_message = job.error_message
return response

66
api/app/routers/pos.py Normal file
View file

@ -0,0 +1,66 @@
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
import spacy
from ..auth import verify_token
router = APIRouter(prefix="/analyze", tags=["analysis"])
LANGUAGE_MODELS: dict[str, str] = {
"en": "en_core_web_sm",
"fr": "fr_core_news_sm",
"es": "es_core_news_sm",
"it": "it_core_news_sm",
"de": "de_core_news_sm",
}
_nlp_cache: dict[str, spacy.Language] = {}
def _get_nlp(language: str) -> spacy.Language:
if language not in LANGUAGE_MODELS:
raise HTTPException(
status_code=400,
detail=f"Unsupported language '{language}'. Supported: {list(LANGUAGE_MODELS)}",
)
if language not in _nlp_cache:
_nlp_cache[language] = spacy.load(LANGUAGE_MODELS[language])
return _nlp_cache[language]
class POSRequest(BaseModel):
text: str
language: str
class TokenInfo(BaseModel):
text: str
lemma: str
pos: str
tag: str
dep: str
is_stop: bool
class POSResponse(BaseModel):
language: str
tokens: list[TokenInfo]
@router.post("/pos", response_model=POSResponse)
def analyze_pos(request: POSRequest, _: dict = Depends(verify_token)) -> POSResponse:
nlp = _get_nlp(request.language)
doc = nlp(request.text)
tokens = [
TokenInfo(
text=token.text,
lemma=token.lemma_,
pos=token.pos_,
tag=token.tag_,
dep=token.dep_,
is_stop=token.is_stop,
)
for token in doc
if not token.is_space
]
return POSResponse(language=request.language, tokens=tokens)

22
api/pyproject.toml Normal file
View file

@ -0,0 +1,22 @@
[project]
name = "language-learning-api"
version = "0.1.0"
requires-python = ">=3.11"
dependencies = [
"fastapi>=0.115.0",
"uvicorn[standard]>=0.30.0",
"sqlalchemy[asyncio]>=2.0.0",
"asyncpg>=0.30.0",
"spacy>=3.8.0",
"anthropic>=0.40.0",
"pyjwt>=2.10.0",
"pydantic-settings>=2.0.0",
"deepl>=1.18.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["app"]

31
docker-compose.yml Normal file
View file

@ -0,0 +1,31 @@
services:
db:
image: postgres:16-alpine
environment:
POSTGRES_USER: ${POSTGRES_USER:-langlearn}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB:-langlearn}
volumes:
- pgdata:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-langlearn}"]
interval: 5s
timeout: 5s
retries: 10
api:
build: ./api
ports:
- "${API_PORT:-8000}:8000"
environment:
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-langlearn}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-langlearn}
JWT_SECRET: ${JWT_SECRET}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
DEEPL_API_KEY: ${DEEPL_API_KEY}
depends_on:
db:
condition: service_healthy
restart: unless-stopped
volumes:
pgdata: