A compounding LLM-maintained knowledge wiki. Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's mempalace, with an automation layer on top for conversation mining, URL harvesting, human-in-the-loop staging, staleness decay, and hygiene. Includes: - 11 pipeline scripts (extract, summarize, index, harvest, stage, hygiene, maintain, sync, + shared library) - Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE - Example CLAUDE.md files (wiki schema + global instructions) tuned for the three-collection qmd setup - 171-test pytest suite (cross-platform, runs in ~1.3s) - MIT licensed
115 lines
3.8 KiB
YAML
115 lines
3.8 KiB
YAML
# Example configuration — copy to config.yaml and edit for your setup.
|
|
#
|
|
# This file is NOT currently read by any script (see docs/CUSTOMIZE.md
|
|
# "What I'd change if starting over" #1). The scripts use inline
|
|
# constants with "CONFIGURE ME" comments instead. This file is a
|
|
# template for a future refactor and a reference for what the
|
|
# configurable surface looks like.
|
|
#
|
|
# For now, edit the constants directly in:
|
|
# scripts/extract-sessions.py (PROJECT_MAP)
|
|
# scripts/update-conversation-index.py (PROJECT_NAMES, PROJECT_ORDER)
|
|
# scripts/wiki-harvest.py (SKIP_DOMAIN_PATTERNS)
|
|
|
|
# ─── Project / wing configuration ──────────────────────────────────────────
|
|
projects:
|
|
# Map Claude Code directory suffixes to short project codes (wings)
|
|
map:
|
|
projects-wiki: wiki # this wiki's own sessions
|
|
-claude: cl # ~/.claude config repo
|
|
my-webapp: web # your project dirs
|
|
mobile-app: mob
|
|
work-monorepo: work
|
|
-home: general # catch-all
|
|
-Users: general
|
|
|
|
# Display names for each project code
|
|
names:
|
|
wiki: WIKI — This Wiki
|
|
cl: CL — Claude Config
|
|
web: WEB — My Webapp
|
|
mob: MOB — Mobile App
|
|
work: WORK — Day Job
|
|
general: General — Cross-Project
|
|
|
|
# Display order (most-active first)
|
|
order:
|
|
- work
|
|
- web
|
|
- mob
|
|
- wiki
|
|
- cl
|
|
- general
|
|
|
|
# ─── URL harvesting configuration ──────────────────────────────────────────
|
|
harvest:
|
|
# Domains to always skip (internal, ephemeral, personal).
|
|
# Patterns use re.search, so unanchored suffixes like \.example\.com$ work.
|
|
skip_domains:
|
|
- \.atlassian\.net$
|
|
- ^app\.asana\.com$
|
|
- ^(www\.)?slack\.com$
|
|
- ^(www\.)?discord\.com$
|
|
- ^mail\.google\.com$
|
|
- ^calendar\.google\.com$
|
|
- ^.+\.local$
|
|
- ^.+\.internal$
|
|
# Add your own:
|
|
- \.mycompany\.com$
|
|
- ^git\.mydomain\.com$
|
|
|
|
# Type C URLs (issue trackers, Q&A) — only harvested if topic covered
|
|
c_type_patterns:
|
|
- ^https?://github\.com/[^/]+/[^/]+/issues/\d+
|
|
- ^https?://github\.com/[^/]+/[^/]+/pull/\d+
|
|
- ^https?://(www\.)?stackoverflow\.com/questions/\d+
|
|
|
|
# Fetch behavior
|
|
fetch_delay_seconds: 2
|
|
max_failed_attempts: 3
|
|
min_content_length: 100
|
|
fetch_timeout: 45
|
|
|
|
# ─── Hygiene / staleness configuration ─────────────────────────────────────
|
|
hygiene:
|
|
# Confidence decay thresholds (days since last_verified)
|
|
decay:
|
|
high_to_medium: 180 # 6 months
|
|
medium_to_low: 270 # 9 months (6+3)
|
|
low_to_stale: 365 # 12 months (6+3+3)
|
|
|
|
# Pages with body shorter than this are flagged as stubs
|
|
empty_stub_threshold_chars: 100
|
|
|
|
# Version regex for technology lifecycle checks (which tools to track)
|
|
version_regex: '\b(?:Node(?:\.js)?|Python|Docker|PostgreSQL|MySQL|Redis|Next\.js|NestJS)\s+(\d+(?:\.\d+)?)'
|
|
|
|
# ─── LLM configuration ─────────────────────────────────────────────────────
|
|
llm:
|
|
# Which backend to use for summarization and compilation
|
|
# Options: claude | openai | local | ollama
|
|
backend: claude
|
|
|
|
# Routing threshold — sessions/content above this use the larger model
|
|
long_threshold_chars: 20000
|
|
long_threshold_messages: 200
|
|
|
|
# Per-backend settings
|
|
claude:
|
|
short_model: haiku
|
|
long_model: sonnet
|
|
timeout: 600
|
|
|
|
openai:
|
|
short_model: gpt-4o-mini
|
|
long_model: gpt-4o
|
|
api_key_env: OPENAI_API_KEY
|
|
|
|
local:
|
|
base_url: http://localhost:8080/v1
|
|
model: Phi-4-14B-Q4_K_M
|
|
|
|
ollama:
|
|
base_url: http://localhost:11434/v1
|
|
model: phi4:14b
|