Files
memex/config.example.yaml
Eric Turner ee54a2f5d4 Initial commit — memex
A compounding LLM-maintained knowledge wiki.

Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's
mempalace, with an automation layer on top for conversation mining, URL
harvesting, human-in-the-loop staging, staleness decay, and hygiene.

Includes:
- 11 pipeline scripts (extract, summarize, index, harvest, stage,
  hygiene, maintain, sync, + shared library)
- Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE
- Example CLAUDE.md files (wiki schema + global instructions) tuned for
  the three-collection qmd setup
- 171-test pytest suite (cross-platform, runs in ~1.3s)
- MIT licensed
2026-04-12 21:16:02 -06:00

115 lines
3.8 KiB
YAML

# Example configuration — copy to config.yaml and edit for your setup.
#
# This file is NOT currently read by any script (see docs/CUSTOMIZE.md
# "What I'd change if starting over" #1). The scripts use inline
# constants with "CONFIGURE ME" comments instead. This file is a
# template for a future refactor and a reference for what the
# configurable surface looks like.
#
# For now, edit the constants directly in:
# scripts/extract-sessions.py (PROJECT_MAP)
# scripts/update-conversation-index.py (PROJECT_NAMES, PROJECT_ORDER)
# scripts/wiki-harvest.py (SKIP_DOMAIN_PATTERNS)
# ─── Project / wing configuration ──────────────────────────────────────────
projects:
# Map Claude Code directory suffixes to short project codes (wings)
map:
projects-wiki: wiki # this wiki's own sessions
-claude: cl # ~/.claude config repo
my-webapp: web # your project dirs
mobile-app: mob
work-monorepo: work
-home: general # catch-all
-Users: general
# Display names for each project code
names:
wiki: WIKI — This Wiki
cl: CL — Claude Config
web: WEB — My Webapp
mob: MOB — Mobile App
work: WORK — Day Job
general: General — Cross-Project
# Display order (most-active first)
order:
- work
- web
- mob
- wiki
- cl
- general
# ─── URL harvesting configuration ──────────────────────────────────────────
harvest:
# Domains to always skip (internal, ephemeral, personal).
# Patterns use re.search, so unanchored suffixes like \.example\.com$ work.
skip_domains:
- \.atlassian\.net$
- ^app\.asana\.com$
- ^(www\.)?slack\.com$
- ^(www\.)?discord\.com$
- ^mail\.google\.com$
- ^calendar\.google\.com$
- ^.+\.local$
- ^.+\.internal$
# Add your own:
- \.mycompany\.com$
- ^git\.mydomain\.com$
# Type C URLs (issue trackers, Q&A) — only harvested if topic covered
c_type_patterns:
- ^https?://github\.com/[^/]+/[^/]+/issues/\d+
- ^https?://github\.com/[^/]+/[^/]+/pull/\d+
- ^https?://(www\.)?stackoverflow\.com/questions/\d+
# Fetch behavior
fetch_delay_seconds: 2
max_failed_attempts: 3
min_content_length: 100
fetch_timeout: 45
# ─── Hygiene / staleness configuration ─────────────────────────────────────
hygiene:
# Confidence decay thresholds (days since last_verified)
decay:
high_to_medium: 180 # 6 months
medium_to_low: 270 # 9 months (6+3)
low_to_stale: 365 # 12 months (6+3+3)
# Pages with body shorter than this are flagged as stubs
empty_stub_threshold_chars: 100
# Version regex for technology lifecycle checks (which tools to track)
version_regex: '\b(?:Node(?:\.js)?|Python|Docker|PostgreSQL|MySQL|Redis|Next\.js|NestJS)\s+(\d+(?:\.\d+)?)'
# ─── LLM configuration ─────────────────────────────────────────────────────
llm:
# Which backend to use for summarization and compilation
# Options: claude | openai | local | ollama
backend: claude
# Routing threshold — sessions/content above this use the larger model
long_threshold_chars: 20000
long_threshold_messages: 200
# Per-backend settings
claude:
short_model: haiku
long_model: sonnet
timeout: 600
openai:
short_model: gpt-4o-mini
long_model: gpt-4o
api_key_env: OPENAI_API_KEY
local:
base_url: http://localhost:8080/v1
model: Phi-4-14B-Q4_K_M
ollama:
base_url: http://localhost:11434/v1
model: phi4:14b