memex/config.example.yaml

# Example configuration — copy to config.yaml and edit for your setup.
#
# This file is NOT currently read by any script (see docs/CUSTOMIZE.md
# "What I'd change if starting over" #1). The scripts use inline
# constants with "CONFIGURE ME" comments instead. This file is a
# template for a future refactor and a reference for what the
# configurable surface looks like.
#
# For now, edit the constants directly in:
#   scripts/extract-sessions.py       (PROJECT_MAP)
#   scripts/update-conversation-index.py  (PROJECT_NAMES, PROJECT_ORDER)
#   scripts/wiki-harvest.py           (SKIP_DOMAIN_PATTERNS)

# ─── Project / wing configuration ──────────────────────────────────────────
projects:
  # Map Claude Code directory suffixes to short project codes (wings)
  map:
    projects-wiki: wiki       # this wiki's own sessions
    -claude: cl                # ~/.claude config repo
    my-webapp: web             # your project dirs
    mobile-app: mob
    work-monorepo: work
    -home: general             # catch-all
    -Users: general

  # Display names for each project code
  names:
    wiki: WIKI — This Wiki
    cl: CL — Claude Config
    web: WEB — My Webapp
    mob: MOB — Mobile App
    work: WORK — Day Job
    general: General — Cross-Project

  # Display order (most-active first)
  order:
    - work
    - web
    - mob
    - wiki
    - cl
    - general

# ─── URL harvesting configuration ──────────────────────────────────────────
harvest:
  # Domains to always skip (internal, ephemeral, personal).
  # Patterns use re.search, so unanchored suffixes like \.example\.com$ work.
  skip_domains:
    - \.atlassian\.net$
    - ^app\.asana\.com$
    - ^(www\.)?slack\.com$
    - ^(www\.)?discord\.com$
    - ^mail\.google\.com$
    - ^calendar\.google\.com$
    - ^.+\.local$
    - ^.+\.internal$
    # Add your own:
    - \.mycompany\.com$
    - ^git\.mydomain\.com$

  # Type C URLs (issue trackers, Q&A) — only harvested if topic covered
  c_type_patterns:
    - ^https?://github\.com/[^/]+/[^/]+/issues/\d+
    - ^https?://github\.com/[^/]+/[^/]+/pull/\d+
    - ^https?://(www\.)?stackoverflow\.com/questions/\d+

  # Fetch behavior
  fetch_delay_seconds: 2
  max_failed_attempts: 3
  min_content_length: 100
  fetch_timeout: 45

# ─── Hygiene / staleness configuration ─────────────────────────────────────
hygiene:
  # Confidence decay thresholds (days since last_verified)
  decay:
    high_to_medium: 180   # 6 months
    medium_to_low: 270    # 9 months (6+3)
    low_to_stale: 365     # 12 months (6+3+3)

  # Pages with body shorter than this are flagged as stubs
  empty_stub_threshold_chars: 100

  # Version regex for technology lifecycle checks (which tools to track)
  version_regex: '\b(?:Node(?:\.js)?|Python|Docker|PostgreSQL|MySQL|Redis|Next\.js|NestJS)\s+(\d+(?:\.\d+)?)'

# ─── LLM configuration ─────────────────────────────────────────────────────
llm:
  # Which backend to use for summarization and compilation
  # Options: claude | openai | local | ollama
  backend: claude

  # Routing threshold — sessions/content above this use the larger model
  long_threshold_chars: 20000
  long_threshold_messages: 200

  # Per-backend settings
  claude:
    short_model: haiku
    long_model: sonnet
    timeout: 600

  openai:
    short_model: gpt-4o-mini
    long_model: gpt-4o
    api_key_env: OPENAI_API_KEY

  local:
    base_url: http://localhost:8080/v1
    model: Phi-4-14B-Q4_K_M

  ollama:
    base_url: http://localhost:11434/v1
    model: phi4:14b