Initial commit — memex
A compounding LLM-maintained knowledge wiki. Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's mempalace, with an automation layer on top for conversation mining, URL harvesting, human-in-the-loop staging, staleness decay, and hygiene. Includes: - 11 pipeline scripts (extract, summarize, index, harvest, stage, hygiene, maintain, sync, + shared library) - Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE - Example CLAUDE.md files (wiki schema + global instructions) tuned for the three-collection qmd setup - 171-test pytest suite (cross-platform, runs in ~1.3s) - MIT licensed
This commit is contained in:
476
scripts/update-conversation-index.py
Executable file
476
scripts/update-conversation-index.py
Executable file
@@ -0,0 +1,476 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Update conversation index and context files from summarized conversations.
|
||||
|
||||
Phase C of the conversation mining pipeline. Reads all conversation markdown
|
||||
files and regenerates:
|
||||
- conversations/index.md — catalog organized by project
|
||||
- context/wake-up.md — world briefing from recent conversations
|
||||
- context/active-concerns.md — current blockers and open threads
|
||||
|
||||
Usage:
|
||||
python3 update-conversation-index.py
|
||||
python3 update-conversation-index.py --reindex # Also triggers qmd update
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
WIKI_DIR = Path(os.environ.get("WIKI_DIR", str(Path.home() / "projects" / "wiki")))
|
||||
CONVERSATIONS_DIR = WIKI_DIR / "conversations"
|
||||
CONTEXT_DIR = WIKI_DIR / "context"
|
||||
INDEX_FILE = CONVERSATIONS_DIR / "index.md"
|
||||
WAKEUP_FILE = CONTEXT_DIR / "wake-up.md"
|
||||
CONCERNS_FILE = CONTEXT_DIR / "active-concerns.md"
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# CONFIGURE ME — Project code to display name mapping
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
#
|
||||
# Every project code you use in `extract-sessions.py`'s PROJECT_MAP should
|
||||
# have a display name here. The conversation index groups conversations by
|
||||
# these codes and renders them under sections named by the display name.
|
||||
#
|
||||
# Examples — replace with your own:
|
||||
PROJECT_NAMES: dict[str, str] = {
|
||||
"wiki": "WIKI — This Wiki",
|
||||
"cl": "CL — Claude Config",
|
||||
# "web": "WEB — My Webapp",
|
||||
# "mob": "MOB — My Mobile App",
|
||||
# "work": "WORK — Day Job",
|
||||
"general": "General — Cross-Project",
|
||||
}
|
||||
|
||||
# Order for display — put your most-active projects first
|
||||
PROJECT_ORDER = [
|
||||
# "work", "web", "mob",
|
||||
"wiki", "cl", "general",
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Frontmatter parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def parse_frontmatter(file_path: Path) -> dict[str, str]:
|
||||
"""Parse YAML frontmatter from a markdown file."""
|
||||
fm: dict[str, str] = {}
|
||||
content = file_path.read_text()
|
||||
|
||||
# Find frontmatter between --- markers
|
||||
match = re.match(r"^---\n(.*?)\n---", content, re.DOTALL)
|
||||
if not match:
|
||||
return fm
|
||||
|
||||
for line in match.group(1).splitlines():
|
||||
if ":" in line:
|
||||
key, _, value = line.partition(":")
|
||||
fm[key.strip()] = value.strip()
|
||||
|
||||
return fm
|
||||
|
||||
|
||||
def get_summary_line(file_path: Path) -> str:
|
||||
"""Extract the first sentence of the Summary section."""
|
||||
content = file_path.read_text()
|
||||
match = re.search(r"## Summary\n\n(.+?)(?:\n\n|\n##)", content, re.DOTALL)
|
||||
if match:
|
||||
summary = match.group(1).strip()
|
||||
# First sentence
|
||||
first_sentence = summary.split(". ")[0]
|
||||
if not first_sentence.endswith("."):
|
||||
first_sentence += "."
|
||||
# Truncate if too long
|
||||
if len(first_sentence) > 120:
|
||||
first_sentence = first_sentence[:117] + "..."
|
||||
return first_sentence
|
||||
return "No summary available."
|
||||
|
||||
|
||||
def get_decisions(file_path: Path) -> list[str]:
|
||||
"""Extract decisions from a conversation file."""
|
||||
content = file_path.read_text()
|
||||
decisions: list[str] = []
|
||||
match = re.search(r"## Decisions.*?\n(.*?)(?:\n##|\n---|\Z)", content, re.DOTALL)
|
||||
if match:
|
||||
for line in match.group(1).strip().splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("- "):
|
||||
decisions.append(line[2:])
|
||||
return decisions
|
||||
|
||||
|
||||
def get_discoveries(file_path: Path) -> list[str]:
|
||||
"""Extract discoveries from a conversation file."""
|
||||
content = file_path.read_text()
|
||||
discoveries: list[str] = []
|
||||
match = re.search(r"## Discoveries.*?\n(.*?)(?:\n##|\n---|\Z)", content, re.DOTALL)
|
||||
if match:
|
||||
for line in match.group(1).strip().splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("- "):
|
||||
discoveries.append(line[2:])
|
||||
return discoveries
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Conversation discovery
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def discover_conversations() -> dict[str, list[dict[str, Any]]]:
|
||||
"""Discover all conversation files organized by project."""
|
||||
by_project: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
||||
|
||||
for project_dir in sorted(CONVERSATIONS_DIR.iterdir()):
|
||||
if not project_dir.is_dir():
|
||||
continue
|
||||
|
||||
project_code = project_dir.name
|
||||
if project_code not in PROJECT_NAMES:
|
||||
continue
|
||||
|
||||
for md_file in sorted(project_dir.glob("*.md"), reverse=True):
|
||||
if md_file.name == ".gitkeep":
|
||||
continue
|
||||
|
||||
fm = parse_frontmatter(md_file)
|
||||
status = fm.get("status", "extracted")
|
||||
|
||||
entry = {
|
||||
"file": md_file,
|
||||
"relative": md_file.relative_to(CONVERSATIONS_DIR),
|
||||
"title": fm.get("title", md_file.stem),
|
||||
"date": fm.get("date", "unknown"),
|
||||
"status": status,
|
||||
"messages": fm.get("messages", "0"),
|
||||
"halls": fm.get("halls", ""),
|
||||
"topics": fm.get("topics", ""),
|
||||
"project": project_code,
|
||||
}
|
||||
|
||||
by_project[project_code].append(entry)
|
||||
|
||||
return by_project
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Index generation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def generate_index(by_project: dict[str, list[dict[str, Any]]]) -> str:
|
||||
"""Generate the conversations/index.md content."""
|
||||
total = sum(len(convos) for convos in by_project.values())
|
||||
summarized = sum(
|
||||
1
|
||||
for convos in by_project.values()
|
||||
for c in convos
|
||||
if c["status"] == "summarized"
|
||||
)
|
||||
trivial = sum(
|
||||
1
|
||||
for convos in by_project.values()
|
||||
for c in convos
|
||||
if c["status"] == "trivial"
|
||||
)
|
||||
extracted = total - summarized - trivial
|
||||
|
||||
lines = [
|
||||
"---",
|
||||
"title: Conversation Index",
|
||||
"type: index",
|
||||
f"last_updated: {datetime.now(timezone.utc).strftime('%Y-%m-%d')}",
|
||||
"---",
|
||||
"",
|
||||
"# Conversation Index",
|
||||
"",
|
||||
f"Mined conversations from Claude Code sessions, organized by project (wing).",
|
||||
"",
|
||||
f"**{total} conversations** — {summarized} summarized, {extracted} pending, {trivial} trivial.",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
]
|
||||
|
||||
for project_code in PROJECT_ORDER:
|
||||
convos = by_project.get(project_code, [])
|
||||
display_name = PROJECT_NAMES.get(project_code, project_code.upper())
|
||||
|
||||
lines.append(f"## {display_name}")
|
||||
lines.append("")
|
||||
|
||||
if not convos:
|
||||
lines.append("_No conversations mined yet._")
|
||||
lines.append("")
|
||||
continue
|
||||
|
||||
# Show summarized first, then extracted, skip trivial from listing
|
||||
shown = 0
|
||||
for c in convos:
|
||||
if c["status"] == "trivial":
|
||||
continue
|
||||
|
||||
status_tag = ""
|
||||
if c["status"] == "extracted":
|
||||
status_tag = " _(pending summary)_"
|
||||
|
||||
# Get summary line if summarized
|
||||
summary_text = ""
|
||||
if c["status"] == "summarized":
|
||||
summary_text = f" — {get_summary_line(c['file'])}"
|
||||
|
||||
lines.append(
|
||||
f"- [{c['title']}]({c['relative']})"
|
||||
f" ({c['date']}, {c['messages']} msgs)"
|
||||
f"{summary_text}{status_tag}"
|
||||
)
|
||||
shown += 1
|
||||
|
||||
trivial_count = len(convos) - shown
|
||||
if trivial_count > 0:
|
||||
lines.append(f"\n_{trivial_count} trivial session(s) not listed._")
|
||||
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Context generation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def generate_wakeup(by_project: dict[str, list[dict[str, Any]]]) -> str:
|
||||
"""Generate context/wake-up.md from recent conversations."""
|
||||
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
|
||||
# Determine activity level per project
|
||||
project_activity: dict[str, dict[str, Any]] = {}
|
||||
for code in PROJECT_ORDER:
|
||||
convos = by_project.get(code, [])
|
||||
summarized = [c for c in convos if c["status"] == "summarized"]
|
||||
|
||||
if summarized:
|
||||
latest = max(summarized, key=lambda c: c["date"])
|
||||
last_date = latest["date"]
|
||||
# Simple activity heuristic: sessions in last 7 days = active
|
||||
try:
|
||||
dt = datetime.strptime(last_date, "%Y-%m-%d")
|
||||
days_ago = (datetime.now() - dt).days
|
||||
if days_ago <= 7:
|
||||
status = "Active"
|
||||
elif days_ago <= 30:
|
||||
status = "Quiet"
|
||||
else:
|
||||
status = "Inactive"
|
||||
except ValueError:
|
||||
status = "Unknown"
|
||||
last_date = "—"
|
||||
else:
|
||||
# Check extracted-only
|
||||
if convos:
|
||||
latest = max(convos, key=lambda c: c["date"])
|
||||
last_date = latest["date"]
|
||||
status = "Active" if latest["date"] >= today[:7] else "Quiet"
|
||||
else:
|
||||
status = "—"
|
||||
last_date = "—"
|
||||
|
||||
project_activity[code] = {
|
||||
"status": status,
|
||||
"last_date": last_date,
|
||||
"count": len(convos),
|
||||
}
|
||||
|
||||
# Gather recent decisions across all projects
|
||||
recent_decisions: list[tuple[str, str, str]] = [] # (date, project, decision)
|
||||
for code, convos in by_project.items():
|
||||
for c in convos:
|
||||
if c["status"] != "summarized":
|
||||
continue
|
||||
for decision in get_decisions(c["file"]):
|
||||
recent_decisions.append((c["date"], code, decision))
|
||||
|
||||
recent_decisions.sort(key=lambda x: x[0], reverse=True)
|
||||
recent_decisions = recent_decisions[:10] # Top 10 most recent
|
||||
|
||||
# Gather recent discoveries
|
||||
recent_discoveries: list[tuple[str, str, str]] = []
|
||||
for code, convos in by_project.items():
|
||||
for c in convos:
|
||||
if c["status"] != "summarized":
|
||||
continue
|
||||
for disc in get_discoveries(c["file"]):
|
||||
recent_discoveries.append((c["date"], code, disc))
|
||||
|
||||
recent_discoveries.sort(key=lambda x: x[0], reverse=True)
|
||||
recent_discoveries = recent_discoveries[:5]
|
||||
|
||||
lines = [
|
||||
"---",
|
||||
"title: Wake-Up Briefing",
|
||||
"type: context",
|
||||
f"last_updated: {today}",
|
||||
"---",
|
||||
"",
|
||||
"# Wake-Up Briefing",
|
||||
"",
|
||||
"Auto-generated world state for AI session context.",
|
||||
"",
|
||||
"## Active Projects",
|
||||
"",
|
||||
"| Code | Project | Status | Last Activity | Sessions |",
|
||||
"|------|---------|--------|---------------|----------|",
|
||||
]
|
||||
|
||||
for code in PROJECT_ORDER:
|
||||
if code == "general":
|
||||
continue # Skip general from roster
|
||||
info = project_activity.get(code, {"status": "—", "last_date": "—", "count": 0})
|
||||
display = PROJECT_NAMES.get(code, code).split(" — ")[1] if " — " in PROJECT_NAMES.get(code, "") else code
|
||||
lines.append(
|
||||
f"| {code.upper()} | {display} | {info['status']} | {info['last_date']} | {info['count']} |"
|
||||
)
|
||||
|
||||
lines.append("")
|
||||
|
||||
if recent_decisions:
|
||||
lines.append("## Recent Decisions")
|
||||
lines.append("")
|
||||
for date, proj, decision in recent_decisions[:7]:
|
||||
lines.append(f"- **[{proj.upper()}]** {decision} ({date})")
|
||||
lines.append("")
|
||||
|
||||
if recent_discoveries:
|
||||
lines.append("## Recent Discoveries")
|
||||
lines.append("")
|
||||
for date, proj, disc in recent_discoveries[:5]:
|
||||
lines.append(f"- **[{proj.upper()}]** {disc} ({date})")
|
||||
lines.append("")
|
||||
|
||||
if not recent_decisions and not recent_discoveries:
|
||||
lines.append("## Recent Decisions")
|
||||
lines.append("")
|
||||
lines.append("_Populated after summarization runs._")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_concerns(by_project: dict[str, list[dict[str, Any]]]) -> str:
|
||||
"""Generate context/active-concerns.md from recent conversations."""
|
||||
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
|
||||
# For now, this is a template that gets populated as summaries accumulate.
|
||||
# Future enhancement: parse "blockers", "open questions" from summaries.
|
||||
lines = [
|
||||
"---",
|
||||
"title: Active Concerns",
|
||||
"type: context",
|
||||
f"last_updated: {today}",
|
||||
"---",
|
||||
"",
|
||||
"# Active Concerns",
|
||||
"",
|
||||
"Auto-generated from recent conversations. Current blockers, deadlines, and open questions.",
|
||||
"",
|
||||
]
|
||||
|
||||
# Count recent activity to give a sense of what's hot
|
||||
active_projects: list[tuple[str, int]] = []
|
||||
for code in PROJECT_ORDER:
|
||||
convos = by_project.get(code, [])
|
||||
recent = [c for c in convos if c["date"] >= today[:7]] # This month
|
||||
if recent:
|
||||
active_projects.append((code, len(recent)))
|
||||
|
||||
if active_projects:
|
||||
active_projects.sort(key=lambda x: x[1], reverse=True)
|
||||
lines.append("## Current Focus Areas")
|
||||
lines.append("")
|
||||
for code, count in active_projects[:5]:
|
||||
display = PROJECT_NAMES.get(code, code)
|
||||
lines.append(f"- **{display}** — {count} session(s) this month")
|
||||
lines.append("")
|
||||
|
||||
lines.extend([
|
||||
"## Blockers",
|
||||
"",
|
||||
"_Populated from conversation analysis._",
|
||||
"",
|
||||
"## Open Questions",
|
||||
"",
|
||||
"_Populated from conversation analysis._",
|
||||
"",
|
||||
])
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Update conversation index and context files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--reindex",
|
||||
action="store_true",
|
||||
help="Also trigger qmd update and embed after updating files",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Discover all conversations
|
||||
by_project = discover_conversations()
|
||||
|
||||
total = sum(len(v) for v in by_project.values())
|
||||
print(f"Found {total} conversation(s) across {len(by_project)} projects.")
|
||||
|
||||
# Generate and write index
|
||||
index_content = generate_index(by_project)
|
||||
INDEX_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
INDEX_FILE.write_text(index_content)
|
||||
print(f"Updated {INDEX_FILE.relative_to(WIKI_DIR)}")
|
||||
|
||||
# Generate and write context files (create dir if needed)
|
||||
WAKEUP_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
wakeup_content = generate_wakeup(by_project)
|
||||
WAKEUP_FILE.write_text(wakeup_content)
|
||||
print(f"Updated {WAKEUP_FILE.relative_to(WIKI_DIR)}")
|
||||
|
||||
concerns_content = generate_concerns(by_project)
|
||||
CONCERNS_FILE.write_text(concerns_content)
|
||||
print(f"Updated {CONCERNS_FILE.relative_to(WIKI_DIR)}")
|
||||
|
||||
# Optionally trigger qmd reindex
|
||||
if args.reindex:
|
||||
print("Triggering qmd reindex...")
|
||||
try:
|
||||
subprocess.run(["qmd", "update"], check=True, capture_output=True)
|
||||
subprocess.run(["qmd", "embed"], check=True, capture_output=True)
|
||||
print("qmd index updated.")
|
||||
except FileNotFoundError:
|
||||
print("qmd not found — skipping reindex.", file=sys.stderr)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"qmd reindex failed: {e}", file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user