#!/usr/bin/env python3 """Update conversation index and context files from summarized conversations. Phase C of the conversation mining pipeline. Reads all conversation markdown files and regenerates: - conversations/index.md — catalog organized by project - context/wake-up.md — world briefing from recent conversations - context/active-concerns.md — current blockers and open threads Usage: python3 update-conversation-index.py python3 update-conversation-index.py --reindex # Also triggers qmd update """ from __future__ import annotations import argparse import os import re import subprocess import sys from collections import defaultdict from datetime import datetime, timezone from pathlib import Path from typing import Any # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- WIKI_DIR = Path(os.environ.get("WIKI_DIR", str(Path.home() / "projects" / "wiki"))) CONVERSATIONS_DIR = WIKI_DIR / "conversations" CONTEXT_DIR = WIKI_DIR / "context" INDEX_FILE = CONVERSATIONS_DIR / "index.md" WAKEUP_FILE = CONTEXT_DIR / "wake-up.md" CONCERNS_FILE = CONTEXT_DIR / "active-concerns.md" # ════════════════════════════════════════════════════════════════════════════ # CONFIGURE ME — Project code to display name mapping # ════════════════════════════════════════════════════════════════════════════ # # Every project code you use in `extract-sessions.py`'s PROJECT_MAP should # have a display name here. The conversation index groups conversations by # these codes and renders them under sections named by the display name. # # Examples — replace with your own: PROJECT_NAMES: dict[str, str] = { "wiki": "WIKI — This Wiki", "cl": "CL — Claude Config", # "web": "WEB — My Webapp", # "mob": "MOB — My Mobile App", # "work": "WORK — Day Job", "general": "General — Cross-Project", } # Order for display — put your most-active projects first PROJECT_ORDER = [ # "work", "web", "mob", "wiki", "cl", "general", ] # --------------------------------------------------------------------------- # Frontmatter parsing # --------------------------------------------------------------------------- def parse_frontmatter(file_path: Path) -> dict[str, str]: """Parse YAML frontmatter from a markdown file.""" fm: dict[str, str] = {} content = file_path.read_text() # Find frontmatter between --- markers match = re.match(r"^---\n(.*?)\n---", content, re.DOTALL) if not match: return fm for line in match.group(1).splitlines(): if ":" in line: key, _, value = line.partition(":") fm[key.strip()] = value.strip() return fm def get_summary_line(file_path: Path) -> str: """Extract the first sentence of the Summary section.""" content = file_path.read_text() match = re.search(r"## Summary\n\n(.+?)(?:\n\n|\n##)", content, re.DOTALL) if match: summary = match.group(1).strip() # First sentence first_sentence = summary.split(". ")[0] if not first_sentence.endswith("."): first_sentence += "." # Truncate if too long if len(first_sentence) > 120: first_sentence = first_sentence[:117] + "..." return first_sentence return "No summary available." def get_decisions(file_path: Path) -> list[str]: """Extract decisions from a conversation file.""" content = file_path.read_text() decisions: list[str] = [] match = re.search(r"## Decisions.*?\n(.*?)(?:\n##|\n---|\Z)", content, re.DOTALL) if match: for line in match.group(1).strip().splitlines(): line = line.strip() if line.startswith("- "): decisions.append(line[2:]) return decisions def get_discoveries(file_path: Path) -> list[str]: """Extract discoveries from a conversation file.""" content = file_path.read_text() discoveries: list[str] = [] match = re.search(r"## Discoveries.*?\n(.*?)(?:\n##|\n---|\Z)", content, re.DOTALL) if match: for line in match.group(1).strip().splitlines(): line = line.strip() if line.startswith("- "): discoveries.append(line[2:]) return discoveries # --------------------------------------------------------------------------- # Conversation discovery # --------------------------------------------------------------------------- def discover_conversations() -> dict[str, list[dict[str, Any]]]: """Discover all conversation files organized by project.""" by_project: dict[str, list[dict[str, Any]]] = defaultdict(list) for project_dir in sorted(CONVERSATIONS_DIR.iterdir()): if not project_dir.is_dir(): continue project_code = project_dir.name if project_code not in PROJECT_NAMES: continue for md_file in sorted(project_dir.glob("*.md"), reverse=True): if md_file.name == ".gitkeep": continue fm = parse_frontmatter(md_file) status = fm.get("status", "extracted") entry = { "file": md_file, "relative": md_file.relative_to(CONVERSATIONS_DIR), "title": fm.get("title", md_file.stem), "date": fm.get("date", "unknown"), "status": status, "messages": fm.get("messages", "0"), "halls": fm.get("halls", ""), "topics": fm.get("topics", ""), "project": project_code, } by_project[project_code].append(entry) return by_project # --------------------------------------------------------------------------- # Index generation # --------------------------------------------------------------------------- def generate_index(by_project: dict[str, list[dict[str, Any]]]) -> str: """Generate the conversations/index.md content.""" total = sum(len(convos) for convos in by_project.values()) summarized = sum( 1 for convos in by_project.values() for c in convos if c["status"] == "summarized" ) trivial = sum( 1 for convos in by_project.values() for c in convos if c["status"] == "trivial" ) extracted = total - summarized - trivial lines = [ "---", "title: Conversation Index", "type: index", f"last_updated: {datetime.now(timezone.utc).strftime('%Y-%m-%d')}", "---", "", "# Conversation Index", "", f"Mined conversations from Claude Code sessions, organized by project (wing).", "", f"**{total} conversations** — {summarized} summarized, {extracted} pending, {trivial} trivial.", "", "---", "", ] for project_code in PROJECT_ORDER: convos = by_project.get(project_code, []) display_name = PROJECT_NAMES.get(project_code, project_code.upper()) lines.append(f"## {display_name}") lines.append("") if not convos: lines.append("_No conversations mined yet._") lines.append("") continue # Show summarized first, then extracted, skip trivial from listing shown = 0 for c in convos: if c["status"] == "trivial": continue status_tag = "" if c["status"] == "extracted": status_tag = " _(pending summary)_" # Get summary line if summarized summary_text = "" if c["status"] == "summarized": summary_text = f" — {get_summary_line(c['file'])}" lines.append( f"- [{c['title']}]({c['relative']})" f" ({c['date']}, {c['messages']} msgs)" f"{summary_text}{status_tag}" ) shown += 1 trivial_count = len(convos) - shown if trivial_count > 0: lines.append(f"\n_{trivial_count} trivial session(s) not listed._") lines.append("") return "\n".join(lines) # --------------------------------------------------------------------------- # Context generation # --------------------------------------------------------------------------- def generate_wakeup(by_project: dict[str, list[dict[str, Any]]]) -> str: """Generate context/wake-up.md from recent conversations.""" today = datetime.now(timezone.utc).strftime("%Y-%m-%d") # Determine activity level per project project_activity: dict[str, dict[str, Any]] = {} for code in PROJECT_ORDER: convos = by_project.get(code, []) summarized = [c for c in convos if c["status"] == "summarized"] if summarized: latest = max(summarized, key=lambda c: c["date"]) last_date = latest["date"] # Simple activity heuristic: sessions in last 7 days = active try: dt = datetime.strptime(last_date, "%Y-%m-%d") days_ago = (datetime.now() - dt).days if days_ago <= 7: status = "Active" elif days_ago <= 30: status = "Quiet" else: status = "Inactive" except ValueError: status = "Unknown" last_date = "—" else: # Check extracted-only if convos: latest = max(convos, key=lambda c: c["date"]) last_date = latest["date"] status = "Active" if latest["date"] >= today[:7] else "Quiet" else: status = "—" last_date = "—" project_activity[code] = { "status": status, "last_date": last_date, "count": len(convos), } # Gather recent decisions across all projects recent_decisions: list[tuple[str, str, str]] = [] # (date, project, decision) for code, convos in by_project.items(): for c in convos: if c["status"] != "summarized": continue for decision in get_decisions(c["file"]): recent_decisions.append((c["date"], code, decision)) recent_decisions.sort(key=lambda x: x[0], reverse=True) recent_decisions = recent_decisions[:10] # Top 10 most recent # Gather recent discoveries recent_discoveries: list[tuple[str, str, str]] = [] for code, convos in by_project.items(): for c in convos: if c["status"] != "summarized": continue for disc in get_discoveries(c["file"]): recent_discoveries.append((c["date"], code, disc)) recent_discoveries.sort(key=lambda x: x[0], reverse=True) recent_discoveries = recent_discoveries[:5] lines = [ "---", "title: Wake-Up Briefing", "type: context", f"last_updated: {today}", "---", "", "# Wake-Up Briefing", "", "Auto-generated world state for AI session context.", "", "## Active Projects", "", "| Code | Project | Status | Last Activity | Sessions |", "|------|---------|--------|---------------|----------|", ] for code in PROJECT_ORDER: if code == "general": continue # Skip general from roster info = project_activity.get(code, {"status": "—", "last_date": "—", "count": 0}) display = PROJECT_NAMES.get(code, code).split(" — ")[1] if " — " in PROJECT_NAMES.get(code, "") else code lines.append( f"| {code.upper()} | {display} | {info['status']} | {info['last_date']} | {info['count']} |" ) lines.append("") if recent_decisions: lines.append("## Recent Decisions") lines.append("") for date, proj, decision in recent_decisions[:7]: lines.append(f"- **[{proj.upper()}]** {decision} ({date})") lines.append("") if recent_discoveries: lines.append("## Recent Discoveries") lines.append("") for date, proj, disc in recent_discoveries[:5]: lines.append(f"- **[{proj.upper()}]** {disc} ({date})") lines.append("") if not recent_decisions and not recent_discoveries: lines.append("## Recent Decisions") lines.append("") lines.append("_Populated after summarization runs._") lines.append("") return "\n".join(lines) def generate_concerns(by_project: dict[str, list[dict[str, Any]]]) -> str: """Generate context/active-concerns.md from recent conversations.""" today = datetime.now(timezone.utc).strftime("%Y-%m-%d") # For now, this is a template that gets populated as summaries accumulate. # Future enhancement: parse "blockers", "open questions" from summaries. lines = [ "---", "title: Active Concerns", "type: context", f"last_updated: {today}", "---", "", "# Active Concerns", "", "Auto-generated from recent conversations. Current blockers, deadlines, and open questions.", "", ] # Count recent activity to give a sense of what's hot active_projects: list[tuple[str, int]] = [] for code in PROJECT_ORDER: convos = by_project.get(code, []) recent = [c for c in convos if c["date"] >= today[:7]] # This month if recent: active_projects.append((code, len(recent))) if active_projects: active_projects.sort(key=lambda x: x[1], reverse=True) lines.append("## Current Focus Areas") lines.append("") for code, count in active_projects[:5]: display = PROJECT_NAMES.get(code, code) lines.append(f"- **{display}** — {count} session(s) this month") lines.append("") lines.extend([ "## Blockers", "", "_Populated from conversation analysis._", "", "## Open Questions", "", "_Populated from conversation analysis._", "", ]) return "\n".join(lines) # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main() -> None: parser = argparse.ArgumentParser( description="Update conversation index and context files", ) parser.add_argument( "--reindex", action="store_true", help="Also trigger qmd update and embed after updating files", ) args = parser.parse_args() # Discover all conversations by_project = discover_conversations() total = sum(len(v) for v in by_project.values()) print(f"Found {total} conversation(s) across {len(by_project)} projects.") # Generate and write index index_content = generate_index(by_project) INDEX_FILE.parent.mkdir(parents=True, exist_ok=True) INDEX_FILE.write_text(index_content) print(f"Updated {INDEX_FILE.relative_to(WIKI_DIR)}") # Generate and write context files (create dir if needed) WAKEUP_FILE.parent.mkdir(parents=True, exist_ok=True) wakeup_content = generate_wakeup(by_project) WAKEUP_FILE.write_text(wakeup_content) print(f"Updated {WAKEUP_FILE.relative_to(WIKI_DIR)}") concerns_content = generate_concerns(by_project) CONCERNS_FILE.write_text(concerns_content) print(f"Updated {CONCERNS_FILE.relative_to(WIKI_DIR)}") # Optionally trigger qmd reindex if args.reindex: print("Triggering qmd reindex...") try: subprocess.run(["qmd", "update"], check=True, capture_output=True) subprocess.run(["qmd", "embed"], check=True, capture_output=True) print("qmd index updated.") except FileNotFoundError: print("qmd not found — skipping reindex.", file=sys.stderr) except subprocess.CalledProcessError as e: print(f"qmd reindex failed: {e}", file=sys.stderr) if __name__ == "__main__": main()