A compounding LLM-maintained knowledge wiki. Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's mempalace, with an automation layer on top for conversation mining, URL harvesting, human-in-the-loop staging, staleness decay, and hygiene. Includes: - 11 pipeline scripts (extract, summarize, index, harvest, stage, hygiene, maintain, sync, + shared library) - Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE - Example CLAUDE.md files (wiki schema + global instructions) tuned for the three-collection qmd setup - 171-test pytest suite (cross-platform, runs in ~1.3s) - MIT licensed
119 lines
3.3 KiB
Bash
Executable File
119 lines
3.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# mine-conversations.sh — Top-level orchestrator for conversation mining pipeline
|
|
#
|
|
# Chains: Extract (Python) → Summarize (llama.cpp) → Index (Python)
|
|
#
|
|
# Usage:
|
|
# mine-conversations.sh # Full pipeline
|
|
# mine-conversations.sh --extract-only # Phase A only (no LLM)
|
|
# mine-conversations.sh --summarize-only # Phase B only (requires llama-server)
|
|
# mine-conversations.sh --index-only # Phase C only
|
|
# mine-conversations.sh --project mc # Filter to one project
|
|
# mine-conversations.sh --dry-run # Show what would be done
|
|
|
|
# Resolve script location first so sibling scripts are found regardless of WIKI_DIR
|
|
SCRIPTS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
WIKI_DIR="${WIKI_DIR:-$(dirname "${SCRIPTS_DIR}")}"
|
|
LOG_FILE="${SCRIPTS_DIR}/.mine.log"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Argument parsing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
EXTRACT=true
|
|
SUMMARIZE=true
|
|
INDEX=true
|
|
PROJECT=""
|
|
DRY_RUN=""
|
|
EXTRA_ARGS=()
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--extract-only)
|
|
SUMMARIZE=false
|
|
INDEX=false
|
|
shift
|
|
;;
|
|
--summarize-only)
|
|
EXTRACT=false
|
|
INDEX=false
|
|
shift
|
|
;;
|
|
--index-only)
|
|
EXTRACT=false
|
|
SUMMARIZE=false
|
|
shift
|
|
;;
|
|
--project)
|
|
PROJECT="$2"
|
|
shift 2
|
|
;;
|
|
--dry-run)
|
|
DRY_RUN="--dry-run"
|
|
shift
|
|
;;
|
|
*)
|
|
EXTRA_ARGS+=("$1")
|
|
shift
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
log() {
|
|
local msg
|
|
msg="[$(date '+%Y-%m-%d %H:%M:%S')] $*"
|
|
echo "${msg}" | tee -a "${LOG_FILE}"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Pipeline
|
|
# ---------------------------------------------------------------------------
|
|
|
|
mkdir -p "${WIKI_DIR}/scripts"
|
|
|
|
log "=== Conversation mining started ==="
|
|
|
|
# Phase A: Extract
|
|
if [[ "${EXTRACT}" == true ]]; then
|
|
log "Phase A: Extracting sessions..."
|
|
local_args=()
|
|
if [[ -n "${PROJECT}" ]]; then
|
|
local_args+=(--project "${PROJECT}")
|
|
fi
|
|
if [[ -n "${DRY_RUN}" ]]; then
|
|
local_args+=(--dry-run)
|
|
fi
|
|
python3 "${SCRIPTS_DIR}/extract-sessions.py" "${local_args[@]}" "${EXTRA_ARGS[@]}" 2>&1 | tee -a "${LOG_FILE}"
|
|
fi
|
|
|
|
# Phase B: Summarize
|
|
if [[ "${SUMMARIZE}" == true ]]; then
|
|
log "Phase B: Summarizing conversations..."
|
|
local_args=()
|
|
if [[ -n "${PROJECT}" ]]; then
|
|
local_args+=(--project "${PROJECT}")
|
|
fi
|
|
if [[ -n "${DRY_RUN}" ]]; then
|
|
local_args+=(--dry-run)
|
|
fi
|
|
python3 "${SCRIPTS_DIR}/summarize-conversations.py" "${local_args[@]}" "${EXTRA_ARGS[@]}" 2>&1 | tee -a "${LOG_FILE}"
|
|
fi
|
|
|
|
# Phase C: Index
|
|
if [[ "${INDEX}" == true ]]; then
|
|
log "Phase C: Updating index and context..."
|
|
local_args=()
|
|
if [[ -z "${DRY_RUN}" ]]; then
|
|
local_args+=(--reindex)
|
|
fi
|
|
python3 "${SCRIPTS_DIR}/update-conversation-index.py" "${local_args[@]}" 2>&1 | tee -a "${LOG_FILE}"
|
|
fi
|
|
|
|
log "=== Conversation mining complete ==="
|