Files
memex/scripts/mine-conversations.sh
Eric Turner ee54a2f5d4 Initial commit — memex
A compounding LLM-maintained knowledge wiki.

Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's
mempalace, with an automation layer on top for conversation mining, URL
harvesting, human-in-the-loop staging, staleness decay, and hygiene.

Includes:
- 11 pipeline scripts (extract, summarize, index, harvest, stage,
  hygiene, maintain, sync, + shared library)
- Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE
- Example CLAUDE.md files (wiki schema + global instructions) tuned for
  the three-collection qmd setup
- 171-test pytest suite (cross-platform, runs in ~1.3s)
- MIT licensed
2026-04-12 21:16:02 -06:00

119 lines
3.3 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# mine-conversations.sh — Top-level orchestrator for conversation mining pipeline
#
# Chains: Extract (Python) → Summarize (llama.cpp) → Index (Python)
#
# Usage:
# mine-conversations.sh # Full pipeline
# mine-conversations.sh --extract-only # Phase A only (no LLM)
# mine-conversations.sh --summarize-only # Phase B only (requires llama-server)
# mine-conversations.sh --index-only # Phase C only
# mine-conversations.sh --project mc # Filter to one project
# mine-conversations.sh --dry-run # Show what would be done
# Resolve script location first so sibling scripts are found regardless of WIKI_DIR
SCRIPTS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
WIKI_DIR="${WIKI_DIR:-$(dirname "${SCRIPTS_DIR}")}"
LOG_FILE="${SCRIPTS_DIR}/.mine.log"
# ---------------------------------------------------------------------------
# Argument parsing
# ---------------------------------------------------------------------------
EXTRACT=true
SUMMARIZE=true
INDEX=true
PROJECT=""
DRY_RUN=""
EXTRA_ARGS=()
while [[ $# -gt 0 ]]; do
case "$1" in
--extract-only)
SUMMARIZE=false
INDEX=false
shift
;;
--summarize-only)
EXTRACT=false
INDEX=false
shift
;;
--index-only)
EXTRACT=false
SUMMARIZE=false
shift
;;
--project)
PROJECT="$2"
shift 2
;;
--dry-run)
DRY_RUN="--dry-run"
shift
;;
*)
EXTRA_ARGS+=("$1")
shift
;;
esac
done
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
log() {
local msg
msg="[$(date '+%Y-%m-%d %H:%M:%S')] $*"
echo "${msg}" | tee -a "${LOG_FILE}"
}
# ---------------------------------------------------------------------------
# Pipeline
# ---------------------------------------------------------------------------
mkdir -p "${WIKI_DIR}/scripts"
log "=== Conversation mining started ==="
# Phase A: Extract
if [[ "${EXTRACT}" == true ]]; then
log "Phase A: Extracting sessions..."
local_args=()
if [[ -n "${PROJECT}" ]]; then
local_args+=(--project "${PROJECT}")
fi
if [[ -n "${DRY_RUN}" ]]; then
local_args+=(--dry-run)
fi
python3 "${SCRIPTS_DIR}/extract-sessions.py" "${local_args[@]}" "${EXTRA_ARGS[@]}" 2>&1 | tee -a "${LOG_FILE}"
fi
# Phase B: Summarize
if [[ "${SUMMARIZE}" == true ]]; then
log "Phase B: Summarizing conversations..."
local_args=()
if [[ -n "${PROJECT}" ]]; then
local_args+=(--project "${PROJECT}")
fi
if [[ -n "${DRY_RUN}" ]]; then
local_args+=(--dry-run)
fi
python3 "${SCRIPTS_DIR}/summarize-conversations.py" "${local_args[@]}" "${EXTRA_ARGS[@]}" 2>&1 | tee -a "${LOG_FILE}"
fi
# Phase C: Index
if [[ "${INDEX}" == true ]]; then
log "Phase C: Updating index and context..."
local_args=()
if [[ -z "${DRY_RUN}" ]]; then
local_args+=(--reindex)
fi
python3 "${SCRIPTS_DIR}/update-conversation-index.py" "${local_args[@]}" 2>&1 | tee -a "${LOG_FILE}"
fi
log "=== Conversation mining complete ==="