Initial commit — memex
A compounding LLM-maintained knowledge wiki. Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's mempalace, with an automation layer on top for conversation mining, URL harvesting, human-in-the-loop staging, staleness decay, and hygiene. Includes: - 11 pipeline scripts (extract, summarize, index, harvest, stage, hygiene, maintain, sync, + shared library) - Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE - Example CLAUDE.md files (wiki schema + global instructions) tuned for the three-collection qmd setup - 171-test pytest suite (cross-platform, runs in ~1.3s) - MIT licensed
This commit is contained in:
118
scripts/mine-conversations.sh
Executable file
118
scripts/mine-conversations.sh
Executable file
@@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# mine-conversations.sh — Top-level orchestrator for conversation mining pipeline
|
||||
#
|
||||
# Chains: Extract (Python) → Summarize (llama.cpp) → Index (Python)
|
||||
#
|
||||
# Usage:
|
||||
# mine-conversations.sh # Full pipeline
|
||||
# mine-conversations.sh --extract-only # Phase A only (no LLM)
|
||||
# mine-conversations.sh --summarize-only # Phase B only (requires llama-server)
|
||||
# mine-conversations.sh --index-only # Phase C only
|
||||
# mine-conversations.sh --project mc # Filter to one project
|
||||
# mine-conversations.sh --dry-run # Show what would be done
|
||||
|
||||
# Resolve script location first so sibling scripts are found regardless of WIKI_DIR
|
||||
SCRIPTS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
WIKI_DIR="${WIKI_DIR:-$(dirname "${SCRIPTS_DIR}")}"
|
||||
LOG_FILE="${SCRIPTS_DIR}/.mine.log"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Argument parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
EXTRACT=true
|
||||
SUMMARIZE=true
|
||||
INDEX=true
|
||||
PROJECT=""
|
||||
DRY_RUN=""
|
||||
EXTRA_ARGS=()
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--extract-only)
|
||||
SUMMARIZE=false
|
||||
INDEX=false
|
||||
shift
|
||||
;;
|
||||
--summarize-only)
|
||||
EXTRACT=false
|
||||
INDEX=false
|
||||
shift
|
||||
;;
|
||||
--index-only)
|
||||
EXTRACT=false
|
||||
SUMMARIZE=false
|
||||
shift
|
||||
;;
|
||||
--project)
|
||||
PROJECT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN="--dry-run"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
EXTRA_ARGS+=("$1")
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
log() {
|
||||
local msg
|
||||
msg="[$(date '+%Y-%m-%d %H:%M:%S')] $*"
|
||||
echo "${msg}" | tee -a "${LOG_FILE}"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pipeline
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
mkdir -p "${WIKI_DIR}/scripts"
|
||||
|
||||
log "=== Conversation mining started ==="
|
||||
|
||||
# Phase A: Extract
|
||||
if [[ "${EXTRACT}" == true ]]; then
|
||||
log "Phase A: Extracting sessions..."
|
||||
local_args=()
|
||||
if [[ -n "${PROJECT}" ]]; then
|
||||
local_args+=(--project "${PROJECT}")
|
||||
fi
|
||||
if [[ -n "${DRY_RUN}" ]]; then
|
||||
local_args+=(--dry-run)
|
||||
fi
|
||||
python3 "${SCRIPTS_DIR}/extract-sessions.py" "${local_args[@]}" "${EXTRA_ARGS[@]}" 2>&1 | tee -a "${LOG_FILE}"
|
||||
fi
|
||||
|
||||
# Phase B: Summarize
|
||||
if [[ "${SUMMARIZE}" == true ]]; then
|
||||
log "Phase B: Summarizing conversations..."
|
||||
local_args=()
|
||||
if [[ -n "${PROJECT}" ]]; then
|
||||
local_args+=(--project "${PROJECT}")
|
||||
fi
|
||||
if [[ -n "${DRY_RUN}" ]]; then
|
||||
local_args+=(--dry-run)
|
||||
fi
|
||||
python3 "${SCRIPTS_DIR}/summarize-conversations.py" "${local_args[@]}" "${EXTRA_ARGS[@]}" 2>&1 | tee -a "${LOG_FILE}"
|
||||
fi
|
||||
|
||||
# Phase C: Index
|
||||
if [[ "${INDEX}" == true ]]; then
|
||||
log "Phase C: Updating index and context..."
|
||||
local_args=()
|
||||
if [[ -z "${DRY_RUN}" ]]; then
|
||||
local_args+=(--reindex)
|
||||
fi
|
||||
python3 "${SCRIPTS_DIR}/update-conversation-index.py" "${local_args[@]}" 2>&1 | tee -a "${LOG_FILE}"
|
||||
fi
|
||||
|
||||
log "=== Conversation mining complete ==="
|
||||
Reference in New Issue
Block a user