Files
memex/scripts/wiki-maintain.sh
Eric Turner ee54a2f5d4 Initial commit — memex
A compounding LLM-maintained knowledge wiki.

Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's
mempalace, with an automation layer on top for conversation mining, URL
harvesting, human-in-the-loop staging, staleness decay, and hygiene.

Includes:
- 11 pipeline scripts (extract, summarize, index, harvest, stage,
  hygiene, maintain, sync, + shared library)
- Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE
- Example CLAUDE.md files (wiki schema + global instructions) tuned for
  the three-collection qmd setup
- 171-test pytest suite (cross-platform, runs in ~1.3s)
- MIT licensed
2026-04-12 21:16:02 -06:00

199 lines
6.6 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# wiki-maintain.sh — Top-level orchestrator for wiki maintenance.
#
# Chains the three maintenance scripts in the correct order:
# 1. wiki-harvest.py (URL harvesting from summarized conversations)
# 2. wiki-hygiene.py (quick or full hygiene checks)
# 3. qmd update && qmd embed (reindex after changes)
#
# Usage:
# wiki-maintain.sh # Harvest + quick hygiene
# wiki-maintain.sh --full # Harvest + full hygiene (LLM-powered)
# wiki-maintain.sh --harvest-only # URL harvesting only
# wiki-maintain.sh --hygiene-only # Quick hygiene only
# wiki-maintain.sh --hygiene-only --full # Full hygiene only
# wiki-maintain.sh --dry-run # Show what would run (no writes)
# wiki-maintain.sh --no-compile # Harvest without claude -p compilation step
# wiki-maintain.sh --no-reindex # Skip qmd update/embed after
#
# Log file: scripts/.maintain.log (rotated manually)
# Resolve script location first so we can find sibling scripts regardless of
# how WIKI_DIR is set. WIKI_DIR defaults to the parent of scripts/ but may be
# overridden for tests or alternate installs.
SCRIPTS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
WIKI_DIR="${WIKI_DIR:-$(dirname "${SCRIPTS_DIR}")}"
LOG_FILE="${SCRIPTS_DIR}/.maintain.log"
# -----------------------------------------------------------------------------
# Argument parsing
# -----------------------------------------------------------------------------
FULL_MODE=false
HARVEST_ONLY=false
HYGIENE_ONLY=false
DRY_RUN=false
NO_COMPILE=false
NO_REINDEX=false
while [[ $# -gt 0 ]]; do
case "$1" in
--full) FULL_MODE=true; shift ;;
--harvest-only) HARVEST_ONLY=true; shift ;;
--hygiene-only) HYGIENE_ONLY=true; shift ;;
--dry-run) DRY_RUN=true; shift ;;
--no-compile) NO_COMPILE=true; shift ;;
--no-reindex) NO_REINDEX=true; shift ;;
-h|--help)
sed -n '3,20p' "$0" | sed 's/^# \?//'
exit 0
;;
*)
echo "Unknown option: $1" >&2
exit 1
;;
esac
done
if [[ "${HARVEST_ONLY}" == "true" && "${HYGIENE_ONLY}" == "true" ]]; then
echo "--harvest-only and --hygiene-only are mutually exclusive" >&2
exit 1
fi
# -----------------------------------------------------------------------------
# Logging
# -----------------------------------------------------------------------------
log() {
local ts
ts="$(date '+%Y-%m-%d %H:%M:%S')"
printf '[%s] %s\n' "${ts}" "$*"
}
section() {
echo ""
log "━━━ $* ━━━"
}
# -----------------------------------------------------------------------------
# Sanity checks
# -----------------------------------------------------------------------------
if [[ ! -d "${WIKI_DIR}" ]]; then
echo "Wiki directory not found: ${WIKI_DIR}" >&2
exit 1
fi
cd "${WIKI_DIR}"
for req in python3 qmd; do
if ! command -v "${req}" >/dev/null 2>&1; then
if [[ "${req}" == "qmd" && "${NO_REINDEX}" == "true" ]]; then
continue # qmd not required if --no-reindex
fi
echo "Required command not found: ${req}" >&2
exit 1
fi
done
# -----------------------------------------------------------------------------
# Pipeline
# -----------------------------------------------------------------------------
START_TS="$(date '+%s')"
section "wiki-maintain.sh starting"
log "mode: $(${FULL_MODE} && echo full || echo quick)"
log "harvest: $(${HYGIENE_ONLY} && echo skipped || echo enabled)"
log "hygiene: $(${HARVEST_ONLY} && echo skipped || echo enabled)"
log "reindex: $(${NO_REINDEX} && echo skipped || echo enabled)"
log "dry-run: ${DRY_RUN}"
log "wiki: ${WIKI_DIR}"
# -----------------------------------------------------------------------------
# Phase 1: Harvest
# -----------------------------------------------------------------------------
if [[ "${HYGIENE_ONLY}" != "true" ]]; then
section "Phase 1: URL harvesting"
harvest_args=()
${DRY_RUN} && harvest_args+=(--dry-run)
${NO_COMPILE} && harvest_args+=(--no-compile)
if python3 "${SCRIPTS_DIR}/wiki-harvest.py" "${harvest_args[@]}"; then
log "harvest completed"
else
log "[error] harvest failed (exit $?) — continuing to hygiene"
fi
else
section "Phase 1: URL harvesting (skipped)"
fi
# -----------------------------------------------------------------------------
# Phase 2: Hygiene
# -----------------------------------------------------------------------------
if [[ "${HARVEST_ONLY}" != "true" ]]; then
section "Phase 2: Hygiene checks"
hygiene_args=()
if ${FULL_MODE}; then
hygiene_args+=(--full)
fi
${DRY_RUN} && hygiene_args+=(--dry-run)
if python3 "${SCRIPTS_DIR}/wiki-hygiene.py" "${hygiene_args[@]}"; then
log "hygiene completed"
else
log "[error] hygiene failed (exit $?) — continuing to reindex"
fi
else
section "Phase 2: Hygiene checks (skipped)"
fi
# -----------------------------------------------------------------------------
# Phase 3: qmd reindex
# -----------------------------------------------------------------------------
if [[ "${NO_REINDEX}" != "true" && "${DRY_RUN}" != "true" ]]; then
section "Phase 3: qmd reindex"
if qmd update 2>&1 | sed 's/^/ /'; then
log "qmd update completed"
else
log "[error] qmd update failed (exit $?)"
fi
if qmd embed 2>&1 | sed 's/^/ /'; then
log "qmd embed completed"
else
log "[warn] qmd embed failed or produced warnings"
fi
else
section "Phase 3: qmd reindex (skipped)"
fi
# -----------------------------------------------------------------------------
# Summary
# -----------------------------------------------------------------------------
END_TS="$(date '+%s')"
DURATION=$((END_TS - START_TS))
section "wiki-maintain.sh finished in ${DURATION}s"
# Report the most recent hygiene reports, if any. Use `if` statements (not
# `[[ ]] && action`) because under `set -e` a false test at end-of-script
# becomes the process exit status.
if [[ -d "${WIKI_DIR}/reports" ]]; then
latest_fixed="$(ls -t "${WIKI_DIR}"/reports/hygiene-*-fixed.md 2>/dev/null | head -n 1 || true)"
latest_review="$(ls -t "${WIKI_DIR}"/reports/hygiene-*-needs-review.md 2>/dev/null | head -n 1 || true)"
if [[ -n "${latest_fixed}" ]]; then
log "latest fixed report: $(basename "${latest_fixed}")"
fi
if [[ -n "${latest_review}" ]]; then
log "latest review report: $(basename "${latest_review}")"
fi
fi
exit 0