Initial commit — memex
A compounding LLM-maintained knowledge wiki. Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's mempalace, with an automation layer on top for conversation mining, URL harvesting, human-in-the-loop staging, staleness decay, and hygiene. Includes: - 11 pipeline scripts (extract, summarize, index, harvest, stage, hygiene, maintain, sync, + shared library) - Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE - Example CLAUDE.md files (wiki schema + global instructions) tuned for the three-collection qmd setup - 171-test pytest suite (cross-platform, runs in ~1.3s) - MIT licensed
This commit is contained in:
198
scripts/wiki-maintain.sh
Executable file
198
scripts/wiki-maintain.sh
Executable file
@@ -0,0 +1,198 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# wiki-maintain.sh — Top-level orchestrator for wiki maintenance.
|
||||
#
|
||||
# Chains the three maintenance scripts in the correct order:
|
||||
# 1. wiki-harvest.py (URL harvesting from summarized conversations)
|
||||
# 2. wiki-hygiene.py (quick or full hygiene checks)
|
||||
# 3. qmd update && qmd embed (reindex after changes)
|
||||
#
|
||||
# Usage:
|
||||
# wiki-maintain.sh # Harvest + quick hygiene
|
||||
# wiki-maintain.sh --full # Harvest + full hygiene (LLM-powered)
|
||||
# wiki-maintain.sh --harvest-only # URL harvesting only
|
||||
# wiki-maintain.sh --hygiene-only # Quick hygiene only
|
||||
# wiki-maintain.sh --hygiene-only --full # Full hygiene only
|
||||
# wiki-maintain.sh --dry-run # Show what would run (no writes)
|
||||
# wiki-maintain.sh --no-compile # Harvest without claude -p compilation step
|
||||
# wiki-maintain.sh --no-reindex # Skip qmd update/embed after
|
||||
#
|
||||
# Log file: scripts/.maintain.log (rotated manually)
|
||||
|
||||
# Resolve script location first so we can find sibling scripts regardless of
|
||||
# how WIKI_DIR is set. WIKI_DIR defaults to the parent of scripts/ but may be
|
||||
# overridden for tests or alternate installs.
|
||||
SCRIPTS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
WIKI_DIR="${WIKI_DIR:-$(dirname "${SCRIPTS_DIR}")}"
|
||||
LOG_FILE="${SCRIPTS_DIR}/.maintain.log"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Argument parsing
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
FULL_MODE=false
|
||||
HARVEST_ONLY=false
|
||||
HYGIENE_ONLY=false
|
||||
DRY_RUN=false
|
||||
NO_COMPILE=false
|
||||
NO_REINDEX=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--full) FULL_MODE=true; shift ;;
|
||||
--harvest-only) HARVEST_ONLY=true; shift ;;
|
||||
--hygiene-only) HYGIENE_ONLY=true; shift ;;
|
||||
--dry-run) DRY_RUN=true; shift ;;
|
||||
--no-compile) NO_COMPILE=true; shift ;;
|
||||
--no-reindex) NO_REINDEX=true; shift ;;
|
||||
-h|--help)
|
||||
sed -n '3,20p' "$0" | sed 's/^# \?//'
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ "${HARVEST_ONLY}" == "true" && "${HYGIENE_ONLY}" == "true" ]]; then
|
||||
echo "--harvest-only and --hygiene-only are mutually exclusive" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Logging
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
log() {
|
||||
local ts
|
||||
ts="$(date '+%Y-%m-%d %H:%M:%S')"
|
||||
printf '[%s] %s\n' "${ts}" "$*"
|
||||
}
|
||||
|
||||
section() {
|
||||
echo ""
|
||||
log "━━━ $* ━━━"
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Sanity checks
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ ! -d "${WIKI_DIR}" ]]; then
|
||||
echo "Wiki directory not found: ${WIKI_DIR}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "${WIKI_DIR}"
|
||||
|
||||
for req in python3 qmd; do
|
||||
if ! command -v "${req}" >/dev/null 2>&1; then
|
||||
if [[ "${req}" == "qmd" && "${NO_REINDEX}" == "true" ]]; then
|
||||
continue # qmd not required if --no-reindex
|
||||
fi
|
||||
echo "Required command not found: ${req}" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Pipeline
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
START_TS="$(date '+%s')"
|
||||
section "wiki-maintain.sh starting"
|
||||
log "mode: $(${FULL_MODE} && echo full || echo quick)"
|
||||
log "harvest: $(${HYGIENE_ONLY} && echo skipped || echo enabled)"
|
||||
log "hygiene: $(${HARVEST_ONLY} && echo skipped || echo enabled)"
|
||||
log "reindex: $(${NO_REINDEX} && echo skipped || echo enabled)"
|
||||
log "dry-run: ${DRY_RUN}"
|
||||
log "wiki: ${WIKI_DIR}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Phase 1: Harvest
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ "${HYGIENE_ONLY}" != "true" ]]; then
|
||||
section "Phase 1: URL harvesting"
|
||||
harvest_args=()
|
||||
${DRY_RUN} && harvest_args+=(--dry-run)
|
||||
${NO_COMPILE} && harvest_args+=(--no-compile)
|
||||
|
||||
if python3 "${SCRIPTS_DIR}/wiki-harvest.py" "${harvest_args[@]}"; then
|
||||
log "harvest completed"
|
||||
else
|
||||
log "[error] harvest failed (exit $?) — continuing to hygiene"
|
||||
fi
|
||||
else
|
||||
section "Phase 1: URL harvesting (skipped)"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Phase 2: Hygiene
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ "${HARVEST_ONLY}" != "true" ]]; then
|
||||
section "Phase 2: Hygiene checks"
|
||||
hygiene_args=()
|
||||
if ${FULL_MODE}; then
|
||||
hygiene_args+=(--full)
|
||||
fi
|
||||
${DRY_RUN} && hygiene_args+=(--dry-run)
|
||||
|
||||
if python3 "${SCRIPTS_DIR}/wiki-hygiene.py" "${hygiene_args[@]}"; then
|
||||
log "hygiene completed"
|
||||
else
|
||||
log "[error] hygiene failed (exit $?) — continuing to reindex"
|
||||
fi
|
||||
else
|
||||
section "Phase 2: Hygiene checks (skipped)"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Phase 3: qmd reindex
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ "${NO_REINDEX}" != "true" && "${DRY_RUN}" != "true" ]]; then
|
||||
section "Phase 3: qmd reindex"
|
||||
|
||||
if qmd update 2>&1 | sed 's/^/ /'; then
|
||||
log "qmd update completed"
|
||||
else
|
||||
log "[error] qmd update failed (exit $?)"
|
||||
fi
|
||||
|
||||
if qmd embed 2>&1 | sed 's/^/ /'; then
|
||||
log "qmd embed completed"
|
||||
else
|
||||
log "[warn] qmd embed failed or produced warnings"
|
||||
fi
|
||||
else
|
||||
section "Phase 3: qmd reindex (skipped)"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Summary
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
END_TS="$(date '+%s')"
|
||||
DURATION=$((END_TS - START_TS))
|
||||
section "wiki-maintain.sh finished in ${DURATION}s"
|
||||
|
||||
# Report the most recent hygiene reports, if any. Use `if` statements (not
|
||||
# `[[ ]] && action`) because under `set -e` a false test at end-of-script
|
||||
# becomes the process exit status.
|
||||
if [[ -d "${WIKI_DIR}/reports" ]]; then
|
||||
latest_fixed="$(ls -t "${WIKI_DIR}"/reports/hygiene-*-fixed.md 2>/dev/null | head -n 1 || true)"
|
||||
latest_review="$(ls -t "${WIKI_DIR}"/reports/hygiene-*-needs-review.md 2>/dev/null | head -n 1 || true)"
|
||||
if [[ -n "${latest_fixed}" ]]; then
|
||||
log "latest fixed report: $(basename "${latest_fixed}")"
|
||||
fi
|
||||
if [[ -n "${latest_review}" ]]; then
|
||||
log "latest review report: $(basename "${latest_review}")"
|
||||
fi
|
||||
fi
|
||||
|
||||
exit 0
|
||||
Reference in New Issue
Block a user