#!/usr/bin/env bash set -euo pipefail # wiki-maintain.sh — Top-level orchestrator for wiki maintenance. # # Chains the maintenance scripts in the correct order: # 1a. wiki-distill.py (closet summaries → wiki pages via claude -p) # 1b. wiki-harvest.py (URL content from conversations → wiki pages) # 2. wiki-hygiene.py (quick or full hygiene checks) # 3. qmd update && qmd embed (reindex after changes) # # Distill runs BEFORE harvest: conversation content takes priority over # URL content. If a topic is already discussed in the conversations, we # want the conversation rollup to drive the page, not a cited URL. # # Usage: # wiki-maintain.sh # Distill + harvest + quick hygiene + reindex # wiki-maintain.sh --full # Everything with full hygiene (LLM) # wiki-maintain.sh --distill-only # Conversation distillation only # wiki-maintain.sh --harvest-only # URL harvesting only # wiki-maintain.sh --hygiene-only # Hygiene only # wiki-maintain.sh --no-distill # Skip distillation phase # wiki-maintain.sh --distill-first-run # Bootstrap distill with last 7 days # wiki-maintain.sh --dry-run # Show what would run (no writes, no LLM) # wiki-maintain.sh --no-compile # Skip claude -p in harvest AND distill # wiki-maintain.sh --no-reindex # Skip qmd update/embed after # # Log file: scripts/.maintain.log (rotated manually) # Resolve script location first so we can find sibling scripts regardless of # how WIKI_DIR is set. WIKI_DIR defaults to the parent of scripts/ but may be # overridden for tests or alternate installs. SCRIPTS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" WIKI_DIR="${WIKI_DIR:-$(dirname "${SCRIPTS_DIR}")}" LOG_FILE="${SCRIPTS_DIR}/.maintain.log" # ----------------------------------------------------------------------------- # Argument parsing # ----------------------------------------------------------------------------- FULL_MODE=false DISTILL_ONLY=false HARVEST_ONLY=false HYGIENE_ONLY=false NO_DISTILL=false DISTILL_FIRST_RUN=false DRY_RUN=false NO_COMPILE=false NO_REINDEX=false while [[ $# -gt 0 ]]; do case "$1" in --full) FULL_MODE=true; shift ;; --distill-only) DISTILL_ONLY=true; shift ;; --harvest-only) HARVEST_ONLY=true; shift ;; --hygiene-only) HYGIENE_ONLY=true; shift ;; --no-distill) NO_DISTILL=true; shift ;; --distill-first-run) DISTILL_FIRST_RUN=true; shift ;; --dry-run) DRY_RUN=true; shift ;; --no-compile) NO_COMPILE=true; shift ;; --no-reindex) NO_REINDEX=true; shift ;; -h|--help) sed -n '3,28p' "$0" | sed 's/^# \?//' exit 0 ;; *) echo "Unknown option: $1" >&2 exit 1 ;; esac done # Mutex check — only one "only" flag at a time only_count=0 ${DISTILL_ONLY} && only_count=$((only_count + 1)) ${HARVEST_ONLY} && only_count=$((only_count + 1)) ${HYGIENE_ONLY} && only_count=$((only_count + 1)) if [[ $only_count -gt 1 ]]; then echo "--distill-only, --harvest-only, and --hygiene-only are mutually exclusive" >&2 exit 1 fi # ----------------------------------------------------------------------------- # Logging # ----------------------------------------------------------------------------- log() { local ts ts="$(date '+%Y-%m-%d %H:%M:%S')" printf '[%s] %s\n' "${ts}" "$*" } section() { echo "" log "━━━ $* ━━━" } # ----------------------------------------------------------------------------- # Sanity checks # ----------------------------------------------------------------------------- if [[ ! -d "${WIKI_DIR}" ]]; then echo "Wiki directory not found: ${WIKI_DIR}" >&2 exit 1 fi cd "${WIKI_DIR}" for req in python3 qmd; do if ! command -v "${req}" >/dev/null 2>&1; then if [[ "${req}" == "qmd" && "${NO_REINDEX}" == "true" ]]; then continue fi echo "Required command not found: ${req}" >&2 exit 1 fi done # ----------------------------------------------------------------------------- # Determine which phases to run # ----------------------------------------------------------------------------- run_distill=true run_harvest=true run_hygiene=true ${NO_DISTILL} && run_distill=false if ${DISTILL_ONLY}; then run_harvest=false run_hygiene=false fi if ${HARVEST_ONLY}; then run_distill=false run_hygiene=false fi if ${HYGIENE_ONLY}; then run_distill=false run_harvest=false fi # ----------------------------------------------------------------------------- # Pipeline # ----------------------------------------------------------------------------- START_TS="$(date '+%s')" section "wiki-maintain.sh starting" log "mode: $(${FULL_MODE} && echo full || echo quick)" log "distill: $(${run_distill} && echo enabled || echo skipped)" log "harvest: $(${run_harvest} && echo enabled || echo skipped)" log "hygiene: $(${run_hygiene} && echo enabled || echo skipped)" log "reindex: $(${NO_REINDEX} && echo skipped || echo enabled)" log "dry-run: ${DRY_RUN}" log "wiki: ${WIKI_DIR}" # ----------------------------------------------------------------------------- # Phase 1a: Distill — conversations → wiki pages # ----------------------------------------------------------------------------- if ${run_distill}; then section "Phase 1a: Conversation distillation" distill_args=() ${DRY_RUN} && distill_args+=(--dry-run) ${NO_COMPILE} && distill_args+=(--no-compile) ${DISTILL_FIRST_RUN} && distill_args+=(--first-run) if python3 "${SCRIPTS_DIR}/wiki-distill.py" "${distill_args[@]}"; then log "distill completed" else log "[error] distill failed (exit $?) — continuing to harvest" fi else section "Phase 1a: Conversation distillation (skipped)" fi # ----------------------------------------------------------------------------- # Phase 1b: Harvest — URLs cited in conversations → raw/ → wiki pages # ----------------------------------------------------------------------------- if ${run_harvest}; then section "Phase 1b: URL harvesting" harvest_args=() ${DRY_RUN} && harvest_args+=(--dry-run) ${NO_COMPILE} && harvest_args+=(--no-compile) if python3 "${SCRIPTS_DIR}/wiki-harvest.py" "${harvest_args[@]}"; then log "harvest completed" else log "[error] harvest failed (exit $?) — continuing to hygiene" fi else section "Phase 1b: URL harvesting (skipped)" fi # ----------------------------------------------------------------------------- # Phase 2: Hygiene # ----------------------------------------------------------------------------- if ${run_hygiene}; then section "Phase 2: Hygiene checks" hygiene_args=() if ${FULL_MODE}; then hygiene_args+=(--full) fi ${DRY_RUN} && hygiene_args+=(--dry-run) if python3 "${SCRIPTS_DIR}/wiki-hygiene.py" "${hygiene_args[@]}"; then log "hygiene completed" else log "[error] hygiene failed (exit $?) — continuing to reindex" fi else section "Phase 2: Hygiene checks (skipped)" fi # ----------------------------------------------------------------------------- # Phase 3: qmd reindex # ----------------------------------------------------------------------------- if [[ "${NO_REINDEX}" != "true" && "${DRY_RUN}" != "true" ]]; then section "Phase 3: qmd reindex" if qmd update 2>&1 | sed 's/^/ /'; then log "qmd update completed" else log "[error] qmd update failed (exit $?)" fi if qmd embed 2>&1 | sed 's/^/ /'; then log "qmd embed completed" else log "[warn] qmd embed failed or produced warnings" fi else section "Phase 3: qmd reindex (skipped)" fi # ----------------------------------------------------------------------------- # Summary # ----------------------------------------------------------------------------- END_TS="$(date '+%s')" DURATION=$((END_TS - START_TS)) section "wiki-maintain.sh finished in ${DURATION}s" # Report the most recent hygiene reports, if any. Use `if` statements (not # `[[ ]] && action`) because under `set -e` a false test at end-of-script # becomes the process exit status. if [[ -d "${WIKI_DIR}/reports" ]]; then latest_fixed="$(ls -t "${WIKI_DIR}"/reports/hygiene-*-fixed.md 2>/dev/null | head -n 1 || true)" latest_review="$(ls -t "${WIKI_DIR}"/reports/hygiene-*-needs-review.md 2>/dev/null | head -n 1 || true)" if [[ -n "${latest_fixed}" ]]; then log "latest fixed report: $(basename "${latest_fixed}")" fi if [[ -n "${latest_review}" ]]; then log "latest review report: $(basename "${latest_review}")" fi fi exit 0