#!/usr/bin/env bash set -euo pipefail # wiki-maintain.sh — Top-level orchestrator for wiki maintenance. # # Chains the three maintenance scripts in the correct order: # 1. wiki-harvest.py (URL harvesting from summarized conversations) # 2. wiki-hygiene.py (quick or full hygiene checks) # 3. qmd update && qmd embed (reindex after changes) # # Usage: # wiki-maintain.sh # Harvest + quick hygiene # wiki-maintain.sh --full # Harvest + full hygiene (LLM-powered) # wiki-maintain.sh --harvest-only # URL harvesting only # wiki-maintain.sh --hygiene-only # Quick hygiene only # wiki-maintain.sh --hygiene-only --full # Full hygiene only # wiki-maintain.sh --dry-run # Show what would run (no writes) # wiki-maintain.sh --no-compile # Harvest without claude -p compilation step # wiki-maintain.sh --no-reindex # Skip qmd update/embed after # # Log file: scripts/.maintain.log (rotated manually) # Resolve script location first so we can find sibling scripts regardless of # how WIKI_DIR is set. WIKI_DIR defaults to the parent of scripts/ but may be # overridden for tests or alternate installs. SCRIPTS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" WIKI_DIR="${WIKI_DIR:-$(dirname "${SCRIPTS_DIR}")}" LOG_FILE="${SCRIPTS_DIR}/.maintain.log" # ----------------------------------------------------------------------------- # Argument parsing # ----------------------------------------------------------------------------- FULL_MODE=false HARVEST_ONLY=false HYGIENE_ONLY=false DRY_RUN=false NO_COMPILE=false NO_REINDEX=false while [[ $# -gt 0 ]]; do case "$1" in --full) FULL_MODE=true; shift ;; --harvest-only) HARVEST_ONLY=true; shift ;; --hygiene-only) HYGIENE_ONLY=true; shift ;; --dry-run) DRY_RUN=true; shift ;; --no-compile) NO_COMPILE=true; shift ;; --no-reindex) NO_REINDEX=true; shift ;; -h|--help) sed -n '3,20p' "$0" | sed 's/^# \?//' exit 0 ;; *) echo "Unknown option: $1" >&2 exit 1 ;; esac done if [[ "${HARVEST_ONLY}" == "true" && "${HYGIENE_ONLY}" == "true" ]]; then echo "--harvest-only and --hygiene-only are mutually exclusive" >&2 exit 1 fi # ----------------------------------------------------------------------------- # Logging # ----------------------------------------------------------------------------- log() { local ts ts="$(date '+%Y-%m-%d %H:%M:%S')" printf '[%s] %s\n' "${ts}" "$*" } section() { echo "" log "━━━ $* ━━━" } # ----------------------------------------------------------------------------- # Sanity checks # ----------------------------------------------------------------------------- if [[ ! -d "${WIKI_DIR}" ]]; then echo "Wiki directory not found: ${WIKI_DIR}" >&2 exit 1 fi cd "${WIKI_DIR}" for req in python3 qmd; do if ! command -v "${req}" >/dev/null 2>&1; then if [[ "${req}" == "qmd" && "${NO_REINDEX}" == "true" ]]; then continue # qmd not required if --no-reindex fi echo "Required command not found: ${req}" >&2 exit 1 fi done # ----------------------------------------------------------------------------- # Pipeline # ----------------------------------------------------------------------------- START_TS="$(date '+%s')" section "wiki-maintain.sh starting" log "mode: $(${FULL_MODE} && echo full || echo quick)" log "harvest: $(${HYGIENE_ONLY} && echo skipped || echo enabled)" log "hygiene: $(${HARVEST_ONLY} && echo skipped || echo enabled)" log "reindex: $(${NO_REINDEX} && echo skipped || echo enabled)" log "dry-run: ${DRY_RUN}" log "wiki: ${WIKI_DIR}" # ----------------------------------------------------------------------------- # Phase 1: Harvest # ----------------------------------------------------------------------------- if [[ "${HYGIENE_ONLY}" != "true" ]]; then section "Phase 1: URL harvesting" harvest_args=() ${DRY_RUN} && harvest_args+=(--dry-run) ${NO_COMPILE} && harvest_args+=(--no-compile) if python3 "${SCRIPTS_DIR}/wiki-harvest.py" "${harvest_args[@]}"; then log "harvest completed" else log "[error] harvest failed (exit $?) — continuing to hygiene" fi else section "Phase 1: URL harvesting (skipped)" fi # ----------------------------------------------------------------------------- # Phase 2: Hygiene # ----------------------------------------------------------------------------- if [[ "${HARVEST_ONLY}" != "true" ]]; then section "Phase 2: Hygiene checks" hygiene_args=() if ${FULL_MODE}; then hygiene_args+=(--full) fi ${DRY_RUN} && hygiene_args+=(--dry-run) if python3 "${SCRIPTS_DIR}/wiki-hygiene.py" "${hygiene_args[@]}"; then log "hygiene completed" else log "[error] hygiene failed (exit $?) — continuing to reindex" fi else section "Phase 2: Hygiene checks (skipped)" fi # ----------------------------------------------------------------------------- # Phase 3: qmd reindex # ----------------------------------------------------------------------------- if [[ "${NO_REINDEX}" != "true" && "${DRY_RUN}" != "true" ]]; then section "Phase 3: qmd reindex" if qmd update 2>&1 | sed 's/^/ /'; then log "qmd update completed" else log "[error] qmd update failed (exit $?)" fi if qmd embed 2>&1 | sed 's/^/ /'; then log "qmd embed completed" else log "[warn] qmd embed failed or produced warnings" fi else section "Phase 3: qmd reindex (skipped)" fi # ----------------------------------------------------------------------------- # Summary # ----------------------------------------------------------------------------- END_TS="$(date '+%s')" DURATION=$((END_TS - START_TS)) section "wiki-maintain.sh finished in ${DURATION}s" # Report the most recent hygiene reports, if any. Use `if` statements (not # `[[ ]] && action`) because under `set -e` a false test at end-of-script # becomes the process exit status. if [[ -d "${WIKI_DIR}/reports" ]]; then latest_fixed="$(ls -t "${WIKI_DIR}"/reports/hygiene-*-fixed.md 2>/dev/null | head -n 1 || true)" latest_review="$(ls -t "${WIKI_DIR}"/reports/hygiene-*-needs-review.md 2>/dev/null | head -n 1 || true)" if [[ -n "${latest_fixed}" ]]; then log "latest fixed report: $(basename "${latest_fixed}")" fi if [[ -n "${latest_review}" ]]; then log "latest review report: $(basename "${latest_review}")" fi fi exit 0