Script 17 · Cluster Control Plane

17. Master Orchestration (Cluster Management) v3.0 Hardened

Installs a hardened multi-node orchestration toolkit for RAIN/Erupe clusters with SSH, sync, database, health, audit, and rolling-update workflows.

Category: Cluster Control Plane Risk: High Lines: calculating Language: Bash / Linux
Back to index

What this script does

  • Manage many nodes from one master VPS.
  • Run safer rolling updates with snapshots and failure thresholds.
  • Centralize config editing, DB operations, sync, audit, and health checks.

Prerequisites

  • Dedicated master VPS
  • Root access for installation
  • SSH reachability to nodes
  • Staging test before production
  • Backup strategy

Execution flow

  1. Installs dependencies
  2. Creates /opt/erupe-orchestrator state
  3. Generates command suite
  4. Registers nodes
  5. Runs preflight/health/update/sync workflows

Validation checklist

  • sudo node-manager
  • node-preflight
  • node-health
  • node-audit
  • node-registry

Operational cautions

  • The master becomes a high-value administrative control point.
  • Use non-root SSH users with sudo where possible.
  • Test in staging before touching production nodes.

Original script notes

Script Info (v3.0): hardened cluster management system with fixed heredoc formatting, safer SSH defaults, input validation, lock files, preflight checks, canary/rolling updates, snapshots, audit logs, registry export/import, node groups, health monitoring, notifications, sync dry-run, and database pre-backups before destructive actions.

Script source
sudo tee /usr/local/bin/setup-orchestration >/dev/null <<'EOF'
#!/usr/bin/env bash
set -euo pipefail

# ============================================================================
# MASTER ORCHESTRATION v3.0 - RAIN/Erupe Cluster Management
# ============================================================================
# Major fixes:
#   - Valid heredoc/newline layout for HTML copy usage.
#   - No glued chmod/comment lines.
#   - set -euo pipefail enabled.
#   - StrictHostKeyChecking=accept-new by default.
#   - No chmod 777 defaults.
#   - Input validation for IP/host, port, path, node name, group tags.
#   - DB destructive operations create backups first.
#
# Added features:
#   - Preflight checks.
#   - Lock file to prevent overlapping update/sync/edit jobs.
#   - Snapshot before node update/mass update.
#   - Canary update and rolling update with failure threshold.
#   - Audit log + report files.
#   - Optional append-only audit mode with chattr.
#   - Export/import registry bundle.
#   - Non-root SSH user support with sudo wrapper.
#   - Known-host bootstrap via ssh-keyscan.
#   - jq-based JSON path editing with backup/rollback.
#   - Post-action reports for update and sync.
# ============================================================================

ORCH_VERSION="3.0"
ORCH_ROOT="/opt/erupe-orchestrator"
STATE_DIR="$ORCH_ROOT/state"
REPORT_DIR="$ORCH_ROOT/reports"
SNAPSHOT_DIR="$ORCH_ROOT/snapshots"
NODE_DB="$ORCH_ROOT/nodes.db"
GROUP_DB="$ORCH_ROOT/node_groups.db"
PG_DB="$ORCH_ROOT/databases.db"
PATH_CONF="$ORCH_ROOT/paths.conf"
BACKUP_INDEX="$ORCH_ROOT/config_backups.db"
NOTIFY_CONF="$ORCH_ROOT/notify.conf"
CRYPTO_KEY="/etc/erupe-orchestration.key"
AUDIT_LOG="/var/log/erupe-orchestration.log"
LOCK_DIR="/run/erupe-orchestration"
SYNC_STAGING="/ERUPE-SYNC"

log() { printf '\033[1;36m[%s]\033[0m %s\n' "$(date '+%H:%M:%S')" "$*"; }
ok() { printf '\033[1;32m[OK]\033[0m %s\n' "$*"; }
warn() { printf '\033[1;33m[WARN]\033[0m %s\n' "$*"; }
err() { printf '\033[1;31m[ERR]\033[0m %s\n' "$*" >&2; }

need_root() {
  if [ "${EUID:-$(id -u)}" -ne 0 ]; then
    err "Run as root: sudo setup-orchestration"
    exit 1
  fi
}

install_dependencies() {
  local pkgs=(sshpass rsync postgresql-client openssl jq curl coreutils findutils gawk)
  local missing=()
  local cmd
  for cmd in ssh ssh-keygen ssh-copy-id rsync psql openssl jq curl awk sed grep find tar; do
    command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd")
  done
  if [ "${#missing[@]}" -gt 0 ]; then
    log "Installing dependencies: ${pkgs[*]}"
    apt-get update -y -qq
    DEBIAN_FRONTEND=noninteractive apt-get install -y -qq "${pkgs[@]}"
  fi
}

bootstrap_storage() {
  mkdir -p "$ORCH_ROOT" "$STATE_DIR" "$REPORT_DIR" "$SNAPSHOT_DIR" "$LOCK_DIR" "$SYNC_STAGING" "$(dirname "$AUDIT_LOG")"
  touch "$NODE_DB" "$GROUP_DB" "$PG_DB" "$PATH_CONF" "$BACKUP_INDEX" "$NOTIFY_CONF" "$AUDIT_LOG"
  chmod 700 "$ORCH_ROOT" "$STATE_DIR" "$SNAPSHOT_DIR"
  chmod 755 "$REPORT_DIR"
  chmod 775 "$SYNC_STAGING"
  chmod 600 "$NODE_DB" "$GROUP_DB" "$PG_DB" "$BACKUP_INDEX"
  chmod 640 "$AUDIT_LOG"
  chmod 644 "$PATH_CONF" "$NOTIFY_CONF"
  if [ ! -f "$CRYPTO_KEY" ]; then
    openssl rand -base64 48 > "$CRYPTO_KEY"
    chmod 600 "$CRYPTO_KEY"
  fi
  if [ ! -f /root/.ssh/id_ed25519 ]; then
    mkdir -p /root/.ssh
    ssh-keygen -t ed25519 -N "" -f /root/.ssh/id_ed25519 -q
  fi
}

write_common_lib() {
  cat > /usr/local/lib/erupe-orchestrator-common <<'LIBEOF'
#!/usr/bin/env bash
set -euo pipefail

ORCH_VERSION="3.0"
ORCH_ROOT="/opt/erupe-orchestrator"
STATE_DIR="$ORCH_ROOT/state"
REPORT_DIR="$ORCH_ROOT/reports"
SNAPSHOT_DIR="$ORCH_ROOT/snapshots"
NODE_DB="$ORCH_ROOT/nodes.db"
GROUP_DB="$ORCH_ROOT/node_groups.db"
PG_DB="$ORCH_ROOT/databases.db"
PATH_CONF="$ORCH_ROOT/paths.conf"
BACKUP_INDEX="$ORCH_ROOT/config_backups.db"
NOTIFY_CONF="$ORCH_ROOT/notify.conf"
CRYPTO_KEY="/etc/erupe-orchestration.key"
AUDIT_LOG="/var/log/erupe-orchestration.log"
LOCK_DIR="/run/erupe-orchestration"
SYNC_STAGING="/ERUPE-SYNC"

die() { echo "ERROR: $*" >&2; exit 1; }
need_root() { [ "${EUID:-$(id -u)}" -eq 0 ] || die "Run with sudo/root."; }
timestamp() { date '+%Y%m%d_%H%M%S'; }

audit_log() {
  local action="${1:-unknown}" target="${2:-}" result="${3:-}" detail="${4:-}"
  mkdir -p "$(dirname "$AUDIT_LOG")"
  printf '[%s] user=%s action=%s target=%s result=%s detail="%s"\n' \
    "$(date -Iseconds 2>/dev/null || date)" "${SUDO_USER:-${USER:-root}}" \
    "$action" "$target" "$result" "$detail" >> "$AUDIT_LOG"
}

notify_send() {
  local level="${1:-info}" msg="${2:-}"
  [ -f "$NOTIFY_CONF" ] || return 0
  local url
  url="$(grep '^WEBHOOK_URL=' "$NOTIFY_CONF" 2>/dev/null | tail -n 1 | cut -d= -f2- || true)"
  [ -n "$url" ] || return 0
  curl -fsS -m 5 --connect-timeout 3 -H "Content-Type: application/json" \
    -d "$(printf '{"text":%s}' "$(printf '%s: %s' "$level" "$msg" | jq -Rs .)")" \
    "$url" >/dev/null 2>&1 || true
}

confirm_word() {
  local label="$1"
  echo "$label"
  read -r -p "Type CONFIRM to continue: " ans
  [ "$ans" = "CONFIRM" ]
}

valid_host() { [[ "${1:-}" =~ ^[A-Za-z0-9._:-]+$ ]]; }
valid_port() { [[ "${1:-}" =~ ^[0-9]+$ ]] && [ "$1" -ge 1 ] && [ "$1" -le 65535 ]; }
valid_user() { [[ "${1:-}" =~ ^[A-Za-z_][A-Za-z0-9._-]*$ ]]; }
valid_name() { [[ "${1:-}" =~ ^[A-Za-z0-9._-]+$ ]]; }
valid_groups() { [[ "${1:-}" =~ ^[A-Za-z0-9._,-]*$ ]]; }
valid_abs_path() { [[ "${1:-}" == /* ]] && [[ "$1" != *".."* ]]; }

require_host() { valid_host "$1" || die "Invalid host/IP: $1"; }
require_port() { valid_port "$1" || die "Invalid port: $1"; }
require_user() { valid_user "$1" || die "Invalid user: $1"; }
require_name() { valid_name "$1" || die "Invalid name. Use A-Z a-z 0-9 . _ - only."; }
require_groups() { valid_groups "$1" || die "Invalid groups. Use comma-separated tags only."; }
require_abs_path() { valid_abs_path "$1" || die "Invalid path. Use an absolute path without '..'."; }

with_lock() {
  local lock_name="$1"
  local lock_path="$LOCK_DIR/$lock_name.lock"
  mkdir -p "$LOCK_DIR"
  if ! mkdir "$lock_path" 2>/dev/null; then
    die "Another operation is running: $lock_name"
  fi
  trap 'rm -rf "$lock_path"' EXIT INT TERM
}

get_global_path() {
  grep '^GLOBAL=' "$PATH_CONF" 2>/dev/null | tail -n 1 | cut -d= -f2- || true
}

get_erupe_dir() {
  local ip="${1:-}" specific=""
  if [ -n "$ip" ]; then
    specific="$(grep "^${ip}=" "$PATH_CONF" 2>/dev/null | tail -n 1 | cut -d= -f2- || true)"
  fi
  [ -n "$specific" ] && echo "$specific" || get_global_path
}

get_backup_dir() {
  local b
  b="$(grep '^BACKUP_DIR=' "$PATH_CONF" 2>/dev/null | tail -n 1 | cut -d= -f2- || true)"
  if [ -n "$b" ]; then echo "$b"; return; fi
  local g
  g="$(get_global_path)"
  [ -n "$g" ] && echo "$g/BACKUP" || echo "$ORCH_ROOT/backups"
}

check_path_setup() {
  [ -n "$(get_global_path)" ] || die "GLOBAL Erupe path is not configured. Open node-manager option 1."
}

node_line_count() { [ -s "$NODE_DB" ] && wc -l < "$NODE_DB" | tr -d ' ' || echo 0; }
get_node_line() { sed -n "${1}p" "$NODE_DB"; }

node_field() {
  local line="$1" field="$2"
  local count
  count="$(awk -F'|' '{print NF}' <<< "$line")"
  if [ "$count" -ge 4 ]; then
    case "$field" in
      ip) cut -d'|' -f1 <<< "$line" ;;
      port) cut -d'|' -f2 <<< "$line" ;;
      user) cut -d'|' -f3 <<< "$line" ;;
      name) cut -d'|' -f4 <<< "$line" ;;
    esac
  else
    case "$field" in
      ip) cut -d'|' -f1 <<< "$line" ;;
      port) cut -d'|' -f2 <<< "$line" ;;
      user) echo "root" ;;
      name) cut -d'|' -f3 <<< "$line" ;;
    esac
  fi
}

node_target() {
  local line="$1"
  echo "$(node_field "$line" user)@$(node_field "$line" ip)"
}

ssh_opts() {
  echo "-o BatchMode=yes -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=2 -o StrictHostKeyChecking=accept-new"
}

ssh_exec() {
  local line="$1"; shift
  local port target attempt delay
  port="$(node_field "$line" port)"
  target="$(node_target "$line")"
  attempt=1
  delay=2
  while [ "$attempt" -le 3 ]; do
    if ssh $(ssh_opts) -p "$port" "$target" "$@"; then return 0; fi
    [ "$attempt" -lt 3 ] && sleep "$delay"
    delay=$((delay * 2))
    attempt=$((attempt + 1))
  done
  return 1
}

ssh_interactive() {
  local line="$1"; shift
  local port target
  port="$(node_field "$line" port)"
  target="$(node_target "$line")"
  ssh -t -o StrictHostKeyChecking=accept-new -p "$port" "$target" "$@"
}

remote_sudo() {
  local line="$1"; shift
  if [ "$(node_field "$line" user)" = "root" ]; then
    ssh_exec "$line" "$@"
  else
    ssh_exec "$line" "sudo -n bash -lc $(printf '%q' "$*")"
  fi
}

encrypt_secret() {
  printf '%s' "$1" | openssl enc -aes-256-cbc -salt -pbkdf2 -iter 100000 \
    -pass file:"$CRYPTO_KEY" -base64 -A 2>/dev/null
}

decrypt_secret() {
  printf '%s' "$1" | openssl enc -d -aes-256-cbc -pbkdf2 -iter 100000 \
    -pass file:"$CRYPTO_KEY" -base64 -A 2>/dev/null
}

known_host_bootstrap() {
  local host="$1" port="$2"
  mkdir -p /root/.ssh
  touch /root/.ssh/known_hosts
  chmod 600 /root/.ssh/known_hosts
  ssh-keygen -R "[$host]:$port" >/dev/null 2>&1 || true
  ssh-keyscan -p "$port" -T 5 "$host" >> /root/.ssh/known_hosts 2>/dev/null || true
}

make_report() {
  mkdir -p "$REPORT_DIR"
  local name="$1"
  echo "$REPORT_DIR/${name}_$(timestamp).log"
}

snapshot_node() {
  local line="$1" reason="${2:-manual}"
  local ip name dir remote_base
  ip="$(node_field "$line" ip)"
  name="$(node_field "$line" name)"
  dir="$(get_erupe_dir "$ip")"
  [ -n "$dir" ] || return 0
  remote_base="$dir/BACKUP/orchestrator_snapshots"
  remote_sudo "$line" "mkdir -p '$remote_base' && tar -czf '$remote_base/snapshot_${reason}_$(timestamp).tar.gz' -C '$dir' config.json erupe-ce 2>/dev/null || true"
  audit_log "SNAPSHOT" "$ip" "created" "node=$name reason=$reason"
}

backup_config_remote() {
  local line="$1"
  local ip dir conf bak size
  ip="$(node_field "$line" ip)"
  dir="$(get_erupe_dir "$ip")"
  conf="$dir/config.json"
  bak="${conf}.bak.$(timestamp)"
  remote_sudo "$line" "test -f '$conf' && cp '$conf' '$bak'"
  size="$(ssh_exec "$line" "stat -c%s '$bak' 2>/dev/null || echo 0" | tail -n 1)"
  echo "${ip}|${bak}|${size}|$(timestamp)" >> "$BACKUP_INDEX"
  echo "$bak"
}

db_pre_backup() {
  local ip="$1" port="$2" user="$3" pass="$4" db="$5" label="$6"
  local dir out
  dir="$(get_backup_dir)/pre_destructive_${label}_${db}"
  mkdir -p "$dir"
  out="$dir/${db}_$(timestamp).dump"
  PGPASSWORD="$pass" pg_dump -h "$ip" -p "$port" -U "$user" -Fc -d "$db" -f "$out"
  echo "$out"
}
LIBEOF
  chmod 755 /usr/local/lib/erupe-orchestrator-common
}

write_node_list() {
  cat > /usr/local/bin/node-list <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common

filter="${1:-}"
[ -s "$NODE_DB" ] || { echo "No nodes registered."; exit 0; }

declare -A groups
if [ -s "$GROUP_DB" ]; then
  while IFS='|' read -r ip g; do groups["$ip"]="$g"; done < "$GROUP_DB"
fi

printf '%-4s | %-18s | %-6s | %-12s | %-24s | %s\n' "NO" "HOST" "PORT" "USER" "NAME" "GROUPS"
echo "------------------------------------------------------------------------------------------------"
n=0
while IFS= read -r line; do
  n=$((n + 1))
  ip="$(node_field "$line" ip)"
  port="$(node_field "$line" port)"
  user="$(node_field "$line" user)"
  name="$(node_field "$line" name)"
  g="${groups[$ip]:--}"
  if [ -n "$filter" ] && [[ ",$g," != *",$filter,"* ]]; then continue; fi
  printf '%-4s | %-18s | %-6s | %-12s | %-24s | %s\n' "$n" "$ip" "$port" "$user" "$name" "$g"
done < "$NODE_DB"
SCRIPT_EOF
  chmod 755 /usr/local/bin/node-list
}

write_preflight() {
  cat > /usr/local/bin/node-preflight <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common

echo "LOCAL PREFLIGHT"
echo "==============="
for cmd in ssh sshpass ssh-copy-id rsync psql pg_dump pg_restore jq openssl curl tar; do
  command -v "$cmd" >/dev/null 2>&1 && echo "[OK] $cmd" || echo "[MISS] $cmd"
done
echo "Global path : $(get_global_path || true)"
echo "Backup path : $(get_backup_dir || true)"
echo "Node count  : $(node_line_count)"
echo "Audit log   : $AUDIT_LOG"
echo ""

[ -s "$NODE_DB" ] || exit 0
echo "REMOTE PREFLIGHT"
echo "================"
while IFS= read -r line; do
  ip="$(node_field "$line" ip)"
  name="$(node_field "$line" name)"
  echo "--- $name ($ip) ---"
  if ssh_exec "$line" "echo SSH_OK; command -v jq >/dev/null 2>&1 && echo jq=OK || echo jq=MISS; systemctl is-active erupe 2>/dev/null || true; df -h / | tail -1"; then
    audit_log "PREFLIGHT" "$ip" "success" ""
  else
    audit_log "PREFLIGHT" "$ip" "failed" ""
  fi
done < "$NODE_DB"
SCRIPT_EOF
  chmod 755 /usr/local/bin/node-preflight
}

write_node_groups() {
  cat > /usr/local/bin/node-groups <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root

while true; do
  echo "1. View groups"
  echo "2. Set groups for node"
  echo "3. Clear node groups"
  echo "4. List nodes in group"
  echo "5. Exit"
  read -r -p "Select: " opt
  case "$opt" in
    1)
      [ -s "$GROUP_DB" ] && cat "$GROUP_DB" || echo "No groups assigned."
      ;;
    2)
      node-list
      read -r -p "Node number: " n
      line="$(get_node_line "$n")"
      [ -n "$line" ] || { echo "Invalid node."; continue; }
      ip="$(node_field "$line" ip)"
      read -r -p "Groups comma-separated: " g
      g="$(echo "$g" | tr -d ' ' | sed 's/,,*/,/g; s/^,*//; s/,*$//')"
      require_groups "$g"
      sed -i "/^${ip}|/d" "$GROUP_DB" 2>/dev/null || true
      [ -n "$g" ] && echo "$ip|$g" >> "$GROUP_DB"
      chmod 600 "$GROUP_DB"
      audit_log "GROUP_SET" "$ip" "success" "groups=$g"
      ;;
    3)
      read -r -p "Node host/IP: " ip
      require_host "$ip"
      sed -i "/^${ip}|/d" "$GROUP_DB" 2>/dev/null || true
      audit_log "GROUP_CLEAR" "$ip" "success" ""
      ;;
    4)
      read -r -p "Group: " g
      require_groups "$g"
      node-list "$g"
      ;;
    5) exit 0 ;;
  esac
done
SCRIPT_EOF
  chmod 755 /usr/local/bin/node-groups
}

write_node_health() {
  cat > /usr/local/bin/node-health <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common

filter="${1:-}"
refresh="${2:-0}"

display_once() {
  clear || true
  echo "CLUSTER HEALTH $(date '+%F %T')"
  printf '%-22s | %-18s | %-10s | %-8s | %-8s | %-8s | %s\n' "NODE" "HOST" "ERUPE" "CPU" "RAM" "DISK" "LOAD"
  echo "------------------------------------------------------------------------------------------------"
  while IFS= read -r line; do
    ip="$(node_field "$line" ip)"
    name="$(node_field "$line" name)"
    if [ -n "$filter" ]; then
      g="$(grep "^${ip}|" "$GROUP_DB" 2>/dev/null | cut -d'|' -f2- || true)"
      [[ ",$g," == *",$filter,"* ]] || continue
    fi
    data="$(ssh_exec "$line" 'cpu=$(top -bn1 | awk -F"[, ]+" "/Cpu\\(s\\)/{print 100-\$8}" 2>/dev/null || echo "?"); ram=$(free | awk "/^Mem:/{printf \"%.0f\", \$3/\$2*100}" 2>/dev/null || echo "?"); disk=$(df / | awk "END{print \$5}" | tr -d "%"); svc=$(systemctl is-active erupe 2>/dev/null || echo unknown); load=$(uptime | awk -F"load average: " "{print \$2}" 2>/dev/null || echo "?"); echo "$svc|$cpu|$ram|$disk|$load"' 2>/dev/null || echo "unreachable|?|?|?|?")"
    IFS='|' read -r svc cpu ram disk loadavg <<< "$data"
    printf '%-22s | %-18s | %-10s | %-8s | %-8s | %-8s | %s\n' "$name" "$ip" "$svc" "$cpu%" "$ram%" "$disk%" "$loadavg"
  done < "$NODE_DB"
}

if [ "$refresh" -gt 0 ]; then
  while true; do display_once; sleep "$refresh"; done
else
  display_once
fi
SCRIPT_EOF
  chmod 755 /usr/local/bin/node-health
}

write_audit_notify() {
  cat > /usr/local/bin/node-audit <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common

touch "$AUDIT_LOG"
echo "1. Last 80 entries"
echo "2. Filter keyword"
echo "3. Today"
echo "4. Statistics"
echo "5. Live tail"
echo "6. Rotate/archive log"
echo "7. Enable append-only audit log (chattr +a)"
echo "8. Disable append-only audit log (chattr -a)"
read -r -p "Select: " opt
case "$opt" in
  1) tail -n 80 "$AUDIT_LOG" ;;
  2) read -r -p "Keyword: " kw; grep -- "$kw" "$AUDIT_LOG" | tail -n 150 || true ;;
  3) grep "^\\[$(date +%F)" "$AUDIT_LOG" | tail -n 150 || true ;;
  4) awk -F'action=' '{print $2}' "$AUDIT_LOG" | awk '{print $1}' | sort | uniq -c | sort -rn | head -25 ;;
  5) tail -f "$AUDIT_LOG" ;;
  6)
    confirm_word "Archive current audit log?" || exit 0
    out="$ORCH_ROOT/audit_$(timestamp).log"
    cp "$AUDIT_LOG" "$out"
    : > "$AUDIT_LOG"
    echo "Archived to $out"
    ;;
  7) command -v chattr >/dev/null 2>&1 && chattr +a "$AUDIT_LOG" && echo "append-only enabled" || echo "chattr unavailable" ;;
  8) command -v chattr >/dev/null 2>&1 && chattr -a "$AUDIT_LOG" && echo "append-only disabled" || echo "chattr unavailable" ;;
esac
SCRIPT_EOF
  chmod 755 /usr/local/bin/node-audit

  cat > /usr/local/bin/node-notify <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common

echo "1. Set webhook URL"
echo "2. Test notification"
echo "3. View config"
echo "4. Disable"
read -r -p "Select: " opt
case "$opt" in
  1) read -r -p "Webhook URL: " url; printf 'WEBHOOK_URL=%s\n' "$url" > "$NOTIFY_CONF"; chmod 600 "$NOTIFY_CONF" ;;
  2) notify_send "info" "Test notification from Master Orchestration v$ORCH_VERSION" ;;
  3) sed 's/\(WEBHOOK_URL=.\{20\}\).*/\1...redacted/' "$NOTIFY_CONF" 2>/dev/null || true ;;
  4) : > "$NOTIFY_CONF" ;;
esac
SCRIPT_EOF
  chmod 755 /usr/local/bin/node-notify
}

write_registry() {
  cat > /usr/local/bin/node-registry <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root

echo "1. Export registry"
echo "2. Import registry"
read -r -p "Select: " opt
case "$opt" in
  1)
    out="$ORCH_ROOT/registry_export_$(timestamp).tar.gz"
    tar -czf "$out" -C "$ORCH_ROOT" nodes.db node_groups.db databases.db paths.conf config_backups.db notify.conf 2>/dev/null || true
    chmod 600 "$out"
    echo "Exported: $out"
    audit_log "REGISTRY_EXPORT" "local" "success" "file=$out"
    ;;
  2)
    read -r -p "Export file path: " file
    [ -f "$file" ] || die "File not found."
    confirm_word "Import will overwrite local registry files." || exit 0
    tar -xzf "$file" -C "$ORCH_ROOT"
    chmod 600 "$NODE_DB" "$GROUP_DB" "$PG_DB" "$BACKUP_INDEX" 2>/dev/null || true
    chmod 644 "$PATH_CONF" "$NOTIFY_CONF" 2>/dev/null || true
    audit_log "REGISTRY_IMPORT" "local" "success" "file=$file"
    ;;
esac
SCRIPT_EOF
  chmod 755 /usr/local/bin/node-registry
}

write_config_editor() {
  cat > /usr/local/bin/node-editconfig <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root
check_path_setup
with_lock "config-edit"

[ -s "$NODE_DB" ] || die "No nodes registered."

select_targets() {
  TARGETS=()
  echo "1. One node"
  echo "2. All nodes"
  echo "3. Group"
  read -r -p "Target: " mode
  case "$mode" in
    1) node-list; read -r -p "Node number: " n; line="$(get_node_line "$n")"; [ -n "$line" ] || die "Invalid node"; TARGETS+=("$line") ;;
    2) confirm_word "Edit config.json on ALL nodes." || exit 0; while IFS= read -r line; do TARGETS+=("$line"); done < "$NODE_DB" ;;
    3) read -r -p "Group: " g; while IFS= read -r line; do ip="$(node_field "$line" ip)"; gs="$(grep "^${ip}|" "$GROUP_DB" 2>/dev/null | cut -d'|' -f2- || true)"; [[ ",$gs," == *",$g,"* ]] && TARGETS+=("$line"); done < "$NODE_DB" ;;
    *) die "Invalid target" ;;
  esac
  [ "${#TARGETS[@]}" -gt 0 ] || die "No target nodes."
}

rollback_menu() {
  node-list
  read -r -p "Node number: " n
  line="$(get_node_line "$n")"
  [ -n "$line" ] || die "Invalid node"
  ip="$(node_field "$line" ip)"
  grep "^${ip}|" "$BACKUP_INDEX" 2>/dev/null | tail -n 15 | nl -w2 -s'. ' || true
  read -r -p "Remote backup full path: " backup
  require_abs_path "$backup"
  dir="$(get_erupe_dir "$ip")"
  conf="$dir/config.json"
  confirm_word "Rollback $ip config.json from $backup?" || exit 0
  remote_sudo "$line" "cp '$conf' '${conf}.pre-rollback.$(timestamp)' && cp '$backup' '$conf' && python3 -m json.tool '$conf' >/dev/null"
  remote_sudo "$line" "systemctl restart erupe"
  audit_log "CONFIG_ROLLBACK" "$ip" "success" "backup=$backup"
}

echo "1. jq path set"
echo "2. Default Active Feature"
echo "3. All Active Feature"
echo "4. Literal find/replace"
echo "5. View config"
echo "6. Rollback"
read -r -p "Action: " action
[ "$action" = "6" ] && { rollback_menu; exit 0; }

select_targets
case "$action" in
  1)
    read -r -p "JSON path, dot form (example: Server.Log.Level): " jpath
    [[ "$jpath" =~ ^[A-Za-z0-9_.-]+$ ]] || die "Invalid JSON path."
    read -r -p "JSON value (example: 123, true, \"text\"): " jval
    jpath64="$(printf '%s' "$jpath" | base64 -w0)"
    jval64="$(printf '%s' "$jval" | base64 -w0)"
    edit_cmd='edit_json(){ P=$(printf %s '"$jpath64"' | base64 -d); V=$(printf %s '"$jval64"' | base64 -d); F="$1"; T=$(mktemp); jq --arg p "$P" --argjson v "$V" '\''setpath(($p|split(".")); $v)'\'' "$F" > "$T" && cat "$T" > "$F"; rm -f "$T"; }; edit_json'
    ;;
  2) edit_cmd='edit_json(){ sed -i -E "s|\"MinFeatureWeapons\"[[:space:]]*:[[:space:]]*[0-9]+|\"MinFeatureWeapons\": 3|; s|\"MaxFeatureWeapons\"[[:space:]]*:[[:space:]]*[0-9]+|\"MaxFeatureWeapons\": 4|" "$1"; }; edit_json' ;;
  3) edit_cmd='edit_json(){ sed -i -E "s|\"MinFeatureWeapons\"[[:space:]]*:[[:space:]]*[0-9]+|\"MinFeatureWeapons\": 14|; s|\"MaxFeatureWeapons\"[[:space:]]*:[[:space:]]*[0-9]+|\"MaxFeatureWeapons\": 14|" "$1"; }; edit_json' ;;
  4)
    read -r -p "Find: " find
    read -r -p "Replace: " repl
    find64="$(printf '%s' "$find" | base64 -w0)"
    repl64="$(printf '%s' "$repl" | base64 -w0)"
    edit_cmd='edit_json(){ FND=$(printf %s '"$find64"' | base64 -d); REP=$(printf %s '"$repl64"' | base64 -d); perl -0777 -pi -e "s/\\Q$FND\\E/$REP/g" "$1"; }; edit_json'
    ;;
  5)
    for line in "${TARGETS[@]}"; do ip="$(node_field "$line" ip)"; conf="$(get_erupe_dir "$ip")/config.json"; echo "===== $ip:$conf ====="; ssh_exec "$line" "cat '$conf'"; done
    exit 0
    ;;
  *) die "Invalid action" ;;
esac

read -r -p "Dry-run? [Y/n]: " dry
dry="${dry:-Y}"
for line in "${TARGETS[@]}"; do
  ip="$(node_field "$line" ip)"
  name="$(node_field "$line" name)"
  conf="$(get_erupe_dir "$ip")/config.json"
  echo "Target: $name ($ip) $conf"
  if [[ "${dry,,}" != "n" ]]; then
    echo "Would backup and run: $edit_cmd '$conf'"
    continue
  fi
  bak="$(backup_config_remote "$line")"
  remote_sudo "$line" "$edit_cmd '$conf'"
  if remote_sudo "$line" "python3 -m json.tool '$conf' >/dev/null"; then
    echo "JSON valid."
    audit_log "CONFIG_EDIT" "$ip" "success" "backup=$bak"
  else
    echo "Invalid JSON, rolling back."
    remote_sudo "$line" "cp '$bak' '$conf'"
    audit_log "CONFIG_EDIT" "$ip" "failed_rollback" "backup=$bak"
    continue
  fi
  read -r -p "Restart erupe on $name? [y/N]: " r
  if [[ "${r,,}" = "y" ]]; then
    remote_sudo "$line" "systemctl restart erupe"
    sleep 3
    remote_sudo "$line" "systemctl is-active erupe"
  fi
done
SCRIPT_EOF
  chmod 755 /usr/local/bin/node-editconfig
}

write_update_tools() {
  cat > /usr/local/bin/node-update <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root
check_path_setup
with_lock "node-update"

select_nodes() {
  TARGETS=()
  node-list
  read -r -p "Node numbers separated by space: " nums
  for n in $nums; do
    [[ "$n" =~ ^[0-9]+$ ]] || continue
    line="$(get_node_line "$n")"
    [ -n "$line" ] && TARGETS+=("$line")
  done
  [ "${#TARGETS[@]}" -gt 0 ] || die "No valid nodes selected."
}

build_payload() {
  read -r -p "Mode [1=Express, 2=Manual, 3=Base]: " mode
  mode="${mode:-1}"
  case "$mode" in
    1) PAYLOAD="printf '1\n' | /usr/local/bin/erupe-update" ;;
    2)
      read -r -p "RAM percent: " ram
      read -r -p "GOGC: " gogc
      read -r -p "Log level [1-5 default 4]: " ll; ll="${ll:-4}"
      read -r -p "Rebuild? [y/N]: " rb
      read -r -p "Save logs? [y/N]: " ls
      PAYLOAD="printf '2\n${ram}\n${gogc}\n${ll}\n${rb}\n${ls}\n' | /usr/local/bin/erupe-update"
      ;;
    3)
      read -r -p "Log level [1-5 default 4]: " ll; ll="${ll:-4}"
      read -r -p "Rebuild? [y/N]: " rb
      read -r -p "Save logs? [y/N]: " ls
      PAYLOAD="printf '3\n${ll}\n${rb}\n${ls}\n' | /usr/local/bin/erupe-update"
      ;;
    *) die "Invalid mode" ;;
  esac
}

select_nodes
build_payload
read -r -p "Dry-run? [Y/n]: " dry; dry="${dry:-Y}"
report="$(make_report update)"
success=0; failed=0

for line in "${TARGETS[@]}"; do
  ip="$(node_field "$line" ip)"
  name="$(node_field "$line" name)"
  echo "=== $name ($ip) ===" | tee -a "$report"
  if [[ "${dry,,}" != "n" ]]; then
    echo "DRY-RUN: $PAYLOAD" | tee -a "$report"
    continue
  fi
  snapshot_node "$line" "update"
  if remote_sudo "$line" "$PAYLOAD"; then
    sleep 3
    if remote_sudo "$line" "systemctl is-active erupe | grep -q active"; then
      echo "OK active" | tee -a "$report"
      success=$((success + 1))
      audit_log "NODE_UPDATE" "$ip" "success" ""
    else
      echo "FAILED service inactive" | tee -a "$report"
      failed=$((failed + 1))
      audit_log "NODE_UPDATE" "$ip" "service_inactive" ""
    fi
  else
    echo "FAILED command" | tee -a "$report"
    failed=$((failed + 1))
    audit_log "NODE_UPDATE" "$ip" "failed" ""
  fi
done
echo "Report: $report"
echo "Success=$success Failed=$failed" | tee -a "$report"
SCRIPT_EOF
  chmod 755 /usr/local/bin/node-update

  cat > /usr/local/bin/node-updateall <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root
check_path_setup
with_lock "node-updateall"

[ -s "$NODE_DB" ] || die "No nodes registered."
read -r -p "Canary first? [Y/n]: " canary; canary="${canary:-Y}"
read -r -p "Failure threshold before stopping [default 1]: " threshold; threshold="${threshold:-1}"
[[ "$threshold" =~ ^[0-9]+$ ]] || threshold=1
read -r -p "Dry-run? [Y/n]: " dry; dry="${dry:-Y}"

read -r -p "Mode [1=Express, 2=Manual, 3=Base]: " mode
mode="${mode:-1}"
case "$mode" in
  1) PAYLOAD="printf '1\n' | /usr/local/bin/erupe-update" ;;
  2)
    read -r -p "RAM percent: " ram
    read -r -p "GOGC: " gogc
    read -r -p "Log level [1-5 default 4]: " ll; ll="${ll:-4}"
    read -r -p "Rebuild? [y/N]: " rb
    read -r -p "Save logs? [y/N]: " ls
    PAYLOAD="printf '2\n${ram}\n${gogc}\n${ll}\n${rb}\n${ls}\n' | /usr/local/bin/erupe-update"
    ;;
  3)
    read -r -p "Log level [1-5 default 4]: " ll; ll="${ll:-4}"
    read -r -p "Rebuild? [y/N]: " rb
    read -r -p "Save logs? [y/N]: " ls
    PAYLOAD="printf '3\n${ll}\n${rb}\n${ls}\n' | /usr/local/bin/erupe-update"
    ;;
  *) die "Invalid mode" ;;
esac

mapfile -t targets < "$NODE_DB"
if [[ "${canary,,}" != "n" ]] && [ "${#targets[@]}" -gt 1 ]; then
  echo "Canary node:"
  line="${targets[0]}"
  echo "$(node_field "$line" name) ($(node_field "$line" ip))"
  confirm_word "Run canary update first?" || exit 0
  targets=("${targets[0]}" "${targets[@]:1}")
else
  confirm_word "Rolling update ALL nodes?" || exit 0
fi

report="$(make_report updateall)"
failed=0; success=0; idx=0
for line in "${targets[@]}"; do
  idx=$((idx + 1))
  ip="$(node_field "$line" ip)"
  name="$(node_field "$line" name)"
  echo "=== [$idx/${#targets[@]}] $name ($ip) ===" | tee -a "$report"
  if [[ "${dry,,}" != "n" ]]; then
    echo "DRY-RUN: $PAYLOAD" | tee -a "$report"
    continue
  fi
  snapshot_node "$line" "rolling_update"
  if remote_sudo "$line" "$PAYLOAD" && sleep 3 && remote_sudo "$line" "systemctl is-active erupe | grep -q active"; then
    echo "OK active" | tee -a "$report"
    success=$((success + 1))
    audit_log "ROLLING_UPDATE" "$ip" "success" ""
  else
    echo "FAILED" | tee -a "$report"
    failed=$((failed + 1))
    audit_log "ROLLING_UPDATE" "$ip" "failed" ""
    notify_send "error" "Rolling update failed on $name ($ip)"
    if [ "$failed" -ge "$threshold" ]; then
      echo "Failure threshold reached. Stopping rollout." | tee -a "$report"
      break
    fi
  fi
done
echo "Report: $report"
echo "Success=$success Failed=$failed" | tee -a "$report"
SCRIPT_EOF
  chmod 755 /usr/local/bin/node-updateall
}

write_node_db() {
  cat > /usr/local/bin/node-db <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root

show_db_list() {
  [ -s "$PG_DB" ] || { echo "No DB registered."; return; }
  printf '%-4s | %-18s | %-6s | %-12s | %-18s | %s\n' "NO" "HOST" "PORT" "USER" "DB" "ALIAS"
  nl -w1 -s'|' "$PG_DB" | while IFS='|' read -r n ip port user enc db alias; do
    printf '%-4s | %-18s | %-6s | %-12s | %-18s | %s\n' "$n" "$ip" "$port" "$user" "$db" "$alias"
  done
}

add_db() {
  read -r -p "DB host: " ip; require_host "$ip"
  read -r -p "Port [5432]: " port; port="${port:-5432}"; require_port "$port"
  read -r -p "User [postgres]: " user; user="${user:-postgres}"; require_user "$user"
  read -r -p "Database [erupe]: " db; db="${db:-erupe}"; require_name "$db"
  read -r -p "Alias: " alias; alias="${alias:-$ip}"; require_name "$alias"
  read -r -s -p "Password: " pass; echo
  if PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -c '\q' >/dev/null 2>&1; then
    enc="$(encrypt_secret "$pass")"
    echo "$ip|$port|$user|$enc|$db|$alias" >> "$PG_DB"
    chmod 600 "$PG_DB"
    audit_log "DB_ADD" "$ip" "success" "alias=$alias"
  else
    echo "Connection failed."
    audit_log "DB_ADD" "$ip" "failed" ""
  fi
}

manage_db() {
  show_db_list
  read -r -p "DB number: " n
  line="$(sed -n "${n}p" "$PG_DB")"
  [ -n "$line" ] || die "Invalid DB."
  IFS='|' read -r ip port user enc db alias <<< "$line"
  pass="$(decrypt_secret "$enc")"
  [ -n "$pass" ] || die "Could not decrypt password."
  while true; do
    echo "DB: $alias ($ip:$port/$db)"
    echo "1. psql shell"
    echo "2. Backup"
    echo "3. Restore"
    echo "4. Rotation 01"
    echo "5. Rotation 02"
    echo "6. Truncate feature_weapon"
    echo "7. Update users.rights"
    echo "8. REINDEX + VACUUM"
    echo "9. Stats"
    echo "10. Back"
    read -r -p "Select: " opt
    case "$opt" in
      1) PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" ;;
      2)
        dir="$(get_backup_dir)/BackupDB_${alias}_${db}"; mkdir -p "$dir"
        out="$dir/${db}_$(timestamp).dump"
        PGPASSWORD="$pass" pg_dump -h "$ip" -p "$port" -U "$user" -Fc -d "$db" -f "$out"
        echo "Saved: $out"
        audit_log "DB_BACKUP" "$ip" "success" "file=$out"
        ;;
      3)
        pre="$(db_pre_backup "$ip" "$port" "$user" "$pass" "$db" "restore")"
        echo "Pre-restore backup: $pre"
        read -r -p "Restore file: " file
        [ -f "$file" ] || { echo "File not found."; continue; }
        confirm_word "Restore into $db?" || continue
        PGPASSWORD="$pass" pg_restore -h "$ip" -p "$port" -U "$user" -d "$db" --no-owner --no-privileges "$file" || PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -f "$file"
        audit_log "DB_RESTORE" "$ip" "success" "file=$file pre=$pre"
        ;;
      4|5)
        rot="0$((opt-3))"
        pre="$(db_pre_backup "$ip" "$port" "$user" "$pass" "$db" "rotation${rot}")"
        bdir="$(get_backup_dir)"
        file="$(find "$bdir" -maxdepth 2 -type f -name "shop_items_rotation${rot}.*" | head -n1)"
        [ -n "$file" ] || { echo "Rotation file not found in $bdir"; continue; }
        confirm_word "TRUNCATE shop_items and restore rotation ${rot}? Pre-backup: $pre" || continue
        PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -c "TRUNCATE TABLE IF EXISTS shop_items CASCADE;"
        PGPASSWORD="$pass" pg_restore -h "$ip" -p "$port" -U "$user" -d "$db" --no-owner --no-privileges "$file" || PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -f "$file"
        audit_log "DB_ROTATION" "$ip" "success" "rotation=$rot pre=$pre"
        ;;
      6)
        pre="$(db_pre_backup "$ip" "$port" "$user" "$pass" "$db" "truncate_feature_weapon")"
        confirm_word "TRUNCATE feature_weapon? Pre-backup: $pre" || continue
        schema="$(PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -A -t -c "SELECT schemaname FROM pg_tables WHERE tablename='feature_weapon' LIMIT 1;")"
        [ -n "$schema" ] && PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -c "TRUNCATE TABLE \"$schema\".feature_weapon;" || echo "feature_weapon not found"
        ;;
      7)
        read -r -p "New rights numeric value: " rights
        [[ "$rights" =~ ^[0-9]+$ ]] || { echo "Invalid number"; continue; }
        pre="$(db_pre_backup "$ip" "$port" "$user" "$pass" "$db" "update_rights")"
        confirm_word "Update all users.rights to $rights? Pre-backup: $pre" || continue
        schema="$(PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -A -t -c "SELECT schemaname FROM pg_tables WHERE tablename='users' LIMIT 1;")"
        [ -n "$schema" ] && PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -c "UPDATE \"$schema\".users SET rights=$rights; ALTER TABLE \"$schema\".users ALTER COLUMN rights SET DEFAULT $rights;"
        ;;
      8) PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -c "REINDEX DATABASE \"$db\"; VACUUM (VERBOSE, ANALYZE);" ;;
      9) PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -c "SELECT pg_size_pretty(pg_database_size('$db')) AS db_size;" ;;
      10) return ;;
    esac
  done
}

while true; do
  echo "1. Manage DB"
  echo "2. Add DB"
  echo "3. Remove DB entry"
  echo "4. List"
  echo "5. Exit"
  read -r -p "Select: " opt
  case "$opt" in
    1) manage_db ;;
    2) add_db ;;
    3) show_db_list; read -r -p "Number: " n; confirm_word "Remove DB entry $n?" && sed -i "${n}d" "$PG_DB" ;;
    4) show_db_list ;;
    5) exit 0 ;;
  esac
done
SCRIPT_EOF
  chmod 755 /usr/local/bin/node-db
}

write_sync_manager() {
  cat > /usr/local/bin/sync-manager <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root
with_lock "sync"
[ -s "$NODE_DB" ] || die "No nodes registered."

select_targets() {
  TARGETS=()
  echo "1. One node"
  echo "2. All nodes"
  echo "3. Group"
  read -r -p "Target: " mode
  case "$mode" in
    1) node-list; read -r -p "Node number: " n; line="$(get_node_line "$n")"; [ -n "$line" ] && TARGETS+=("$line") ;;
    2) confirm_word "Sync to ALL nodes?" || exit 0; while IFS= read -r line; do TARGETS+=("$line"); done < "$NODE_DB" ;;
    3) read -r -p "Group: " g; while IFS= read -r line; do ip="$(node_field "$line" ip)"; gs="$(grep "^${ip}|" "$GROUP_DB" 2>/dev/null | cut -d'|' -f2- || true)"; [[ ",$gs," == *",$g,"* ]] && TARGETS+=("$line"); done < "$NODE_DB" ;;
  esac
  [ "${#TARGETS[@]}" -gt 0 ] || die "No targets."
}

read -r -p "Source path [/ERUPE-SYNC/]: " src
src="${src:-/ERUPE-SYNC/}"
[ -e "${src%/}" ] || die "Source not found."
read -r -p "Destination absolute path: " dest
require_abs_path "$dest"
read -r -p "Dry-run? [Y/n]: " dry; dry="${dry:-Y}"
read -r -p "Conflict [1=overwrite, 2=skip existing]: " conflict; conflict="${conflict:-1}"
read -r -p "Bandwidth limit, empty=unlimited (example 10M): " bw
flags="-azh --stats --info=progress2"
[ "$conflict" = "2" ] && flags="$flags --ignore-existing"
[ -n "$bw" ] && flags="$flags --bwlimit=$bw"
[[ "${dry,,}" != "n" ]] && flags="$flags --dry-run"
select_targets
report="$(make_report sync)"

for line in "${TARGETS[@]}"; do
  ip="$(node_field "$line" ip)"
  port="$(node_field "$line" port)"
  user="$(node_field "$line" user)"
  name="$(node_field "$line" name)"
  echo "=== $name ($ip) ===" | tee -a "$report"
  remote_sudo "$line" "mkdir -p '$dest'"
  rsync $flags -e "ssh -p $port -o StrictHostKeyChecking=accept-new" "$src" "$user@$ip:$dest/" | tee -a "$report"
  audit_log "SYNC" "$ip" "completed" "src=$src dest=$dest dry=$dry"
done

if [[ "${dry,,}" = "n" ]]; then
  read -r -p "Apply chmod? [skip/755/750/644]: " perm
  case "$perm" in
    755|750|644)
      for line in "${TARGETS[@]}"; do remote_sudo "$line" "chmod -R $perm '$dest'"; done
      ;;
  esac
fi
echo "Report: $report"
SCRIPT_EOF
  chmod 755 /usr/local/bin/sync-manager
}

write_node_manager() {
  cat > /usr/local/bin/node-manager <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root

configure_paths() {
  while true; do
    echo "Global path: $(get_global_path || true)"
    echo "Backup path: $(get_backup_dir || true)"
    echo "1. Set global Erupe path"
    echo "2. Set node-specific path"
    echo "3. Set backup path"
    echo "4. View config"
    echo "5. Back"
    read -r -p "Select: " opt
    case "$opt" in
      1) read -r -p "Path: " p; require_abs_path "$p"; sed -i '/^GLOBAL=/d' "$PATH_CONF" 2>/dev/null || true; echo "GLOBAL=${p%/}" >> "$PATH_CONF" ;;
      2) node-list; read -r -p "Host/IP: " ip; require_host "$ip"; read -r -p "Path: " p; require_abs_path "$p"; sed -i "/^${ip}=/d" "$PATH_CONF" 2>/dev/null || true; echo "$ip=${p%/}" >> "$PATH_CONF" ;;
      3) read -r -p "Backup path: " p; require_abs_path "$p"; sed -i '/^BACKUP_DIR=/d' "$PATH_CONF" 2>/dev/null || true; echo "BACKUP_DIR=${p%/}" >> "$PATH_CONF" ;;
      4) cat "$PATH_CONF" ;;
      5) return ;;
    esac
  done
}

add_node() {
  read -r -p "Host/IP: " ip; require_host "$ip"
  read -r -p "SSH port [22]: " port; port="${port:-22}"; require_port "$port"
  read -r -p "SSH user [root]: " user; user="${user:-root}"; require_user "$user"
  read -r -p "Node name: " name; require_name "$name"
  known_host_bootstrap "$ip" "$port"
  read -r -s -p "SSH password for initial key install: " pass; echo
  if sshpass -p "$pass" ssh-copy-id -o StrictHostKeyChecking=accept-new -i /root/.ssh/id_ed25519.pub -p "$port" "$user@$ip"; then
    echo "$ip|$port|$user|$name" >> "$NODE_DB"
    chmod 600 "$NODE_DB"
    audit_log "NODE_ADD" "$ip" "success" "user=$user name=$name"
  else
    audit_log "NODE_ADD" "$ip" "failed" "user=$user"
  fi
}

modify_node() {
  node-list
  read -r -p "Node number: " n
  line="$(get_node_line "$n")"
  [ -n "$line" ] || { echo "Invalid."; return; }
  echo "1. Edit"
  echo "2. Delete"
  read -r -p "Action: " a
  if [ "$a" = "2" ]; then
    confirm_word "Delete node entry $n?" || return
    sed -i "${n}d" "$NODE_DB"
    return
  fi
  read -r -p "Host/IP: " ip; require_host "$ip"
  read -r -p "Port [22]: " port; port="${port:-22}"; require_port "$port"
  read -r -p "User [root]: " user; user="${user:-root}"; require_user "$user"
  read -r -p "Name: " name; require_name "$name"
  sed -i "${n}s/.*/$ip|$port|$user|$name/" "$NODE_DB"
}

ssh_menu() {
  echo "1. Interactive SSH"
  echo "2. Broadcast one-line command"
  echo "3. Broadcast local script file"
  read -r -p "Select: " opt
  case "$opt" in
    1) node-list; read -r -p "Node number: " n; line="$(get_node_line "$n")"; [ -n "$line" ] && ssh_interactive "$line" ;;
    2)
      read -r -p "Command: " cmd
      confirm_word "Broadcast command to all nodes?" || return
      while IFS= read -r line; do echo "== $(node_field "$line" name) =="; ssh_exec "$line" "$cmd" || true; done < "$NODE_DB"
      ;;
    3)
      read -r -p "Local script path: " script
      [ -f "$script" ] || { echo "File not found."; return; }
      confirm_word "Run this script on all nodes?" || return
      while IFS= read -r line; do
        ip="$(node_field "$line" ip)"; port="$(node_field "$line" port)"; user="$(node_field "$line" user)"
        rsync -az -e "ssh -p $port -o StrictHostKeyChecking=accept-new" "$script" "$user@$ip:/tmp/erupe_remote_exec.sh"
        remote_sudo "$line" "bash /tmp/erupe_remote_exec.sh; rm -f /tmp/erupe_remote_exec.sh"
      done < "$NODE_DB"
      ;;
  esac
}

while true; do
  echo "================================================================"
  echo "RAIN/Erupe Master Orchestration v$ORCH_VERSION"
  echo "================================================================"
  echo "1. Configure paths"
  echo "2. Add node"
  echo "3. Modify/delete node"
  echo "4. Node list"
  echo "5. Node groups"
  echo "6. Preflight check"
  echo "7. Health monitor"
  echo "8. Audit log"
  echo "9. Notifications"
  echo "10. Edit config.json"
  echo "11. Database manager"
  echo "12. Update selected nodes"
  echo "13. Rolling update all nodes"
  echo "14. SSH/broadcast"
  echo "15. Remote erupe-manager"
  echo "16. Sync manager"
  echo "17. Registry export/import"
  echo "18. Exit"
  read -r -p "Select: " opt
  case "$opt" in
    1) configure_paths ;;
    2) add_node ;;
    3) modify_node ;;
    4) node-list ;;
    5) node-groups ;;
    6) node-preflight ;;
    7) read -r -p "Group filter empty=all: " g; read -r -p "Refresh seconds [0]: " r; node-health "$g" "${r:-0}" ;;
    8) node-audit ;;
    9) node-notify ;;
    10) node-editconfig ;;
    11) node-db ;;
    12) node-update ;;
    13) node-updateall ;;
    14) ssh_menu ;;
    15) node-list; read -r -p "Node number: " n; line="$(get_node_line "$n")"; [ -n "$line" ] && ssh_interactive "$line" "/usr/local/bin/erupe-manager" ;;
    16) sync-manager ;;
    17) node-registry ;;
    18) exit 0 ;;
  esac
done
SCRIPT_EOF
  chmod 755 /usr/local/bin/node-manager
}

main() {
  need_root
  install_dependencies
  bootstrap_storage
  write_common_lib
  write_node_list
  write_preflight
  write_node_groups
  write_node_health
  write_audit_notify
  write_registry
  write_config_editor
  write_update_tools
  write_node_db
  write_sync_manager
  write_node_manager
  ok "Master Orchestration v$ORCH_VERSION installed."
  echo "Start: sudo node-manager"
}

main "$@"
EOF
sudo chmod +x /usr/local/bin/setup-orchestration
sudo /usr/local/bin/setup-orchestration
Done