Script 17 · Cluster Control Plane
17. Master Orchestration (Cluster Management) v3.0 Hardened
Installs a hardened multi-node orchestration toolkit for RAIN/Erupe clusters with SSH, sync, database, health, audit, and rolling-update workflows.
Category: Cluster Control Plane
Risk: High
Lines: calculating
Language: Bash / Linux
What this script does
- Manage many nodes from one master VPS.
- Run safer rolling updates with snapshots and failure thresholds.
- Centralize config editing, DB operations, sync, audit, and health checks.
Prerequisites
- Dedicated master VPS
- Root access for installation
- SSH reachability to nodes
- Staging test before production
- Backup strategy
Execution flow
- Installs dependencies
- Creates /opt/erupe-orchestrator state
- Generates command suite
- Registers nodes
- Runs preflight/health/update/sync workflows
Validation checklist
- sudo node-manager
- node-preflight
- node-health
- node-audit
- node-registry
Operational cautions
- The master becomes a high-value administrative control point.
- Use non-root SSH users with sudo where possible.
- Test in staging before touching production nodes.
Original script notes
Script Info (v3.0): hardened cluster management system with fixed heredoc formatting, safer SSH defaults, input validation, lock files, preflight checks, canary/rolling updates, snapshots, audit logs, registry export/import, node groups, health monitoring, notifications, sync dry-run, and database pre-backups before destructive actions.
sudo tee /usr/local/bin/setup-orchestration >/dev/null <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
# ============================================================================
# MASTER ORCHESTRATION v3.0 - RAIN/Erupe Cluster Management
# ============================================================================
# Major fixes:
# - Valid heredoc/newline layout for HTML copy usage.
# - No glued chmod/comment lines.
# - set -euo pipefail enabled.
# - StrictHostKeyChecking=accept-new by default.
# - No chmod 777 defaults.
# - Input validation for IP/host, port, path, node name, group tags.
# - DB destructive operations create backups first.
#
# Added features:
# - Preflight checks.
# - Lock file to prevent overlapping update/sync/edit jobs.
# - Snapshot before node update/mass update.
# - Canary update and rolling update with failure threshold.
# - Audit log + report files.
# - Optional append-only audit mode with chattr.
# - Export/import registry bundle.
# - Non-root SSH user support with sudo wrapper.
# - Known-host bootstrap via ssh-keyscan.
# - jq-based JSON path editing with backup/rollback.
# - Post-action reports for update and sync.
# ============================================================================
ORCH_VERSION="3.0"
ORCH_ROOT="/opt/erupe-orchestrator"
STATE_DIR="$ORCH_ROOT/state"
REPORT_DIR="$ORCH_ROOT/reports"
SNAPSHOT_DIR="$ORCH_ROOT/snapshots"
NODE_DB="$ORCH_ROOT/nodes.db"
GROUP_DB="$ORCH_ROOT/node_groups.db"
PG_DB="$ORCH_ROOT/databases.db"
PATH_CONF="$ORCH_ROOT/paths.conf"
BACKUP_INDEX="$ORCH_ROOT/config_backups.db"
NOTIFY_CONF="$ORCH_ROOT/notify.conf"
CRYPTO_KEY="/etc/erupe-orchestration.key"
AUDIT_LOG="/var/log/erupe-orchestration.log"
LOCK_DIR="/run/erupe-orchestration"
SYNC_STAGING="/ERUPE-SYNC"
log() { printf '\033[1;36m[%s]\033[0m %s\n' "$(date '+%H:%M:%S')" "$*"; }
ok() { printf '\033[1;32m[OK]\033[0m %s\n' "$*"; }
warn() { printf '\033[1;33m[WARN]\033[0m %s\n' "$*"; }
err() { printf '\033[1;31m[ERR]\033[0m %s\n' "$*" >&2; }
need_root() {
if [ "${EUID:-$(id -u)}" -ne 0 ]; then
err "Run as root: sudo setup-orchestration"
exit 1
fi
}
install_dependencies() {
local pkgs=(sshpass rsync postgresql-client openssl jq curl coreutils findutils gawk)
local missing=()
local cmd
for cmd in ssh ssh-keygen ssh-copy-id rsync psql openssl jq curl awk sed grep find tar; do
command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd")
done
if [ "${#missing[@]}" -gt 0 ]; then
log "Installing dependencies: ${pkgs[*]}"
apt-get update -y -qq
DEBIAN_FRONTEND=noninteractive apt-get install -y -qq "${pkgs[@]}"
fi
}
bootstrap_storage() {
mkdir -p "$ORCH_ROOT" "$STATE_DIR" "$REPORT_DIR" "$SNAPSHOT_DIR" "$LOCK_DIR" "$SYNC_STAGING" "$(dirname "$AUDIT_LOG")"
touch "$NODE_DB" "$GROUP_DB" "$PG_DB" "$PATH_CONF" "$BACKUP_INDEX" "$NOTIFY_CONF" "$AUDIT_LOG"
chmod 700 "$ORCH_ROOT" "$STATE_DIR" "$SNAPSHOT_DIR"
chmod 755 "$REPORT_DIR"
chmod 775 "$SYNC_STAGING"
chmod 600 "$NODE_DB" "$GROUP_DB" "$PG_DB" "$BACKUP_INDEX"
chmod 640 "$AUDIT_LOG"
chmod 644 "$PATH_CONF" "$NOTIFY_CONF"
if [ ! -f "$CRYPTO_KEY" ]; then
openssl rand -base64 48 > "$CRYPTO_KEY"
chmod 600 "$CRYPTO_KEY"
fi
if [ ! -f /root/.ssh/id_ed25519 ]; then
mkdir -p /root/.ssh
ssh-keygen -t ed25519 -N "" -f /root/.ssh/id_ed25519 -q
fi
}
write_common_lib() {
cat > /usr/local/lib/erupe-orchestrator-common <<'LIBEOF'
#!/usr/bin/env bash
set -euo pipefail
ORCH_VERSION="3.0"
ORCH_ROOT="/opt/erupe-orchestrator"
STATE_DIR="$ORCH_ROOT/state"
REPORT_DIR="$ORCH_ROOT/reports"
SNAPSHOT_DIR="$ORCH_ROOT/snapshots"
NODE_DB="$ORCH_ROOT/nodes.db"
GROUP_DB="$ORCH_ROOT/node_groups.db"
PG_DB="$ORCH_ROOT/databases.db"
PATH_CONF="$ORCH_ROOT/paths.conf"
BACKUP_INDEX="$ORCH_ROOT/config_backups.db"
NOTIFY_CONF="$ORCH_ROOT/notify.conf"
CRYPTO_KEY="/etc/erupe-orchestration.key"
AUDIT_LOG="/var/log/erupe-orchestration.log"
LOCK_DIR="/run/erupe-orchestration"
SYNC_STAGING="/ERUPE-SYNC"
die() { echo "ERROR: $*" >&2; exit 1; }
need_root() { [ "${EUID:-$(id -u)}" -eq 0 ] || die "Run with sudo/root."; }
timestamp() { date '+%Y%m%d_%H%M%S'; }
audit_log() {
local action="${1:-unknown}" target="${2:-}" result="${3:-}" detail="${4:-}"
mkdir -p "$(dirname "$AUDIT_LOG")"
printf '[%s] user=%s action=%s target=%s result=%s detail="%s"\n' \
"$(date -Iseconds 2>/dev/null || date)" "${SUDO_USER:-${USER:-root}}" \
"$action" "$target" "$result" "$detail" >> "$AUDIT_LOG"
}
notify_send() {
local level="${1:-info}" msg="${2:-}"
[ -f "$NOTIFY_CONF" ] || return 0
local url
url="$(grep '^WEBHOOK_URL=' "$NOTIFY_CONF" 2>/dev/null | tail -n 1 | cut -d= -f2- || true)"
[ -n "$url" ] || return 0
curl -fsS -m 5 --connect-timeout 3 -H "Content-Type: application/json" \
-d "$(printf '{"text":%s}' "$(printf '%s: %s' "$level" "$msg" | jq -Rs .)")" \
"$url" >/dev/null 2>&1 || true
}
confirm_word() {
local label="$1"
echo "$label"
read -r -p "Type CONFIRM to continue: " ans
[ "$ans" = "CONFIRM" ]
}
valid_host() { [[ "${1:-}" =~ ^[A-Za-z0-9._:-]+$ ]]; }
valid_port() { [[ "${1:-}" =~ ^[0-9]+$ ]] && [ "$1" -ge 1 ] && [ "$1" -le 65535 ]; }
valid_user() { [[ "${1:-}" =~ ^[A-Za-z_][A-Za-z0-9._-]*$ ]]; }
valid_name() { [[ "${1:-}" =~ ^[A-Za-z0-9._-]+$ ]]; }
valid_groups() { [[ "${1:-}" =~ ^[A-Za-z0-9._,-]*$ ]]; }
valid_abs_path() { [[ "${1:-}" == /* ]] && [[ "$1" != *".."* ]]; }
require_host() { valid_host "$1" || die "Invalid host/IP: $1"; }
require_port() { valid_port "$1" || die "Invalid port: $1"; }
require_user() { valid_user "$1" || die "Invalid user: $1"; }
require_name() { valid_name "$1" || die "Invalid name. Use A-Z a-z 0-9 . _ - only."; }
require_groups() { valid_groups "$1" || die "Invalid groups. Use comma-separated tags only."; }
require_abs_path() { valid_abs_path "$1" || die "Invalid path. Use an absolute path without '..'."; }
with_lock() {
local lock_name="$1"
local lock_path="$LOCK_DIR/$lock_name.lock"
mkdir -p "$LOCK_DIR"
if ! mkdir "$lock_path" 2>/dev/null; then
die "Another operation is running: $lock_name"
fi
trap 'rm -rf "$lock_path"' EXIT INT TERM
}
get_global_path() {
grep '^GLOBAL=' "$PATH_CONF" 2>/dev/null | tail -n 1 | cut -d= -f2- || true
}
get_erupe_dir() {
local ip="${1:-}" specific=""
if [ -n "$ip" ]; then
specific="$(grep "^${ip}=" "$PATH_CONF" 2>/dev/null | tail -n 1 | cut -d= -f2- || true)"
fi
[ -n "$specific" ] && echo "$specific" || get_global_path
}
get_backup_dir() {
local b
b="$(grep '^BACKUP_DIR=' "$PATH_CONF" 2>/dev/null | tail -n 1 | cut -d= -f2- || true)"
if [ -n "$b" ]; then echo "$b"; return; fi
local g
g="$(get_global_path)"
[ -n "$g" ] && echo "$g/BACKUP" || echo "$ORCH_ROOT/backups"
}
check_path_setup() {
[ -n "$(get_global_path)" ] || die "GLOBAL Erupe path is not configured. Open node-manager option 1."
}
node_line_count() { [ -s "$NODE_DB" ] && wc -l < "$NODE_DB" | tr -d ' ' || echo 0; }
get_node_line() { sed -n "${1}p" "$NODE_DB"; }
node_field() {
local line="$1" field="$2"
local count
count="$(awk -F'|' '{print NF}' <<< "$line")"
if [ "$count" -ge 4 ]; then
case "$field" in
ip) cut -d'|' -f1 <<< "$line" ;;
port) cut -d'|' -f2 <<< "$line" ;;
user) cut -d'|' -f3 <<< "$line" ;;
name) cut -d'|' -f4 <<< "$line" ;;
esac
else
case "$field" in
ip) cut -d'|' -f1 <<< "$line" ;;
port) cut -d'|' -f2 <<< "$line" ;;
user) echo "root" ;;
name) cut -d'|' -f3 <<< "$line" ;;
esac
fi
}
node_target() {
local line="$1"
echo "$(node_field "$line" user)@$(node_field "$line" ip)"
}
ssh_opts() {
echo "-o BatchMode=yes -o ConnectTimeout=10 -o ServerAliveInterval=15 -o ServerAliveCountMax=2 -o StrictHostKeyChecking=accept-new"
}
ssh_exec() {
local line="$1"; shift
local port target attempt delay
port="$(node_field "$line" port)"
target="$(node_target "$line")"
attempt=1
delay=2
while [ "$attempt" -le 3 ]; do
if ssh $(ssh_opts) -p "$port" "$target" "$@"; then return 0; fi
[ "$attempt" -lt 3 ] && sleep "$delay"
delay=$((delay * 2))
attempt=$((attempt + 1))
done
return 1
}
ssh_interactive() {
local line="$1"; shift
local port target
port="$(node_field "$line" port)"
target="$(node_target "$line")"
ssh -t -o StrictHostKeyChecking=accept-new -p "$port" "$target" "$@"
}
remote_sudo() {
local line="$1"; shift
if [ "$(node_field "$line" user)" = "root" ]; then
ssh_exec "$line" "$@"
else
ssh_exec "$line" "sudo -n bash -lc $(printf '%q' "$*")"
fi
}
encrypt_secret() {
printf '%s' "$1" | openssl enc -aes-256-cbc -salt -pbkdf2 -iter 100000 \
-pass file:"$CRYPTO_KEY" -base64 -A 2>/dev/null
}
decrypt_secret() {
printf '%s' "$1" | openssl enc -d -aes-256-cbc -pbkdf2 -iter 100000 \
-pass file:"$CRYPTO_KEY" -base64 -A 2>/dev/null
}
known_host_bootstrap() {
local host="$1" port="$2"
mkdir -p /root/.ssh
touch /root/.ssh/known_hosts
chmod 600 /root/.ssh/known_hosts
ssh-keygen -R "[$host]:$port" >/dev/null 2>&1 || true
ssh-keyscan -p "$port" -T 5 "$host" >> /root/.ssh/known_hosts 2>/dev/null || true
}
make_report() {
mkdir -p "$REPORT_DIR"
local name="$1"
echo "$REPORT_DIR/${name}_$(timestamp).log"
}
snapshot_node() {
local line="$1" reason="${2:-manual}"
local ip name dir remote_base
ip="$(node_field "$line" ip)"
name="$(node_field "$line" name)"
dir="$(get_erupe_dir "$ip")"
[ -n "$dir" ] || return 0
remote_base="$dir/BACKUP/orchestrator_snapshots"
remote_sudo "$line" "mkdir -p '$remote_base' && tar -czf '$remote_base/snapshot_${reason}_$(timestamp).tar.gz' -C '$dir' config.json erupe-ce 2>/dev/null || true"
audit_log "SNAPSHOT" "$ip" "created" "node=$name reason=$reason"
}
backup_config_remote() {
local line="$1"
local ip dir conf bak size
ip="$(node_field "$line" ip)"
dir="$(get_erupe_dir "$ip")"
conf="$dir/config.json"
bak="${conf}.bak.$(timestamp)"
remote_sudo "$line" "test -f '$conf' && cp '$conf' '$bak'"
size="$(ssh_exec "$line" "stat -c%s '$bak' 2>/dev/null || echo 0" | tail -n 1)"
echo "${ip}|${bak}|${size}|$(timestamp)" >> "$BACKUP_INDEX"
echo "$bak"
}
db_pre_backup() {
local ip="$1" port="$2" user="$3" pass="$4" db="$5" label="$6"
local dir out
dir="$(get_backup_dir)/pre_destructive_${label}_${db}"
mkdir -p "$dir"
out="$dir/${db}_$(timestamp).dump"
PGPASSWORD="$pass" pg_dump -h "$ip" -p "$port" -U "$user" -Fc -d "$db" -f "$out"
echo "$out"
}
LIBEOF
chmod 755 /usr/local/lib/erupe-orchestrator-common
}
write_node_list() {
cat > /usr/local/bin/node-list <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
filter="${1:-}"
[ -s "$NODE_DB" ] || { echo "No nodes registered."; exit 0; }
declare -A groups
if [ -s "$GROUP_DB" ]; then
while IFS='|' read -r ip g; do groups["$ip"]="$g"; done < "$GROUP_DB"
fi
printf '%-4s | %-18s | %-6s | %-12s | %-24s | %s\n' "NO" "HOST" "PORT" "USER" "NAME" "GROUPS"
echo "------------------------------------------------------------------------------------------------"
n=0
while IFS= read -r line; do
n=$((n + 1))
ip="$(node_field "$line" ip)"
port="$(node_field "$line" port)"
user="$(node_field "$line" user)"
name="$(node_field "$line" name)"
g="${groups[$ip]:--}"
if [ -n "$filter" ] && [[ ",$g," != *",$filter,"* ]]; then continue; fi
printf '%-4s | %-18s | %-6s | %-12s | %-24s | %s\n' "$n" "$ip" "$port" "$user" "$name" "$g"
done < "$NODE_DB"
SCRIPT_EOF
chmod 755 /usr/local/bin/node-list
}
write_preflight() {
cat > /usr/local/bin/node-preflight <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
echo "LOCAL PREFLIGHT"
echo "==============="
for cmd in ssh sshpass ssh-copy-id rsync psql pg_dump pg_restore jq openssl curl tar; do
command -v "$cmd" >/dev/null 2>&1 && echo "[OK] $cmd" || echo "[MISS] $cmd"
done
echo "Global path : $(get_global_path || true)"
echo "Backup path : $(get_backup_dir || true)"
echo "Node count : $(node_line_count)"
echo "Audit log : $AUDIT_LOG"
echo ""
[ -s "$NODE_DB" ] || exit 0
echo "REMOTE PREFLIGHT"
echo "================"
while IFS= read -r line; do
ip="$(node_field "$line" ip)"
name="$(node_field "$line" name)"
echo "--- $name ($ip) ---"
if ssh_exec "$line" "echo SSH_OK; command -v jq >/dev/null 2>&1 && echo jq=OK || echo jq=MISS; systemctl is-active erupe 2>/dev/null || true; df -h / | tail -1"; then
audit_log "PREFLIGHT" "$ip" "success" ""
else
audit_log "PREFLIGHT" "$ip" "failed" ""
fi
done < "$NODE_DB"
SCRIPT_EOF
chmod 755 /usr/local/bin/node-preflight
}
write_node_groups() {
cat > /usr/local/bin/node-groups <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root
while true; do
echo "1. View groups"
echo "2. Set groups for node"
echo "3. Clear node groups"
echo "4. List nodes in group"
echo "5. Exit"
read -r -p "Select: " opt
case "$opt" in
1)
[ -s "$GROUP_DB" ] && cat "$GROUP_DB" || echo "No groups assigned."
;;
2)
node-list
read -r -p "Node number: " n
line="$(get_node_line "$n")"
[ -n "$line" ] || { echo "Invalid node."; continue; }
ip="$(node_field "$line" ip)"
read -r -p "Groups comma-separated: " g
g="$(echo "$g" | tr -d ' ' | sed 's/,,*/,/g; s/^,*//; s/,*$//')"
require_groups "$g"
sed -i "/^${ip}|/d" "$GROUP_DB" 2>/dev/null || true
[ -n "$g" ] && echo "$ip|$g" >> "$GROUP_DB"
chmod 600 "$GROUP_DB"
audit_log "GROUP_SET" "$ip" "success" "groups=$g"
;;
3)
read -r -p "Node host/IP: " ip
require_host "$ip"
sed -i "/^${ip}|/d" "$GROUP_DB" 2>/dev/null || true
audit_log "GROUP_CLEAR" "$ip" "success" ""
;;
4)
read -r -p "Group: " g
require_groups "$g"
node-list "$g"
;;
5) exit 0 ;;
esac
done
SCRIPT_EOF
chmod 755 /usr/local/bin/node-groups
}
write_node_health() {
cat > /usr/local/bin/node-health <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
filter="${1:-}"
refresh="${2:-0}"
display_once() {
clear || true
echo "CLUSTER HEALTH $(date '+%F %T')"
printf '%-22s | %-18s | %-10s | %-8s | %-8s | %-8s | %s\n' "NODE" "HOST" "ERUPE" "CPU" "RAM" "DISK" "LOAD"
echo "------------------------------------------------------------------------------------------------"
while IFS= read -r line; do
ip="$(node_field "$line" ip)"
name="$(node_field "$line" name)"
if [ -n "$filter" ]; then
g="$(grep "^${ip}|" "$GROUP_DB" 2>/dev/null | cut -d'|' -f2- || true)"
[[ ",$g," == *",$filter,"* ]] || continue
fi
data="$(ssh_exec "$line" 'cpu=$(top -bn1 | awk -F"[, ]+" "/Cpu\\(s\\)/{print 100-\$8}" 2>/dev/null || echo "?"); ram=$(free | awk "/^Mem:/{printf \"%.0f\", \$3/\$2*100}" 2>/dev/null || echo "?"); disk=$(df / | awk "END{print \$5}" | tr -d "%"); svc=$(systemctl is-active erupe 2>/dev/null || echo unknown); load=$(uptime | awk -F"load average: " "{print \$2}" 2>/dev/null || echo "?"); echo "$svc|$cpu|$ram|$disk|$load"' 2>/dev/null || echo "unreachable|?|?|?|?")"
IFS='|' read -r svc cpu ram disk loadavg <<< "$data"
printf '%-22s | %-18s | %-10s | %-8s | %-8s | %-8s | %s\n' "$name" "$ip" "$svc" "$cpu%" "$ram%" "$disk%" "$loadavg"
done < "$NODE_DB"
}
if [ "$refresh" -gt 0 ]; then
while true; do display_once; sleep "$refresh"; done
else
display_once
fi
SCRIPT_EOF
chmod 755 /usr/local/bin/node-health
}
write_audit_notify() {
cat > /usr/local/bin/node-audit <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
touch "$AUDIT_LOG"
echo "1. Last 80 entries"
echo "2. Filter keyword"
echo "3. Today"
echo "4. Statistics"
echo "5. Live tail"
echo "6. Rotate/archive log"
echo "7. Enable append-only audit log (chattr +a)"
echo "8. Disable append-only audit log (chattr -a)"
read -r -p "Select: " opt
case "$opt" in
1) tail -n 80 "$AUDIT_LOG" ;;
2) read -r -p "Keyword: " kw; grep -- "$kw" "$AUDIT_LOG" | tail -n 150 || true ;;
3) grep "^\\[$(date +%F)" "$AUDIT_LOG" | tail -n 150 || true ;;
4) awk -F'action=' '{print $2}' "$AUDIT_LOG" | awk '{print $1}' | sort | uniq -c | sort -rn | head -25 ;;
5) tail -f "$AUDIT_LOG" ;;
6)
confirm_word "Archive current audit log?" || exit 0
out="$ORCH_ROOT/audit_$(timestamp).log"
cp "$AUDIT_LOG" "$out"
: > "$AUDIT_LOG"
echo "Archived to $out"
;;
7) command -v chattr >/dev/null 2>&1 && chattr +a "$AUDIT_LOG" && echo "append-only enabled" || echo "chattr unavailable" ;;
8) command -v chattr >/dev/null 2>&1 && chattr -a "$AUDIT_LOG" && echo "append-only disabled" || echo "chattr unavailable" ;;
esac
SCRIPT_EOF
chmod 755 /usr/local/bin/node-audit
cat > /usr/local/bin/node-notify <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
echo "1. Set webhook URL"
echo "2. Test notification"
echo "3. View config"
echo "4. Disable"
read -r -p "Select: " opt
case "$opt" in
1) read -r -p "Webhook URL: " url; printf 'WEBHOOK_URL=%s\n' "$url" > "$NOTIFY_CONF"; chmod 600 "$NOTIFY_CONF" ;;
2) notify_send "info" "Test notification from Master Orchestration v$ORCH_VERSION" ;;
3) sed 's/\(WEBHOOK_URL=.\{20\}\).*/\1...redacted/' "$NOTIFY_CONF" 2>/dev/null || true ;;
4) : > "$NOTIFY_CONF" ;;
esac
SCRIPT_EOF
chmod 755 /usr/local/bin/node-notify
}
write_registry() {
cat > /usr/local/bin/node-registry <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root
echo "1. Export registry"
echo "2. Import registry"
read -r -p "Select: " opt
case "$opt" in
1)
out="$ORCH_ROOT/registry_export_$(timestamp).tar.gz"
tar -czf "$out" -C "$ORCH_ROOT" nodes.db node_groups.db databases.db paths.conf config_backups.db notify.conf 2>/dev/null || true
chmod 600 "$out"
echo "Exported: $out"
audit_log "REGISTRY_EXPORT" "local" "success" "file=$out"
;;
2)
read -r -p "Export file path: " file
[ -f "$file" ] || die "File not found."
confirm_word "Import will overwrite local registry files." || exit 0
tar -xzf "$file" -C "$ORCH_ROOT"
chmod 600 "$NODE_DB" "$GROUP_DB" "$PG_DB" "$BACKUP_INDEX" 2>/dev/null || true
chmod 644 "$PATH_CONF" "$NOTIFY_CONF" 2>/dev/null || true
audit_log "REGISTRY_IMPORT" "local" "success" "file=$file"
;;
esac
SCRIPT_EOF
chmod 755 /usr/local/bin/node-registry
}
write_config_editor() {
cat > /usr/local/bin/node-editconfig <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root
check_path_setup
with_lock "config-edit"
[ -s "$NODE_DB" ] || die "No nodes registered."
select_targets() {
TARGETS=()
echo "1. One node"
echo "2. All nodes"
echo "3. Group"
read -r -p "Target: " mode
case "$mode" in
1) node-list; read -r -p "Node number: " n; line="$(get_node_line "$n")"; [ -n "$line" ] || die "Invalid node"; TARGETS+=("$line") ;;
2) confirm_word "Edit config.json on ALL nodes." || exit 0; while IFS= read -r line; do TARGETS+=("$line"); done < "$NODE_DB" ;;
3) read -r -p "Group: " g; while IFS= read -r line; do ip="$(node_field "$line" ip)"; gs="$(grep "^${ip}|" "$GROUP_DB" 2>/dev/null | cut -d'|' -f2- || true)"; [[ ",$gs," == *",$g,"* ]] && TARGETS+=("$line"); done < "$NODE_DB" ;;
*) die "Invalid target" ;;
esac
[ "${#TARGETS[@]}" -gt 0 ] || die "No target nodes."
}
rollback_menu() {
node-list
read -r -p "Node number: " n
line="$(get_node_line "$n")"
[ -n "$line" ] || die "Invalid node"
ip="$(node_field "$line" ip)"
grep "^${ip}|" "$BACKUP_INDEX" 2>/dev/null | tail -n 15 | nl -w2 -s'. ' || true
read -r -p "Remote backup full path: " backup
require_abs_path "$backup"
dir="$(get_erupe_dir "$ip")"
conf="$dir/config.json"
confirm_word "Rollback $ip config.json from $backup?" || exit 0
remote_sudo "$line" "cp '$conf' '${conf}.pre-rollback.$(timestamp)' && cp '$backup' '$conf' && python3 -m json.tool '$conf' >/dev/null"
remote_sudo "$line" "systemctl restart erupe"
audit_log "CONFIG_ROLLBACK" "$ip" "success" "backup=$backup"
}
echo "1. jq path set"
echo "2. Default Active Feature"
echo "3. All Active Feature"
echo "4. Literal find/replace"
echo "5. View config"
echo "6. Rollback"
read -r -p "Action: " action
[ "$action" = "6" ] && { rollback_menu; exit 0; }
select_targets
case "$action" in
1)
read -r -p "JSON path, dot form (example: Server.Log.Level): " jpath
[[ "$jpath" =~ ^[A-Za-z0-9_.-]+$ ]] || die "Invalid JSON path."
read -r -p "JSON value (example: 123, true, \"text\"): " jval
jpath64="$(printf '%s' "$jpath" | base64 -w0)"
jval64="$(printf '%s' "$jval" | base64 -w0)"
edit_cmd='edit_json(){ P=$(printf %s '"$jpath64"' | base64 -d); V=$(printf %s '"$jval64"' | base64 -d); F="$1"; T=$(mktemp); jq --arg p "$P" --argjson v "$V" '\''setpath(($p|split(".")); $v)'\'' "$F" > "$T" && cat "$T" > "$F"; rm -f "$T"; }; edit_json'
;;
2) edit_cmd='edit_json(){ sed -i -E "s|\"MinFeatureWeapons\"[[:space:]]*:[[:space:]]*[0-9]+|\"MinFeatureWeapons\": 3|; s|\"MaxFeatureWeapons\"[[:space:]]*:[[:space:]]*[0-9]+|\"MaxFeatureWeapons\": 4|" "$1"; }; edit_json' ;;
3) edit_cmd='edit_json(){ sed -i -E "s|\"MinFeatureWeapons\"[[:space:]]*:[[:space:]]*[0-9]+|\"MinFeatureWeapons\": 14|; s|\"MaxFeatureWeapons\"[[:space:]]*:[[:space:]]*[0-9]+|\"MaxFeatureWeapons\": 14|" "$1"; }; edit_json' ;;
4)
read -r -p "Find: " find
read -r -p "Replace: " repl
find64="$(printf '%s' "$find" | base64 -w0)"
repl64="$(printf '%s' "$repl" | base64 -w0)"
edit_cmd='edit_json(){ FND=$(printf %s '"$find64"' | base64 -d); REP=$(printf %s '"$repl64"' | base64 -d); perl -0777 -pi -e "s/\\Q$FND\\E/$REP/g" "$1"; }; edit_json'
;;
5)
for line in "${TARGETS[@]}"; do ip="$(node_field "$line" ip)"; conf="$(get_erupe_dir "$ip")/config.json"; echo "===== $ip:$conf ====="; ssh_exec "$line" "cat '$conf'"; done
exit 0
;;
*) die "Invalid action" ;;
esac
read -r -p "Dry-run? [Y/n]: " dry
dry="${dry:-Y}"
for line in "${TARGETS[@]}"; do
ip="$(node_field "$line" ip)"
name="$(node_field "$line" name)"
conf="$(get_erupe_dir "$ip")/config.json"
echo "Target: $name ($ip) $conf"
if [[ "${dry,,}" != "n" ]]; then
echo "Would backup and run: $edit_cmd '$conf'"
continue
fi
bak="$(backup_config_remote "$line")"
remote_sudo "$line" "$edit_cmd '$conf'"
if remote_sudo "$line" "python3 -m json.tool '$conf' >/dev/null"; then
echo "JSON valid."
audit_log "CONFIG_EDIT" "$ip" "success" "backup=$bak"
else
echo "Invalid JSON, rolling back."
remote_sudo "$line" "cp '$bak' '$conf'"
audit_log "CONFIG_EDIT" "$ip" "failed_rollback" "backup=$bak"
continue
fi
read -r -p "Restart erupe on $name? [y/N]: " r
if [[ "${r,,}" = "y" ]]; then
remote_sudo "$line" "systemctl restart erupe"
sleep 3
remote_sudo "$line" "systemctl is-active erupe"
fi
done
SCRIPT_EOF
chmod 755 /usr/local/bin/node-editconfig
}
write_update_tools() {
cat > /usr/local/bin/node-update <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root
check_path_setup
with_lock "node-update"
select_nodes() {
TARGETS=()
node-list
read -r -p "Node numbers separated by space: " nums
for n in $nums; do
[[ "$n" =~ ^[0-9]+$ ]] || continue
line="$(get_node_line "$n")"
[ -n "$line" ] && TARGETS+=("$line")
done
[ "${#TARGETS[@]}" -gt 0 ] || die "No valid nodes selected."
}
build_payload() {
read -r -p "Mode [1=Express, 2=Manual, 3=Base]: " mode
mode="${mode:-1}"
case "$mode" in
1) PAYLOAD="printf '1\n' | /usr/local/bin/erupe-update" ;;
2)
read -r -p "RAM percent: " ram
read -r -p "GOGC: " gogc
read -r -p "Log level [1-5 default 4]: " ll; ll="${ll:-4}"
read -r -p "Rebuild? [y/N]: " rb
read -r -p "Save logs? [y/N]: " ls
PAYLOAD="printf '2\n${ram}\n${gogc}\n${ll}\n${rb}\n${ls}\n' | /usr/local/bin/erupe-update"
;;
3)
read -r -p "Log level [1-5 default 4]: " ll; ll="${ll:-4}"
read -r -p "Rebuild? [y/N]: " rb
read -r -p "Save logs? [y/N]: " ls
PAYLOAD="printf '3\n${ll}\n${rb}\n${ls}\n' | /usr/local/bin/erupe-update"
;;
*) die "Invalid mode" ;;
esac
}
select_nodes
build_payload
read -r -p "Dry-run? [Y/n]: " dry; dry="${dry:-Y}"
report="$(make_report update)"
success=0; failed=0
for line in "${TARGETS[@]}"; do
ip="$(node_field "$line" ip)"
name="$(node_field "$line" name)"
echo "=== $name ($ip) ===" | tee -a "$report"
if [[ "${dry,,}" != "n" ]]; then
echo "DRY-RUN: $PAYLOAD" | tee -a "$report"
continue
fi
snapshot_node "$line" "update"
if remote_sudo "$line" "$PAYLOAD"; then
sleep 3
if remote_sudo "$line" "systemctl is-active erupe | grep -q active"; then
echo "OK active" | tee -a "$report"
success=$((success + 1))
audit_log "NODE_UPDATE" "$ip" "success" ""
else
echo "FAILED service inactive" | tee -a "$report"
failed=$((failed + 1))
audit_log "NODE_UPDATE" "$ip" "service_inactive" ""
fi
else
echo "FAILED command" | tee -a "$report"
failed=$((failed + 1))
audit_log "NODE_UPDATE" "$ip" "failed" ""
fi
done
echo "Report: $report"
echo "Success=$success Failed=$failed" | tee -a "$report"
SCRIPT_EOF
chmod 755 /usr/local/bin/node-update
cat > /usr/local/bin/node-updateall <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root
check_path_setup
with_lock "node-updateall"
[ -s "$NODE_DB" ] || die "No nodes registered."
read -r -p "Canary first? [Y/n]: " canary; canary="${canary:-Y}"
read -r -p "Failure threshold before stopping [default 1]: " threshold; threshold="${threshold:-1}"
[[ "$threshold" =~ ^[0-9]+$ ]] || threshold=1
read -r -p "Dry-run? [Y/n]: " dry; dry="${dry:-Y}"
read -r -p "Mode [1=Express, 2=Manual, 3=Base]: " mode
mode="${mode:-1}"
case "$mode" in
1) PAYLOAD="printf '1\n' | /usr/local/bin/erupe-update" ;;
2)
read -r -p "RAM percent: " ram
read -r -p "GOGC: " gogc
read -r -p "Log level [1-5 default 4]: " ll; ll="${ll:-4}"
read -r -p "Rebuild? [y/N]: " rb
read -r -p "Save logs? [y/N]: " ls
PAYLOAD="printf '2\n${ram}\n${gogc}\n${ll}\n${rb}\n${ls}\n' | /usr/local/bin/erupe-update"
;;
3)
read -r -p "Log level [1-5 default 4]: " ll; ll="${ll:-4}"
read -r -p "Rebuild? [y/N]: " rb
read -r -p "Save logs? [y/N]: " ls
PAYLOAD="printf '3\n${ll}\n${rb}\n${ls}\n' | /usr/local/bin/erupe-update"
;;
*) die "Invalid mode" ;;
esac
mapfile -t targets < "$NODE_DB"
if [[ "${canary,,}" != "n" ]] && [ "${#targets[@]}" -gt 1 ]; then
echo "Canary node:"
line="${targets[0]}"
echo "$(node_field "$line" name) ($(node_field "$line" ip))"
confirm_word "Run canary update first?" || exit 0
targets=("${targets[0]}" "${targets[@]:1}")
else
confirm_word "Rolling update ALL nodes?" || exit 0
fi
report="$(make_report updateall)"
failed=0; success=0; idx=0
for line in "${targets[@]}"; do
idx=$((idx + 1))
ip="$(node_field "$line" ip)"
name="$(node_field "$line" name)"
echo "=== [$idx/${#targets[@]}] $name ($ip) ===" | tee -a "$report"
if [[ "${dry,,}" != "n" ]]; then
echo "DRY-RUN: $PAYLOAD" | tee -a "$report"
continue
fi
snapshot_node "$line" "rolling_update"
if remote_sudo "$line" "$PAYLOAD" && sleep 3 && remote_sudo "$line" "systemctl is-active erupe | grep -q active"; then
echo "OK active" | tee -a "$report"
success=$((success + 1))
audit_log "ROLLING_UPDATE" "$ip" "success" ""
else
echo "FAILED" | tee -a "$report"
failed=$((failed + 1))
audit_log "ROLLING_UPDATE" "$ip" "failed" ""
notify_send "error" "Rolling update failed on $name ($ip)"
if [ "$failed" -ge "$threshold" ]; then
echo "Failure threshold reached. Stopping rollout." | tee -a "$report"
break
fi
fi
done
echo "Report: $report"
echo "Success=$success Failed=$failed" | tee -a "$report"
SCRIPT_EOF
chmod 755 /usr/local/bin/node-updateall
}
write_node_db() {
cat > /usr/local/bin/node-db <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root
show_db_list() {
[ -s "$PG_DB" ] || { echo "No DB registered."; return; }
printf '%-4s | %-18s | %-6s | %-12s | %-18s | %s\n' "NO" "HOST" "PORT" "USER" "DB" "ALIAS"
nl -w1 -s'|' "$PG_DB" | while IFS='|' read -r n ip port user enc db alias; do
printf '%-4s | %-18s | %-6s | %-12s | %-18s | %s\n' "$n" "$ip" "$port" "$user" "$db" "$alias"
done
}
add_db() {
read -r -p "DB host: " ip; require_host "$ip"
read -r -p "Port [5432]: " port; port="${port:-5432}"; require_port "$port"
read -r -p "User [postgres]: " user; user="${user:-postgres}"; require_user "$user"
read -r -p "Database [erupe]: " db; db="${db:-erupe}"; require_name "$db"
read -r -p "Alias: " alias; alias="${alias:-$ip}"; require_name "$alias"
read -r -s -p "Password: " pass; echo
if PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -c '\q' >/dev/null 2>&1; then
enc="$(encrypt_secret "$pass")"
echo "$ip|$port|$user|$enc|$db|$alias" >> "$PG_DB"
chmod 600 "$PG_DB"
audit_log "DB_ADD" "$ip" "success" "alias=$alias"
else
echo "Connection failed."
audit_log "DB_ADD" "$ip" "failed" ""
fi
}
manage_db() {
show_db_list
read -r -p "DB number: " n
line="$(sed -n "${n}p" "$PG_DB")"
[ -n "$line" ] || die "Invalid DB."
IFS='|' read -r ip port user enc db alias <<< "$line"
pass="$(decrypt_secret "$enc")"
[ -n "$pass" ] || die "Could not decrypt password."
while true; do
echo "DB: $alias ($ip:$port/$db)"
echo "1. psql shell"
echo "2. Backup"
echo "3. Restore"
echo "4. Rotation 01"
echo "5. Rotation 02"
echo "6. Truncate feature_weapon"
echo "7. Update users.rights"
echo "8. REINDEX + VACUUM"
echo "9. Stats"
echo "10. Back"
read -r -p "Select: " opt
case "$opt" in
1) PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" ;;
2)
dir="$(get_backup_dir)/BackupDB_${alias}_${db}"; mkdir -p "$dir"
out="$dir/${db}_$(timestamp).dump"
PGPASSWORD="$pass" pg_dump -h "$ip" -p "$port" -U "$user" -Fc -d "$db" -f "$out"
echo "Saved: $out"
audit_log "DB_BACKUP" "$ip" "success" "file=$out"
;;
3)
pre="$(db_pre_backup "$ip" "$port" "$user" "$pass" "$db" "restore")"
echo "Pre-restore backup: $pre"
read -r -p "Restore file: " file
[ -f "$file" ] || { echo "File not found."; continue; }
confirm_word "Restore into $db?" || continue
PGPASSWORD="$pass" pg_restore -h "$ip" -p "$port" -U "$user" -d "$db" --no-owner --no-privileges "$file" || PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -f "$file"
audit_log "DB_RESTORE" "$ip" "success" "file=$file pre=$pre"
;;
4|5)
rot="0$((opt-3))"
pre="$(db_pre_backup "$ip" "$port" "$user" "$pass" "$db" "rotation${rot}")"
bdir="$(get_backup_dir)"
file="$(find "$bdir" -maxdepth 2 -type f -name "shop_items_rotation${rot}.*" | head -n1)"
[ -n "$file" ] || { echo "Rotation file not found in $bdir"; continue; }
confirm_word "TRUNCATE shop_items and restore rotation ${rot}? Pre-backup: $pre" || continue
PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -c "TRUNCATE TABLE IF EXISTS shop_items CASCADE;"
PGPASSWORD="$pass" pg_restore -h "$ip" -p "$port" -U "$user" -d "$db" --no-owner --no-privileges "$file" || PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -f "$file"
audit_log "DB_ROTATION" "$ip" "success" "rotation=$rot pre=$pre"
;;
6)
pre="$(db_pre_backup "$ip" "$port" "$user" "$pass" "$db" "truncate_feature_weapon")"
confirm_word "TRUNCATE feature_weapon? Pre-backup: $pre" || continue
schema="$(PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -A -t -c "SELECT schemaname FROM pg_tables WHERE tablename='feature_weapon' LIMIT 1;")"
[ -n "$schema" ] && PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -c "TRUNCATE TABLE \"$schema\".feature_weapon;" || echo "feature_weapon not found"
;;
7)
read -r -p "New rights numeric value: " rights
[[ "$rights" =~ ^[0-9]+$ ]] || { echo "Invalid number"; continue; }
pre="$(db_pre_backup "$ip" "$port" "$user" "$pass" "$db" "update_rights")"
confirm_word "Update all users.rights to $rights? Pre-backup: $pre" || continue
schema="$(PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -A -t -c "SELECT schemaname FROM pg_tables WHERE tablename='users' LIMIT 1;")"
[ -n "$schema" ] && PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -c "UPDATE \"$schema\".users SET rights=$rights; ALTER TABLE \"$schema\".users ALTER COLUMN rights SET DEFAULT $rights;"
;;
8) PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -c "REINDEX DATABASE \"$db\"; VACUUM (VERBOSE, ANALYZE);" ;;
9) PGPASSWORD="$pass" psql -h "$ip" -p "$port" -U "$user" -d "$db" -c "SELECT pg_size_pretty(pg_database_size('$db')) AS db_size;" ;;
10) return ;;
esac
done
}
while true; do
echo "1. Manage DB"
echo "2. Add DB"
echo "3. Remove DB entry"
echo "4. List"
echo "5. Exit"
read -r -p "Select: " opt
case "$opt" in
1) manage_db ;;
2) add_db ;;
3) show_db_list; read -r -p "Number: " n; confirm_word "Remove DB entry $n?" && sed -i "${n}d" "$PG_DB" ;;
4) show_db_list ;;
5) exit 0 ;;
esac
done
SCRIPT_EOF
chmod 755 /usr/local/bin/node-db
}
write_sync_manager() {
cat > /usr/local/bin/sync-manager <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root
with_lock "sync"
[ -s "$NODE_DB" ] || die "No nodes registered."
select_targets() {
TARGETS=()
echo "1. One node"
echo "2. All nodes"
echo "3. Group"
read -r -p "Target: " mode
case "$mode" in
1) node-list; read -r -p "Node number: " n; line="$(get_node_line "$n")"; [ -n "$line" ] && TARGETS+=("$line") ;;
2) confirm_word "Sync to ALL nodes?" || exit 0; while IFS= read -r line; do TARGETS+=("$line"); done < "$NODE_DB" ;;
3) read -r -p "Group: " g; while IFS= read -r line; do ip="$(node_field "$line" ip)"; gs="$(grep "^${ip}|" "$GROUP_DB" 2>/dev/null | cut -d'|' -f2- || true)"; [[ ",$gs," == *",$g,"* ]] && TARGETS+=("$line"); done < "$NODE_DB" ;;
esac
[ "${#TARGETS[@]}" -gt 0 ] || die "No targets."
}
read -r -p "Source path [/ERUPE-SYNC/]: " src
src="${src:-/ERUPE-SYNC/}"
[ -e "${src%/}" ] || die "Source not found."
read -r -p "Destination absolute path: " dest
require_abs_path "$dest"
read -r -p "Dry-run? [Y/n]: " dry; dry="${dry:-Y}"
read -r -p "Conflict [1=overwrite, 2=skip existing]: " conflict; conflict="${conflict:-1}"
read -r -p "Bandwidth limit, empty=unlimited (example 10M): " bw
flags="-azh --stats --info=progress2"
[ "$conflict" = "2" ] && flags="$flags --ignore-existing"
[ -n "$bw" ] && flags="$flags --bwlimit=$bw"
[[ "${dry,,}" != "n" ]] && flags="$flags --dry-run"
select_targets
report="$(make_report sync)"
for line in "${TARGETS[@]}"; do
ip="$(node_field "$line" ip)"
port="$(node_field "$line" port)"
user="$(node_field "$line" user)"
name="$(node_field "$line" name)"
echo "=== $name ($ip) ===" | tee -a "$report"
remote_sudo "$line" "mkdir -p '$dest'"
rsync $flags -e "ssh -p $port -o StrictHostKeyChecking=accept-new" "$src" "$user@$ip:$dest/" | tee -a "$report"
audit_log "SYNC" "$ip" "completed" "src=$src dest=$dest dry=$dry"
done
if [[ "${dry,,}" = "n" ]]; then
read -r -p "Apply chmod? [skip/755/750/644]: " perm
case "$perm" in
755|750|644)
for line in "${TARGETS[@]}"; do remote_sudo "$line" "chmod -R $perm '$dest'"; done
;;
esac
fi
echo "Report: $report"
SCRIPT_EOF
chmod 755 /usr/local/bin/sync-manager
}
write_node_manager() {
cat > /usr/local/bin/node-manager <<'SCRIPT_EOF'
#!/usr/bin/env bash
set -euo pipefail
source /usr/local/lib/erupe-orchestrator-common
need_root
configure_paths() {
while true; do
echo "Global path: $(get_global_path || true)"
echo "Backup path: $(get_backup_dir || true)"
echo "1. Set global Erupe path"
echo "2. Set node-specific path"
echo "3. Set backup path"
echo "4. View config"
echo "5. Back"
read -r -p "Select: " opt
case "$opt" in
1) read -r -p "Path: " p; require_abs_path "$p"; sed -i '/^GLOBAL=/d' "$PATH_CONF" 2>/dev/null || true; echo "GLOBAL=${p%/}" >> "$PATH_CONF" ;;
2) node-list; read -r -p "Host/IP: " ip; require_host "$ip"; read -r -p "Path: " p; require_abs_path "$p"; sed -i "/^${ip}=/d" "$PATH_CONF" 2>/dev/null || true; echo "$ip=${p%/}" >> "$PATH_CONF" ;;
3) read -r -p "Backup path: " p; require_abs_path "$p"; sed -i '/^BACKUP_DIR=/d' "$PATH_CONF" 2>/dev/null || true; echo "BACKUP_DIR=${p%/}" >> "$PATH_CONF" ;;
4) cat "$PATH_CONF" ;;
5) return ;;
esac
done
}
add_node() {
read -r -p "Host/IP: " ip; require_host "$ip"
read -r -p "SSH port [22]: " port; port="${port:-22}"; require_port "$port"
read -r -p "SSH user [root]: " user; user="${user:-root}"; require_user "$user"
read -r -p "Node name: " name; require_name "$name"
known_host_bootstrap "$ip" "$port"
read -r -s -p "SSH password for initial key install: " pass; echo
if sshpass -p "$pass" ssh-copy-id -o StrictHostKeyChecking=accept-new -i /root/.ssh/id_ed25519.pub -p "$port" "$user@$ip"; then
echo "$ip|$port|$user|$name" >> "$NODE_DB"
chmod 600 "$NODE_DB"
audit_log "NODE_ADD" "$ip" "success" "user=$user name=$name"
else
audit_log "NODE_ADD" "$ip" "failed" "user=$user"
fi
}
modify_node() {
node-list
read -r -p "Node number: " n
line="$(get_node_line "$n")"
[ -n "$line" ] || { echo "Invalid."; return; }
echo "1. Edit"
echo "2. Delete"
read -r -p "Action: " a
if [ "$a" = "2" ]; then
confirm_word "Delete node entry $n?" || return
sed -i "${n}d" "$NODE_DB"
return
fi
read -r -p "Host/IP: " ip; require_host "$ip"
read -r -p "Port [22]: " port; port="${port:-22}"; require_port "$port"
read -r -p "User [root]: " user; user="${user:-root}"; require_user "$user"
read -r -p "Name: " name; require_name "$name"
sed -i "${n}s/.*/$ip|$port|$user|$name/" "$NODE_DB"
}
ssh_menu() {
echo "1. Interactive SSH"
echo "2. Broadcast one-line command"
echo "3. Broadcast local script file"
read -r -p "Select: " opt
case "$opt" in
1) node-list; read -r -p "Node number: " n; line="$(get_node_line "$n")"; [ -n "$line" ] && ssh_interactive "$line" ;;
2)
read -r -p "Command: " cmd
confirm_word "Broadcast command to all nodes?" || return
while IFS= read -r line; do echo "== $(node_field "$line" name) =="; ssh_exec "$line" "$cmd" || true; done < "$NODE_DB"
;;
3)
read -r -p "Local script path: " script
[ -f "$script" ] || { echo "File not found."; return; }
confirm_word "Run this script on all nodes?" || return
while IFS= read -r line; do
ip="$(node_field "$line" ip)"; port="$(node_field "$line" port)"; user="$(node_field "$line" user)"
rsync -az -e "ssh -p $port -o StrictHostKeyChecking=accept-new" "$script" "$user@$ip:/tmp/erupe_remote_exec.sh"
remote_sudo "$line" "bash /tmp/erupe_remote_exec.sh; rm -f /tmp/erupe_remote_exec.sh"
done < "$NODE_DB"
;;
esac
}
while true; do
echo "================================================================"
echo "RAIN/Erupe Master Orchestration v$ORCH_VERSION"
echo "================================================================"
echo "1. Configure paths"
echo "2. Add node"
echo "3. Modify/delete node"
echo "4. Node list"
echo "5. Node groups"
echo "6. Preflight check"
echo "7. Health monitor"
echo "8. Audit log"
echo "9. Notifications"
echo "10. Edit config.json"
echo "11. Database manager"
echo "12. Update selected nodes"
echo "13. Rolling update all nodes"
echo "14. SSH/broadcast"
echo "15. Remote erupe-manager"
echo "16. Sync manager"
echo "17. Registry export/import"
echo "18. Exit"
read -r -p "Select: " opt
case "$opt" in
1) configure_paths ;;
2) add_node ;;
3) modify_node ;;
4) node-list ;;
5) node-groups ;;
6) node-preflight ;;
7) read -r -p "Group filter empty=all: " g; read -r -p "Refresh seconds [0]: " r; node-health "$g" "${r:-0}" ;;
8) node-audit ;;
9) node-notify ;;
10) node-editconfig ;;
11) node-db ;;
12) node-update ;;
13) node-updateall ;;
14) ssh_menu ;;
15) node-list; read -r -p "Node number: " n; line="$(get_node_line "$n")"; [ -n "$line" ] && ssh_interactive "$line" "/usr/local/bin/erupe-manager" ;;
16) sync-manager ;;
17) node-registry ;;
18) exit 0 ;;
esac
done
SCRIPT_EOF
chmod 755 /usr/local/bin/node-manager
}
main() {
need_root
install_dependencies
bootstrap_storage
write_common_lib
write_node_list
write_preflight
write_node_groups
write_node_health
write_audit_notify
write_registry
write_config_editor
write_update_tools
write_node_db
write_sync_manager
write_node_manager
ok "Master Orchestration v$ORCH_VERSION installed."
echo "Start: sudo node-manager"
}
main "$@"
EOF
sudo chmod +x /usr/local/bin/setup-orchestration
sudo /usr/local/bin/setup-orchestration