Files
deploy-felhom-compose/scripts/felhom-wipe.sh
T
admin 4a9ed71b7a fix(felhom-wipe): detect sys_drive and other backups-only storage paths
Two bugs prevented /mnt/sys_drive (and similar drives) from being detected:

1. controller.yaml is root-owned (permission denied from host), so data_dir
   could not be read. Settings.json was never loaded, falling back to /mnt/*
   scan only. Fix: also try `docker volume inspect felhom-controller_controller-data`
   to locate the actual settings.json in the Docker volume.

2. Fallback /mnt/* scan only checked for felhom-data/ or appdata/, missing
   drives that only have backups/ (e.g. sys_drive pre-v0.26.0). Fix: also
   check for backups/ in the scan condition.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-22 10:31:12 +01:00

415 lines
13 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
# ===================================================================
# felhom-wipe.sh — Clean felhom data from a test node
# Usage: ./felhom-wipe.sh --level <soft|controller|full|nuclear> [--yes]
# ===================================================================
# --- Colors ---
RED='\033[0;31m'
YELLOW='\033[1;33m'
GREEN='\033[0;32m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'
# --- Defaults ---
LEVEL=""
DRY_RUN=true
INCLUDE_PROTECTED=false
# --- Configuration (auto-detected) ---
CONTROLLER_YAML="/opt/docker/felhom-controller/controller.yaml"
DATA_DIR="/opt/docker/felhom-controller/data"
COMPOSE_DIR="/opt/docker/felhom-controller"
STACKS_DIR="/opt/docker/stacks"
SETTINGS_JSON="$DATA_DIR/settings.json"
# --- Helpers ---
die() { echo -e "${RED}ERROR: $1${NC}" >&2; exit 1; }
info() { echo -e "${GREEN}$1${NC}"; }
warn() { echo -e "${YELLOW}$1${NC}"; }
bold() { echo -e "${BOLD}$1${NC}"; }
human_size() {
local path="$1"
if [ -e "$path" ]; then
du -sh "$path" 2>/dev/null | cut -f1 || echo "?"
else
echo "n/a"
fi
}
usage() {
cat <<EOF
Usage: $(basename "$0") --level <level> [--yes] [--include-protected]
Levels:
soft Controller state only (settings.json, metrics.db, session data)
controller Soft + remove all app containers, volumes, stack dirs, app.yaml files
full Controller + felhom-data/ on all drives (appdata, backups)
nuclear Full + controller.yaml, controller container, Traefik, Portainer, all Docker data
Options:
--yes Execute the wipe (default: dry run)
--include-protected Also remove protected stacks (controller level only)
EOF
exit 1
}
# --- Parse Args ---
parse_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
--level) LEVEL="$2"; shift 2 ;;
--yes) DRY_RUN=false; shift ;;
--include-protected) INCLUDE_PROTECTED=true; shift ;;
-h|--help) usage ;;
*) die "Unknown argument: $1" ;;
esac
done
[[ -z "$LEVEL" ]] && usage
case "$LEVEL" in
soft|controller|full|nuclear) ;;
*) die "Invalid level: $LEVEL (must be soft|controller|full|nuclear)" ;;
esac
}
# --- Detect Paths ---
detect_paths() {
# Auto-detect from controller.yaml if readable (may be root-owned)
if [ -f "$CONTROLLER_YAML" ] && [ -r "$CONTROLLER_YAML" ]; then
local sd
sd=$(grep -oP 'stacks_dir:\s*\K\S+' "$CONTROLLER_YAML" 2>/dev/null || true)
[ -n "$sd" ] && STACKS_DIR="$sd"
local dd
dd=$(grep -oP 'data_dir:\s*\K\S+' "$CONTROLLER_YAML" 2>/dev/null || true)
[ -n "$dd" ] && DATA_DIR="$dd" && SETTINGS_JSON="$dd/settings.json"
fi
# If settings.json not found at configured path, try the Docker volume directly.
# The controller stores data in a named volume (felhom-controller_controller-data),
# not at the container-internal path on the host filesystem.
if [ ! -f "$SETTINGS_JSON" ]; then
local vol_path
vol_path=$(docker volume inspect felhom-controller_controller-data --format '{{.Mountpoint}}' 2>/dev/null || true)
if [ -n "$vol_path" ] && [ -d "$vol_path" ]; then
DATA_DIR="$vol_path"
SETTINGS_JSON="$vol_path/settings.json"
fi
fi
}
# --- Detect Storage Paths ---
declare -a STORAGE_PATHS=()
detect_storage_paths() {
# From settings.json
if [ -f "$SETTINGS_JSON" ]; then
while IFS= read -r p; do
[ -n "$p" ] && STORAGE_PATHS+=("$p")
done < <(python3 -c "
import json, sys
try:
d = json.load(open('$SETTINGS_JSON'))
for sp in d.get('storage_paths', []):
print(sp.get('path', ''))
except: pass
" 2>/dev/null || true)
fi
# Also scan /mnt/* for felhom-managed dirs not in registry
for d in /mnt/*/; do
[ -d "${d}felhom-data" ] || [ -d "${d}appdata" ] || [ -d "${d}backups" ] || continue
local already=false
for sp in "${STORAGE_PATHS[@]:-}"; do
[ "$sp" = "${d%/}" ] && already=true && break
done
$already || STORAGE_PATHS+=("${d%/}")
done
}
# --- List App Containers (non-infra) ---
list_app_containers() {
docker ps -a --format '{{.Names}}' 2>/dev/null | grep -v -E '^(felhom-controller|traefik|cloudflared|portainer)$' || true
}
# --- List App Volumes (non-infra) ---
list_app_volumes() {
docker volume ls -q 2>/dev/null | grep -v -E '^(portainer_data)$' || true
}
# --- Protected Stacks ---
get_protected_stacks() {
if [ -f "$CONTROLLER_YAML" ]; then
grep -A 20 'protected_stacks:' "$CONTROLLER_YAML" 2>/dev/null | grep -oP '^\s*-\s*\K\S+' || true
fi
}
# --- Print Plan ---
print_plan() {
echo ""
bold "=== Felhom Wipe — Level: $LEVEL ==="
echo ""
# State files
echo -e "${CYAN}Controller state:${NC}"
local state_files=("$DATA_DIR/settings.json" "$DATA_DIR/metrics.db" "$DATA_DIR/setup-state.json" "$DATA_DIR/update-state.json" "$DATA_DIR/session-data.json" "$DATA_DIR/snapshot-history.json")
for f in "${state_files[@]}"; do
if [ -f "$f" ]; then
echo -e " ${YELLOW}DELETE${NC} $f ($(human_size "$f"))"
fi
done
if [[ "$LEVEL" == "controller" || "$LEVEL" == "full" || "$LEVEL" == "nuclear" ]]; then
echo ""
echo -e "${CYAN}Docker containers:${NC}"
local containers
containers=$(list_app_containers)
if [ -n "$containers" ]; then
echo "$containers" | while read -r c; do
echo -e " ${YELLOW}REMOVE${NC} $c"
done
else
echo -e " ${GREEN}(none)${NC}"
fi
echo ""
echo -e "${CYAN}Docker volumes:${NC}"
local volumes
volumes=$(list_app_volumes)
if [ -n "$volumes" ]; then
echo "$volumes" | while read -r v; do
echo -e " ${YELLOW}REMOVE${NC} $v"
done
else
echo -e " ${GREEN}(none)${NC}"
fi
echo ""
echo -e "${CYAN}Stack directories:${NC}"
if [ -d "$STACKS_DIR" ]; then
for sd in "$STACKS_DIR"/*/; do
[ -d "$sd" ] || continue
local stack_name
stack_name=$(basename "$sd")
local protected_stacks
protected_stacks=$(get_protected_stacks)
if echo "$protected_stacks" | grep -qx "$stack_name" && ! $INCLUDE_PROTECTED; then
echo -e " ${GREEN}KEEP${NC} $sd (protected)"
else
echo -e " ${YELLOW}DELETE${NC} $sd"
fi
done
else
echo -e " ${GREEN}(not found)${NC}"
fi
fi
if [[ "$LEVEL" == "full" || "$LEVEL" == "nuclear" ]]; then
echo ""
echo -e "${CYAN}Storage data:${NC}"
if [ ${#STORAGE_PATHS[@]} -gt 0 ]; then
for sp in "${STORAGE_PATHS[@]}"; do
if [ -d "$sp/felhom-data" ]; then
echo -e " ${YELLOW}DELETE${NC} $sp/felhom-data/ ($(human_size "$sp/felhom-data"))"
fi
# Old-style paths
if [ -d "$sp/appdata" ]; then
echo -e " ${YELLOW}DELETE${NC} $sp/appdata/ ($(human_size "$sp/appdata")) [old-style]"
fi
if [ -d "$sp/backups" ]; then
echo -e " ${YELLOW}DELETE${NC} $sp/backups/ ($(human_size "$sp/backups")) [old-style]"
fi
done
else
echo -e " ${GREEN}(no storage paths found)${NC}"
fi
fi
if [[ "$LEVEL" == "nuclear" ]]; then
echo ""
echo -e "${RED}Nuclear:${NC}"
echo -e " ${RED}DELETE${NC} controller.yaml"
echo -e " ${RED}DELETE${NC} controller container + image"
echo -e " ${RED}DELETE${NC} Traefik container"
echo -e " ${RED}DELETE${NC} Cloudflared container"
echo -e " ${RED}DELETE${NC} Portainer container + volume"
echo -e " ${RED}DELETE${NC} .felhom-infra-backup/ (DR markers on all drives)"
echo -e " ${RED}DELETE${NC} All Docker data (docker system prune -af --volumes)"
fi
echo ""
echo -e "${CYAN}Will preserve:${NC}"
echo -e " ${GREEN}- OS and system files${NC}"
if [[ "$LEVEL" != "nuclear" ]]; then
echo -e " ${GREEN}- Controller container (felhom-controller)${NC}"
echo -e " ${GREEN}- Controller image${NC}"
echo -e " ${GREEN}- Traefik, Cloudflare Tunnel${NC}"
echo -e " ${GREEN}- controller.yaml${NC}"
echo -e " ${GREEN}- .felhom-infra-backup/ (DR markers on drives)${NC}"
fi
if [[ "$LEVEL" != "full" && "$LEVEL" != "nuclear" ]]; then
echo -e " ${GREEN}- Storage data on drives${NC}"
fi
echo -e " ${GREEN}- User files (Dokumentumok, media, etc.)${NC}"
echo ""
}
# --- Wipe Functions ---
do_soft_wipe() {
info "Soft wipe: removing controller state..."
local state_files=("$DATA_DIR/settings.json" "$DATA_DIR/metrics.db" "$DATA_DIR/setup-state.json" "$DATA_DIR/update-state.json" "$DATA_DIR/session-data.json" "$DATA_DIR/snapshot-history.json")
for f in "${state_files[@]}"; do
[ -f "$f" ] && rm -f "$f" && info " Removed: $f"
done
}
do_controller_wipe() {
do_soft_wipe
info "Controller wipe: stopping and removing app containers..."
# Stop and remove app containers
local containers
containers=$(list_app_containers)
if [ -n "$containers" ]; then
echo "$containers" | while read -r c; do
docker rm -f "$c" 2>/dev/null && info " Removed container: $c" || warn " Failed to remove: $c"
done
fi
# Remove app volumes
info "Removing app volumes..."
local volumes
volumes=$(list_app_volumes)
if [ -n "$volumes" ]; then
echo "$volumes" | while read -r v; do
docker volume rm "$v" 2>/dev/null && info " Removed volume: $v" || warn " Failed to remove: $v"
done
fi
# Remove stack directories
info "Removing stack directories..."
if [ -d "$STACKS_DIR" ]; then
local protected_stacks
protected_stacks=$(get_protected_stacks)
for sd in "$STACKS_DIR"/*/; do
[ -d "$sd" ] || continue
local stack_name
stack_name=$(basename "$sd")
if echo "$protected_stacks" | grep -qx "$stack_name" && ! $INCLUDE_PROTECTED; then
warn " Skipping protected stack: $stack_name"
continue
fi
rm -rf "$sd" && info " Removed: $sd"
done
fi
# NOTE: No restart here — callers handle restart after all cleanup is done.
}
do_full_wipe() {
do_controller_wipe
info "Full wipe: removing storage data..."
for sp in "${STORAGE_PATHS[@]}"; do
# New-style namespace
if [ -d "$sp/felhom-data" ]; then
rm -rf "$sp/felhom-data" && info " Removed: $sp/felhom-data/"
fi
# Old-style paths
if [ -d "$sp/appdata" ]; then
rm -rf "$sp/appdata" && info " Removed: $sp/appdata/ [old-style]"
fi
if [ -d "$sp/backups" ]; then
rm -rf "$sp/backups" && info " Removed: $sp/backups/ [old-style]"
fi
done
# Restart controller after all cleanup is done
info "Restarting controller..."
docker restart felhom-controller 2>/dev/null || warn "Could not restart controller"
}
do_nuclear_wipe() {
do_full_wipe
info "Nuclear wipe: removing all infrastructure..."
# Stop infrastructure containers
for c in felhom-controller traefik cloudflared portainer; do
docker rm -f "$c" 2>/dev/null && info " Removed: $c" || true
done
# Remove controller.yaml
[ -f "$CONTROLLER_YAML" ] && rm -f "$CONTROLLER_YAML" && info " Removed: controller.yaml"
# Remove DR markers (nuclear = brand-new machine simulation)
for sp in "${STORAGE_PATHS[@]}"; do
if [ -d "$sp/.felhom-infra-backup" ]; then
rm -rf "$sp/.felhom-infra-backup" && info " Removed: $sp/.felhom-infra-backup/"
fi
done
# Remove all Docker data
warn "Pruning all Docker data..."
docker system prune -af --volumes 2>/dev/null || warn "Docker prune failed"
echo ""
info "Nuclear wipe complete."
echo -e "${CYAN}To redeploy, run:${NC}"
echo " curl -fsSL https://gitea.dooplex.hu/admin/deploy-felhom-compose/raw/branch/main/scripts/docker-setup.sh | bash"
}
# --- Main ---
main() {
# Must run as root
if [ "$(id -u)" -ne 0 ]; then
die "Must run as root (use sudo)"
fi
# Check Docker
if ! docker info >/dev/null 2>&1; then
die "Docker is not running"
fi
parse_args "$@"
detect_paths
detect_storage_paths
print_plan
if $DRY_RUN; then
warn "Dry run — nothing deleted. Use --yes to execute."
exit 0
fi
# Confirmation
echo -e "${RED}${BOLD}This will permanently delete the data listed above.${NC}"
read -rp "Type YES to confirm: " confirm
if [ "$confirm" != "YES" ]; then
echo "Aborted."
exit 1
fi
echo ""
case "$LEVEL" in
soft) do_soft_wipe ;;
controller) do_controller_wipe
info "Restarting controller..."
docker restart felhom-controller 2>/dev/null || warn "Could not restart controller"
;;
full) do_full_wipe ;;
nuclear) do_nuclear_wipe ;;
esac
echo ""
info "Wipe complete (level: $LEVEL)."
}
main "$@"