Fix bugs from BUGHUNT.md: restore race conditions, infra backup, DR wiring, docker-setup.sh, restore.html
This commit is contained in:
@@ -71,14 +71,46 @@ func (rp *RestorePlan) GetApps() []RestorableApp {
|
||||
func (rp *RestorePlan) Snapshot() map[string]interface{} {
|
||||
rp.mu.RLock()
|
||||
defer rp.mu.RUnlock()
|
||||
|
||||
apps := make([]RestorableApp, len(rp.Apps))
|
||||
copy(apps, rp.Apps)
|
||||
drives := make([]DriveInfo, len(rp.Drives))
|
||||
copy(drives, rp.Drives)
|
||||
|
||||
return map[string]interface{}{
|
||||
"ok": true,
|
||||
"status": rp.Status,
|
||||
"apps": rp.Apps,
|
||||
"drives": rp.Drives,
|
||||
"apps": apps,
|
||||
"drives": drives,
|
||||
}
|
||||
}
|
||||
|
||||
// TryStartRestore atomically sets status to "restoring" if not already restoring.
|
||||
// Returns false if a restore is already in progress (prevents double-restore race).
|
||||
func (rp *RestorePlan) TryStartRestore() bool {
|
||||
rp.mu.Lock()
|
||||
defer rp.mu.Unlock()
|
||||
if rp.Status == "restoring" {
|
||||
return false
|
||||
}
|
||||
rp.Status = "restoring"
|
||||
return true
|
||||
}
|
||||
|
||||
// SetStatus sets the overall plan status under lock.
|
||||
func (rp *RestorePlan) SetStatus(status string) {
|
||||
rp.mu.Lock()
|
||||
defer rp.mu.Unlock()
|
||||
rp.Status = status
|
||||
}
|
||||
|
||||
// GetStatus returns the current plan status under lock.
|
||||
func (rp *RestorePlan) GetStatus() string {
|
||||
rp.mu.RLock()
|
||||
defer rp.mu.RUnlock()
|
||||
return rp.Status
|
||||
}
|
||||
|
||||
// UpdateApp updates a single app's status in the plan.
|
||||
func (rp *RestorePlan) UpdateApp(name, status, errMsg string) {
|
||||
rp.mu.Lock()
|
||||
@@ -231,9 +263,13 @@ func dirExists(path string) bool {
|
||||
}
|
||||
|
||||
// dirIsEmpty returns true if a directory has no entries.
|
||||
// Returns false on read errors (assume non-empty — safer for backup detection).
|
||||
func dirIsEmpty(path string) bool {
|
||||
entries, err := os.ReadDir(path)
|
||||
return err != nil || len(entries) == 0
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return len(entries) == 0
|
||||
}
|
||||
|
||||
// hasUserData checks if the rsync backup dir has user data (not just _config/_db).
|
||||
|
||||
@@ -2,6 +2,8 @@ package report
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
@@ -43,6 +45,7 @@ func BuildInfraBackup(
|
||||
systemDataPath string,
|
||||
sett *settings.Settings,
|
||||
stackProvider backup.StackDataProvider,
|
||||
logger *log.Logger,
|
||||
) (*InfraBackup, error) {
|
||||
ib := &InfraBackup{
|
||||
CustomerID: customerID,
|
||||
@@ -51,22 +54,29 @@ func BuildInfraBackup(
|
||||
Timestamp: time.Now().UTC().Format(time.RFC3339),
|
||||
}
|
||||
|
||||
// Read and encode controller.yaml
|
||||
if data, err := os.ReadFile(controllerYAMLPath); err == nil {
|
||||
ib.ControllerConfigB64 = base64.StdEncoding.EncodeToString(data)
|
||||
// Read and encode controller.yaml (critical — fail if unreadable)
|
||||
data, err := os.ReadFile(controllerYAMLPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading controller config %s: %w", controllerYAMLPath, err)
|
||||
}
|
||||
ib.ControllerConfigB64 = base64.StdEncoding.EncodeToString(data)
|
||||
|
||||
// Read and encode settings.json
|
||||
// Read and encode settings.json (important but non-fatal)
|
||||
if data, err := os.ReadFile(settingsPath); err == nil {
|
||||
ib.SettingsJSONB64 = base64.StdEncoding.EncodeToString(data)
|
||||
} else if !os.IsNotExist(err) {
|
||||
logger.Printf("[WARN] Infra backup: could not read settings.json: %v", err)
|
||||
}
|
||||
|
||||
// Read primary restic password
|
||||
// Read primary restic password (important but non-fatal)
|
||||
if data, err := os.ReadFile(resticPasswordFile); err == nil {
|
||||
ib.ResticPassword = base64.StdEncoding.EncodeToString(data)
|
||||
} else if !os.IsNotExist(err) {
|
||||
logger.Printf("[WARN] Infra backup: could not read restic password file: %v", err)
|
||||
}
|
||||
|
||||
// Read cross-drive restic password
|
||||
// Cross-drive password is stored as plain text (not base64) because it's
|
||||
// already a string in settings, unlike ResticPassword which comes from a file.
|
||||
if pw := sett.GetCrossDriveResticPassword(); pw != "" {
|
||||
ib.CrossDrivePassword = pw
|
||||
}
|
||||
|
||||
@@ -11,21 +11,32 @@ import (
|
||||
|
||||
// restorePageHandler renders the full-page DR restore UI.
|
||||
func (s *Server) restorePageHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if s.restorePlan == nil {
|
||||
s.restoreMu.RLock()
|
||||
plan := s.restorePlan
|
||||
if plan == nil {
|
||||
s.restoreMu.RUnlock()
|
||||
http.Redirect(w, r, "/", http.StatusFound)
|
||||
return
|
||||
}
|
||||
// Snapshot all needed fields under lock before rendering
|
||||
customerID := plan.CustomerID
|
||||
timestamp := plan.Timestamp
|
||||
apps := plan.GetApps()
|
||||
drives := make([]backup.DriveInfo, len(plan.Drives))
|
||||
copy(drives, plan.Drives)
|
||||
status := plan.GetStatus()
|
||||
s.restoreMu.RUnlock()
|
||||
|
||||
data := map[string]interface{}{
|
||||
"Title": "Katasztrófa utáni visszaállítás",
|
||||
"CustomerName": s.cfg.Customer.Name,
|
||||
"Domain": s.cfg.Customer.Domain,
|
||||
"Version": s.version,
|
||||
"CustomerID": s.restorePlan.CustomerID,
|
||||
"Timestamp": s.restorePlan.Timestamp,
|
||||
"Apps": s.restorePlan.GetApps(),
|
||||
"Drives": s.restorePlan.Drives,
|
||||
"PlanStatus": s.restorePlan.Status,
|
||||
"CustomerID": customerID,
|
||||
"Timestamp": timestamp,
|
||||
"Apps": apps,
|
||||
"Drives": drives,
|
||||
"PlanStatus": status,
|
||||
}
|
||||
|
||||
s.render(w, "restore", data)
|
||||
@@ -33,27 +44,33 @@ func (s *Server) restorePageHandler(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
// apiRestoreStatus returns the current restore plan status as JSON.
|
||||
func (s *Server) apiRestoreStatus(w http.ResponseWriter, r *http.Request) {
|
||||
if s.restorePlan == nil {
|
||||
s.restoreMu.RLock()
|
||||
plan := s.restorePlan
|
||||
if plan == nil {
|
||||
s.restoreMu.RUnlock()
|
||||
jsonError(w, "not in restore mode", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
snapshot := plan.Snapshot()
|
||||
s.restoreMu.RUnlock()
|
||||
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
json.NewEncoder(w).Encode(s.restorePlan.Snapshot())
|
||||
json.NewEncoder(w).Encode(snapshot)
|
||||
}
|
||||
|
||||
// apiRestoreAll starts restoring all pending apps sequentially.
|
||||
func (s *Server) apiRestoreAll(w http.ResponseWriter, r *http.Request) {
|
||||
if s.restorePlan == nil {
|
||||
s.restoreMu.RLock()
|
||||
plan := s.restorePlan
|
||||
s.restoreMu.RUnlock()
|
||||
if plan == nil {
|
||||
jsonError(w, "not in restore mode", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if s.restorePlan.Status == "restoring" {
|
||||
if !plan.TryStartRestore() {
|
||||
jsonError(w, "restore already in progress", http.StatusConflict)
|
||||
return
|
||||
}
|
||||
|
||||
s.restorePlan.Status = "restoring"
|
||||
go s.executeAllRestores()
|
||||
|
||||
jsonResponse(w, map[string]interface{}{
|
||||
@@ -64,7 +81,10 @@ func (s *Server) apiRestoreAll(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
// apiRestoreSkip exits restore mode without restoring.
|
||||
func (s *Server) apiRestoreSkip(w http.ResponseWriter, r *http.Request) {
|
||||
if s.restorePlan == nil {
|
||||
s.restoreMu.RLock()
|
||||
plan := s.restorePlan
|
||||
s.restoreMu.RUnlock()
|
||||
if plan == nil {
|
||||
jsonError(w, "not in restore mode", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
@@ -82,13 +102,21 @@ func (s *Server) apiRestoreSkip(w http.ResponseWriter, r *http.Request) {
|
||||
func (s *Server) executeAllRestores() {
|
||||
s.logger.Println("[INFO] Starting DR restore for all apps")
|
||||
|
||||
for i := range s.restorePlan.Apps {
|
||||
app := &s.restorePlan.Apps[i]
|
||||
s.restoreMu.RLock()
|
||||
plan := s.restorePlan
|
||||
s.restoreMu.RUnlock()
|
||||
if plan == nil {
|
||||
s.logger.Println("[WARN] Restore plan cleared before execution could start")
|
||||
return
|
||||
}
|
||||
|
||||
for i := range plan.Apps {
|
||||
app := &plan.Apps[i]
|
||||
if app.Status != "pending" {
|
||||
continue
|
||||
}
|
||||
|
||||
s.restorePlan.UpdateApp(app.Name, "restoring", "")
|
||||
plan.UpdateApp(app.Name, "restoring", "")
|
||||
s.logger.Printf("[INFO] Restoring app %s (%s)", app.Name, app.DisplayName)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
||||
@@ -96,15 +124,15 @@ func (s *Server) executeAllRestores() {
|
||||
cancel()
|
||||
|
||||
if err != nil {
|
||||
s.restorePlan.UpdateApp(app.Name, "failed", err.Error())
|
||||
plan.UpdateApp(app.Name, "failed", err.Error())
|
||||
s.logger.Printf("[ERROR] Restore failed for %s: %v", app.Name, err)
|
||||
} else {
|
||||
s.restorePlan.UpdateApp(app.Name, "done", "")
|
||||
plan.UpdateApp(app.Name, "done", "")
|
||||
s.logger.Printf("[INFO] Restore completed for %s", app.Name)
|
||||
}
|
||||
}
|
||||
|
||||
s.restorePlan.Status = "done"
|
||||
plan.SetStatus("done")
|
||||
s.logger.Println("[INFO] All app restores completed")
|
||||
|
||||
// Re-scan stacks so dashboard picks up restored apps
|
||||
@@ -113,15 +141,6 @@ func (s *Server) executeAllRestores() {
|
||||
s.logger.Printf("[WARN] Post-restore stack scan failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-clear restore mode after a brief delay so user can see final status
|
||||
go func() {
|
||||
time.Sleep(5 * time.Second)
|
||||
// Only auto-clear if user hasn't already navigated away
|
||||
if s.restorePlan != nil && s.restorePlan.AllDone() {
|
||||
// Keep plan visible — user clicks "continue to dashboard" to clear
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// clearRestoreMode exits restore mode and returns to normal operation.
|
||||
|
||||
@@ -255,10 +255,15 @@
|
||||
pollStatus();
|
||||
}
|
||||
|
||||
var pollErrors = 0;
|
||||
function pollStatus() {
|
||||
fetch('/api/restore/status')
|
||||
.then(function(resp) { return resp.json(); })
|
||||
.then(function(resp) {
|
||||
if (!resp.ok) throw new Error('HTTP ' + resp.status);
|
||||
return resp.json();
|
||||
})
|
||||
.then(function(data) {
|
||||
pollErrors = 0;
|
||||
if (!data.ok) return;
|
||||
updateTable(data.apps || []);
|
||||
updateProgress(data.apps || []);
|
||||
@@ -270,23 +275,41 @@
|
||||
updateActions();
|
||||
}
|
||||
})
|
||||
.catch(function() {});
|
||||
.catch(function(err) {
|
||||
pollErrors++;
|
||||
console.error('Poll error:', err);
|
||||
if (pollErrors >= 10) {
|
||||
clearInterval(polling);
|
||||
polling = null;
|
||||
var actions = document.getElementById('dr-actions');
|
||||
if (actions) {
|
||||
actions.innerHTML = '<p style="color:var(--danger)">Kapcsolat megszakadt. <a href="/restore">Oldal frissítése</a></p>';
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function updateTable(apps) {
|
||||
apps.forEach(function(app) {
|
||||
var cells = document.querySelectorAll('.app-status[data-app="' + app.name + '"]');
|
||||
cells.forEach(function(cell) {
|
||||
var html = '<span class="status-' + app.status + '">';
|
||||
var span = document.createElement('span');
|
||||
span.className = 'status-' + app.status;
|
||||
if (app.status === 'restoring') {
|
||||
html += '<span class="spinner"></span> ';
|
||||
var spinner = document.createElement('span');
|
||||
spinner.className = 'spinner';
|
||||
span.appendChild(spinner);
|
||||
span.appendChild(document.createTextNode(' '));
|
||||
}
|
||||
html += statusText(app.status);
|
||||
span.appendChild(document.createTextNode(statusText(app.status)));
|
||||
if (app.error) {
|
||||
html += ' <span style="font-size:.8rem;color:var(--danger)">(' + app.error.substring(0, 60) + ')</span>';
|
||||
var errSpan = document.createElement('span');
|
||||
errSpan.style.cssText = 'font-size:.8rem;color:var(--danger)';
|
||||
errSpan.textContent = ' (' + app.error.substring(0, 60) + ')';
|
||||
span.appendChild(errSpan);
|
||||
}
|
||||
html += '</span>';
|
||||
cell.innerHTML = html;
|
||||
cell.innerHTML = '';
|
||||
cell.appendChild(span);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user