fix: deep bug hunt II — concurrency, security & optimization (25 files)

Critical: watchdog mutex panic safety, SetGeoAppOverride nil guard,
SSD-only app DB restore fallback.

High: double deploy race (atomic Deploying flag), delete/remove during
deploy guard, ScanStacks overwrite protection, FileBrowser mount mutex,
PushEvent history, PushOnce error handling, DB dump sync+close before
rename, restic retry fresh context, encrypt failure logging, cross-backup
path traversal validation, deepCopyStack completeness.

Security: constant-time API key comparison, login rate limiting (5/min),
git credential masking in logs, storage path prefix traversal fix.

Concurrency: MigrateEncryption lock ordering, SubdomainInUse I/O outside
lock, scheduler late-registered jobs, SQLite WAL verification, metrics
shutdown context, telemetry scan error logging, asset sync lock scope.

Optimization: streaming file copy for DB dumps, restic stats dedup,
atomic infra config copy.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-25 14:21:09 +01:00
parent 72ab145b41
commit db83db383c
25 changed files with 930 additions and 626 deletions
+25 -2
View File
@@ -68,6 +68,7 @@ type DriveRepoInfo struct {
TotalSize string
TotalSizeBytes int64
SnapshotCount int
LatestSnapshot *SnapshotInfo `json:"-"` // used for aggregation, not serialized
}
// CrossDriveSummaryItem holds display data for one app's cross-drive backup.
@@ -860,11 +861,33 @@ func (m *Manager) perDriveRepoStats() []DriveRepoInfo {
TotalSize: stats.TotalSize,
TotalSizeBytes: stats.TotalSizeBytes,
SnapshotCount: stats.SnapshotCount,
LatestSnapshot: stats.LatestSnapshot,
})
}
return infos
}
// aggregateFromDriveStats derives aggregate stats from already-computed per-drive stats,
// avoiding a second round of restic subprocess calls.
func aggregateFromDriveStats(drives []DriveRepoInfo, m *Manager) *RepoStats {
agg := &RepoStats{}
var totalBytes int64
for _, d := range drives {
agg.SnapshotCount += d.SnapshotCount
totalBytes += d.TotalSizeBytes
if d.LatestSnapshot != nil {
if agg.LatestSnapshot == nil || d.LatestSnapshot.Time.After(agg.LatestSnapshot.Time) {
agg.LatestSnapshot = d.LatestSnapshot
}
}
}
agg.TotalSizeBytes = totalBytes
if totalBytes > 0 {
agg.TotalSize = humanizeBytes(totalBytes)
}
return agg
}
// aggregateRepoStats combines stats from all primary restic repos.
func (m *Manager) aggregateRepoStats() *RepoStats {
drives := m.activeDrives()
@@ -1066,9 +1089,9 @@ func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
Retention: m.cfg.Backup.Retention,
}
// Expensive calls (outside lock)
status.RepoStats = m.aggregateRepoStats()
// Expensive calls (outside lock) — compute per-drive stats once, derive aggregate
status.PerDriveRepoStats = m.perDriveRepoStats()
status.RepoStats = aggregateFromDriveStats(status.PerDriveRepoStats, m)
// Scan dump files from per-drive per-stack paths
files := m.listAllDumpFiles()
+32 -12
View File
@@ -3,6 +3,7 @@ package backup
import (
"context"
"fmt"
"io"
"log"
"os"
"os/exec"
@@ -475,12 +476,8 @@ func (r *CrossDriveRunner) copyStackDBDumps(stackName, destDir string) error {
}
src := filepath.Join(dumpDir, e.Name())
dst := filepath.Join(destDir, e.Name())
data, err := os.ReadFile(src)
if err != nil {
return fmt.Errorf("reading %s: %w", e.Name(), err)
}
if err := os.WriteFile(dst, data, 0644); err != nil {
return fmt.Errorf("writing %s: %w", e.Name(), err)
if err := copyFile(src, dst); err != nil {
return fmt.Errorf("copying %s: %w", e.Name(), err)
}
copied++
}
@@ -523,14 +520,11 @@ func (r *CrossDriveRunner) syncInfraConfig(ctx context.Context) {
}
}
// Copy controller.yaml → _infra/controller.yaml
// Copy controller.yaml → _infra/controller.yaml (atomic via copyFile)
if _, err := os.Stat(r.controllerYAMLPath); err == nil {
yamlDest := filepath.Join(infraDir, "controller.yaml")
data, err := os.ReadFile(r.controllerYAMLPath)
if err != nil {
r.logger.Printf("[WARN] Cannot read controller.yaml for infra backup: %v", err)
} else if err := os.WriteFile(yamlDest, data, 0644); err != nil {
r.logger.Printf("[WARN] Cannot write controller.yaml to %s: %v", yamlDest, err)
if err := copyFile(r.controllerYAMLPath, yamlDest); err != nil {
r.logger.Printf("[WARN] Cannot copy controller.yaml to %s: %v", yamlDest, err)
}
}
@@ -628,6 +622,32 @@ func (r *CrossDriveRunner) updateStatus(stackName, status, errMsg string, durati
})
}
// copyFile copies src to dst using buffered streaming I/O (no full-file memory allocation).
func copyFile(src, dst string) error {
in, err := os.Open(src)
if err != nil {
return err
}
defer in.Close()
tmp := dst + ".tmp"
out, err := os.Create(tmp)
if err != nil {
return err
}
if _, err := io.Copy(out, in); err != nil {
out.Close()
os.Remove(tmp)
return err
}
if err := out.Close(); err != nil {
os.Remove(tmp)
return err
}
return os.Rename(tmp, dst)
}
// dirSizeBytes returns the total byte size of all files under path.
// H7: Walk errors are now propagated instead of silently swallowed.
func dirSizeBytes(path string) (int64, error) {
+14
View File
@@ -256,6 +256,20 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
return result
}
// Close and sync tmpFile before rename to ensure data is flushed to disk (H8 fix).
if err := tmpFile.Sync(); err != nil {
os.Remove(tmpPath)
result.Error = fmt.Errorf("syncing dump file: %w", err)
result.Duration = time.Since(start)
return result
}
if err := tmpFile.Close(); err != nil {
os.Remove(tmpPath)
result.Error = fmt.Errorf("closing dump file: %w", err)
result.Duration = time.Since(start)
return result
}
// Check file size
stat, err := os.Stat(tmpPath)
if err != nil || stat.Size() == 0 {
+4 -2
View File
@@ -145,8 +145,10 @@ func (r *ResticManager) Snapshot(repoPath string, paths []string, tags []string)
if unlockErr := unlockCmd.Run(); unlockErr != nil {
r.logger.Printf("[WARN] Restic unlock failed: %v", unlockErr)
}
// Retry once
cmd = r.command(ctx, repoPath, args...)
// Retry once with a fresh context (H9 fix — original may be nearly expired).
retryCtx, retryCancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer retryCancel()
cmd = r.command(retryCtx, repoPath, args...)
out, err = cmd.Output()
if err != nil {
return nil, fmt.Errorf("restic backup failed after unlock: %v", err)
@@ -154,11 +154,22 @@ func restoreUserData(ctx context.Context, app *RestorableApp, logger *log.Logger
// restoreDBDumps copies DB dump files from cross-drive backup to the primary dump dir.
func restoreDBDumps(app *RestorableApp, logger *log.Logger) error {
if app.DBDumpPath == "" || app.HDDPath == "" {
if app.DBDumpPath == "" {
return nil
}
destDir := AppDBDumpPath(app.HDDPath, app.Name)
// Use HDDPath for apps with HDD data, fall back to DrivePath (system data path)
// for SSD-only apps whose DB dumps live under the system drive.
drivePath := app.HDDPath
if drivePath == "" {
drivePath = app.DrivePath
}
if drivePath == "" {
logger.Printf("[WARN] Cannot restore DB dumps for %s: no drive path", app.Name)
return nil
}
destDir := AppDBDumpPath(drivePath, app.Name)
if err := os.MkdirAll(destDir, 0755); err != nil {
return fmt.Errorf("creating dump dir: %w", err)
}