Files
deploy-felhom-compose/controller/internal/cloudflare/geosync.go
T
admin 8b8c04a487 fix: P0+P1 critical bug fixes across controller (24 files)
Concurrency fixes:
- Deep-copy stacks in GetStack/GetStacks to prevent shared state mutation (C04)
- Add per-state mutex to watchdog pathProbeState (C05)
- Guard MetricsCollector.Start() with sync.Once against double-start (C06)
- Hold diskJobMu across entire raw mount operation (C07)
- Add mutex to SetEncryptionKey (C08), MigrateEncryption write lock (H03)
- Use sync.Once for sync.Stop() channel close (H08)
- Set syncing=true before releasing lock in TriggerSync (H09)
- Deep-copy lastDBDump/lastBackup in GetFullStatus (H11)
- Add WaitGroup for stderr goroutine in MigrateDrive (H19)
- Add mutex to SetBackupRunningCheck (M18)

Security fixes:
- Validate Bearer token against Hub API key in CSRF middleware (H16)
- Validate backup paths start with expected prefix in RemoveStack (M12)
- Guard uuid[:8] slice with length check (H20)
- Parse fstab fields exactly for mount target matching (H21)

Bug fixes:
- Use decrypted env vars for compose deploy (C01)
- Log decrypt failures in DecryptMap instead of swallowing (C02)
- Move Deployed=false inside lock in runComposeDeploy (C03)
- Fix activeDrives() to skip disconnected drives (H02)
- Fix Snapshot() stderr extraction from exec.ExitError (H01)
- Check unlockCmd.Run() error in restic (H01)
- Buffer template rendering via bytes.Buffer (H07)
- Thread context.Context through cloudflare client (H10)
- Fix leaf-name collision detection in cross-drive backup (H15)
- Add nil check for crossDriveRunner (H17)
- Use strings.TrimSpace instead of slice on command output (H18)
- Make SaveAppConfig atomic with write-to-tmp+rename (H04)
- Pass encKey on deploy failure SaveAppConfig (H05)
- Fix IPv6 address format in TCP health probe

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 13:39:45 +01:00

255 lines
6.9 KiB
Go

package cloudflare
import (
"context"
"fmt"
"log"
"sort"
"sync"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
)
// StackLister provides deployed app hostnames (interface to break circular import).
type StackLister interface {
// GetDeployedHostnames returns appName → full hostname (e.g., "nextcloud.demo-felhom.eu").
GetDeployedHostnames() map[string]string
}
// GeoSyncManager synchronizes geo-restriction settings to Cloudflare WAF rules.
type GeoSyncManager struct {
client *Client
settings *settings.Settings
domain string
stacks StackLister
logger *log.Logger
mu sync.Mutex
running bool
}
// NewGeoSyncManager creates a new geo sync manager.
func NewGeoSyncManager(client *Client, sett *settings.Settings, domain string, stacks StackLister, logger *log.Logger) *GeoSyncManager {
return &GeoSyncManager{
client: client,
settings: sett,
domain: domain,
stacks: stacks,
logger: logger,
}
}
// IsRunning returns true if a sync operation is in progress.
func (g *GeoSyncManager) IsRunning() bool {
g.mu.Lock()
defer g.mu.Unlock()
return g.running
}
// Sync reads current geo settings and pushes/updates/deletes CF WAF rules.
func (g *GeoSyncManager) Sync(ctx context.Context) error {
g.mu.Lock()
if g.running {
g.mu.Unlock()
return fmt.Errorf("sync already in progress")
}
g.running = true
g.mu.Unlock()
defer func() {
g.mu.Lock()
g.running = false
g.mu.Unlock()
}()
geo := g.settings.GetGeoRestriction()
// If geo is nil or disabled, delete all felhom rules and return.
if geo == nil || !geo.Enabled {
return g.deleteAllRules(ctx, geo)
}
g.logger.Printf("[GEO] Starting sync for domain %s (%d allowed countries, %d app overrides)",
g.domain, len(geo.AllowedCountries), len(geo.AppOverrides))
// 1. Resolve zone ID (use cached value if available)
zoneID := geo.ZoneID
if zoneID == "" {
var err error
zoneID, err = g.client.GetZoneID(ctx, g.domain)
if err != nil {
g.saveError(zoneID, "", err.Error())
return fmt.Errorf("resolve zone: %w", err)
}
}
// 2. Get or create the custom WAF ruleset
rulesetID := geo.RulesetID
if rulesetID == "" {
var err error
rulesetID, err = g.client.GetCustomRulesetID(ctx, zoneID)
if err != nil {
g.saveError(zoneID, "", err.Error())
return fmt.Errorf("get ruleset: %w", err)
}
if rulesetID == "" {
rulesetID, err = g.client.CreateCustomRuleset(ctx, zoneID)
if err != nil {
g.saveError(zoneID, "", err.Error())
return fmt.Errorf("create ruleset: %w", err)
}
}
}
// 3. List existing felhom-managed rules
existing, err := g.client.GetFelhomRules(ctx, zoneID, rulesetID)
if err != nil {
g.saveError(zoneID, rulesetID, err.Error())
return fmt.Errorf("list existing rules: %w", err)
}
// 4. Build desired rules
desired := g.buildDesiredRules(geo)
// 5. Diff and apply
if err := g.applyDiff(ctx, zoneID, rulesetID, existing, desired); err != nil {
g.saveError(zoneID, rulesetID, err.Error())
return fmt.Errorf("apply diff: %w", err)
}
// 6. Save success state
g.saveError(zoneID, rulesetID, "")
g.logger.Printf("[GEO] Sync completed successfully")
return nil
}
// deleteAllRules removes all felhom-geo rules when the feature is disabled.
func (g *GeoSyncManager) deleteAllRules(ctx context.Context, geo *settings.GeoRestriction) error {
// Need zone and ruleset IDs to delete rules
zoneID := ""
rulesetID := ""
if geo != nil {
zoneID = geo.ZoneID
rulesetID = geo.RulesetID
}
if zoneID == "" || rulesetID == "" {
// No cached IDs — nothing to clean up
return nil
}
existing, err := g.client.GetFelhomRules(ctx, zoneID, rulesetID)
if err != nil {
g.logger.Printf("[GEO] Warning: could not list rules for cleanup: %v", err)
return nil
}
for _, r := range existing {
if err := g.client.DeleteRule(ctx, zoneID, rulesetID, r.ID); err != nil {
g.logger.Printf("[GEO] Warning: could not delete rule %s: %v", r.ID, err)
}
}
if len(existing) > 0 {
g.logger.Printf("[GEO] Deleted %d felhom-geo rules (feature disabled)", len(existing))
}
g.saveError(zoneID, rulesetID, "")
return nil
}
// desiredRule describes a rule that should exist.
type desiredRule struct {
description string
expression string
}
// buildDesiredRules builds the set of rules that should exist in Cloudflare.
func (g *GeoSyncManager) buildDesiredRules(geo *settings.GeoRestriction) []desiredRule {
var rules []desiredRule
hostnames := g.stacks.GetDeployedHostnames()
// Collect app hostnames that have overrides (to exclude from global rule)
var excludeHostnames []string
overrideApps := make(map[string]bool)
for appName, override := range geo.AppOverrides {
hostname, ok := hostnames[appName]
if !ok {
continue // app not deployed, skip
}
overrideApps[appName] = true
excludeHostnames = append(excludeHostnames, hostname)
// Per-app rule
rules = append(rules, desiredRule{
description: AppRuleDescription(appName),
expression: BuildAppExpression(hostname, override.AllowedCountries),
})
}
// Sort exclude hostnames for deterministic expression
sort.Strings(excludeHostnames)
// Global rule (excludes apps with their own rules)
rules = append(rules, desiredRule{
description: globalRuleDesc,
expression: BuildGlobalExpression(geo.AllowedCountries, excludeHostnames),
})
return rules
}
// applyDiff applies the difference between existing and desired rules.
func (g *GeoSyncManager) applyDiff(ctx context.Context, zoneID, rulesetID string, existing []GeoRule, desired []desiredRule) error {
// Index existing by description
existingByDesc := make(map[string]GeoRule)
for _, r := range existing {
existingByDesc[r.Description] = r
}
// Index desired by description
desiredByDesc := make(map[string]desiredRule)
for _, r := range desired {
desiredByDesc[r.description] = r
}
// Create or update
for _, d := range desired {
if ex, ok := existingByDesc[d.description]; ok {
// Rule exists — check if expression changed
if ex.Expression != d.expression {
r := newBlockRule(d.description, d.expression)
if err := g.client.UpdateRule(ctx, zoneID, rulesetID, ex.ID, r); err != nil {
return fmt.Errorf("update rule %q: %w", d.description, err)
}
}
} else {
// New rule — create
r := newBlockRule(d.description, d.expression)
if _, err := g.client.CreateRule(ctx, zoneID, rulesetID, r); err != nil {
return fmt.Errorf("create rule %q: %w", d.description, err)
}
}
}
// Delete rules that are no longer desired
for _, ex := range existing {
if _, ok := desiredByDesc[ex.Description]; !ok {
if err := g.client.DeleteRule(ctx, zoneID, rulesetID, ex.ID); err != nil {
return fmt.Errorf("delete rule %q: %w", ex.Description, err)
}
}
}
return nil
}
// saveError updates the sync state in settings.
func (g *GeoSyncManager) saveError(zoneID, rulesetID, errMsg string) {
if err := g.settings.SetGeoSyncState(zoneID, rulesetID, errMsg); err != nil {
g.logger.Printf("[GEO] Warning: failed to save sync state: %v", err)
}
}