diff --git a/mon-system/prometheus-rules.yaml b/mon-system/prometheus-rules.yaml
index 83574d2..9270ff2 100644
--- a/mon-system/prometheus-rules.yaml
+++ b/mon-system/prometheus-rules.yaml
@@ -13,6 +13,147 @@ metadata:
   labels:
     app: prometheus
 data:
+
+  authentik-alerts.yml: |
+    groups:
+      - name: authentik-availability
+        rules:
+          # Fires if Prometheus cannot scrape the server metrics endpoint.
+          # Equivalent to "authentik-server pod is unreachable or crashed."
+          - alert: AuthentikServerDown
+            expr: up{job="authentik-server"} == 0
+            for: 3m
+            labels:
+              severity: critical
+              component: authentik
+            annotations:
+              summary: "Authentik server is unreachable"
+              description: "authentik-server scrape has been failing for 3+ minutes. Logins will fail."
+ 
+          - alert: AuthentikWorkerDown
+            expr: up{job="authentik-worker"} == 0
+            for: 3m
+            labels:
+              severity: critical
+              component: authentik
+            annotations:
+              summary: "Authentik worker is unreachable"
+              description: "authentik-worker scrape has been failing for 3+ minutes. Background tasks are not running."
+ 
+          # Per-outpost disconnect. Downgraded to warning because a single
+          # outpost failure only breaks a subset of apps (e.g. just the Arr
+          # stack if arr-outpost is down), not the entire IDP.
+          - alert: AuthentikOutpostDown
+            expr: up{job="authentik-outposts"} == 0
+            for: 5m
+            labels:
+              severity: warning
+              component: authentik
+            annotations:
+              summary: "Authentik outpost {{ $labels.outpost }} is unreachable"
+              description: "Outpost {{ $labels.outpost }} has been down for 5+ minutes. Apps behind this outpost cannot authenticate."
+ 
+      - name: authentik-functional
+        rules:
+          # *** The 13-days-ago alert. ***
+          # Fires when the worker has tasks in progress but throughput
+          # has collapsed. Probes cannot catch this because the pod is
+          # technically alive — only the queue dynamics give it away.
+          - alert: AuthentikTaskQueueStuck
+            expr: |
+              sum(authentik_tasks_in_progress{job="authentik-worker"}) > 5
+              and
+              sum(rate(authentik_tasks_total{job="authentik-worker"}[5m])) < 0.01
+            for: 10m
+            labels:
+              severity: critical
+              component: authentik
+            annotations:
+              summary: "Authentik worker queue is stuck"
+              description: "{{ $value }} tasks in progress with near-zero throughput for 10+ min. Worker is alive but not draining the queue — typically resolved by restarting the deployment."
+ 
+          # Softer version: queue is growing but still moving somewhat.
+          # Could indicate an expensive task blocking the workers, or
+          # a task that keeps retrying, or steady overload.
+          - alert: AuthentikTaskBacklog
+            expr: sum(authentik_tasks_in_progress{job="authentik-worker"}) > 20
+            for: 15m
+            labels:
+              severity: warning
+              component: authentik
+            annotations:
+              summary: "Authentik task backlog >20 for 15 min"
+              description: "{{ $value }} tasks in progress for 15+ min. Likely overload or a single hanging task."
+ 
+          # User-visible error signal. Threshold is conservative —
+          # on a low-traffic homelab, 0.1 err/sec = ~6 errors/min which
+          # is already noticeable to users.
+          - alert: AuthentikHighErrorRate
+            expr: |
+              sum(rate(django_http_responses_total_by_status_total{job="authentik-server",status=~"5.."}[5m])) > 0.1
+            for: 5m
+            labels:
+              severity: warning
+              component: authentik
+            annotations:
+              summary: "Authentik is serving 5xx errors"
+              description: "{{ $value | printf \"%.2f\" }} 5xx responses/sec for 5+ min."
+ 
+          # p95 latency guard. 2s is high for modern auth flows — at this
+          # point users are visibly waiting on the login page.
+          - alert: AuthentikHighLatency
+            expr: |
+              histogram_quantile(0.95,
+                sum by (le) (rate(authentik_main_request_duration_seconds_bucket{job="authentik-server",dest="core"}[5m]))
+              ) > 2
+            for: 10m
+            labels:
+              severity: warning
+              component: authentik
+            annotations:
+              summary: "Authentik p95 request latency > 2s"
+              description: "p95 latency {{ $value | printf \"%.2f\" }}s for 10+ min. Logins are slow."
+ 
+  postgresql-alerts.yml: |
+    groups:
+      - name: postgresql-availability
+        rules:
+          - alert: PostgresExporterDown
+            expr: up{job="cloudnativepg"} == 0
+            for: 2m
+            labels:
+              severity: critical
+              component: postgresql
+            annotations:
+              summary: "CloudNativePG metrics endpoint unreachable"
+              description: "CNPG metrics exporter on {{ $labels.pod }} has been down for 2+ min. Postgres may be down or the sidecar may have crashed."
+ 
+      - name: postgresql-capacity
+        rules:
+          # Threshold of 80% of your 200 max_connections = 160.
+          # If you raise max_connections later, update the number.
+          - alert: PostgresHighConnections
+            expr: sum by (cluster) (cnpg_backends_total) > 160
+            for: 5m
+            labels:
+              severity: warning
+              component: postgresql
+            annotations:
+              summary: "Postgres cluster {{ $labels.cluster }} nearing connection limit"
+              description: "{{ $value }} active connections (>80% of max_connections=200). Check for connection leaks."
+ 
+          # Locks held long enough that other queries are waiting.
+          # Usually cleared fast; a sustained non-zero value is abnormal.
+          - alert: PostgresBackendsWaiting
+            expr: cnpg_backends_waiting_total > 0
+            for: 5m
+            labels:
+              severity: warning
+              component: postgresql
+            annotations:
+              summary: "Postgres has queries blocked on locks"
+              description: "{{ $value }} backends waiting on locks for 5+ min. Investigate long-running transactions."
+
   longhorn-alerts.yml: |
     groups:
       - name: longhorn-volume-alerts