A Kubernetes operator that bridges Hardware Security Module (HSM) data storage with Kubernetes Secrets, providing true secret portability th
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

deploy agents in parallel, remove hsmsecret usage in mirroring

+226 -338
+2 -2
CLAUDE.md
··· 56 56 - **HSMPool-based Agent Discovery**: API and controllers query HSMPool to find all agent instances for a device type 57 57 - **Multiple Agent Instances**: Each physical device gets its own agent pod (e.g., `hsm-agent-pico-hsm-0`, `hsm-agent-pico-hsm-1`) 58 58 - **Multi-Agent Operations**: API operations (list, write, delete) work across all agents when mirroring is enabled 59 - - **Automatic Synchronization**: HSMSyncReconciler handles conflict detection and resolution between devices 59 + - **Automatic Synchronization**: HSMMirrorReconciler handles conflict detection and resolution between devices 60 60 61 61 **gRPC Communication Architecture:** 62 62 - Protocol definition in `api/proto/hsm/v1/hsm.proto` with 10 HSM operations ··· 71 71 ├── HSMSecretReconciler - HSM to K8s Secret sync 72 72 ├── HSMPoolReconciler - Aggregates discovery reports from pod annotations 73 73 ├── HSMPoolAgentReconciler - Deploys agents when pools are ready 74 - ├── HSMSyncReconciler - Multi-device HSM synchronization and conflict resolution 74 + ├── HSMMirrorReconciler - Multi-device HSM mirroring and conflict resolution 75 75 └── DiscoveryDaemonSetReconciler - Manages discovery DaemonSet lifecycle 76 76 77 77 Discovery Controllers:
+131 -73
internal/agent/deployment.go
··· 133 133 return m 134 134 } 135 135 136 + // deviceWork represents work to be done for a specific device 137 + type deviceWork struct { 138 + device hsmv1alpha1.DiscoveredDevice 139 + agentName string 140 + agentKey string 141 + index int 142 + } 143 + 136 144 // EnsureAgent ensures HSM agents are deployed for all available devices in the pool 137 145 func (m *Manager) EnsureAgent(ctx context.Context, hsmDevice *hsmv1alpha1.HSMDevice, hsmSecret *hsmv1alpha1.HSMSecret) error { 138 146 // Get the HSMPool for this device to find all aggregated devices ··· 144 152 }, &hsmPool); err != nil { 145 153 return fmt.Errorf("failed to get HSMPool %s: %w", poolName, err) 146 154 } 147 - m.mu.Lock() 148 - defer m.mu.Unlock() 149 155 150 - // Ensure agents for each available aggregated device in the pool 156 + // Pre-collect available devices to process (no mutex needed) 157 + workItems := make([]deviceWork, 0, len(hsmPool.Status.AggregatedDevices)) 151 158 for i, aggregatedDevice := range hsmPool.Status.AggregatedDevices { 152 159 if !aggregatedDevice.Available { 153 160 continue 154 161 } 162 + workItems = append(workItems, deviceWork{ 163 + device: aggregatedDevice, 164 + agentName: fmt.Sprintf("%s-%s-%d", AgentNamePrefix, hsmDevice.Name, i), 165 + agentKey: fmt.Sprintf("%s-%d", hsmDevice.Name, i), 166 + index: i, 167 + }) 168 + } 155 169 156 - // Create unique agent name for each physical device 157 - agentName := fmt.Sprintf("%s-%s-%d", AgentNamePrefix, hsmDevice.Name, i) 158 - agentKey := fmt.Sprintf("%s-%d", hsmDevice.Name, i) // Unique key for tracking 170 + if len(workItems) == 0 { 171 + return nil // No available devices to process 172 + } 159 173 160 - // Check if we already have this specific agent tracked 161 - if agentInfo, exists := m.activeAgents[agentKey]; exists { 162 - // Agent exists in tracking, verify it's still healthy 163 - if m.isAgentHealthy(ctx, agentInfo) { 164 - continue // Agent is healthy, skip 165 - } 166 - // Agent unhealthy, remove from tracking and recreate 167 - m.removeAgentFromTracking(agentKey) 168 - } 174 + // Process devices in parallel 175 + var wg sync.WaitGroup 176 + errChan := make(chan error, len(workItems)) 169 177 170 - // Check if deployment exists in Kubernetes 171 - var deployment appsv1.Deployment 172 - err := m.Get(ctx, types.NamespacedName{ 173 - Name: agentName, 174 - Namespace: hsmDevice.Namespace, 175 - }, &deployment) 178 + for _, work := range workItems { 179 + wg.Add(1) 180 + go func(w deviceWork) { 181 + defer wg.Done() 176 182 177 - if err == nil { 178 - // Agent exists, but check if it needs updating (image version, device/node configuration) 179 - needsUpdate, err := m.agentNeedsUpdate(ctx, &deployment, hsmDevice) 180 - if err != nil { 181 - return fmt.Errorf("failed to check if agent deployment %s needs update: %w", agentName, err) 183 + // Mutex-protected check and update of activeAgents 184 + m.mu.Lock() 185 + needsDeployment := false 186 + if agentInfo, exists := m.activeAgents[w.agentKey]; exists { 187 + if !m.isAgentHealthy(ctx, agentInfo) { 188 + m.removeAgentFromTracking(w.agentKey) 189 + needsDeployment = true 190 + } 191 + } else { 192 + needsDeployment = true 182 193 } 194 + m.mu.Unlock() 183 195 184 - // Also check device-specific configuration 185 - if !needsUpdate { 186 - needsUpdate = m.deploymentNeedsUpdateForDevice(&deployment, &aggregatedDevice) 196 + // Skip if agent is healthy and tracked 197 + if !needsDeployment { 198 + return 187 199 } 188 200 189 - if needsUpdate { 190 - // Delete existing deployment to trigger recreation 191 - if err := m.Delete(ctx, &deployment); err != nil { 192 - return fmt.Errorf("failed to delete outdated agent deployment %s: %w", agentName, err) 193 - } 194 - } else { 195 - // Agent exists and is correct - wait for it and track it 196 - podIPs, err := m.waitForAgentReady(ctx, agentName, hsmDevice.Namespace) 197 - if err != nil { 198 - return fmt.Errorf("failed waiting for existing agent pods %s: %w", agentName, err) 199 - } 201 + // Deploy agent for this device (Kubernetes API calls - no mutex needed) 202 + if err := m.deployAgentForDevice(ctx, w, hsmDevice); err != nil { 203 + errChan <- fmt.Errorf("failed to deploy agent %s: %w", w.agentName, err) 204 + } 205 + }(work) 206 + } 200 207 201 - // Track the existing agent 202 - agentInfo := &AgentInfo{ 203 - DeviceName: agentKey, 204 - PodIPs: podIPs, 205 - CreatedAt: time.Now(), 206 - LastHealthCheck: time.Now(), 207 - Status: AgentStatusReady, 208 - AgentName: agentName, 209 - Namespace: hsmDevice.Namespace, 210 - } 208 + // Wait for all goroutines to complete 209 + wg.Wait() 210 + close(errChan) 211 211 212 - m.activeAgents[agentKey] = agentInfo 213 - continue 214 - } 215 - } else if !errors.IsNotFound(err) { 216 - return fmt.Errorf("failed to check agent deployment %s: %w", agentName, err) 217 - } 212 + // Collect any errors 213 + deploymentErrors := make([]error, 0, len(workItems)) 214 + for err := range errChan { 215 + deploymentErrors = append(deploymentErrors, err) 216 + } 218 217 219 - // Create agent deployment for this specific device 220 - if err := m.createAgentDeployment(ctx, hsmDevice, nil, hsmDevice.Namespace, &aggregatedDevice, agentName); err != nil { 221 - return fmt.Errorf("failed to create agent deployment %s: %w", agentName, err) 222 - } 218 + if len(deploymentErrors) > 0 { 219 + return fmt.Errorf("agent deployment errors: %v", deploymentErrors) 220 + } 221 + 222 + return nil 223 + } 224 + 225 + // deployAgentForDevice handles the deployment logic for a single device 226 + func (m *Manager) deployAgentForDevice(ctx context.Context, work deviceWork, hsmDevice *hsmv1alpha1.HSMDevice) error { 227 + // Check if deployment exists in Kubernetes 228 + var deployment appsv1.Deployment 229 + err := m.Get(ctx, types.NamespacedName{ 230 + Name: work.agentName, 231 + Namespace: hsmDevice.Namespace, 232 + }, &deployment) 223 233 224 - // Wait for agent pods to be ready and get their IPs 225 - podIPs, err := m.waitForAgentReady(ctx, agentName, hsmDevice.Namespace) 234 + if err == nil { 235 + // Agent exists, but check if it needs updating (image version, device/node configuration) 236 + needsUpdate, err := m.agentNeedsUpdate(ctx, &deployment, hsmDevice) 226 237 if err != nil { 227 - return fmt.Errorf("failed waiting for agent pods %s: %w", agentName, err) 238 + return fmt.Errorf("failed to check if agent deployment %s needs update: %w", work.agentName, err) 228 239 } 229 240 230 - // Track the new agent 231 - agentInfo := &AgentInfo{ 232 - DeviceName: agentKey, 233 - PodIPs: podIPs, 234 - CreatedAt: time.Now(), 235 - LastHealthCheck: time.Now(), 236 - Status: AgentStatusReady, 237 - AgentName: agentName, 238 - Namespace: hsmDevice.Namespace, 241 + // Also check device-specific configuration 242 + if !needsUpdate { 243 + needsUpdate = m.deploymentNeedsUpdateForDevice(&deployment, &work.device) 239 244 } 240 245 241 - m.activeAgents[agentKey] = agentInfo 246 + if needsUpdate { 247 + // Delete existing deployment to trigger recreation 248 + if err := m.Delete(ctx, &deployment); err != nil { 249 + return fmt.Errorf("failed to delete outdated agent deployment %s: %w", work.agentName, err) 250 + } 251 + } else { 252 + // Agent exists and is correct - wait for it and track it 253 + podIPs, err := m.waitForAgentReady(ctx, work.agentName, hsmDevice.Namespace) 254 + if err != nil { 255 + return fmt.Errorf("failed waiting for existing agent pods %s: %w", work.agentName, err) 256 + } 257 + 258 + // Track the existing agent (mutex-protected) 259 + m.mu.Lock() 260 + agentInfo := &AgentInfo{ 261 + DeviceName: work.agentKey, 262 + PodIPs: podIPs, 263 + CreatedAt: time.Now(), 264 + LastHealthCheck: time.Now(), 265 + Status: AgentStatusReady, 266 + AgentName: work.agentName, 267 + Namespace: hsmDevice.Namespace, 268 + } 269 + m.activeAgents[work.agentKey] = agentInfo 270 + m.mu.Unlock() 271 + return nil 272 + } 273 + } else if !errors.IsNotFound(err) { 274 + return fmt.Errorf("failed to check agent deployment %s: %w", work.agentName, err) 242 275 } 276 + 277 + // Create agent deployment for this specific device 278 + if err := m.createAgentDeployment(ctx, hsmDevice, nil, hsmDevice.Namespace, &work.device, work.agentName); err != nil { 279 + return fmt.Errorf("failed to create agent deployment %s: %w", work.agentName, err) 280 + } 281 + 282 + // Wait for agent pods to be ready and get their IPs 283 + podIPs, err := m.waitForAgentReady(ctx, work.agentName, hsmDevice.Namespace) 284 + if err != nil { 285 + return fmt.Errorf("failed waiting for agent pods %s: %w", work.agentName, err) 286 + } 287 + 288 + // Track the new agent (mutex-protected) 289 + m.mu.Lock() 290 + agentInfo := &AgentInfo{ 291 + DeviceName: work.agentKey, 292 + PodIPs: podIPs, 293 + CreatedAt: time.Now(), 294 + LastHealthCheck: time.Now(), 295 + Status: AgentStatusReady, 296 + AgentName: work.agentName, 297 + Namespace: hsmDevice.Namespace, 298 + } 299 + m.activeAgents[work.agentKey] = agentInfo 300 + m.mu.Unlock() 243 301 244 302 return nil 245 303 }
-126
internal/controller/hsmmirror_controller.go
··· 1 - /* 2 - Copyright 2025. 3 - 4 - Licensed under the Apache License, Version 2.0 (the "License"); 5 - you may not use this file except in compliance with the License. 6 - You may obtain a copy of the License at 7 - 8 - http://www.apache.org/licenses/LICENSE-2.0 9 - 10 - Unless required by applicable law or agreed to in writing, software 11 - distributed under the License is distributed on an "AS IS" BASIS, 12 - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 - See the License for the specific language governing permissions and 14 - limitations under the License. 15 - */ 16 - 17 - package controller 18 - 19 - import ( 20 - "context" 21 - "time" 22 - 23 - "k8s.io/apimachinery/pkg/runtime" 24 - ctrl "sigs.k8s.io/controller-runtime" 25 - "sigs.k8s.io/controller-runtime/pkg/client" 26 - "sigs.k8s.io/controller-runtime/pkg/log" 27 - 28 - hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 29 - "github.com/evanjarrett/hsm-secrets-operator/internal/agent" 30 - "github.com/evanjarrett/hsm-secrets-operator/internal/mirror" 31 - ) 32 - 33 - // HSMMirrorReconciler handles multi-device HSM mirroring and conflict resolution 34 - type HSMMirrorReconciler struct { 35 - client.Client 36 - Scheme *runtime.Scheme 37 - MirrorManager *mirror.MirrorManager 38 - 39 - // MirrorInterval controls how often to perform mirror checks (default: 30 seconds) 40 - MirrorInterval time.Duration 41 - } 42 - 43 - // NewHSMMirrorReconciler creates a new HSM mirror reconciler 44 - func NewHSMMirrorReconciler(k8sClient client.Client, scheme *runtime.Scheme, agentManager *agent.Manager, operatorNamespace string) *HSMMirrorReconciler { 45 - logger := ctrl.Log.WithName("hsm-mirror-controller") 46 - mirrorManager := mirror.NewMirrorManager(k8sClient, agentManager, logger, operatorNamespace) 47 - 48 - return &HSMMirrorReconciler{ 49 - Client: k8sClient, 50 - Scheme: scheme, 51 - MirrorManager: mirrorManager, 52 - MirrorInterval: 30 * time.Second, // Default mirror interval 53 - } 54 - } 55 - 56 - // +kubebuilder:rbac:groups=hsm.j5t.io,resources=hsmsecrets,verbs=get;list;watch;update;patch 57 - // +kubebuilder:rbac:groups=hsm.j5t.io,resources=hsmsecrets/status,verbs=get;update;patch 58 - // +kubebuilder:rbac:groups=hsm.j5t.io,resources=hsmpools,verbs=get;list;watch 59 - // +kubebuilder:rbac:groups=hsm.j5t.io,resources=hsmdevices,verbs=get;list;watch 60 - 61 - // Reconcile performs HSM device mirroring and conflict resolution 62 - func (r *HSMMirrorReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 63 - logger := log.FromContext(ctx) 64 - 65 - // Fetch the HSMSecret instance 66 - var hsmSecret hsmv1alpha1.HSMSecret 67 - if err := r.Get(ctx, req.NamespacedName, &hsmSecret); err != nil { 68 - return ctrl.Result{}, client.IgnoreNotFound(err) 69 - } 70 - 71 - // Skip sync if AutoSync is disabled 72 - if !hsmSecret.Spec.AutoSync { 73 - logger.V(1).Info("AutoSync disabled, skipping sync", "secret", hsmSecret.Name) 74 - return ctrl.Result{}, nil 75 - } 76 - 77 - logger.Info("Starting multi-device HSM mirror", "secret", hsmSecret.Name) 78 - 79 - // Perform the mirror operation 80 - result, err := r.MirrorManager.MirrorSecret(ctx, &hsmSecret) 81 - if err != nil { 82 - logger.Error(err, "Failed to perform HSM mirror", "secret", hsmSecret.Name) 83 - 84 - // Update status with error 85 - hsmSecret.Status.SyncStatus = hsmv1alpha1.SyncStatusError 86 - hsmSecret.Status.LastError = err.Error() 87 - if updateErr := r.Status().Update(ctx, &hsmSecret); updateErr != nil { 88 - logger.Error(updateErr, "Failed to update HSMSecret status") 89 - } 90 - 91 - // Retry sooner on error 92 - return ctrl.Result{RequeueAfter: r.MirrorInterval / 2}, nil 93 - } 94 - 95 - // Update HSMSecret status with mirror results 96 - if err := r.MirrorManager.UpdateHSMSecretStatus(ctx, &hsmSecret, result); err != nil { 97 - logger.Error(err, "Failed to update HSMSecret status", "secret", hsmSecret.Name) 98 - return ctrl.Result{RequeueAfter: r.MirrorInterval / 2}, err 99 - } 100 - 101 - // Log mirror results 102 - logger.Info("Per-secret HSM mirror completed", 103 - "secret", hsmSecret.Name, 104 - "success", result.Success, 105 - "secretsProcessed", result.SecretsProcessed, 106 - "secretsUpdated", result.SecretsUpdated, 107 - "secretsCreated", result.SecretsCreated, 108 - "metadataRestored", result.MetadataRestored, 109 - "errors", len(result.Errors)) 110 - 111 - // Calculate next mirror interval based on HSMSecret spec 112 - mirrorInterval := r.MirrorInterval 113 - if hsmSecret.Spec.SyncInterval > 0 { 114 - mirrorInterval = time.Duration(hsmSecret.Spec.SyncInterval) * time.Second 115 - } 116 - 117 - return ctrl.Result{RequeueAfter: mirrorInterval}, nil 118 - } 119 - 120 - // SetupWithManager sets up the controller with the Manager. 121 - func (r *HSMMirrorReconciler) SetupWithManager(mgr ctrl.Manager) error { 122 - return ctrl.NewControllerManagedBy(mgr). 123 - For(&hsmv1alpha1.HSMSecret{}). 124 - Named("hsmmirror"). 125 - Complete(r) 126 - }
+63 -131
internal/mirror/manager.go
··· 24 24 "time" 25 25 26 26 "github.com/go-logr/logr" 27 - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 27 "sigs.k8s.io/controller-runtime/pkg/client" 29 28 30 29 hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" ··· 110 109 MirrorTypeCreate // Create missing secret 111 110 MirrorTypeRestoreMetadata // Add metadata to existing secret 112 111 ) 113 - 114 - // MirrorSecret performs per-secret mirroring across all HSM devices 115 - func (mm *MirrorManager) MirrorSecret(ctx context.Context, hsmSecret *hsmv1alpha1.HSMSecret) (*MirrorResult, error) { 116 - logger := mm.logger.WithValues("secret", hsmSecret.Name, "namespace", hsmSecret.Namespace) 117 - secretPath := hsmSecret.Name 118 - 119 - // Get all available HSM devices from HSMPools in the operator namespace 120 - devices, err := mm.getAvailableDevices(ctx, mm.operatorNamespace) 121 - if err != nil { 122 - return nil, fmt.Errorf("failed to get available devices: %w", err) 123 - } 124 - 125 - if len(devices) == 0 { 126 - return &MirrorResult{ 127 - Success: false, 128 - SecretResults: make(map[string]SecretMirrorResult), 129 - Errors: []string{"no HSM devices available"}, 130 - }, fmt.Errorf("no HSM devices available") 131 - } 132 - 133 - logger.Info("Starting per-secret mirror", "devices", len(devices), "secretPath", secretPath) 134 - 135 - // Build inventory of all secrets across all devices 136 - inventory, err := mm.buildSecretInventory(ctx, []string{secretPath}, devices, mm.operatorNamespace, logger) 137 - if err != nil { 138 - return &MirrorResult{ 139 - Success: false, 140 - SecretResults: make(map[string]SecretMirrorResult), 141 - Errors: []string{fmt.Sprintf("failed to build secret inventory: %v", err)}, 142 - }, fmt.Errorf("failed to build secret inventory: %w", err) 143 - } 144 - 145 - // Create mirror plan for the single secret 146 - mirrorPlans := mm.createMirrorPlans(inventory, logger) 147 - 148 - // Execute mirror operations 149 - result := mm.executeMirrorPlans(ctx, mirrorPlans, mm.operatorNamespace, logger) 150 - 151 - logger.Info("Per-secret sync completed", 152 - "secretsProcessed", result.SecretsProcessed, 153 - "secretsUpdated", result.SecretsUpdated, 154 - "secretsCreated", result.SecretsCreated, 155 - "metadataRestored", result.MetadataRestored, 156 - "errors", len(result.Errors)) 157 - 158 - return result, nil 159 - } 160 112 161 113 // buildSecretInventory builds a comprehensive inventory of secrets across all devices 162 114 func (mm *MirrorManager) buildSecretInventory(ctx context.Context, secretPaths []string, devices []string, operatorNamespace string, logger logr.Logger) (map[string]*SecretInventory, error) { ··· 686 638 return devices, nil 687 639 } 688 640 689 - // UpdateHSMSecretStatus updates the HSMSecret status with mirror results 690 - func (mm *MirrorManager) UpdateHSMSecretStatus(ctx context.Context, hsmSecret *hsmv1alpha1.HSMSecret, result *MirrorResult) error { 691 - now := metav1.NewTime(time.Now()) 641 + // MirrorAllSecrets performs device-scoped mirroring of ALL secrets across HSM devices 642 + // This discovers all secrets on any device and ensures they exist on all other devices 643 + func (mm *MirrorManager) MirrorAllSecrets(ctx context.Context) (*MirrorResult, error) { 644 + logger := mm.logger.WithValues("operation", "device-scoped-mirror") 645 + logger.Info("Starting device-scoped mirroring of all HSM secrets") 646 + 647 + // Get all available HSM devices 648 + devices, err := mm.getAvailableDevices(ctx, mm.operatorNamespace) 649 + if err != nil { 650 + return nil, fmt.Errorf("failed to get available devices: %w", err) 651 + } 692 652 693 - // Update overall status 694 - if result.Success { 695 - hsmSecret.Status.SyncStatus = hsmv1alpha1.SyncStatusInSync 696 - hsmSecret.Status.LastSyncTime = &now 697 - if len(result.Errors) == 0 { 698 - hsmSecret.Status.LastError = "" 699 - } else { 700 - // Partial success - some errors occurred 701 - hsmSecret.Status.LastError = fmt.Sprintf("Partial sync completed with %d errors", len(result.Errors)) 702 - } 703 - } else { 704 - hsmSecret.Status.SyncStatus = hsmv1alpha1.SyncStatusError 705 - if len(result.Errors) > 0 { 706 - hsmSecret.Status.LastError = result.Errors[0] // Show first error 707 - } else { 708 - hsmSecret.Status.LastError = "Failed to sync secret" 709 - } 653 + if len(devices) <= 1 { 654 + logger.Info("Skipping mirroring - need at least 2 devices", "devices", len(devices)) 655 + return &MirrorResult{Success: true, SecretsProcessed: 0}, nil 710 656 } 711 657 712 - // Update secret-level sync information 713 - secretResult, hasSecretResult := result.SecretResults[hsmSecret.Name] 714 - if hasSecretResult { 715 - // Calculate checksum from the source device if sync was successful 716 - if secretResult.Success { 717 - // Read the current data to calculate checksum 718 - if devices, err := mm.getAvailableDevices(ctx, mm.operatorNamespace); err == nil && len(devices) > 0 { 719 - if data, _, err := mm.readSecretWithMetadata(ctx, devices[0], hsmSecret.Name, mm.operatorNamespace, mm.logger); err == nil { 720 - hsmSecret.Status.SecretChecksum = mm.calculateChecksum(data) 721 - } 722 - } 723 - } 658 + logger.Info("Starting mirroring across devices", "devices", devices, "deviceCount", len(devices)) 659 + 660 + // Discover all secrets across all devices 661 + allSecretPaths := mm.discoverAllSecrets(ctx, devices, mm.operatorNamespace, logger) 724 662 725 - // Set primary device information if available 726 - if secretResult.SourceDevice != "" { 727 - hsmSecret.Status.PrimaryDevice = secretResult.SourceDevice 728 - } 663 + if len(allSecretPaths) == 0 { 664 + logger.Info("No secrets found to mirror") 665 + return &MirrorResult{Success: true, SecretsProcessed: 0}, nil 729 666 } 730 667 731 - // Update device-specific sync status based on available devices 732 - devices, err := mm.getAvailableDevices(ctx, mm.operatorNamespace) 733 - if err == nil { 734 - hsmSecret.Status.DeviceSyncStatus = make([]hsmv1alpha1.HSMDeviceSync, 0, len(devices)) 668 + logger.Info("Discovered secrets for mirroring", "secretCount", len(allSecretPaths), "secrets", allSecretPaths) 735 669 736 - for _, deviceName := range devices { 737 - deviceSync := hsmv1alpha1.HSMDeviceSync{ 738 - DeviceName: deviceName, 739 - LastSyncTime: &now, 740 - Checksum: "", 741 - Online: true, // Assume online if in available devices list 742 - Version: 0, 743 - } 670 + // Build inventory for all discovered secrets 671 + inventory, err := mm.buildSecretInventory(ctx, allSecretPaths, devices, mm.operatorNamespace, logger) 672 + if err != nil { 673 + return nil, fmt.Errorf("failed to build secret inventory: %w", err) 674 + } 744 675 745 - // Check if this device was involved in sync operations 746 - if hasSecretResult { 747 - if secretResult.SourceDevice == deviceName { 748 - deviceSync.Status = hsmv1alpha1.SyncStatusInSync 749 - deviceSync.Version = secretResult.SourceVersion 750 - } else { 751 - // Check if this device was a target 752 - isTarget := false 753 - for _, target := range secretResult.TargetDevices { 754 - if target == deviceName { 755 - isTarget = true 756 - break 757 - } 758 - } 676 + // Create mirror plans 677 + plans := mm.createMirrorPlans(inventory, logger) 678 + logger.Info("Created mirror plans", "planCount", len(plans)) 679 + 680 + // Execute mirror plans 681 + return mm.executeMirrorPlans(ctx, plans, mm.operatorNamespace, logger), nil 682 + } 683 + 684 + // discoverAllSecrets discovers all secrets present on any HSM device 685 + func (mm *MirrorManager) discoverAllSecrets(ctx context.Context, devices []string, operatorNamespace string, logger logr.Logger) []string { 686 + secretPaths := make(map[string]bool) 687 + 688 + for _, deviceName := range devices { 689 + deviceLogger := logger.WithValues("device", deviceName) 690 + 691 + hsmClient, err := mm.agentManager.CreateSingleGRPCClient(ctx, deviceName, operatorNamespace, deviceLogger) 692 + if err != nil { 693 + deviceLogger.Info("Failed to connect to device for discovery, skipping", "error", err) 694 + continue 695 + } 759 696 760 - if isTarget { 761 - if secretResult.Success { 762 - deviceSync.Status = hsmv1alpha1.SyncStatusInSync 763 - deviceSync.Version = secretResult.SourceVersion 764 - } else { 765 - deviceSync.Status = hsmv1alpha1.SyncStatusError 766 - if secretResult.Error != nil { 767 - deviceSync.LastError = secretResult.Error.Error() 768 - } 769 - } 770 - } else { 771 - // Device wasn't involved in sync - assume in sync 772 - deviceSync.Status = hsmv1alpha1.SyncStatusInSync 773 - } 774 - } 775 - } else { 776 - // No secret result - assume in sync 777 - deviceSync.Status = hsmv1alpha1.SyncStatusInSync 778 - } 697 + secrets, err := hsmClient.ListSecrets(ctx, "") 698 + if err != nil { 699 + deviceLogger.Info("Failed to list secrets on device, skipping", "error", err) 700 + continue 701 + } 779 702 780 - hsmSecret.Status.DeviceSyncStatus = append(hsmSecret.Status.DeviceSyncStatus, deviceSync) 703 + deviceLogger.Info("Discovered secrets on device", "secretCount", len(secrets)) 704 + for _, secretPath := range secrets { 705 + secretPaths[secretPath] = true 781 706 } 782 707 } 783 708 784 - return mm.client.Status().Update(ctx, hsmSecret) 709 + // Convert to sorted slice 710 + result := make([]string, 0, len(secretPaths)) 711 + for secretPath := range secretPaths { 712 + result = append(result, secretPath) 713 + } 714 + sort.Strings(result) 715 + 716 + return result 785 717 } 786 718 787 719 // calculateChecksum calculates SHA256 checksum of secret data
+30 -6
internal/modes/manager/manager.go
··· 17 17 package manager 18 18 19 19 import ( 20 + "context" 20 21 "crypto/tls" 21 22 "flag" 22 23 "os" ··· 42 43 "github.com/evanjarrett/hsm-secrets-operator/internal/agent" 43 44 "github.com/evanjarrett/hsm-secrets-operator/internal/api" 44 45 "github.com/evanjarrett/hsm-secrets-operator/internal/controller" 46 + "github.com/evanjarrett/hsm-secrets-operator/internal/mirror" 45 47 ) 46 48 47 49 var ( ··· 292 294 return err 293 295 } 294 296 295 - // Set up HSM mirror controller for multi-device mirroring 296 - if err := controller.NewHSMMirrorReconciler(mgr.GetClient(), mgr.GetScheme(), agentManager, operatorNamespace).SetupWithManager(mgr); err != nil { 297 - setupLog.Error(err, "unable to create controller", "controller", "HSMMirror") 298 - return err 299 - } 300 - 301 297 // Set up discovery DaemonSet controller (manager-owned) 302 298 if err := (&controller.DiscoveryDaemonSetReconciler{ 303 299 Client: mgr.GetClient(), ··· 345 341 } 346 342 }() 347 343 } 344 + 345 + // Start device-scoped HSM mirroring in background 346 + mirrorManager := mirror.NewMirrorManager(mgr.GetClient(), agentManager, ctrl.Log.WithName("device-mirror"), operatorNamespace) 347 + go func() { 348 + mirrorTicker := time.NewTicker(30 * time.Second) // Mirror every 30 seconds 349 + defer mirrorTicker.Stop() 350 + 351 + setupLog.Info("starting device-scoped HSM mirroring", "interval", "30s") 352 + 353 + // Initial mirror attempt after 30 seconds to allow agents to start 354 + time.Sleep(30 * time.Second) 355 + 356 + for range mirrorTicker.C { 357 + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) 358 + result, err := mirrorManager.MirrorAllSecrets(ctx) 359 + cancel() 360 + 361 + if err != nil { 362 + setupLog.Error(err, "device-scoped mirroring failed") 363 + } else if result.SecretsProcessed > 0 { 364 + setupLog.Info("device-scoped mirroring completed", 365 + "secretsProcessed", result.SecretsProcessed, 366 + "secretsUpdated", result.SecretsUpdated, 367 + "secretsCreated", result.SecretsCreated, 368 + "errors", len(result.Errors)) 369 + } 370 + } 371 + }() 348 372 349 373 setupLog.Info("starting manager") 350 374 if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {