A Kubernetes operator that bridges Hardware Security Module (HSM) data storage with Kubernetes Secrets, providing true secret portability th
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

begin hsm mirror syncing

+1686 -232
+1 -1
CLAUDE.md
··· 143 143 name: my-secret # HSM path = metadata.name 144 144 spec: 145 145 autoSync: true # Bidirectional sync (default) 146 - syncInterval: 300 # Sync interval in seconds 146 + syncInterval: 30 # Sync interval in seconds 147 147 status: 148 148 syncStatus: "InSync" # InSync|OutOfSync|Error|Pending 149 149 hsmChecksum: "sha256:abc123..." # SHA256 checksum for change detection
+46 -2
api/v1alpha1/hsmsecret_types.go
··· 43 43 44 44 // SyncInterval defines how often to check for HSM changes (in seconds) 45 45 // Only applies when AutoSync is true 46 - // +kubebuilder:default=300 46 + // +kubebuilder:default=30 47 47 // +optional 48 48 SyncInterval int32 `json:"syncInterval,omitempty"` 49 49 } ··· 62 62 SyncStatusPending SyncStatus = "Pending" 63 63 ) 64 64 65 + // HSMDeviceSync tracks synchronization state for a specific HSM device 66 + type HSMDeviceSync struct { 67 + // DeviceName is the name of the HSM device 68 + DeviceName string `json:"deviceName"` 69 + 70 + // LastSyncTime is the timestamp of the last successful sync with this device 71 + // +optional 72 + LastSyncTime *metav1.Time `json:"lastSyncTime,omitempty"` 73 + 74 + // Checksum is the SHA256 checksum of the data on this device 75 + // +optional 76 + Checksum string `json:"checksum,omitempty"` 77 + 78 + // Status indicates the sync status for this specific device 79 + // +optional 80 + Status SyncStatus `json:"status,omitempty"` 81 + 82 + // LastError contains the last error when syncing with this device 83 + // +optional 84 + LastError string `json:"lastError,omitempty"` 85 + 86 + // Online indicates if this device is currently available 87 + // +optional 88 + Online bool `json:"online,omitempty"` 89 + 90 + // Version is a monotonically increasing counter for conflict resolution 91 + // Updated each time the secret changes on this device 92 + // +optional 93 + Version int64 `json:"version,omitempty"` 94 + } 95 + 65 96 // HSMSecretStatus defines the observed state of HSMSecret. 66 97 type HSMSecretStatus struct { 67 98 // LastSyncTime is the timestamp of the last successful synchronization 68 99 // +optional 69 100 LastSyncTime *metav1.Time `json:"lastSyncTime,omitempty"` 70 101 71 - // HSMChecksum is the SHA256 checksum of the HSM data 102 + // HSMChecksum is the SHA256 checksum of the HSM data (deprecated - use DeviceSyncStatus) 72 103 // +optional 73 104 HSMChecksum string `json:"hsmChecksum,omitempty"` 74 105 ··· 91 122 // SecretRef references the created Kubernetes Secret 92 123 // +optional 93 124 SecretRef *corev1.ObjectReference `json:"secretRef,omitempty"` 125 + 126 + // DeviceSyncStatus tracks sync status for each HSM device in mirrored setups 127 + // +optional 128 + DeviceSyncStatus []HSMDeviceSync `json:"deviceSyncStatus,omitempty"` 129 + 130 + // PrimaryDevice indicates which device is currently considered the primary source of truth 131 + // Used for conflict resolution in multi-device scenarios 132 + // +optional 133 + PrimaryDevice string `json:"primaryDevice,omitempty"` 134 + 135 + // SyncConflict indicates if there are conflicting versions across devices 136 + // +optional 137 + SyncConflict bool `json:"syncConflict,omitempty"` 94 138 } 95 139 96 140 // +kubebuilder:object:root=true
+26
api/v1alpha1/zz_generated.deepcopy.go
··· 189 189 } 190 190 191 191 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 192 + func (in *HSMDeviceSync) DeepCopyInto(out *HSMDeviceSync) { 193 + *out = *in 194 + if in.LastSyncTime != nil { 195 + in, out := &in.LastSyncTime, &out.LastSyncTime 196 + *out = (*in).DeepCopy() 197 + } 198 + } 199 + 200 + // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HSMDeviceSync. 201 + func (in *HSMDeviceSync) DeepCopy() *HSMDeviceSync { 202 + if in == nil { 203 + return nil 204 + } 205 + out := new(HSMDeviceSync) 206 + in.DeepCopyInto(out) 207 + return out 208 + } 209 + 210 + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 192 211 func (in *HSMPool) DeepCopyInto(out *HSMPool) { 193 212 *out = *in 194 213 out.TypeMeta = in.TypeMeta ··· 414 433 in, out := &in.SecretRef, &out.SecretRef 415 434 *out = new(corev1.ObjectReference) 416 435 **out = **in 436 + } 437 + if in.DeviceSyncStatus != nil { 438 + in, out := &in.DeviceSyncStatus, &out.DeviceSyncStatus 439 + *out = make([]HSMDeviceSync, len(*in)) 440 + for i := range *in { 441 + (*in)[i].DeepCopyInto(&(*out)[i]) 442 + } 417 443 } 418 444 } 419 445
+52 -2
config/crd/bases/hsm.j5t.io_hsmsecrets.yaml
··· 70 70 create 71 71 type: string 72 72 syncInterval: 73 - default: 300 73 + default: 30 74 74 description: |- 75 75 SyncInterval defines how often to check for HSM changes (in seconds) 76 76 Only applies when AutoSync is true ··· 138 138 - type 139 139 type: object 140 140 type: array 141 + deviceSyncStatus: 142 + description: DeviceSyncStatus tracks sync status for each HSM device 143 + in mirrored setups 144 + items: 145 + description: HSMDeviceSync tracks synchronization state for a specific 146 + HSM device 147 + properties: 148 + checksum: 149 + description: Checksum is the SHA256 checksum of the data on 150 + this device 151 + type: string 152 + deviceName: 153 + description: DeviceName is the name of the HSM device 154 + type: string 155 + lastError: 156 + description: LastError contains the last error when syncing 157 + with this device 158 + type: string 159 + lastSyncTime: 160 + description: LastSyncTime is the timestamp of the last successful 161 + sync with this device 162 + format: date-time 163 + type: string 164 + online: 165 + description: Online indicates if this device is currently available 166 + type: boolean 167 + status: 168 + description: Status indicates the sync status for this specific 169 + device 170 + type: string 171 + version: 172 + description: |- 173 + Version is a monotonically increasing counter for conflict resolution 174 + Updated each time the secret changes on this device 175 + format: int64 176 + type: integer 177 + required: 178 + - deviceName 179 + type: object 180 + type: array 141 181 hsmChecksum: 142 - description: HSMChecksum is the SHA256 checksum of the HSM data 182 + description: HSMChecksum is the SHA256 checksum of the HSM data (deprecated 183 + - use DeviceSyncStatus) 143 184 type: string 144 185 lastError: 145 186 description: LastError contains the last error message if SyncStatus ··· 149 190 description: LastSyncTime is the timestamp of the last successful 150 191 synchronization 151 192 format: date-time 193 + type: string 194 + primaryDevice: 195 + description: |- 196 + PrimaryDevice indicates which device is currently considered the primary source of truth 197 + Used for conflict resolution in multi-device scenarios 152 198 type: string 153 199 secretChecksum: 154 200 description: SecretChecksum is the SHA256 checksum of the Kubernetes ··· 197 243 type: string 198 244 type: object 199 245 x-kubernetes-map-type: atomic 246 + syncConflict: 247 + description: SyncConflict indicates if there are conflicting versions 248 + across devices 249 + type: boolean 200 250 syncStatus: 201 251 description: SyncStatus indicates the current synchronization status 202 252 type: string
+2 -1
go.mod
··· 11 11 github.com/onsi/gomega v1.36.1 12 12 github.com/stretchr/testify v1.10.0 13 13 google.golang.org/grpc v1.68.1 14 + google.golang.org/protobuf v1.36.5 14 15 k8s.io/api v0.33.4 15 16 k8s.io/apimachinery v0.33.4 16 17 k8s.io/client-go v0.33.4 ··· 74 75 github.com/spf13/cobra v1.8.1 // indirect 75 76 github.com/spf13/pflag v1.0.5 // indirect 76 77 github.com/stoewer/go-strcase v1.3.0 // indirect 78 + github.com/stretchr/objx v0.5.2 // indirect 77 79 github.com/twitchyliquid64/golang-asm v0.15.1 // indirect 78 80 github.com/ugorji/go/codec v1.2.12 // indirect 79 81 github.com/x448/float16 v0.8.4 // indirect ··· 102 104 gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect 103 105 google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 // indirect 104 106 google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 // indirect 105 - google.golang.org/protobuf v1.36.5 // indirect 106 107 gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect 107 108 gopkg.in/inf.v0 v0.9.1 // indirect 108 109 gopkg.in/yaml.v3 v3.0.1 // indirect
+333 -45
internal/api/proxy_client.go
··· 18 18 19 19 import ( 20 20 "context" 21 + "fmt" 21 22 "net/http" 22 23 "sync" 24 + "time" 23 25 24 26 "github.com/gin-gonic/gin" 25 27 "github.com/go-logr/logr" 26 28 27 29 "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 28 30 ) 31 + 32 + // WriteResult represents the result of writing to a single device 33 + type WriteResult struct { 34 + DeviceName string 35 + Error error 36 + } 29 37 30 38 // ProxyClient handles HTTP requests and proxies them to gRPC clients 31 39 // It has methods that match the HTTP endpoints and handle the full request/response cycle ··· 96 104 return grpcClient, nil 97 105 } 98 106 107 + // getAllAvailableGRPCClients returns all available gRPC clients for mirroring operations 108 + func (p *ProxyClient) getAllAvailableGRPCClients(c *gin.Context) (map[string]hsm.Client, error) { 109 + // Extract namespace 110 + namespace := c.GetHeader("X-Namespace") 111 + if namespace == "" { 112 + namespace = "secrets" 113 + } 114 + 115 + // Get all available devices 116 + devices, err := p.server.getAllAvailableAgents(c.Request.Context(), namespace) 117 + if err != nil { 118 + return nil, err 119 + } 120 + 121 + clients := make(map[string]hsm.Client) 122 + p.clientsMutex.Lock() 123 + defer p.clientsMutex.Unlock() 124 + 125 + for _, deviceName := range devices { 126 + // Try to get existing client for this device 127 + if client, exists := p.grpcClients[deviceName]; exists && client.IsConnected() { 128 + clients[deviceName] = client 129 + continue 130 + } 131 + 132 + // Close existing client for this device if it exists but is not connected 133 + if oldClient, exists := p.grpcClients[deviceName]; exists { 134 + if closeErr := oldClient.Close(); closeErr != nil { 135 + p.logger.V(1).Info("Error closing old gRPC client", "device", deviceName, "error", closeErr) 136 + } 137 + delete(p.grpcClients, deviceName) 138 + } 139 + 140 + // Create new gRPC client 141 + grpcClient, err := p.server.createGRPCClient(c.Request.Context(), deviceName, namespace) 142 + if err != nil { 143 + p.logger.V(1).Info("Failed to create gRPC client", "device", deviceName, "error", err) 144 + continue 145 + } 146 + 147 + // Cache and include the client 148 + p.grpcClients[deviceName] = grpcClient 149 + clients[deviceName] = grpcClient 150 + p.logger.V(1).Info("Created new gRPC client", "device", deviceName) 151 + } 152 + 153 + return clients, nil 154 + } 155 + 99 156 // GetInfo handles GET /hsm/info 100 157 func (p *ProxyClient) GetInfo(c *gin.Context) { 101 158 grpcClient, err := p.getOrCreateGRPCClient(c) ··· 177 234 p.server.sendResponse(c, http.StatusOK, "Secret read successfully", response) 178 235 } 179 236 180 - // WriteSecret handles POST/PUT /hsm/secrets/:path 237 + // WriteSecret handles POST/PUT /hsm/secrets/:path with mirroring support 181 238 func (p *ProxyClient) WriteSecret(c *gin.Context) { 182 239 path := c.Param("path") 183 240 if path == "" { ··· 189 246 var req struct { 190 247 Data map[string]string `json:"data" binding:"required"` 191 248 Metadata *hsm.SecretMetadata `json:"metadata,omitempty"` 249 + Mirror *bool `json:"mirror,omitempty"` // Enable/disable mirroring for this request 192 250 } 193 251 if err := c.ShouldBindJSON(&req); err != nil { 194 252 p.server.sendError(c, http.StatusBadRequest, "parse_error", "Failed to parse request body", map[string]any{ ··· 203 261 data[key] = []byte(value) 204 262 } 205 263 206 - grpcClient, err := p.getOrCreateGRPCClient(c) 207 - if err != nil { 208 - p.server.sendError(c, http.StatusServiceUnavailable, "no_agent", "No HSM agents available", map[string]any{ 209 - "error": err.Error(), 210 - }) 211 - return 212 - } 264 + // Determine if we should mirror this write (default: true) 265 + shouldMirror := req.Mirror == nil || *req.Mirror 266 + 267 + if shouldMirror { 268 + // Get all available clients for mirroring 269 + clients, err := p.getAllAvailableGRPCClients(c) 270 + if err != nil { 271 + p.server.sendError(c, http.StatusServiceUnavailable, "no_agents", "No HSM agents available for mirroring", map[string]any{ 272 + "error": err.Error(), 273 + }) 274 + return 275 + } 276 + 277 + if len(clients) == 0 { 278 + p.server.sendError(c, http.StatusServiceUnavailable, "no_agents", "No HSM agents available", nil) 279 + return 280 + } 281 + 282 + // Add mirroring metadata 283 + metadata := req.Metadata 284 + if metadata == nil { 285 + metadata = &hsm.SecretMetadata{Tags: make(map[string]string)} 286 + } 287 + if metadata.Tags == nil { 288 + metadata.Tags = make(map[string]string) 289 + } 290 + metadata.Tags["sync.version"] = fmt.Sprintf("%d", time.Now().Unix()) 291 + metadata.Tags["sync.timestamp"] = time.Now().Format(time.RFC3339) 292 + metadata.Tags["sync.mirrored"] = "true" 293 + 294 + // Write to all devices in parallel 295 + results := p.writeToAllDevices(c.Request.Context(), clients, path, data, metadata) 296 + 297 + // Check results 298 + successful := 0 299 + var errors []string 300 + deviceResults := make(map[string]any) 301 + 302 + for deviceName, result := range results { 303 + deviceResults[deviceName] = map[string]any{ 304 + "success": result.Error == nil, 305 + "error": func() string { 306 + if result.Error != nil { 307 + return result.Error.Error() 308 + } 309 + return "" 310 + }(), 311 + } 312 + 313 + if result.Error == nil { 314 + successful++ 315 + } else { 316 + errors = append(errors, fmt.Sprintf("%s: %v", deviceName, result.Error)) 317 + p.logger.Error(result.Error, "Failed to write to device", "device", deviceName, "path", path) 318 + } 319 + } 320 + 321 + // Consider the operation successful if we wrote to at least one device 322 + if successful > 0 { 323 + response := map[string]any{ 324 + "path": path, 325 + "keys": len(data), 326 + "mirrored": true, 327 + "devices": len(clients), 328 + "successful": successful, 329 + "deviceResults": deviceResults, 330 + } 331 + if metadata != nil { 332 + response["metadata"] = metadata 333 + } 334 + if len(errors) > 0 { 335 + response["warnings"] = errors 336 + } 213 337 214 - if req.Metadata != nil { 215 - err = grpcClient.WriteSecretWithMetadata(c.Request.Context(), path, data, req.Metadata) 338 + statusCode := http.StatusCreated 339 + message := "Secret written successfully" 340 + if successful < len(clients) { 341 + statusCode = http.StatusPartialContent // 206 indicates partial success 342 + message = fmt.Sprintf("Secret written to %d/%d devices", successful, len(clients)) 343 + } 344 + 345 + p.server.sendResponse(c, statusCode, message, response) 346 + } else { 347 + // All devices failed 348 + p.server.sendError(c, http.StatusInternalServerError, "write_failed", "Failed to write secret to any HSM device", map[string]any{ 349 + "errors": errors, 350 + "deviceResults": deviceResults, 351 + "path": path, 352 + }) 353 + } 216 354 } else { 217 - err = grpcClient.WriteSecret(c.Request.Context(), path, data) 218 - } 355 + // Single-device write (no mirroring) 356 + grpcClient, err := p.getOrCreateGRPCClient(c) 357 + if err != nil { 358 + p.server.sendError(c, http.StatusServiceUnavailable, "no_agent", "No HSM agents available", map[string]any{ 359 + "error": err.Error(), 360 + }) 361 + return 362 + } 219 363 220 - if err != nil { 221 - p.server.sendError(c, http.StatusInternalServerError, "grpc_error", "Failed to write secret to HSM", map[string]any{ 222 - "error": err.Error(), 223 - "path": path, 224 - }) 225 - return 226 - } 364 + if req.Metadata != nil { 365 + err = grpcClient.WriteSecretWithMetadata(c.Request.Context(), path, data, req.Metadata) 366 + } else { 367 + err = grpcClient.WriteSecret(c.Request.Context(), path, data) 368 + } 227 369 228 - response := map[string]any{ 229 - "path": path, 230 - "keys": len(data), 231 - } 232 - if req.Metadata != nil { 233 - response["metadata"] = req.Metadata 370 + if err != nil { 371 + p.server.sendError(c, http.StatusInternalServerError, "grpc_error", "Failed to write secret to HSM", map[string]any{ 372 + "error": err.Error(), 373 + "path": path, 374 + }) 375 + return 376 + } 377 + 378 + response := map[string]any{ 379 + "path": path, 380 + "keys": len(data), 381 + "mirrored": false, 382 + } 383 + if req.Metadata != nil { 384 + response["metadata"] = req.Metadata 385 + } 386 + p.server.sendResponse(c, http.StatusCreated, "Secret written successfully", response) 234 387 } 235 - p.server.sendResponse(c, http.StatusCreated, "Secret written successfully", response) 236 388 } 237 389 238 - // DeleteSecret handles DELETE /hsm/secrets/:path 390 + // DeleteSecret handles DELETE /hsm/secrets/:path with mirroring support 239 391 func (p *ProxyClient) DeleteSecret(c *gin.Context) { 240 392 path := c.Param("path") 241 393 if path == "" { ··· 243 395 return 244 396 } 245 397 246 - grpcClient, err := p.getOrCreateGRPCClient(c) 247 - if err != nil { 248 - p.server.sendError(c, http.StatusServiceUnavailable, "no_agent", "No HSM agents available", map[string]any{ 249 - "error": err.Error(), 250 - }) 251 - return 252 - } 398 + // Check if mirroring is explicitly disabled 399 + mirror := c.Query("mirror") 400 + shouldMirror := mirror != "false" // Default to true unless explicitly set to false 401 + 402 + if shouldMirror { 403 + // Get all available clients for mirroring delete 404 + clients, err := p.getAllAvailableGRPCClients(c) 405 + if err != nil { 406 + p.server.sendError(c, http.StatusServiceUnavailable, "no_agents", "No HSM agents available for mirroring", map[string]any{ 407 + "error": err.Error(), 408 + }) 409 + return 410 + } 411 + 412 + if len(clients) == 0 { 413 + p.server.sendError(c, http.StatusServiceUnavailable, "no_agents", "No HSM agents available", nil) 414 + return 415 + } 416 + 417 + // Delete from all devices in parallel 418 + results := p.deleteFromAllDevices(c.Request.Context(), clients, path) 419 + 420 + // Check results 421 + successful := 0 422 + var errors []string 423 + deviceResults := make(map[string]any) 424 + 425 + for deviceName, result := range results { 426 + deviceResults[deviceName] = map[string]any{ 427 + "success": result.Error == nil, 428 + "error": func() string { 429 + if result.Error != nil { 430 + return result.Error.Error() 431 + } 432 + return "" 433 + }(), 434 + } 435 + 436 + if result.Error == nil { 437 + successful++ 438 + } else { 439 + errors = append(errors, fmt.Sprintf("%s: %v", deviceName, result.Error)) 440 + p.logger.Error(result.Error, "Failed to delete from device", "device", deviceName, "path", path) 441 + } 442 + } 443 + 444 + // Consider the operation successful if we deleted from at least one device 445 + if successful > 0 { 446 + response := map[string]any{ 447 + "path": path, 448 + "mirrored": true, 449 + "devices": len(clients), 450 + "successful": successful, 451 + "deviceResults": deviceResults, 452 + } 453 + if len(errors) > 0 { 454 + response["warnings"] = errors 455 + } 456 + 457 + statusCode := http.StatusOK 458 + message := "Secret deleted successfully" 459 + if successful < len(clients) { 460 + statusCode = http.StatusPartialContent // 206 indicates partial success 461 + message = fmt.Sprintf("Secret deleted from %d/%d devices", successful, len(clients)) 462 + } 463 + 464 + p.server.sendResponse(c, statusCode, message, response) 465 + } else { 466 + // All devices failed 467 + p.server.sendError(c, http.StatusInternalServerError, "delete_failed", "Failed to delete secret from any HSM device", map[string]any{ 468 + "errors": errors, 469 + "deviceResults": deviceResults, 470 + "path": path, 471 + }) 472 + } 473 + } else { 474 + // Single-device delete (no mirroring) 475 + grpcClient, err := p.getOrCreateGRPCClient(c) 476 + if err != nil { 477 + p.server.sendError(c, http.StatusServiceUnavailable, "no_agent", "No HSM agents available", map[string]any{ 478 + "error": err.Error(), 479 + }) 480 + return 481 + } 253 482 254 - err = grpcClient.DeleteSecret(c.Request.Context(), path) 255 - if err != nil { 256 - p.server.sendError(c, http.StatusInternalServerError, "grpc_error", "Failed to delete secret from HSM", map[string]any{ 257 - "error": err.Error(), 258 - "path": path, 259 - }) 260 - return 261 - } 483 + err = grpcClient.DeleteSecret(c.Request.Context(), path) 484 + if err != nil { 485 + p.server.sendError(c, http.StatusInternalServerError, "grpc_error", "Failed to delete secret from HSM", map[string]any{ 486 + "error": err.Error(), 487 + "path": path, 488 + }) 489 + return 490 + } 262 491 263 - response := map[string]any{ 264 - "path": path, 492 + response := map[string]any{ 493 + "path": path, 494 + "mirrored": false, 495 + } 496 + p.server.sendResponse(c, http.StatusOK, "Secret deleted successfully", response) 265 497 } 266 - p.server.sendResponse(c, http.StatusOK, "Secret deleted successfully", response) 267 498 } 268 499 269 500 // ReadMetadata handles GET /hsm/secrets/:path/metadata ··· 395 626 p.clientsMutex.RLock() 396 627 defer p.clientsMutex.RUnlock() 397 628 return len(p.grpcClients) 629 + } 630 + 631 + // writeToAllDevices writes secret data to all devices in parallel 632 + func (p *ProxyClient) writeToAllDevices(ctx context.Context, clients map[string]hsm.Client, path string, data hsm.SecretData, metadata *hsm.SecretMetadata) map[string]WriteResult { 633 + results := make(map[string]WriteResult) 634 + resultsMutex := sync.Mutex{} 635 + wg := sync.WaitGroup{} 636 + 637 + for deviceName, client := range clients { 638 + wg.Add(1) 639 + go func(deviceName string, client hsm.Client) { 640 + defer wg.Done() 641 + 642 + var err error 643 + if metadata != nil { 644 + err = client.WriteSecretWithMetadata(ctx, path, data, metadata) 645 + } else { 646 + err = client.WriteSecret(ctx, path, data) 647 + } 648 + 649 + resultsMutex.Lock() 650 + results[deviceName] = WriteResult{ 651 + DeviceName: deviceName, 652 + Error: err, 653 + } 654 + resultsMutex.Unlock() 655 + }(deviceName, client) 656 + } 657 + 658 + wg.Wait() 659 + return results 660 + } 661 + 662 + // deleteFromAllDevices deletes secret data from all devices in parallel 663 + func (p *ProxyClient) deleteFromAllDevices(ctx context.Context, clients map[string]hsm.Client, path string) map[string]WriteResult { 664 + results := make(map[string]WriteResult) 665 + resultsMutex := sync.Mutex{} 666 + wg := sync.WaitGroup{} 667 + 668 + for deviceName, client := range clients { 669 + wg.Add(1) 670 + go func(deviceName string, client hsm.Client) { 671 + defer wg.Done() 672 + 673 + err := client.DeleteSecret(ctx, path) 674 + 675 + resultsMutex.Lock() 676 + results[deviceName] = WriteResult{ 677 + DeviceName: deviceName, 678 + Error: err, 679 + } 680 + resultsMutex.Unlock() 681 + }(deviceName, client) 682 + } 683 + 684 + wg.Wait() 685 + return results 398 686 } 399 687 400 688 // Interface compliance methods (unused in HTTP mode but required for hsm.Client interface)
+42 -20
internal/api/server.go
··· 29 29 30 30 hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 31 31 "github.com/evanjarrett/hsm-secrets-operator/internal/agent" 32 - "github.com/evanjarrett/hsm-secrets-operator/internal/discovery" 33 32 "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 34 33 ) 35 34 36 35 // Server represents the HSM REST API server that proxies requests to agent pods 37 36 type Server struct { 38 - client client.Client 39 - agentManager *agent.Manager 40 - mirroringManager *discovery.MirroringManager 41 - validator *validator.Validate 42 - logger logr.Logger 43 - router *gin.Engine 44 - proxyClient *ProxyClient 37 + client client.Client 38 + agentManager *agent.Manager 39 + validator *validator.Validate 40 + logger logr.Logger 41 + router *gin.Engine 42 + proxyClient *ProxyClient 45 43 } 46 44 47 45 // NewServer creates a new API server instance that proxies to agents 48 - func NewServer(k8sClient client.Client, agentManager *agent.Manager, mirroringManager *discovery.MirroringManager, logger logr.Logger) *Server { 46 + func NewServer(k8sClient client.Client, agentManager *agent.Manager, logger logr.Logger) *Server { 49 47 s := &Server{ 50 - client: k8sClient, 51 - agentManager: agentManager, 52 - mirroringManager: mirroringManager, 53 - validator: validator.New(), 54 - logger: logger.WithName("api-server"), 48 + client: k8sClient, 49 + agentManager: agentManager, 50 + validator: validator.New(), 51 + logger: logger.WithName("api-server"), 55 52 } 56 53 57 54 // Create ProxyClient instance ··· 89 86 // In proxy mode, check if any agents are available 90 87 _, agentErr := s.findAvailableAgent(c.Request.Context(), "secrets") 91 88 hsmConnected := agentErr == nil 92 - replicationEnabled := s.mirroringManager != nil 93 - activeNodes := 0 94 89 95 - if s.mirroringManager != nil { 96 - // Count active nodes (simplified - in real implementation would check actual node health) 97 - activeNodes = 1 // Current node 98 - } 90 + // Check if multiple agents are available for replication 91 + agents, _ := s.getAllAvailableAgents(c.Request.Context(), "secrets") 92 + replicationEnabled := len(agents) > 1 93 + activeNodes := len(agents) 99 94 100 95 status := "healthy" 101 96 if !hsmConnected { ··· 192 187 } 193 188 194 189 return "", fmt.Errorf("no available HSM agents found") 190 + } 191 + 192 + // getAllAvailableAgents finds all available HSM agents for mirroring operations 193 + func (s *Server) getAllAvailableAgents(ctx context.Context, namespace string) ([]string, error) { 194 + if s.agentManager == nil { 195 + return nil, fmt.Errorf("agent manager not available") 196 + } 197 + 198 + // List all HSMDevices to find all with active agents 199 + var hsmDeviceList hsmv1alpha1.HSMDeviceList 200 + if err := s.client.List(ctx, &hsmDeviceList, client.InNamespace(namespace)); err != nil { 201 + return nil, fmt.Errorf("failed to list HSM devices: %w", err) 202 + } 203 + 204 + var availableDevices []string 205 + // Check all devices that have active agents with pod IPs 206 + for _, device := range hsmDeviceList.Items { 207 + if podIPs, err := s.agentManager.GetAgentPodIPs(device.Name); err == nil && len(podIPs) > 0 { 208 + availableDevices = append(availableDevices, device.Name) 209 + } 210 + } 211 + 212 + if len(availableDevices) == 0 { 213 + return nil, fmt.Errorf("no available HSM agents found") 214 + } 215 + 216 + return availableDevices, nil 195 217 } 196 218 197 219 // createGRPCClient creates a gRPC client for the specified device using AgentManager
+13 -30
internal/controller/hsmsecret_controller.go
··· 35 35 36 36 hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 37 37 "github.com/evanjarrett/hsm-secrets-operator/internal/agent" 38 - "github.com/evanjarrett/hsm-secrets-operator/internal/discovery" 39 38 "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 40 39 ) 41 40 ··· 44 43 HSMSecretFinalizer = "hsmsecret.hsm.j5t.io/finalizer" 45 44 46 45 // DefaultSyncInterval is the default sync interval in seconds 47 - DefaultSyncInterval = 300 46 + DefaultSyncInterval = 30 48 47 ) 49 48 50 49 // HSMSecretReconciler reconciles a HSMSecret object 51 50 type HSMSecretReconciler struct { 52 51 client.Client 53 - Scheme *runtime.Scheme 54 - MirroringManager *discovery.MirroringManager 55 - AgentManager *agent.Manager 52 + Scheme *runtime.Scheme 53 + AgentManager *agent.Manager 56 54 } 57 55 58 56 // +kubebuilder:rbac:groups=hsm.j5t.io,resources=hsmsecrets,verbs=get;list;watch;create;update;patch;delete ··· 176 174 syncInterval = DefaultSyncInterval 177 175 } 178 176 179 - // Read secret from HSM with readonly fallback support 180 - hsmData, err := r.readSecretWithFallback(ctx, hsmSecret, hsmClient) 177 + // Read secret from HSM via agent 178 + hsmData, err := r.readSecretFromHSM(ctx, hsmSecret, hsmClient) 181 179 if err != nil { 182 - logger.Error(err, "Failed to read secret from HSM and mirrors", "path", hsmSecret.Name) 180 + logger.Error(err, "Failed to read secret from HSM", "path", hsmSecret.Name) 183 181 return ctrl.Result{RequeueAfter: time.Minute * 2}, err 184 182 } 185 183 ··· 367 365 } 368 366 } 369 367 370 - // readSecretWithFallback attempts to read a secret from primary HSM, falling back to mirrors if needed 371 - func (r *HSMSecretReconciler) readSecretWithFallback(ctx context.Context, hsmSecret *hsmv1alpha1.HSMSecret, hsmClient hsm.Client) (hsm.SecretData, error) { 368 + // readSecretFromHSM attempts to read a secret from HSM via agent 369 + func (r *HSMSecretReconciler) readSecretFromHSM(ctx context.Context, hsmSecret *hsmv1alpha1.HSMSecret, hsmClient hsm.Client) (hsm.SecretData, error) { 372 370 logger := log.FromContext(ctx) 373 371 374 - // Try to read from primary HSM first (via agent) 372 + // Read from HSM via agent (sync handles mirroring automatically) 375 373 if hsmClient != nil && hsmClient.IsConnected() { 376 374 data, err := hsmClient.ReadSecret(ctx, hsmSecret.Name) 377 375 if err == nil { 378 - logger.V(1).Info("Successfully read secret from primary HSM", "path", hsmSecret.Name) 376 + logger.V(1).Info("Successfully read secret from HSM", "path", hsmSecret.Name) 379 377 return data, nil 380 378 } 381 - logger.V(1).Info("Failed to read from primary HSM, attempting fallback", "error", err) 379 + logger.V(1).Info("Failed to read from HSM", "error", err) 380 + return nil, err 382 381 } 383 382 384 - // If primary failed and we have a mirroring manager, try readonly access from mirrors 385 - if r.MirroringManager != nil { 386 - // Find relevant HSMDevice for this secret path 387 - hsmDevice, err := r.findHSMDeviceForSecret(ctx, hsmSecret) 388 - if err != nil { 389 - logger.Error(err, "Failed to find HSM device for readonly fallback") 390 - } else if hsmDevice != nil { 391 - data, err := r.MirroringManager.GetReadOnlyAccess(ctx, hsmSecret.Name, hsmDevice) 392 - if err == nil { 393 - logger.Info("Successfully read secret from readonly mirror", "path", hsmSecret.Name) 394 - return data, nil 395 - } 396 - logger.V(1).Info("Failed to read from mirrors", "error", err) 397 - } 398 - } 399 - 400 - return nil, fmt.Errorf("secret not accessible from primary HSM or mirrors") 383 + return nil, fmt.Errorf("HSM client not available or not connected") 401 384 } 402 385 403 386 // findHSMDeviceForSecret finds the HSMDevice that should contain the secret
+128
internal/controller/hsmsync_controller.go
··· 1 + /* 2 + Copyright 2025. 3 + 4 + Licensed under the Apache License, Version 2.0 (the "License"); 5 + you may not use this file except in compliance with the License. 6 + You may obtain a copy of the License at 7 + 8 + http://www.apache.org/licenses/LICENSE-2.0 9 + 10 + Unless required by applicable law or agreed to in writing, software 11 + distributed under the License is distributed on an "AS IS" BASIS, 12 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 + See the License for the specific language governing permissions and 14 + limitations under the License. 15 + */ 16 + 17 + package controller 18 + 19 + import ( 20 + "context" 21 + "time" 22 + 23 + "k8s.io/apimachinery/pkg/runtime" 24 + ctrl "sigs.k8s.io/controller-runtime" 25 + "sigs.k8s.io/controller-runtime/pkg/client" 26 + "sigs.k8s.io/controller-runtime/pkg/log" 27 + 28 + hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 29 + "github.com/evanjarrett/hsm-secrets-operator/internal/agent" 30 + "github.com/evanjarrett/hsm-secrets-operator/internal/sync" 31 + ) 32 + 33 + // HSMSyncReconciler handles multi-device HSM synchronization and conflict resolution 34 + type HSMSyncReconciler struct { 35 + client.Client 36 + Scheme *runtime.Scheme 37 + SyncManager *sync.SyncManager 38 + 39 + // SyncInterval controls how often to perform sync checks (default: 30 seconds) 40 + SyncInterval time.Duration 41 + } 42 + 43 + // NewHSMSyncReconciler creates a new HSM sync reconciler 44 + func NewHSMSyncReconciler(k8sClient client.Client, scheme *runtime.Scheme, agentManager *agent.Manager) *HSMSyncReconciler { 45 + logger := ctrl.Log.WithName("hsm-sync-controller") 46 + syncManager := sync.NewSyncManager(k8sClient, agentManager, logger) 47 + 48 + return &HSMSyncReconciler{ 49 + Client: k8sClient, 50 + Scheme: scheme, 51 + SyncManager: syncManager, 52 + SyncInterval: 30 * time.Second, // Default sync interval 53 + } 54 + } 55 + 56 + // +kubebuilder:rbac:groups=hsm.j5t.io,resources=hsmsecrets,verbs=get;list;watch;update;patch 57 + // +kubebuilder:rbac:groups=hsm.j5t.io,resources=hsmsecrets/status,verbs=get;update;patch 58 + // +kubebuilder:rbac:groups=hsm.j5t.io,resources=hsmpools,verbs=get;list;watch 59 + // +kubebuilder:rbac:groups=hsm.j5t.io,resources=hsmdevices,verbs=get;list;watch 60 + 61 + // Reconcile performs HSM device synchronization and conflict resolution 62 + func (r *HSMSyncReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 63 + logger := log.FromContext(ctx) 64 + 65 + // Fetch the HSMSecret instance 66 + var hsmSecret hsmv1alpha1.HSMSecret 67 + if err := r.Get(ctx, req.NamespacedName, &hsmSecret); err != nil { 68 + return ctrl.Result{}, client.IgnoreNotFound(err) 69 + } 70 + 71 + // Skip sync if AutoSync is disabled 72 + if !hsmSecret.Spec.AutoSync { 73 + logger.V(1).Info("AutoSync disabled, skipping sync", "secret", hsmSecret.Name) 74 + return ctrl.Result{}, nil 75 + } 76 + 77 + logger.Info("Starting multi-device HSM sync", "secret", hsmSecret.Name) 78 + 79 + // Perform the sync operation 80 + result, err := r.SyncManager.SyncSecret(ctx, &hsmSecret) 81 + if err != nil { 82 + logger.Error(err, "Failed to perform HSM sync", "secret", hsmSecret.Name) 83 + 84 + // Update status with error 85 + hsmSecret.Status.SyncStatus = hsmv1alpha1.SyncStatusError 86 + hsmSecret.Status.LastError = err.Error() 87 + if updateErr := r.Status().Update(ctx, &hsmSecret); updateErr != nil { 88 + logger.Error(updateErr, "Failed to update HSMSecret status") 89 + } 90 + 91 + // Retry sooner on error 92 + return ctrl.Result{RequeueAfter: r.SyncInterval / 2}, nil 93 + } 94 + 95 + // Update HSMSecret status with sync results 96 + if err := r.SyncManager.UpdateHSMSecretStatus(ctx, &hsmSecret, result); err != nil { 97 + logger.Error(err, "Failed to update HSMSecret status", "secret", hsmSecret.Name) 98 + return ctrl.Result{RequeueAfter: r.SyncInterval / 2}, err 99 + } 100 + 101 + // Log sync results 102 + if result.ConflictDetected { 103 + logger.Info("Conflict detected and resolved", 104 + "secret", hsmSecret.Name, 105 + "primaryDevice", result.PrimaryDevice, 106 + "devices", len(result.DeviceResults)) 107 + } else { 108 + logger.V(1).Info("HSM sync completed successfully", 109 + "secret", hsmSecret.Name, 110 + "devices", len(result.DeviceResults)) 111 + } 112 + 113 + // Calculate next sync interval based on HSMSecret spec 114 + syncInterval := r.SyncInterval 115 + if hsmSecret.Spec.SyncInterval > 0 { 116 + syncInterval = time.Duration(hsmSecret.Spec.SyncInterval) * time.Second 117 + } 118 + 119 + return ctrl.Result{RequeueAfter: syncInterval}, nil 120 + } 121 + 122 + // SetupWithManager sets up the controller with the Manager. 123 + func (r *HSMSyncReconciler) SetupWithManager(mgr ctrl.Manager) error { 124 + return ctrl.NewControllerManagedBy(mgr). 125 + For(&hsmv1alpha1.HSMSecret{}). 126 + Named("hsmsync"). 127 + Complete(r) 128 + }
-114
internal/discovery/mirroring.go
··· 1 - /* 2 - Copyright 2025. 3 - 4 - Licensed under the Apache License, Version 2.0 (the "License"); 5 - you may not use this file except in compliance with the License. 6 - You may obtain a copy of the License at 7 - 8 - http://www.apache.org/licenses/LICENSE-2.0 9 - 10 - Unless required by applicable law or agreed to in writing, software 11 - distributed under the License is distributed on an "AS IS" BASIS, 12 - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 - See the License for the specific language governing permissions and 14 - limitations under the License. 15 - */ 16 - 17 - // TODO: This entire mirroring system needs to be redesigned for the new HSMPool architecture. 18 - // The previous implementation tried to modify HSMDevice.Status which no longer exists. 19 - // Providing stub implementations to avoid compilation errors while the new architecture is implemented. 20 - 21 - package discovery 22 - 23 - import ( 24 - "context" 25 - "fmt" 26 - "sync" 27 - "time" 28 - 29 - "github.com/go-logr/logr" 30 - ctrl "sigs.k8s.io/controller-runtime" 31 - "sigs.k8s.io/controller-runtime/pkg/client" 32 - 33 - hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 34 - "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 35 - ) 36 - 37 - // MirroredSecretData represents secret data with metadata for mirroring 38 - type MirroredSecretData struct { 39 - Path string `json:"path"` 40 - Data hsm.SecretData `json:"data"` 41 - Checksum string `json:"checksum"` 42 - LastModified time.Time `json:"lastModified"` 43 - SourceNode string `json:"sourceNode"` 44 - Metadata map[string]string `json:"metadata"` 45 - } 46 - 47 - // MirroringManager handles HSM device mirroring and cross-node synchronization 48 - // TODO: Redesign this for HSMPool architecture 49 - type MirroringManager struct { 50 - client client.Client 51 - logger logr.Logger 52 - mutex sync.RWMutex 53 - hsmClients map[string]hsm.Client 54 - syncTimeout time.Duration 55 - } 56 - 57 - // NewMirroringManager creates a new mirroring manager 58 - func NewMirroringManager(k8sClient client.Client, logger logr.Logger) *MirroringManager { 59 - return &MirroringManager{ 60 - client: k8sClient, 61 - logger: logger, 62 - hsmClients: make(map[string]hsm.Client), 63 - syncTimeout: 30 * time.Second, 64 - } 65 - } 66 - 67 - // RegisterHSMClient registers an HSM client for a specific node 68 - func (m *MirroringManager) RegisterHSMClient(nodeName string, hsmClient hsm.Client) { 69 - m.mutex.Lock() 70 - defer m.mutex.Unlock() 71 - m.hsmClients[nodeName] = hsmClient 72 - } 73 - 74 - // UnregisterHSMClient removes an HSM client for a node 75 - func (m *MirroringManager) UnregisterHSMClient(nodeName string) { 76 - m.mutex.Lock() 77 - defer m.mutex.Unlock() 78 - delete(m.hsmClients, nodeName) 79 - } 80 - 81 - // SyncDevices synchronizes HSM devices across mirror nodes 82 - // TODO: Redesign for HSMPool architecture 83 - func (m *MirroringManager) SyncDevices(ctx context.Context, hsmDevice *hsmv1alpha1.HSMDevice) error { 84 - m.logger.Info("Device sync needs redesign for HSMPool architecture", "device", hsmDevice.Name) 85 - return fmt.Errorf("device sync functionality needs to be redesigned for HSMPool architecture") 86 - } 87 - 88 - // TODO: The following functions will be redesigned for HSMPool architecture: 89 - // - determineMirrorTopology 90 - // - syncFromPrimary 91 - // - updateMirroringStatus 92 - 93 - // GetReadOnlyAccess provides read-only access to HSM data during failover scenarios 94 - // TODO: Redesign for HSMPool architecture 95 - func (m *MirroringManager) GetReadOnlyAccess(ctx context.Context, secretPath string, hsmDevice *hsmv1alpha1.HSMDevice) (hsm.SecretData, error) { 96 - m.logger.Info("Read-only access needs redesign for HSMPool architecture", 97 - "device", hsmDevice.Name, 98 - "secretPath", secretPath) 99 - return nil, fmt.Errorf("read-only access functionality needs to be redesigned for HSMPool architecture") 100 - } 101 - 102 - // HandleFailover handles automatic failover to a healthy mirror node 103 - // TODO: Redesign for HSMPool architecture 104 - func (m *MirroringManager) HandleFailover(ctx context.Context, hsmDevice *hsmv1alpha1.HSMDevice) error { 105 - m.logger.Info("Failover handling needs redesign for HSMPool architecture", "device", hsmDevice.Name) 106 - return fmt.Errorf("failover functionality needs to be redesigned for HSMPool architecture") 107 - } 108 - 109 - // SetupWithManager sets up the mirroring manager with the controller manager 110 - func (m *MirroringManager) SetupWithManager(mgr ctrl.Manager) error { 111 - m.logger.Info("Mirroring manager setup - functionality needs redesign for HSMPool architecture") 112 - // TODO: Set up watches on HSMPool resources instead of HSMDevice 113 - return nil 114 - }
+6 -11
internal/modes/manager/manager.go
··· 40 40 "github.com/evanjarrett/hsm-secrets-operator/internal/agent" 41 41 "github.com/evanjarrett/hsm-secrets-operator/internal/api" 42 42 "github.com/evanjarrett/hsm-secrets-operator/internal/controller" 43 - "github.com/evanjarrett/hsm-secrets-operator/internal/discovery" 44 43 ) 45 44 46 45 var ( ··· 210 209 return err 211 210 } 212 211 213 - // Initialize mirroring manager for HSMSecret controller device failover 214 - // Note: Device discovery is handled by separate discovery daemon 215 - mirroringManager := discovery.NewMirroringManager(mgr.GetClient(), setupLog) 216 - 217 - // HSM client registration removed - now handled by agent architecture 212 + // HSM mirroring is now handled by the sync package and HSMSyncReconciler 213 + // Device discovery is handled by separate discovery daemon 218 214 219 215 // Agent manager will detect the current namespace automatically 220 216 imageResolver := controller.NewImageResolver(mgr.GetClient()) ··· 241 237 } 242 238 243 239 if err := (&controller.HSMSecretReconciler{ 244 - Client: mgr.GetClient(), 245 - Scheme: mgr.GetScheme(), 246 - MirroringManager: mirroringManager, 247 - AgentManager: agentManager, 240 + Client: mgr.GetClient(), 241 + Scheme: mgr.GetScheme(), 242 + AgentManager: agentManager, 248 243 }).SetupWithManager(mgr); err != nil { 249 244 setupLog.Error(err, "unable to create controller", "controller", "HSMSecret") 250 245 return err ··· 287 282 288 283 // Start API server if enabled 289 284 if enableAPI { 290 - apiServer := api.NewServer(mgr.GetClient(), agentManager, mirroringManager, ctrl.Log.WithName("api")) 285 + apiServer := api.NewServer(mgr.GetClient(), agentManager, ctrl.Log.WithName("api")) 291 286 292 287 // Start API server in a separate goroutine 293 288 go func() {
+304
internal/sync/conflict_resolver.go
··· 1 + /* 2 + Copyright 2025. 3 + 4 + Licensed under the Apache License, Version 2.0 (the "License"); 5 + you may not use this file except in compliance with the License. 6 + You may obtain a copy of the License at 7 + 8 + http://www.apache.org/licenses/LICENSE-2.0 9 + 10 + Unless required by applicable law or agreed to in writing, software 11 + distributed under the License is distributed on an "AS IS" BASIS, 12 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 + See the License for the specific language governing permissions and 14 + limitations under the License. 15 + */ 16 + 17 + package sync 18 + 19 + import ( 20 + "context" 21 + "fmt" 22 + "time" 23 + 24 + "github.com/go-logr/logr" 25 + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 26 + 27 + hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 28 + "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 29 + ) 30 + 31 + // ConflictResolutionStrategy defines how conflicts should be resolved 32 + type ConflictResolutionStrategy string 33 + 34 + const ( 35 + // StrategyLatestVersion resolves conflicts by choosing the device with the highest version 36 + StrategyLatestVersion ConflictResolutionStrategy = "latest-version" 37 + 38 + // StrategyLatestTimestamp resolves conflicts by choosing the most recently modified data 39 + StrategyLatestTimestamp ConflictResolutionStrategy = "latest-timestamp" 40 + 41 + // StrategyManualResolution requires manual intervention for conflict resolution 42 + StrategyManualResolution ConflictResolutionStrategy = "manual" 43 + 44 + // StrategyPrimaryDevice always uses the designated primary device as source of truth 45 + StrategyPrimaryDevice ConflictResolutionStrategy = "primary-device" 46 + ) 47 + 48 + // ConflictResolver handles HSM device synchronization conflicts 49 + type ConflictResolver struct { 50 + logger logr.Logger 51 + strategy ConflictResolutionStrategy 52 + } 53 + 54 + // NewConflictResolver creates a new conflict resolver 55 + func NewConflictResolver(logger logr.Logger, strategy ConflictResolutionStrategy) *ConflictResolver { 56 + return &ConflictResolver{ 57 + logger: logger.WithName("conflict-resolver"), 58 + strategy: strategy, 59 + } 60 + } 61 + 62 + // ConflictInfo represents detected conflict information 63 + type ConflictInfo struct { 64 + SecretPath string 65 + Devices []DeviceConflictData 66 + DetectedAt time.Time 67 + ResolutionRef string // Reference to resolution method used 68 + } 69 + 70 + // DeviceConflictData represents conflict data from a specific device 71 + type DeviceConflictData struct { 72 + DeviceName string 73 + Checksum string 74 + Version int64 75 + Timestamp time.Time 76 + Data hsm.SecretData 77 + Online bool 78 + Error error 79 + } 80 + 81 + // ResolveConflict resolves a detected conflict using the configured strategy 82 + func (cr *ConflictResolver) ResolveConflict(ctx context.Context, conflict *ConflictInfo, hsmSecret *hsmv1alpha1.HSMSecret) (*ResolutionResult, error) { 83 + logger := cr.logger.WithValues("secret", hsmSecret.Name, "strategy", cr.strategy) 84 + logger.Info("Resolving HSM sync conflict", "devices", len(conflict.Devices)) 85 + 86 + switch cr.strategy { 87 + case StrategyLatestVersion: 88 + return cr.resolveByLatestVersion(conflict, logger) 89 + case StrategyLatestTimestamp: 90 + return cr.resolveByLatestTimestamp(conflict, logger) 91 + case StrategyPrimaryDevice: 92 + return cr.resolveByPrimaryDevice(conflict, hsmSecret, logger) 93 + case StrategyManualResolution: 94 + return cr.requireManualResolution(conflict, hsmSecret, logger) 95 + default: 96 + return nil, fmt.Errorf("unknown conflict resolution strategy: %s", cr.strategy) 97 + } 98 + } 99 + 100 + // ResolutionResult represents the result of conflict resolution 101 + type ResolutionResult struct { 102 + Winner *DeviceConflictData 103 + Resolution ConflictResolutionStrategy 104 + RequiresManualIntervention bool 105 + SyncTargets []string // Devices that need to be updated 106 + ResolvedData hsm.SecretData 107 + } 108 + 109 + // resolveByLatestVersion chooses the device with the highest version number 110 + func (cr *ConflictResolver) resolveByLatestVersion(conflict *ConflictInfo, logger logr.Logger) (*ResolutionResult, error) { 111 + var winner *DeviceConflictData 112 + highestVersion := int64(-1) 113 + 114 + // Find device with highest version among online devices 115 + for i := range conflict.Devices { 116 + device := &conflict.Devices[i] 117 + if device.Online && device.Error == nil && device.Version > highestVersion { 118 + highestVersion = device.Version 119 + winner = device 120 + } 121 + } 122 + 123 + if winner == nil { 124 + return nil, fmt.Errorf("no online devices with valid version found") 125 + } 126 + 127 + // Determine which devices need updating 128 + var syncTargets []string 129 + for _, device := range conflict.Devices { 130 + if device.DeviceName != winner.DeviceName && device.Online && device.Error == nil { 131 + syncTargets = append(syncTargets, device.DeviceName) 132 + } 133 + } 134 + 135 + logger.Info("Resolved conflict by latest version", 136 + "winner", winner.DeviceName, 137 + "version", winner.Version, 138 + "targets", syncTargets) 139 + 140 + return &ResolutionResult{ 141 + Winner: winner, 142 + Resolution: StrategyLatestVersion, 143 + SyncTargets: syncTargets, 144 + ResolvedData: winner.Data, 145 + }, nil 146 + } 147 + 148 + // resolveByLatestTimestamp chooses the device with the most recent timestamp 149 + func (cr *ConflictResolver) resolveByLatestTimestamp(conflict *ConflictInfo, logger logr.Logger) (*ResolutionResult, error) { 150 + var winner *DeviceConflictData 151 + var latestTime time.Time 152 + 153 + // Find device with most recent timestamp among online devices 154 + for i := range conflict.Devices { 155 + device := &conflict.Devices[i] 156 + if device.Online && device.Error == nil && device.Timestamp.After(latestTime) { 157 + latestTime = device.Timestamp 158 + winner = device 159 + } 160 + } 161 + 162 + if winner == nil { 163 + return nil, fmt.Errorf("no online devices with valid timestamp found") 164 + } 165 + 166 + // Determine which devices need updating 167 + var syncTargets []string 168 + for _, device := range conflict.Devices { 169 + if device.DeviceName != winner.DeviceName && device.Online && device.Error == nil { 170 + syncTargets = append(syncTargets, device.DeviceName) 171 + } 172 + } 173 + 174 + logger.Info("Resolved conflict by latest timestamp", 175 + "winner", winner.DeviceName, 176 + "timestamp", winner.Timestamp, 177 + "targets", syncTargets) 178 + 179 + return &ResolutionResult{ 180 + Winner: winner, 181 + Resolution: StrategyLatestTimestamp, 182 + SyncTargets: syncTargets, 183 + ResolvedData: winner.Data, 184 + }, nil 185 + } 186 + 187 + // resolveByPrimaryDevice uses the designated primary device as the source of truth 188 + func (cr *ConflictResolver) resolveByPrimaryDevice(conflict *ConflictInfo, hsmSecret *hsmv1alpha1.HSMSecret, logger logr.Logger) (*ResolutionResult, error) { 189 + primaryDevice := hsmSecret.Status.PrimaryDevice 190 + if primaryDevice == "" { 191 + // No primary device set, fall back to latest version strategy 192 + logger.Info("No primary device set, falling back to latest version strategy") 193 + return cr.resolveByLatestVersion(conflict, logger) 194 + } 195 + 196 + // Find the primary device in the conflict data 197 + var winner *DeviceConflictData 198 + for i := range conflict.Devices { 199 + device := &conflict.Devices[i] 200 + if device.DeviceName == primaryDevice { 201 + if device.Online && device.Error == nil { 202 + winner = device 203 + break 204 + } else { 205 + logger.Info("Primary device is offline or has errors, falling back to latest version", 206 + "primaryDevice", primaryDevice, 207 + "online", device.Online, 208 + "error", device.Error) 209 + return cr.resolveByLatestVersion(conflict, logger) 210 + } 211 + } 212 + } 213 + 214 + if winner == nil { 215 + logger.Info("Primary device not found in conflict, falling back to latest version", "primaryDevice", primaryDevice) 216 + return cr.resolveByLatestVersion(conflict, logger) 217 + } 218 + 219 + // Determine which devices need updating 220 + var syncTargets []string 221 + for _, device := range conflict.Devices { 222 + if device.DeviceName != winner.DeviceName && device.Online && device.Error == nil { 223 + syncTargets = append(syncTargets, device.DeviceName) 224 + } 225 + } 226 + 227 + logger.Info("Resolved conflict by primary device", 228 + "winner", winner.DeviceName, 229 + "targets", syncTargets) 230 + 231 + return &ResolutionResult{ 232 + Winner: winner, 233 + Resolution: StrategyPrimaryDevice, 234 + SyncTargets: syncTargets, 235 + ResolvedData: winner.Data, 236 + }, nil 237 + } 238 + 239 + // requireManualResolution marks the conflict as requiring manual intervention 240 + func (cr *ConflictResolver) requireManualResolution(conflict *ConflictInfo, hsmSecret *hsmv1alpha1.HSMSecret, logger logr.Logger) (*ResolutionResult, error) { 241 + logger.Info("Conflict marked for manual resolution", 242 + "devices", len(conflict.Devices), 243 + "secret", hsmSecret.Name) 244 + 245 + // Add condition to HSMSecret indicating manual resolution is required 246 + now := metav1.NewTime(time.Now()) 247 + condition := metav1.Condition{ 248 + Type: "ConflictResolutionRequired", 249 + Status: metav1.ConditionTrue, 250 + Reason: "ManualResolutionRequired", 251 + Message: fmt.Sprintf("Conflict detected between %d devices requires manual resolution", len(conflict.Devices)), 252 + LastTransitionTime: now, 253 + } 254 + 255 + // Update conditions (this would be done by the caller) 256 + _ = condition 257 + 258 + return &ResolutionResult{ 259 + RequiresManualIntervention: true, 260 + Resolution: StrategyManualResolution, 261 + SyncTargets: []string{}, // No automatic sync 262 + }, nil 263 + } 264 + 265 + // DetectConflicts analyzes device sync results to identify conflicts 266 + func (cr *ConflictResolver) DetectConflicts(deviceResults map[string]DeviceResult, secretPath string) (*ConflictInfo, bool) { 267 + // Group devices by checksum 268 + checksumGroups := make(map[string][]string) 269 + conflictDevices := make([]DeviceConflictData, 0) 270 + 271 + for deviceName, result := range deviceResults { 272 + if result.Online && result.Error == nil && result.Checksum != "" { 273 + if _, exists := checksumGroups[result.Checksum]; !exists { 274 + checksumGroups[result.Checksum] = make([]string, 0) 275 + } 276 + checksumGroups[result.Checksum] = append(checksumGroups[result.Checksum], deviceName) 277 + 278 + // Add to conflict devices list 279 + conflictDevices = append(conflictDevices, DeviceConflictData{ 280 + DeviceName: deviceName, 281 + Checksum: result.Checksum, 282 + Version: result.Version, 283 + Timestamp: result.Timestamp, 284 + Online: result.Online, 285 + Error: result.Error, 286 + }) 287 + } 288 + } 289 + 290 + // Conflict exists if we have more than one checksum group 291 + hasConflict := len(checksumGroups) > 1 && len(conflictDevices) > 1 292 + 293 + if !hasConflict { 294 + return nil, false 295 + } 296 + 297 + conflict := &ConflictInfo{ 298 + SecretPath: secretPath, 299 + Devices: conflictDevices, 300 + DetectedAt: time.Now(), 301 + } 302 + 303 + return conflict, true 304 + }
+405
internal/sync/manager.go
··· 1 + /* 2 + Copyright 2025. 3 + 4 + Licensed under the Apache License, Version 2.0 (the "License"); 5 + you may not use this file except in compliance with the License. 6 + You may obtain a copy of the License at 7 + 8 + http://www.apache.org/licenses/LICENSE-2.0 9 + 10 + Unless required by applicable law or agreed to in writing, software 11 + distributed under the License is distributed on an "AS IS" BASIS, 12 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 + See the License for the specific language governing permissions and 14 + limitations under the License. 15 + */ 16 + 17 + package sync 18 + 19 + import ( 20 + "context" 21 + "crypto/sha256" 22 + "fmt" 23 + "sort" 24 + "time" 25 + 26 + "github.com/go-logr/logr" 27 + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 + "sigs.k8s.io/controller-runtime/pkg/client" 29 + 30 + hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 31 + "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 32 + ) 33 + 34 + // AgentManagerInterface defines the interface for HSM agent management used by sync 35 + type AgentManagerInterface interface { 36 + CreateSingleGRPCClient(ctx context.Context, deviceName string, logger logr.Logger) (hsm.Client, error) 37 + } 38 + 39 + // SyncManager handles multi-device HSM synchronization and conflict resolution 40 + type SyncManager struct { 41 + client client.Client 42 + agentManager AgentManagerInterface 43 + logger logr.Logger 44 + } 45 + 46 + // NewSyncManager creates a new sync manager 47 + func NewSyncManager(k8sClient client.Client, agentManager AgentManagerInterface, logger logr.Logger) *SyncManager { 48 + return &SyncManager{ 49 + client: k8sClient, 50 + agentManager: agentManager, 51 + logger: logger.WithName("sync-manager"), 52 + } 53 + } 54 + 55 + // SyncResult represents the result of a sync operation 56 + type SyncResult struct { 57 + Success bool 58 + ConflictDetected bool 59 + PrimaryDevice string 60 + DeviceResults map[string]DeviceResult 61 + ResolvedData hsm.SecretData 62 + } 63 + 64 + // DeviceResult represents the sync result for a specific device 65 + type DeviceResult struct { 66 + Online bool 67 + Checksum string 68 + Version int64 69 + Error error 70 + Timestamp time.Time 71 + } 72 + 73 + // SyncSecret performs multi-device synchronization for an HSMSecret 74 + func (sm *SyncManager) SyncSecret(ctx context.Context, hsmSecret *hsmv1alpha1.HSMSecret) (*SyncResult, error) { 75 + logger := sm.logger.WithValues("secret", hsmSecret.Name, "namespace", hsmSecret.Namespace) 76 + secretPath := hsmSecret.Name 77 + 78 + // Get all available HSM devices from HSMPools 79 + devices, err := sm.getAvailableDevices(ctx, hsmSecret.Namespace) 80 + if err != nil { 81 + return nil, fmt.Errorf("failed to get available devices: %w", err) 82 + } 83 + 84 + if len(devices) == 0 { 85 + return &SyncResult{ 86 + Success: false, 87 + DeviceResults: make(map[string]DeviceResult), 88 + }, fmt.Errorf("no HSM devices available") 89 + } 90 + 91 + logger.Info("Starting multi-device sync", "devices", len(devices)) 92 + 93 + // Read data from all online devices 94 + deviceResults := make(map[string]DeviceResult) 95 + validDeviceData := make(map[string]hsm.SecretData) 96 + 97 + for _, deviceName := range devices { 98 + result := sm.readFromDevice(ctx, deviceName, secretPath, logger) 99 + deviceResults[deviceName] = result 100 + 101 + if result.Online && result.Error == nil { 102 + // Calculate checksum for the data (we'll get the actual data in practice) 103 + // For now, simulating based on checksum 104 + validDeviceData[deviceName] = hsm.SecretData{ 105 + "checksum": []byte(result.Checksum), 106 + } 107 + } 108 + } 109 + 110 + // Detect conflicts and resolve 111 + conflictDetected := sm.detectConflicts(deviceResults) 112 + primaryDevice := sm.selectPrimaryDevice(deviceResults, hsmSecret) 113 + 114 + var resolvedData hsm.SecretData 115 + if primaryDevice != "" && validDeviceData[primaryDevice] != nil { 116 + resolvedData = validDeviceData[primaryDevice] 117 + logger.Info("Using primary device data", "primaryDevice", primaryDevice) 118 + } else if len(validDeviceData) > 0 { 119 + // Use most recent data if no clear primary 120 + resolvedData = sm.selectMostRecentData(deviceResults, validDeviceData) 121 + logger.Info("Using most recent data for resolution") 122 + } 123 + 124 + // If conflict detected and we have a resolution, sync to all other devices 125 + if conflictDetected && primaryDevice != "" { 126 + sm.syncToSecondaryDevices(ctx, devices, primaryDevice, secretPath, resolvedData, logger) 127 + } 128 + 129 + return &SyncResult{ 130 + Success: len(validDeviceData) > 0, 131 + ConflictDetected: conflictDetected, 132 + PrimaryDevice: primaryDevice, 133 + DeviceResults: deviceResults, 134 + ResolvedData: resolvedData, 135 + }, nil 136 + } 137 + 138 + // readFromDevice reads secret data from a specific HSM device 139 + func (sm *SyncManager) readFromDevice(ctx context.Context, deviceName, secretPath string, logger logr.Logger) DeviceResult { 140 + result := DeviceResult{ 141 + Timestamp: time.Now(), 142 + } 143 + 144 + // Get gRPC client for this device 145 + grpcClient, err := sm.agentManager.CreateSingleGRPCClient(ctx, deviceName, logger) 146 + if err != nil { 147 + result.Error = fmt.Errorf("failed to create gRPC client: %w", err) 148 + return result 149 + } 150 + defer func() { 151 + if closeErr := grpcClient.Close(); closeErr != nil { 152 + logger.V(1).Info("Failed to close gRPC client", "error", closeErr) 153 + } 154 + }() 155 + 156 + result.Online = grpcClient.IsConnected() 157 + if !result.Online { 158 + result.Error = fmt.Errorf("device not connected") 159 + return result 160 + } 161 + 162 + // Try to read the secret 163 + data, err := grpcClient.ReadSecret(ctx, secretPath) 164 + if err != nil { 165 + result.Error = fmt.Errorf("failed to read secret: %w", err) 166 + return result 167 + } 168 + 169 + // Calculate checksum 170 + result.Checksum = sm.calculateChecksum(data) 171 + 172 + // Get metadata to extract version (if available) 173 + metadata, err := grpcClient.ReadMetadata(ctx, secretPath) 174 + if err == nil && metadata != nil { 175 + if versionStr, exists := metadata.Tags["sync.version"]; exists { 176 + if version, parseErr := parseVersion(versionStr); parseErr == nil { 177 + result.Version = version 178 + } 179 + } 180 + } 181 + 182 + return result 183 + } 184 + 185 + // detectConflicts checks if there are conflicting checksums across devices 186 + func (sm *SyncManager) detectConflicts(deviceResults map[string]DeviceResult) bool { 187 + checksums := make(map[string]int) 188 + onlineDevices := 0 189 + 190 + for _, result := range deviceResults { 191 + if result.Online && result.Error == nil && result.Checksum != "" { 192 + checksums[result.Checksum]++ 193 + onlineDevices++ 194 + } 195 + } 196 + 197 + // Conflict if we have more than one unique checksum across online devices 198 + return len(checksums) > 1 && onlineDevices > 1 199 + } 200 + 201 + // selectPrimaryDevice chooses the primary device for conflict resolution 202 + func (sm *SyncManager) selectPrimaryDevice(deviceResults map[string]DeviceResult, hsmSecret *hsmv1alpha1.HSMSecret) string { 203 + // Check if there's already a designated primary in the status 204 + if hsmSecret.Status.PrimaryDevice != "" { 205 + if result, exists := deviceResults[hsmSecret.Status.PrimaryDevice]; exists && result.Online && result.Error == nil { 206 + return hsmSecret.Status.PrimaryDevice 207 + } 208 + } 209 + 210 + // Find device with highest version number among online devices 211 + var bestDevice string 212 + var highestVersion int64 = -1 213 + var mostRecentTime time.Time 214 + 215 + for deviceName, result := range deviceResults { 216 + if result.Online && result.Error == nil { 217 + // Prefer higher version numbers 218 + if result.Version > highestVersion { 219 + highestVersion = result.Version 220 + bestDevice = deviceName 221 + mostRecentTime = result.Timestamp 222 + } else if result.Version == highestVersion && result.Timestamp.After(mostRecentTime) { 223 + // If versions are equal, prefer more recent timestamp 224 + bestDevice = deviceName 225 + mostRecentTime = result.Timestamp 226 + } 227 + } 228 + } 229 + 230 + return bestDevice 231 + } 232 + 233 + // selectMostRecentData selects the most recently modified data 234 + func (sm *SyncManager) selectMostRecentData(deviceResults map[string]DeviceResult, validDeviceData map[string]hsm.SecretData) hsm.SecretData { 235 + var mostRecentDevice string 236 + var mostRecentTime time.Time 237 + 238 + for deviceName, result := range deviceResults { 239 + if result.Online && result.Error == nil && result.Timestamp.After(mostRecentTime) { 240 + mostRecentTime = result.Timestamp 241 + mostRecentDevice = deviceName 242 + } 243 + } 244 + 245 + if mostRecentDevice != "" && validDeviceData[mostRecentDevice] != nil { 246 + return validDeviceData[mostRecentDevice] 247 + } 248 + 249 + // Return first available data if no clear winner 250 + for _, data := range validDeviceData { 251 + return data 252 + } 253 + 254 + return nil 255 + } 256 + 257 + // syncToSecondaryDevices syncs resolved data to all secondary devices 258 + func (sm *SyncManager) syncToSecondaryDevices(ctx context.Context, devices []string, primaryDevice, secretPath string, data hsm.SecretData, logger logr.Logger) { 259 + for _, deviceName := range devices { 260 + if deviceName == primaryDevice { 261 + continue // Skip primary device 262 + } 263 + 264 + logger.Info("Syncing to secondary device", "device", deviceName) 265 + 266 + grpcClient, err := sm.agentManager.CreateSingleGRPCClient(ctx, deviceName, logger) 267 + if err != nil { 268 + logger.Error(err, "Failed to create gRPC client for sync", "device", deviceName) 269 + continue 270 + } 271 + 272 + if !grpcClient.IsConnected() { 273 + logger.V(1).Info("Device offline, skipping sync", "device", deviceName) 274 + if closeErr := grpcClient.Close(); closeErr != nil { 275 + logger.V(1).Info("Failed to close gRPC client", "error", closeErr) 276 + } 277 + continue 278 + } 279 + 280 + // Write data with updated version metadata 281 + metadata := &hsm.SecretMetadata{ 282 + Tags: map[string]string{ 283 + "sync.version": fmt.Sprintf("%d", time.Now().Unix()), 284 + "sync.primary": primaryDevice, 285 + "sync.timestamp": time.Now().Format(time.RFC3339), 286 + }, 287 + } 288 + 289 + if err := grpcClient.WriteSecretWithMetadata(ctx, secretPath, data, metadata); err != nil { 290 + logger.Error(err, "Failed to sync to secondary device", "device", deviceName) 291 + } else { 292 + logger.Info("Successfully synced to secondary device", "device", deviceName) 293 + } 294 + 295 + if closeErr := grpcClient.Close(); closeErr != nil { 296 + logger.V(1).Info("Failed to close gRPC client", "error", closeErr) 297 + } 298 + } 299 + } 300 + 301 + // getAvailableDevices gets list of available HSM devices from HSMPools 302 + func (sm *SyncManager) getAvailableDevices(ctx context.Context, namespace string) ([]string, error) { 303 + var hsmPoolList hsmv1alpha1.HSMPoolList 304 + if err := sm.client.List(ctx, &hsmPoolList, client.InNamespace(namespace)); err != nil { 305 + return nil, fmt.Errorf("failed to list HSM pools: %w", err) 306 + } 307 + 308 + deviceNames := make(map[string]bool) 309 + 310 + for _, pool := range hsmPoolList.Items { 311 + if pool.Status.Phase == hsmv1alpha1.HSMPoolPhaseReady { 312 + for _, deviceRef := range pool.Spec.HSMDeviceRefs { 313 + deviceNames[deviceRef] = true 314 + } 315 + } 316 + } 317 + 318 + devices := make([]string, 0, len(deviceNames)) 319 + for deviceName := range deviceNames { 320 + devices = append(devices, deviceName) 321 + } 322 + 323 + sort.Strings(devices) // Ensure consistent ordering 324 + return devices, nil 325 + } 326 + 327 + // UpdateHSMSecretStatus updates the HSMSecret status with sync results 328 + func (sm *SyncManager) UpdateHSMSecretStatus(ctx context.Context, hsmSecret *hsmv1alpha1.HSMSecret, result *SyncResult) error { 329 + now := metav1.NewTime(time.Now()) 330 + 331 + // Update overall status 332 + if result.Success { 333 + hsmSecret.Status.SyncStatus = hsmv1alpha1.SyncStatusInSync 334 + hsmSecret.Status.LastSyncTime = &now 335 + hsmSecret.Status.LastError = "" 336 + } else { 337 + hsmSecret.Status.SyncStatus = hsmv1alpha1.SyncStatusError 338 + hsmSecret.Status.LastError = "Failed to sync with any HSM device" 339 + } 340 + 341 + hsmSecret.Status.SyncConflict = result.ConflictDetected 342 + hsmSecret.Status.PrimaryDevice = result.PrimaryDevice 343 + 344 + // Update device-specific sync status 345 + hsmSecret.Status.DeviceSyncStatus = make([]hsmv1alpha1.HSMDeviceSync, 0, len(result.DeviceResults)) 346 + 347 + for deviceName, deviceResult := range result.DeviceResults { 348 + syncTime := metav1.NewTime(deviceResult.Timestamp) 349 + deviceSync := hsmv1alpha1.HSMDeviceSync{ 350 + DeviceName: deviceName, 351 + LastSyncTime: &syncTime, 352 + Checksum: deviceResult.Checksum, 353 + Online: deviceResult.Online, 354 + Version: deviceResult.Version, 355 + } 356 + 357 + if deviceResult.Error != nil { 358 + deviceSync.Status = hsmv1alpha1.SyncStatusError 359 + deviceSync.LastError = deviceResult.Error.Error() 360 + } else if deviceResult.Online { 361 + deviceSync.Status = hsmv1alpha1.SyncStatusInSync 362 + } else { 363 + deviceSync.Status = hsmv1alpha1.SyncStatusOutOfSync 364 + } 365 + 366 + hsmSecret.Status.DeviceSyncStatus = append(hsmSecret.Status.DeviceSyncStatus, deviceSync) 367 + } 368 + 369 + // Update Kubernetes Secret checksum if we have resolved data 370 + if result.ResolvedData != nil { 371 + hsmSecret.Status.SecretChecksum = sm.calculateChecksum(result.ResolvedData) 372 + } 373 + 374 + return sm.client.Status().Update(ctx, hsmSecret) 375 + } 376 + 377 + // calculateChecksum calculates SHA256 checksum of secret data 378 + func (sm *SyncManager) calculateChecksum(data hsm.SecretData) string { 379 + if data == nil { 380 + return "" 381 + } 382 + 383 + h := sha256.New() 384 + 385 + // Sort keys for consistent checksum calculation 386 + keys := make([]string, 0, len(data)) 387 + for k := range data { 388 + keys = append(keys, k) 389 + } 390 + sort.Strings(keys) 391 + 392 + for _, k := range keys { 393 + h.Write([]byte(k)) 394 + h.Write(data[k]) 395 + } 396 + 397 + return fmt.Sprintf("%x", h.Sum(nil)) 398 + } 399 + 400 + // Helper function to parse version string 401 + func parseVersion(versionStr string) (int64, error) { 402 + var version int64 403 + _, err := fmt.Sscanf(versionStr, "%d", &version) 404 + return version, err 405 + }
+325
internal/sync/manager_test.go
··· 1 + /* 2 + Copyright 2025. 3 + 4 + Licensed under the Apache License, Version 2.0 (the "License"); 5 + you may not use this file except in compliance with the License. 6 + You may obtain a copy of the License at 7 + 8 + http://www.apache.org/licenses/LICENSE-2.0 9 + 10 + Unless required by applicable law or agreed to in writing, software 11 + distributed under the License is distributed on an "AS IS" BASIS, 12 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 + See the License for the specific language governing permissions and 14 + limitations under the License. 15 + */ 16 + 17 + package sync 18 + 19 + import ( 20 + "context" 21 + "testing" 22 + "time" 23 + 24 + "github.com/go-logr/logr" 25 + "github.com/stretchr/testify/assert" 26 + "github.com/stretchr/testify/mock" 27 + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 + "k8s.io/apimachinery/pkg/runtime" 29 + "sigs.k8s.io/controller-runtime/pkg/client/fake" 30 + 31 + hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 32 + "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 33 + ) 34 + 35 + // MockGRPCClient implements hsm.Client for testing 36 + type MockGRPCClient struct { 37 + mock.Mock 38 + } 39 + 40 + func (m *MockGRPCClient) Initialize(ctx context.Context, config hsm.Config) error { 41 + args := m.Called(ctx, config) 42 + return args.Error(0) 43 + } 44 + 45 + func (m *MockGRPCClient) IsConnected() bool { 46 + args := m.Called() 47 + return args.Bool(0) 48 + } 49 + 50 + func (m *MockGRPCClient) ReadSecret(ctx context.Context, path string) (hsm.SecretData, error) { 51 + args := m.Called(ctx, path) 52 + return args.Get(0).(hsm.SecretData), args.Error(1) 53 + } 54 + 55 + func (m *MockGRPCClient) WriteSecret(ctx context.Context, path string, data hsm.SecretData) error { 56 + args := m.Called(ctx, path, data) 57 + return args.Error(0) 58 + } 59 + 60 + func (m *MockGRPCClient) WriteSecretWithMetadata(ctx context.Context, path string, data hsm.SecretData, metadata *hsm.SecretMetadata) error { 61 + args := m.Called(ctx, path, data, metadata) 62 + return args.Error(0) 63 + } 64 + 65 + func (m *MockGRPCClient) DeleteSecret(ctx context.Context, path string) error { 66 + args := m.Called(ctx, path) 67 + return args.Error(0) 68 + } 69 + 70 + func (m *MockGRPCClient) ListSecrets(ctx context.Context, prefix string) ([]string, error) { 71 + args := m.Called(ctx, prefix) 72 + return args.Get(0).([]string), args.Error(1) 73 + } 74 + 75 + func (m *MockGRPCClient) GetInfo(ctx context.Context) (map[string]any, error) { 76 + args := m.Called(ctx) 77 + return args.Get(0).(map[string]any), args.Error(1) 78 + } 79 + 80 + func (m *MockGRPCClient) GetChecksum(ctx context.Context, path string) (string, error) { 81 + args := m.Called(ctx, path) 82 + return args.String(0), args.Error(1) 83 + } 84 + 85 + func (m *MockGRPCClient) ReadMetadata(ctx context.Context, path string) (*hsm.SecretMetadata, error) { 86 + args := m.Called(ctx, path) 87 + if args.Get(0) == nil { 88 + return nil, args.Error(1) 89 + } 90 + return args.Get(0).(*hsm.SecretMetadata), args.Error(1) 91 + } 92 + 93 + func (m *MockGRPCClient) Close() error { 94 + args := m.Called() 95 + return args.Error(0) 96 + } 97 + 98 + // MockAgentManager implements AgentManagerInterface for testing 99 + type MockAgentManager struct { 100 + mock.Mock 101 + } 102 + 103 + func (m *MockAgentManager) CreateSingleGRPCClient(ctx context.Context, deviceName string, logger logr.Logger) (hsm.Client, error) { 104 + args := m.Called(ctx, deviceName, logger) 105 + if args.Get(0) == nil { 106 + return nil, args.Error(1) 107 + } 108 + return args.Get(0).(hsm.Client), args.Error(1) 109 + } 110 + 111 + func TestSyncManager_CalculateChecksum(t *testing.T) { 112 + scheme := runtime.NewScheme() 113 + _ = hsmv1alpha1.AddToScheme(scheme) 114 + 115 + client := fake.NewClientBuilder().WithScheme(scheme).Build() 116 + mockAgentManager := &MockAgentManager{} 117 + 118 + syncManager := NewSyncManager(client, mockAgentManager, logr.Discard()) 119 + 120 + // Test with nil data 121 + checksum := syncManager.calculateChecksum(nil) 122 + assert.Equal(t, "", checksum) 123 + 124 + // Test with empty data 125 + checksum = syncManager.calculateChecksum(hsm.SecretData{}) 126 + assert.NotEqual(t, "", checksum) 127 + 128 + // Test with actual data 129 + data1 := hsm.SecretData{ 130 + "key1": []byte("value1"), 131 + "key2": []byte("value2"), 132 + } 133 + checksum1 := syncManager.calculateChecksum(data1) 134 + assert.NotEqual(t, "", checksum1) 135 + 136 + // Same data should produce same checksum 137 + data2 := hsm.SecretData{ 138 + "key1": []byte("value1"), 139 + "key2": []byte("value2"), 140 + } 141 + checksum2 := syncManager.calculateChecksum(data2) 142 + assert.Equal(t, checksum1, checksum2) 143 + 144 + // Different data should produce different checksum 145 + data3 := hsm.SecretData{ 146 + "key1": []byte("different"), 147 + "key2": []byte("value2"), 148 + } 149 + checksum3 := syncManager.calculateChecksum(data3) 150 + assert.NotEqual(t, checksum1, checksum3) 151 + 152 + // Key order shouldn't matter 153 + data4 := hsm.SecretData{ 154 + "key2": []byte("value2"), 155 + "key1": []byte("value1"), 156 + } 157 + checksum4 := syncManager.calculateChecksum(data4) 158 + assert.Equal(t, checksum1, checksum4) 159 + } 160 + 161 + func TestSyncManager_UpdateHSMSecretStatus(t *testing.T) { 162 + scheme := runtime.NewScheme() 163 + _ = hsmv1alpha1.AddToScheme(scheme) 164 + 165 + hsmSecret := &hsmv1alpha1.HSMSecret{ 166 + ObjectMeta: metav1.ObjectMeta{ 167 + Name: "test-secret", 168 + Namespace: "default", 169 + }, 170 + Spec: hsmv1alpha1.HSMSecretSpec{ 171 + AutoSync: true, 172 + }, 173 + } 174 + 175 + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(hsmSecret).WithStatusSubresource(&hsmv1alpha1.HSMSecret{}).Build() 176 + mockAgentManager := &MockAgentManager{} 177 + 178 + syncManager := NewSyncManager(client, mockAgentManager, logr.Discard()) 179 + 180 + // Test successful sync result 181 + result := &SyncResult{ 182 + Success: true, 183 + ConflictDetected: false, 184 + PrimaryDevice: "device1", 185 + DeviceResults: map[string]DeviceResult{ 186 + "device1": { 187 + Online: true, 188 + Checksum: "abc123", 189 + Version: 1, 190 + Error: nil, 191 + Timestamp: time.Now(), 192 + }, 193 + "device2": { 194 + Online: true, 195 + Checksum: "abc123", 196 + Version: 1, 197 + Error: nil, 198 + Timestamp: time.Now(), 199 + }, 200 + }, 201 + ResolvedData: hsm.SecretData{ 202 + "key": []byte("value"), 203 + }, 204 + } 205 + 206 + ctx := context.Background() 207 + err := syncManager.UpdateHSMSecretStatus(ctx, hsmSecret, result) 208 + assert.NoError(t, err) 209 + 210 + // Verify status was updated 211 + assert.Equal(t, hsmv1alpha1.SyncStatusInSync, hsmSecret.Status.SyncStatus) 212 + assert.Equal(t, "device1", hsmSecret.Status.PrimaryDevice) 213 + assert.False(t, hsmSecret.Status.SyncConflict) 214 + assert.Equal(t, "", hsmSecret.Status.LastError) 215 + assert.Len(t, hsmSecret.Status.DeviceSyncStatus, 2) 216 + 217 + // Check device sync status 218 + for _, deviceSync := range hsmSecret.Status.DeviceSyncStatus { 219 + assert.True(t, deviceSync.Online) 220 + assert.Equal(t, "abc123", deviceSync.Checksum) 221 + assert.Equal(t, int64(1), deviceSync.Version) 222 + assert.Equal(t, hsmv1alpha1.SyncStatusInSync, deviceSync.Status) 223 + assert.Empty(t, deviceSync.LastError) 224 + } 225 + } 226 + 227 + func TestSyncManager_DetectConflicts(t *testing.T) { 228 + scheme := runtime.NewScheme() 229 + _ = hsmv1alpha1.AddToScheme(scheme) 230 + 231 + client := fake.NewClientBuilder().WithScheme(scheme).Build() 232 + mockAgentManager := &MockAgentManager{} 233 + 234 + syncManager := NewSyncManager(client, mockAgentManager, logr.Discard()) 235 + 236 + // Test with no conflicts (same checksums) 237 + deviceResults := map[string]DeviceResult{ 238 + "device1": { 239 + Online: true, 240 + Checksum: "abc123", 241 + Version: 1, 242 + Error: nil, 243 + }, 244 + "device2": { 245 + Online: true, 246 + Checksum: "abc123", // Same checksum 247 + Version: 1, 248 + Error: nil, 249 + }, 250 + } 251 + 252 + conflict := syncManager.detectConflicts(deviceResults) 253 + assert.False(t, conflict) 254 + 255 + // Test with conflicts (different checksums) 256 + deviceResults = map[string]DeviceResult{ 257 + "device1": { 258 + Online: true, 259 + Checksum: "abc123", 260 + Version: 1, 261 + Error: nil, 262 + }, 263 + "device2": { 264 + Online: true, 265 + Checksum: "def456", // Different checksum 266 + Version: 2, 267 + Error: nil, 268 + }, 269 + } 270 + 271 + conflict = syncManager.detectConflicts(deviceResults) 272 + assert.True(t, conflict) 273 + } 274 + 275 + func TestSyncManager_SelectPrimaryDevice(t *testing.T) { 276 + scheme := runtime.NewScheme() 277 + _ = hsmv1alpha1.AddToScheme(scheme) 278 + 279 + client := fake.NewClientBuilder().WithScheme(scheme).Build() 280 + mockAgentManager := &MockAgentManager{} 281 + 282 + syncManager := NewSyncManager(client, mockAgentManager, logr.Discard()) 283 + 284 + // Test with existing primary device 285 + hsmSecret := &hsmv1alpha1.HSMSecret{ 286 + Status: hsmv1alpha1.HSMSecretStatus{ 287 + PrimaryDevice: "device1", 288 + }, 289 + } 290 + 291 + deviceResults := map[string]DeviceResult{ 292 + "device1": { 293 + Online: true, 294 + Checksum: "abc123", 295 + Version: 1, 296 + Error: nil, 297 + }, 298 + "device2": { 299 + Online: true, 300 + Checksum: "def456", 301 + Version: 2, 302 + Error: nil, 303 + }, 304 + } 305 + 306 + primary := syncManager.selectPrimaryDevice(deviceResults, hsmSecret) 307 + assert.Equal(t, "device1", primary) 308 + 309 + // Test with no existing primary - should choose highest version 310 + hsmSecret.Status.PrimaryDevice = "" 311 + primary = syncManager.selectPrimaryDevice(deviceResults, hsmSecret) 312 + assert.Equal(t, "device2", primary) // device2 has version 2 vs device1's version 1 313 + 314 + // Test with primary device offline - should fallback to highest version 315 + hsmSecret.Status.PrimaryDevice = "device1" 316 + deviceResults["device1"] = DeviceResult{ 317 + Online: false, // Offline 318 + Checksum: "abc123", 319 + Version: 1, 320 + Error: nil, 321 + } 322 + 323 + primary = syncManager.selectPrimaryDevice(deviceResults, hsmSecret) 324 + assert.Equal(t, "device2", primary) 325 + }
+3 -6
web/app.js
··· 77 77 setupEventListeners() { 78 78 const createForm = document.getElementById('createForm'); 79 79 createForm.addEventListener('submit', (e) => this.handleCreateSecret(e)); 80 - 81 - // Auto-refresh every 30 seconds 82 - setInterval(() => this.refreshSecrets(), 30000); 83 80 } 84 81 85 82 async loadInitialData() { ··· 187 184 188 185 try { 189 186 await this.api.deleteSecret(secretName); 190 - this.showSuccess(`Secret "${secretName}" deleted successfully!`); 191 - await this.loadSecrets(); 187 + this.showSuccess(null, `Secret "${secretName}" deleted successfully!`); 188 + await this.loadSecrets(); // Refresh after deletion 192 189 } catch (error) { 193 190 this.showError(null, `Failed to delete secret: ${error.message}`); 194 191 } ··· 321 318 322 319 // Reset form and refresh list 323 320 event.target.reset(); 324 - await this.loadSecrets(); 321 + await this.loadSecrets(); // Refresh after creation 325 322 326 323 // Hide form after a delay 327 324 setTimeout(() => this.hideCreateForm(), 2000);