A Kubernetes operator that bridges Hardware Security Module (HSM) data storage with Kubernetes Secrets, providing true secret portability th
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 1063 lines 32 kB view raw
1/* 2Copyright 2025. 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15*/ 16 17package api 18 19import ( 20 "context" 21 "fmt" 22 "net/http" 23 "sync" 24 "time" 25 26 "github.com/gin-gonic/gin" 27 "github.com/go-logr/logr" 28 29 "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 30) 31 32// WriteResult represents the result of writing to a single device 33type WriteResult struct { 34 DeviceName string 35 Error error 36} 37 38// checksumResult represents the result of getting checksum from a single device 39type checksumResult struct { 40 deviceName string 41 checksum string 42 err error 43} 44 45// secretResult represents the result of reading secret from a single device 46type secretResult struct { 47 deviceName string 48 data hsm.SecretData 49 metadata *hsm.SecretMetadata 50 err error 51} 52 53// metadataResult represents the result of reading metadata from a single device 54type metadataResult struct { 55 deviceName string 56 metadata *hsm.SecretMetadata 57 err error 58} 59 60// ProxyClient handles HTTP requests and proxies them to gRPC clients 61// It has methods that match the HTTP endpoints and handle the full request/response cycle 62type ProxyClient struct { 63 server *Server 64 logger logr.Logger 65 grpcClients map[string]hsm.Client // serialNumber -> gRPC client 66 clientsMutex sync.RWMutex 67} 68 69// NewProxyClient creates a new ProxyClient that handles HTTP routing 70func NewProxyClient(server *Server, logger logr.Logger) *ProxyClient { 71 return &ProxyClient{ 72 server: server, 73 logger: logger.WithName("proxy-client"), 74 grpcClients: make(map[string]hsm.Client), 75 } 76} 77 78// Helper function to parse timestamp from metadata 79func parseTimestampFromMetadata(metadata *hsm.SecretMetadata) int64 { 80 if metadata == nil || metadata.Labels == nil { 81 return 0 82 } 83 84 // Try RFC3339 timestamp first 85 if syncTimestamp, exists := metadata.Labels["sync.timestamp"]; exists { 86 if parsedTime, err := time.Parse(time.RFC3339, syncTimestamp); err == nil { 87 return parsedTime.Unix() 88 } 89 } 90 91 // Fall back to Unix timestamp in sync.version 92 if syncVersion, exists := metadata.Labels["sync.version"]; exists { 93 var timestamp int64 94 if n, err := fmt.Sscanf(syncVersion, "%d", &timestamp); n == 1 && err == nil { 95 return timestamp 96 } 97 } 98 99 return 0 100} 101 102// isSecretDeleted checks if a secret is marked as deleted via tombstone metadata 103func isSecretDeleted(metadata *hsm.SecretMetadata) bool { 104 if metadata == nil || metadata.Labels == nil { 105 return false 106 } 107 return metadata.Labels["sync.deleted"] == "true" 108} 109 110// findConsensusChecksum finds the most common checksum among results and logs inconsistencies 111func (p *ProxyClient) findConsensusChecksum(results []checksumResult, path string) (string, error) { 112 if len(results) == 0 { 113 return "", fmt.Errorf("checksum not found on any HSM device") 114 } 115 116 // Count occurrences of each checksum 117 checksumCounts := make(map[string]int) 118 for _, result := range results { 119 checksumCounts[result.checksum]++ 120 } 121 122 // Find the most common checksum (consensus) 123 // In case of ties, prefer the first occurrence for deterministic behavior 124 var consensusChecksum string 125 var maxCount int 126 for _, result := range results { 127 count := checksumCounts[result.checksum] 128 if count > maxCount { 129 consensusChecksum = result.checksum 130 maxCount = count 131 } 132 } 133 134 // Log checksum inconsistencies 135 if len(checksumCounts) > 1 { 136 inconsistentDevices := make([]string, 0) 137 for _, result := range results { 138 if result.checksum != consensusChecksum { 139 inconsistentDevices = append(inconsistentDevices, result.deviceName) 140 } 141 } 142 p.logger.Info("Checksum inconsistency detected, using consensus", 143 "path", path, 144 "consensus", consensusChecksum, 145 "consensus_count", maxCount, 146 "total_devices", len(results), 147 "inconsistent_devices", inconsistentDevices) 148 } 149 150 return consensusChecksum, nil 151} 152 153// logMultiDeviceOperation logs when operations are performed across multiple devices with sync information 154func (p *ProxyClient) logMultiDeviceOperation(deviceNames []string, selectedDevice, operationName, path, syncDetails string) { 155 p.logger.Info(fmt.Sprintf("%s found on multiple devices, using most recent version", operationName), 156 "path", path, 157 "devices", deviceNames, 158 "selected", selectedDevice, 159 "syncDetails", syncDetails) 160} 161 162// findMostRecentSecretResult finds the most recent secret result based on metadata timestamps 163func (p *ProxyClient) findMostRecentSecretResult(results []secretResult, path string) (hsm.SecretData, error) { 164 if len(results) == 0 { 165 return nil, fmt.Errorf("secret not found on any HSM device") 166 } 167 168 // Find most recent version based on metadata timestamps 169 bestResult := results[0] 170 bestTimestamp := parseTimestampFromMetadata(bestResult.metadata) 171 172 for _, result := range results[1:] { 173 timestamp := parseTimestampFromMetadata(result.metadata) 174 if timestamp > bestTimestamp { 175 bestResult = result 176 bestTimestamp = timestamp 177 } 178 } 179 180 // Log sync issues when multiple devices have different versions 181 if len(results) > 1 { 182 deviceNames := make([]string, len(results)) 183 for i, result := range results { 184 deviceNames[i] = result.deviceName 185 } 186 p.logMultiDeviceOperation(deviceNames, bestResult.deviceName, "Secret", path, fmt.Sprintf("timestamp: %d", bestTimestamp)) 187 } 188 189 // Check if the most recent result is a tombstone (deleted secret) 190 if isSecretDeleted(bestResult.metadata) { 191 return nil, fmt.Errorf("secret not found on any HSM device") 192 } 193 194 return bestResult.data, nil 195} 196 197// findMostRecentMetadataResult finds the most recent metadata result based on timestamps 198func (p *ProxyClient) findMostRecentMetadataResult(results []metadataResult, path string) (*hsm.SecretMetadata, error) { 199 if len(results) == 0 { 200 return nil, fmt.Errorf("metadata not found on any HSM device") 201 } 202 203 // Find most recent version based on metadata timestamps 204 bestResult := results[0] 205 bestTimestamp := parseTimestampFromMetadata(bestResult.metadata) 206 207 for _, result := range results[1:] { 208 timestamp := parseTimestampFromMetadata(result.metadata) 209 if timestamp > bestTimestamp { 210 bestResult = result 211 bestTimestamp = timestamp 212 } 213 } 214 215 // Log sync issues when multiple devices have different versions 216 if len(results) > 1 { 217 deviceNames := make([]string, len(results)) 218 for i, result := range results { 219 deviceNames[i] = result.deviceName 220 } 221 p.logMultiDeviceOperation(deviceNames, bestResult.deviceName, "Metadata", path, fmt.Sprintf("timestamp: %d", bestTimestamp)) 222 } 223 224 // Return the most recent metadata, even if it's a tombstone 225 // This allows callers to see deletion information when needed 226 return bestResult.metadata, nil 227} 228 229// validatePathParam validates the path parameter and sends error if missing 230// Returns (path, true) on success, or ("", false) if path is missing (error already sent to client) 231func (p *ProxyClient) validatePathParam(c *gin.Context) (string, bool) { 232 path := c.Param("path") 233 if path == "" { 234 p.server.sendError(c, http.StatusBadRequest, "missing_path", "Secret path is required", nil) 235 return "", false 236 } 237 return path, true 238} 239 240// getAllAvailableGRPCClients returns all available gRPC clients for mirroring operations 241// Returns (clients, true) on success, or (nil, false) if no agents available (error already sent to client) 242func (p *ProxyClient) getAllAvailableGRPCClients(c *gin.Context) (map[string]hsm.Client, bool) { 243 // Use operator namespace where HSMPools and agents are located 244 namespace := p.server.operatorNamespace 245 246 // Get all available devices 247 devices, err := p.server.getAllAvailableAgents(c.Request.Context(), namespace) 248 if err != nil { 249 p.server.sendError(c, http.StatusServiceUnavailable, "no_agents", "No HSM agents available", map[string]any{ 250 "error": err.Error(), 251 }) 252 return nil, false 253 } 254 255 clients := make(map[string]hsm.Client) 256 p.clientsMutex.Lock() 257 defer p.clientsMutex.Unlock() 258 259 for _, device := range devices { 260 // Try to get existing client for this device 261 if client, exists := p.grpcClients[device.SerialNumber]; exists && client.IsConnected() { 262 clients[device.SerialNumber] = client 263 continue 264 } 265 266 // Close existing client for this device if it exists but is not connected 267 if oldClient, exists := p.grpcClients[device.SerialNumber]; exists { 268 if closeErr := oldClient.Close(); closeErr != nil { 269 p.logger.V(1).Info("Error closing old gRPC client", "node", device.NodeName, "serialNumber", device.SerialNumber, "error", closeErr) 270 } 271 delete(p.grpcClients, device.SerialNumber) 272 } 273 274 // Create new gRPC client 275 grpcClient, err := p.server.createGRPCClient(c.Request.Context(), device) 276 if err != nil { 277 p.logger.V(1).Info("Failed to create gRPC client", "node", device.NodeName, "serialNumber", device.SerialNumber, "error", err) 278 continue 279 } 280 281 // Cache and include the client 282 p.grpcClients[device.SerialNumber] = grpcClient 283 clients[device.SerialNumber] = grpcClient 284 p.logger.V(1).Info("Created new gRPC client", "node", device.NodeName, "serialNumber", device.SerialNumber) 285 } 286 287 if len(clients) == 0 { 288 p.server.sendError(c, http.StatusServiceUnavailable, "no_agents", "No HSM agents available", nil) 289 return nil, false 290 } 291 292 return clients, true 293} 294 295// GetInfo handles GET /hsm/info 296func (p *ProxyClient) GetInfo(c *gin.Context) { 297 clients, ok := p.getAllAvailableGRPCClients(c) 298 if !ok { 299 return 300 } 301 302 type infoResult struct { 303 deviceName string 304 info *hsm.HSMInfo 305 err error 306 } 307 308 resultsChan := make(chan infoResult, len(clients)) 309 for deviceName, grpcClient := range clients { 310 go func(deviceName string, grpcClient hsm.Client) { 311 info, err := grpcClient.GetInfo(c) 312 resultsChan <- infoResult{deviceName, info, err} 313 }(deviceName, grpcClient) 314 } 315 316 // Collect successful results 317 deviceInfos := make(map[string]*hsm.HSMInfo, len(clients)) 318 for i := 0; i < len(clients); i++ { 319 result := <-resultsChan 320 if result.err == nil { 321 deviceInfos[result.deviceName] = result.info 322 } else { 323 p.logger.V(1).Info("Failed to get info from device", "device", result.deviceName, "error", result.err) 324 } 325 } 326 327 if len(deviceInfos) == 0 { 328 p.server.sendError(c, http.StatusInternalServerError, "info_failed", "Failed to get info from any HSM device", nil) 329 return 330 } 331 332 response := GetInfoResponse{DeviceInfos: deviceInfos} 333 p.server.sendResponse(c, http.StatusOK, "HSM info retrieved successfully", response) 334} 335 336// ListSecrets handles GET /hsm/secrets 337func (p *ProxyClient) ListSecrets(c *gin.Context) { 338 prefix := c.Query("prefix") 339 340 clients, ok := p.getAllAvailableGRPCClients(c) 341 if !ok { 342 return 343 } 344 type secretsResult struct { 345 deviceName string 346 secrets []string 347 err error 348 } 349 350 resultsChan := make(chan secretsResult, len(clients)) 351 for deviceName, grpcClient := range clients { 352 go func(deviceName string, grpcClient hsm.Client) { 353 secrets, err := grpcClient.ListSecrets(c, prefix) 354 resultsChan <- secretsResult{deviceName, secrets, err} 355 }(deviceName, grpcClient) 356 } 357 358 // Collect results and deduplicate 359 secretsSet := make(map[string]bool) 360 for i := 0; i < len(clients); i++ { 361 result := <-resultsChan 362 if result.err != nil { 363 p.logger.V(1).Info("Failed to list secrets from device", "device", result.deviceName, "error", result.err) 364 continue 365 } 366 367 // Add all secrets from this device to the union set 368 for _, secretPath := range result.secrets { 369 secretsSet[secretPath] = true 370 } 371 } 372 373 // Convert set to slice 374 allSecrets := make([]string, 0, len(secretsSet)) 375 for secretPath := range secretsSet { 376 allSecrets = append(allSecrets, secretPath) 377 } 378 379 response := ListSecretsResponse{ 380 Secrets: allSecrets, 381 Count: len(allSecrets), 382 Prefix: prefix, 383 } 384 p.server.sendResponse(c, http.StatusOK, "Secrets listed successfully", response) 385} 386 387// ReadSecret handles GET /hsm/secrets/:path 388func (p *ProxyClient) ReadSecret(c *gin.Context) { 389 path, ok := p.validatePathParam(c) 390 if !ok { 391 return 392 } 393 394 clients, ok := p.getAllAvailableGRPCClients(c) 395 if !ok { 396 return 397 } 398 399 // Read from all devices in parallel 400 resultsChan := make(chan secretResult, len(clients)) 401 for deviceName, grpcClient := range clients { 402 go func(deviceName string, grpcClient hsm.Client) { 403 // Read secret data 404 data, err := grpcClient.ReadSecret(c.Request.Context(), path) 405 if err != nil { 406 resultsChan <- secretResult{deviceName: deviceName, err: err} 407 return 408 } 409 410 // Read metadata for timestamp comparison 411 metadata, metaErr := grpcClient.ReadMetadata(c.Request.Context(), path) 412 if metaErr != nil { 413 p.logger.V(1).Info("Failed to read metadata for version comparison", "device", deviceName, "path", path, "error", metaErr) 414 } 415 416 resultsChan <- secretResult{ 417 deviceName: deviceName, 418 data: data, 419 metadata: metadata, 420 } 421 }(deviceName, grpcClient) 422 } 423 424 // Collect successful results 425 var successfulResults []secretResult 426 for i := 0; i < len(clients); i++ { 427 result := <-resultsChan 428 if result.err != nil { 429 p.logger.V(1).Info("Failed to read secret from device", "device", result.deviceName, "path", path, "error", result.err) 430 continue 431 } 432 successfulResults = append(successfulResults, result) 433 } 434 435 // Use helper to find most recent result based on timestamps 436 data, err := p.findMostRecentSecretResult(successfulResults, path) 437 if err != nil { 438 p.server.sendError(c, http.StatusNotFound, "secret_not_found", err.Error(), nil) 439 return 440 } 441 442 // Get checksum for the secret to include in response 443 checksum := "" 444 if len(successfulResults) > 0 { 445 // Get checksum from any available client (they should be consistent after consensus) 446 for _, grpcClient := range clients { 447 if checksumResult, checksumErr := grpcClient.GetChecksum(c.Request.Context(), path); checksumErr == nil { 448 checksum = checksumResult 449 break 450 } 451 } 452 } 453 454 response := ReadSecretResponse{ 455 Path: path, 456 Data: data, 457 Checksum: checksum, 458 DeviceCount: len(successfulResults), 459 } 460 p.server.sendResponse(c, http.StatusOK, "Secret read successfully", response) 461} 462 463// WriteSecret handles POST/PUT /hsm/secrets/:path with mirroring support 464func (p *ProxyClient) WriteSecret(c *gin.Context) { 465 path, ok := p.validatePathParam(c) 466 if !ok { 467 return 468 } 469 470 // Parse request body 471 var req struct { 472 Data map[string]string `json:"data" binding:"required"` 473 Metadata *hsm.SecretMetadata `json:"metadata,omitempty"` 474 Mirror *bool `json:"mirror,omitempty"` // Enable/disable mirroring for this request 475 } 476 if err := c.ShouldBindJSON(&req); err != nil { 477 p.server.sendError(c, http.StatusBadRequest, "parse_error", "Failed to parse request body", map[string]any{ 478 "error": err.Error(), 479 }) 480 return 481 } 482 483 // Convert string data to byte data 484 data := make(hsm.SecretData) 485 for key, value := range req.Data { 486 data[key] = []byte(value) 487 } 488 489 // Get all available clients for mirroring 490 clients, ok := p.getAllAvailableGRPCClients(c) 491 if !ok { 492 return 493 } 494 495 // Add mirroring metadata 496 metadata := req.Metadata 497 if metadata == nil { 498 metadata = &hsm.SecretMetadata{Labels: make(map[string]string)} 499 } 500 if metadata.Labels == nil { 501 metadata.Labels = make(map[string]string) 502 } 503 metadata.Labels["sync.version"] = fmt.Sprintf("%d", time.Now().Unix()) 504 metadata.Labels["sync.timestamp"] = time.Now().Format(time.RFC3339) 505 506 // Write to all devices in parallel 507 results := p.writeToAllDevices(c.Request.Context(), clients, path, data, metadata) 508 509 // Check results - log failures but succeed if at least one device works 510 successful := 0 511 for deviceName, result := range results { 512 if result.Error == nil { 513 successful++ 514 } else { 515 p.logger.Error(result.Error, "Failed to write to device", "device", deviceName, "path", path) 516 } 517 } 518 519 // Succeed if we wrote to at least one device 520 if successful > 0 { 521 if successful < len(clients) { 522 p.logger.Info("Secret written to subset of devices", 523 "path", path, 524 "successful", successful, 525 "total", len(clients)) 526 } 527 528 response := WriteSecretResponse{ 529 Path: path, 530 Keys: len(data), 531 } 532 533 p.server.sendResponse(c, http.StatusCreated, "Secret written successfully", response) 534 } else { 535 // All devices failed 536 p.server.sendError(c, http.StatusInternalServerError, "write_failed", "Failed to write secret to any HSM device", nil) 537 } 538} 539 540// DeleteSecret handles DELETE /hsm/secrets/:path with mirroring support 541func (p *ProxyClient) DeleteSecret(c *gin.Context) { 542 path, ok := p.validatePathParam(c) 543 if !ok { 544 return 545 } 546 547 // Get all available clients for mirroring delete 548 clients, ok := p.getAllAvailableGRPCClients(c) 549 if !ok { 550 return 551 } 552 553 // Perform tombstone deletion from all devices in parallel 554 results := p.tombstoneDeleteFromAllDevices(c.Request.Context(), clients, path) 555 556 // Check results 557 successful := 0 558 var errors []string 559 deviceResults := make(map[string]any) 560 561 for deviceName, result := range results { 562 deviceResults[deviceName] = map[string]any{ 563 "success": result.Error == nil, 564 "error": func() string { 565 if result.Error != nil { 566 return result.Error.Error() 567 } 568 return "" 569 }(), 570 } 571 572 if result.Error == nil { 573 successful++ 574 } else { 575 errors = append(errors, fmt.Sprintf("%s: %v", deviceName, result.Error)) 576 p.logger.Error(result.Error, "Failed to delete from device", "device", deviceName, "path", path) 577 } 578 } 579 580 // Consider the operation successful if we deleted from at least one device 581 if successful > 0 { 582 response := DeleteSecretResponse{ 583 Path: path, 584 Devices: len(clients), 585 DeviceResults: deviceResults, 586 } 587 if len(errors) > 0 { 588 response.Warnings = errors 589 } 590 591 statusCode := http.StatusOK 592 message := "Secret deleted successfully" 593 if successful < len(clients) { 594 statusCode = http.StatusPartialContent // 206 indicates partial success 595 message = fmt.Sprintf("Secret deleted from %d/%d devices", successful, len(clients)) 596 } 597 598 p.server.sendResponse(c, statusCode, message, response) 599 } else { 600 // All devices failed 601 p.server.sendError(c, http.StatusInternalServerError, "delete_failed", "Failed to delete secret from any HSM device", map[string]any{ 602 "errors": errors, 603 "deviceResults": deviceResults, 604 "path": path, 605 }) 606 } 607} 608 609// DeleteSecretKey handles DELETE /hsm/secrets/:path/:key with mirroring support 610func (p *ProxyClient) DeleteSecretKey(c *gin.Context) { 611 path, ok := p.validatePathParam(c) 612 if !ok { 613 return 614 } 615 616 key := c.Param("key") 617 if key == "" { 618 p.server.sendError(c, http.StatusBadRequest, "missing_key", "Key parameter is required", nil) 619 return 620 } 621 622 // Get all available clients for mirroring delete 623 clients, ok := p.getAllAvailableGRPCClients(c) 624 if !ok { 625 return 626 } 627 628 // Delete key from all devices in parallel 629 results := p.deleteKeyFromAllDevices(c.Request.Context(), clients, path, key) 630 631 // Check results 632 successful := 0 633 var errors []string 634 deviceResults := make(map[string]any) 635 636 for deviceName, result := range results { 637 deviceResults[deviceName] = map[string]any{ 638 "success": result.Error == nil, 639 "error": func() string { 640 if result.Error != nil { 641 return result.Error.Error() 642 } 643 return "" 644 }(), 645 } 646 647 if result.Error == nil { 648 successful++ 649 } else { 650 errors = append(errors, fmt.Sprintf("%s: %v", deviceName, result.Error)) 651 p.logger.Error(result.Error, "Failed to delete key from device", "device", deviceName, "path", path, "key", key) 652 } 653 } 654 655 // Consider the operation successful if we deleted from at least one device 656 if successful > 0 { 657 response := map[string]any{ 658 "path": path, 659 "key": key, 660 "devices": len(clients), 661 "deviceResults": deviceResults, 662 } 663 if len(errors) > 0 { 664 response["warnings"] = errors 665 } 666 667 statusCode := http.StatusOK 668 message := "Key deleted successfully" 669 if successful < len(clients) { 670 statusCode = http.StatusPartialContent 671 message = fmt.Sprintf("Key deleted from %d/%d devices", successful, len(clients)) 672 } 673 674 p.server.sendResponse(c, statusCode, message, response) 675 } else { 676 // All devices failed 677 p.server.sendError(c, http.StatusInternalServerError, "delete_key_failed", "Failed to delete key from any HSM device", map[string]any{ 678 "errors": errors, 679 "deviceResults": deviceResults, 680 "path": path, 681 "key": key, 682 }) 683 } 684} 685 686// deleteKeyFromAllDevices deletes a specific key from all devices in parallel 687func (p *ProxyClient) deleteKeyFromAllDevices(ctx context.Context, clients map[string]hsm.Client, path, key string) map[string]WriteResult { 688 results := make(map[string]WriteResult) 689 resultsMutex := sync.Mutex{} 690 wg := sync.WaitGroup{} 691 692 for deviceName, client := range clients { 693 wg.Add(1) 694 go func(deviceName string, client hsm.Client) { 695 defer wg.Done() 696 697 err := client.DeleteSecretKey(ctx, path, key) 698 699 resultsMutex.Lock() 700 results[deviceName] = WriteResult{ 701 DeviceName: deviceName, 702 Error: err, 703 } 704 resultsMutex.Unlock() 705 }(deviceName, client) 706 } 707 708 wg.Wait() 709 return results 710} 711 712// ReadMetadata handles GET /hsm/secrets/:path/metadata 713func (p *ProxyClient) ReadMetadata(c *gin.Context) { 714 path, ok := p.validatePathParam(c) 715 if !ok { 716 return 717 } 718 719 clients, ok := p.getAllAvailableGRPCClients(c) 720 if !ok { 721 return 722 } 723 724 // Read metadata from all devices in parallel 725 resultsChan := make(chan metadataResult, len(clients)) 726 for deviceName, grpcClient := range clients { 727 go func(deviceName string, grpcClient hsm.Client) { 728 metadata, err := grpcClient.ReadMetadata(c.Request.Context(), path) 729 resultsChan <- metadataResult{ 730 deviceName: deviceName, 731 metadata: metadata, 732 err: err, 733 } 734 }(deviceName, grpcClient) 735 } 736 737 // Collect successful results 738 var successfulResults []metadataResult 739 for i := 0; i < len(clients); i++ { 740 result := <-resultsChan 741 if result.err != nil { 742 p.logger.V(1).Info("Failed to read metadata from device", "device", result.deviceName, "path", path, "error", result.err) 743 continue 744 } 745 successfulResults = append(successfulResults, result) 746 } 747 748 // Use helper to find most recent result based on timestamps 749 metadata, err := p.findMostRecentMetadataResult(successfulResults, path) 750 if err != nil { 751 p.server.sendError(c, http.StatusNotFound, "metadata_not_found", err.Error(), nil) 752 return 753 } 754 755 response := ReadMetadataResponse{Path: path, Metadata: metadata} 756 p.server.sendResponse(c, http.StatusOK, "Metadata read successfully", response) 757} 758 759// GetChecksum handles GET /hsm/secrets/:path/checksum 760func (p *ProxyClient) GetChecksum(c *gin.Context) { 761 path, ok := p.validatePathParam(c) 762 if !ok { 763 return 764 } 765 766 clients, ok := p.getAllAvailableGRPCClients(c) 767 if !ok { 768 return 769 } 770 771 // Get checksums from all devices in parallel 772 resultsChan := make(chan checksumResult, len(clients)) 773 for deviceName, grpcClient := range clients { 774 go func(deviceName string, grpcClient hsm.Client) { 775 checksum, err := grpcClient.GetChecksum(c.Request.Context(), path) 776 resultsChan <- checksumResult{ 777 deviceName: deviceName, 778 checksum: checksum, 779 err: err, 780 } 781 }(deviceName, grpcClient) 782 } 783 784 // Collect successful results 785 var successfulResults []checksumResult 786 for i := 0; i < len(clients); i++ { 787 result := <-resultsChan 788 if result.err != nil { 789 p.logger.V(1).Info("Failed to get checksum from device", "device", result.deviceName, "path", path, "error", result.err) 790 continue 791 } 792 successfulResults = append(successfulResults, result) 793 } 794 795 // Use helper to find consensus checksum 796 consensusChecksum, err := p.findConsensusChecksum(successfulResults, path) 797 if err != nil { 798 p.server.sendError(c, http.StatusNotFound, "checksum_not_found", err.Error(), nil) 799 return 800 } 801 802 response := GetChecksumResponse{Path: path, Checksum: consensusChecksum} 803 p.server.sendResponse(c, http.StatusOK, "Checksum retrieved successfully", response) 804} 805 806// IsConnected handles GET /hsm/status 807func (p *ProxyClient) IsConnected(c *gin.Context) { 808 clients, ok := p.getAllAvailableGRPCClients(c) 809 if !ok { 810 return 811 } 812 813 devices := make(map[string]bool, len(clients)) 814 connectedCount := 0 815 816 for deviceName, grpcClient := range clients { 817 connected := grpcClient.IsConnected() 818 devices[deviceName] = connected 819 820 if connected { 821 connectedCount++ 822 } 823 } 824 825 // Log connectivity issues for operational visibility 826 if connectedCount == 0 { 827 p.logger.Info("No HSM devices are connected", "totalDevices", len(clients)) 828 } else if connectedCount < len(clients) { 829 p.logger.Info("Partial HSM connectivity detected", 830 "connectedDevices", connectedCount, 831 "totalDevices", len(clients)) 832 } 833 834 response := IsConnectedResponse{ 835 Devices: devices, 836 TotalDevices: len(clients), 837 } 838 839 p.server.sendResponse(c, http.StatusOK, "HSM connection status retrieved", response) 840} 841 842// Close closes all cached gRPC clients 843func (p *ProxyClient) Close() error { 844 p.clientsMutex.Lock() 845 defer p.clientsMutex.Unlock() 846 847 var lastErr error 848 for deviceName, client := range p.grpcClients { 849 if err := client.Close(); err != nil { 850 p.logger.Error(err, "Failed to close gRPC client", "device", deviceName) 851 lastErr = err 852 } 853 } 854 855 // Clear the map 856 p.grpcClients = make(map[string]hsm.Client) 857 return lastErr 858} 859 860// CleanupDisconnectedClients removes disconnected clients from the cache 861func (p *ProxyClient) CleanupDisconnectedClients() { 862 p.clientsMutex.Lock() 863 defer p.clientsMutex.Unlock() 864 865 for deviceName, client := range p.grpcClients { 866 if !client.IsConnected() { 867 p.logger.V(1).Info("Removing disconnected gRPC client", "device", deviceName) 868 if closeErr := client.Close(); closeErr != nil { 869 p.logger.V(1).Info("Error closing disconnected gRPC client", "device", deviceName, "error", closeErr) 870 } 871 delete(p.grpcClients, deviceName) 872 } 873 } 874} 875 876// GetClientCount returns the number of cached gRPC clients 877func (p *ProxyClient) GetClientCount() int { 878 p.clientsMutex.RLock() 879 defer p.clientsMutex.RUnlock() 880 return len(p.grpcClients) 881} 882 883// writeToAllDevices writes secret data to all devices in parallel 884func (p *ProxyClient) writeToAllDevices(ctx context.Context, clients map[string]hsm.Client, path string, data hsm.SecretData, metadata *hsm.SecretMetadata) map[string]WriteResult { 885 results := make(map[string]WriteResult) 886 resultsMutex := sync.Mutex{} 887 wg := sync.WaitGroup{} 888 889 for deviceName, client := range clients { 890 wg.Add(1) 891 go func(deviceName string, client hsm.Client) { 892 defer wg.Done() 893 894 var err error 895 if metadata != nil { 896 err = client.WriteSecret(ctx, path, data, metadata) 897 } 898 899 resultsMutex.Lock() 900 results[deviceName] = WriteResult{ 901 DeviceName: deviceName, 902 Error: err, 903 } 904 resultsMutex.Unlock() 905 }(deviceName, client) 906 } 907 908 wg.Wait() 909 return results 910} 911 912// tombstoneDeleteFromAllDevices performs tombstone deletion on all devices in parallel 913// Deletes secret data but leaves tombstone metadata to prevent resurrection 914func (p *ProxyClient) tombstoneDeleteFromAllDevices(ctx context.Context, clients map[string]hsm.Client, path string) map[string]WriteResult { 915 results := make(map[string]WriteResult) 916 resultsMutex := sync.Mutex{} 917 wg := sync.WaitGroup{} 918 919 // Create tombstone metadata 920 tombstoneMetadata := &hsm.SecretMetadata{ 921 Labels: map[string]string{ 922 "sync.deleted": "true", 923 "sync.timestamp": time.Now().Format(time.RFC3339), 924 "sync.version": "0", 925 }, 926 } 927 928 for deviceName, client := range clients { 929 wg.Add(1) 930 go func(deviceName string, client hsm.Client) { 931 defer wg.Done() 932 933 var err error 934 935 // First, delete the secret data 936 if deleteErr := client.DeleteSecret(ctx, path); deleteErr != nil { 937 // If delete fails, still try to write tombstone metadata 938 p.logger.V(1).Info("Failed to delete secret data, will still create tombstone", 939 "device", deviceName, "path", path, "error", deleteErr) 940 } 941 942 // Write tombstone metadata (empty data with deletion markers) 943 emptyData := make(hsm.SecretData) 944 err = client.WriteSecret(ctx, path, emptyData, tombstoneMetadata) 945 946 resultsMutex.Lock() 947 results[deviceName] = WriteResult{ 948 DeviceName: deviceName, 949 Error: err, 950 } 951 resultsMutex.Unlock() 952 }(deviceName, client) 953 } 954 955 wg.Wait() 956 return results 957} 958 959// ChangePIN handles PIN rotation requests by proxying to all available HSM agents 960func (p *ProxyClient) ChangePIN(c *gin.Context) { 961 ctx := c.Request.Context() 962 963 // Parse request body 964 var req ChangePINRequest 965 if err := c.ShouldBindJSON(&req); err != nil { 966 p.server.sendError(c, http.StatusBadRequest, "invalid_request", "Invalid request format", map[string]any{"error": err.Error()}) 967 return 968 } 969 970 // Validate request 971 if req.OldPIN == "" { 972 p.server.sendError(c, http.StatusBadRequest, "missing_old_pin", "Missing old PIN", map[string]any{"error": "old_pin is required"}) 973 return 974 } 975 if req.NewPIN == "" { 976 p.server.sendError(c, http.StatusBadRequest, "missing_new_pin", "Missing new PIN", map[string]any{"error": "new_pin is required"}) 977 return 978 } 979 if req.OldPIN == req.NewPIN { 980 p.server.sendError(c, http.StatusBadRequest, "invalid_pin_change", "Invalid PIN change", map[string]any{"error": "new PIN must be different from old PIN"}) 981 return 982 } 983 984 // Get all available HSM clients for multi-device PIN change 985 clients, ok := p.getAllAvailableGRPCClients(c) 986 if !ok { 987 return // Error already sent to client 988 } 989 990 p.logger.Info("Changing PIN on all HSM devices", "deviceCount", len(clients)) 991 992 // Perform PIN change on all devices in parallel (atomic operation) 993 results := p.changePINOnAllDevices(ctx, clients, req.OldPIN, req.NewPIN) 994 995 // Analyze results 996 var errors []string 997 successCount := 0 998 for deviceName, result := range results { 999 if result.Error != nil { 1000 errors = append(errors, fmt.Sprintf("%s: %v", deviceName, result.Error)) 1001 p.logger.Error(result.Error, "PIN change failed", "device", deviceName) 1002 } else { 1003 successCount++ 1004 } 1005 } 1006 1007 // If any device failed, report as partial failure 1008 if len(errors) > 0 { 1009 if successCount == 0 { 1010 // All devices failed 1011 p.server.sendError(c, http.StatusInternalServerError, "pin_change_failed", "PIN change failed on all devices", map[string]any{"errors": errors}) 1012 } else { 1013 // Some devices succeeded, some failed 1014 response := map[string]any{ 1015 "success_count": successCount, 1016 "total_count": len(clients), 1017 "errors": errors, 1018 "message": "PIN changed successfully on some devices, but failed on others. Manual intervention may be required.", 1019 } 1020 p.server.sendResponse(c, http.StatusPartialContent, "Partial PIN change success", response) 1021 } 1022 return 1023 } 1024 1025 // All devices succeeded 1026 response := map[string]any{ 1027 "success_count": successCount, 1028 "total_count": len(clients), 1029 "message": "PIN changed successfully on all HSM devices", 1030 } 1031 1032 p.logger.Info("PIN change completed successfully on all devices", "deviceCount", successCount) 1033 p.server.sendResponse(c, http.StatusOK, "PIN changed successfully", response) 1034} 1035 1036// changePINOnAllDevices performs PIN change on all devices in parallel 1037func (p *ProxyClient) changePINOnAllDevices(ctx context.Context, clients map[string]hsm.Client, oldPIN, newPIN string) map[string]WriteResult { 1038 results := make(map[string]WriteResult) 1039 resultsMutex := sync.Mutex{} 1040 wg := sync.WaitGroup{} 1041 1042 for deviceName, client := range clients { 1043 wg.Add(1) 1044 go func(deviceName string, client hsm.Client) { 1045 defer wg.Done() 1046 1047 err := client.ChangePIN(ctx, oldPIN, newPIN) 1048 1049 resultsMutex.Lock() 1050 results[deviceName] = WriteResult{ 1051 DeviceName: deviceName, 1052 Error: err, 1053 } 1054 resultsMutex.Unlock() 1055 }(deviceName, client) 1056 } 1057 1058 wg.Wait() 1059 return results 1060} 1061 1062// Interface compliance methods (unused in HTTP mode but required for hsm.Client interface) 1063func (p *ProxyClient) Initialize(ctx context.Context, config hsm.Config) error { return nil }