A Kubernetes operator that bridges Hardware Security Module (HSM) data storage with Kubernetes Secrets, providing true secret portability th
1/*
2Copyright 2025.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17package api
18
19import (
20 "context"
21 "fmt"
22 "net/http"
23 "sync"
24 "time"
25
26 "github.com/gin-gonic/gin"
27 "github.com/go-logr/logr"
28
29 "github.com/evanjarrett/hsm-secrets-operator/internal/hsm"
30)
31
32// WriteResult represents the result of writing to a single device
33type WriteResult struct {
34 DeviceName string
35 Error error
36}
37
38// checksumResult represents the result of getting checksum from a single device
39type checksumResult struct {
40 deviceName string
41 checksum string
42 err error
43}
44
45// secretResult represents the result of reading secret from a single device
46type secretResult struct {
47 deviceName string
48 data hsm.SecretData
49 metadata *hsm.SecretMetadata
50 err error
51}
52
53// metadataResult represents the result of reading metadata from a single device
54type metadataResult struct {
55 deviceName string
56 metadata *hsm.SecretMetadata
57 err error
58}
59
60// ProxyClient handles HTTP requests and proxies them to gRPC clients
61// It has methods that match the HTTP endpoints and handle the full request/response cycle
62type ProxyClient struct {
63 server *Server
64 logger logr.Logger
65 grpcClients map[string]hsm.Client // serialNumber -> gRPC client
66 clientsMutex sync.RWMutex
67}
68
69// NewProxyClient creates a new ProxyClient that handles HTTP routing
70func NewProxyClient(server *Server, logger logr.Logger) *ProxyClient {
71 return &ProxyClient{
72 server: server,
73 logger: logger.WithName("proxy-client"),
74 grpcClients: make(map[string]hsm.Client),
75 }
76}
77
78// Helper function to parse timestamp from metadata
79func parseTimestampFromMetadata(metadata *hsm.SecretMetadata) int64 {
80 if metadata == nil || metadata.Labels == nil {
81 return 0
82 }
83
84 // Try RFC3339 timestamp first
85 if syncTimestamp, exists := metadata.Labels["sync.timestamp"]; exists {
86 if parsedTime, err := time.Parse(time.RFC3339, syncTimestamp); err == nil {
87 return parsedTime.Unix()
88 }
89 }
90
91 // Fall back to Unix timestamp in sync.version
92 if syncVersion, exists := metadata.Labels["sync.version"]; exists {
93 var timestamp int64
94 if n, err := fmt.Sscanf(syncVersion, "%d", ×tamp); n == 1 && err == nil {
95 return timestamp
96 }
97 }
98
99 return 0
100}
101
102// isSecretDeleted checks if a secret is marked as deleted via tombstone metadata
103func isSecretDeleted(metadata *hsm.SecretMetadata) bool {
104 if metadata == nil || metadata.Labels == nil {
105 return false
106 }
107 return metadata.Labels["sync.deleted"] == "true"
108}
109
110// findConsensusChecksum finds the most common checksum among results and logs inconsistencies
111func (p *ProxyClient) findConsensusChecksum(results []checksumResult, path string) (string, error) {
112 if len(results) == 0 {
113 return "", fmt.Errorf("checksum not found on any HSM device")
114 }
115
116 // Count occurrences of each checksum
117 checksumCounts := make(map[string]int)
118 for _, result := range results {
119 checksumCounts[result.checksum]++
120 }
121
122 // Find the most common checksum (consensus)
123 // In case of ties, prefer the first occurrence for deterministic behavior
124 var consensusChecksum string
125 var maxCount int
126 for _, result := range results {
127 count := checksumCounts[result.checksum]
128 if count > maxCount {
129 consensusChecksum = result.checksum
130 maxCount = count
131 }
132 }
133
134 // Log checksum inconsistencies
135 if len(checksumCounts) > 1 {
136 inconsistentDevices := make([]string, 0)
137 for _, result := range results {
138 if result.checksum != consensusChecksum {
139 inconsistentDevices = append(inconsistentDevices, result.deviceName)
140 }
141 }
142 p.logger.Info("Checksum inconsistency detected, using consensus",
143 "path", path,
144 "consensus", consensusChecksum,
145 "consensus_count", maxCount,
146 "total_devices", len(results),
147 "inconsistent_devices", inconsistentDevices)
148 }
149
150 return consensusChecksum, nil
151}
152
153// logMultiDeviceOperation logs when operations are performed across multiple devices with sync information
154func (p *ProxyClient) logMultiDeviceOperation(deviceNames []string, selectedDevice, operationName, path, syncDetails string) {
155 p.logger.Info(fmt.Sprintf("%s found on multiple devices, using most recent version", operationName),
156 "path", path,
157 "devices", deviceNames,
158 "selected", selectedDevice,
159 "syncDetails", syncDetails)
160}
161
162// findMostRecentSecretResult finds the most recent secret result based on metadata timestamps
163func (p *ProxyClient) findMostRecentSecretResult(results []secretResult, path string) (hsm.SecretData, error) {
164 if len(results) == 0 {
165 return nil, fmt.Errorf("secret not found on any HSM device")
166 }
167
168 // Find most recent version based on metadata timestamps
169 bestResult := results[0]
170 bestTimestamp := parseTimestampFromMetadata(bestResult.metadata)
171
172 for _, result := range results[1:] {
173 timestamp := parseTimestampFromMetadata(result.metadata)
174 if timestamp > bestTimestamp {
175 bestResult = result
176 bestTimestamp = timestamp
177 }
178 }
179
180 // Log sync issues when multiple devices have different versions
181 if len(results) > 1 {
182 deviceNames := make([]string, len(results))
183 for i, result := range results {
184 deviceNames[i] = result.deviceName
185 }
186 p.logMultiDeviceOperation(deviceNames, bestResult.deviceName, "Secret", path, fmt.Sprintf("timestamp: %d", bestTimestamp))
187 }
188
189 // Check if the most recent result is a tombstone (deleted secret)
190 if isSecretDeleted(bestResult.metadata) {
191 return nil, fmt.Errorf("secret not found on any HSM device")
192 }
193
194 return bestResult.data, nil
195}
196
197// findMostRecentMetadataResult finds the most recent metadata result based on timestamps
198func (p *ProxyClient) findMostRecentMetadataResult(results []metadataResult, path string) (*hsm.SecretMetadata, error) {
199 if len(results) == 0 {
200 return nil, fmt.Errorf("metadata not found on any HSM device")
201 }
202
203 // Find most recent version based on metadata timestamps
204 bestResult := results[0]
205 bestTimestamp := parseTimestampFromMetadata(bestResult.metadata)
206
207 for _, result := range results[1:] {
208 timestamp := parseTimestampFromMetadata(result.metadata)
209 if timestamp > bestTimestamp {
210 bestResult = result
211 bestTimestamp = timestamp
212 }
213 }
214
215 // Log sync issues when multiple devices have different versions
216 if len(results) > 1 {
217 deviceNames := make([]string, len(results))
218 for i, result := range results {
219 deviceNames[i] = result.deviceName
220 }
221 p.logMultiDeviceOperation(deviceNames, bestResult.deviceName, "Metadata", path, fmt.Sprintf("timestamp: %d", bestTimestamp))
222 }
223
224 // Return the most recent metadata, even if it's a tombstone
225 // This allows callers to see deletion information when needed
226 return bestResult.metadata, nil
227}
228
229// validatePathParam validates the path parameter and sends error if missing
230// Returns (path, true) on success, or ("", false) if path is missing (error already sent to client)
231func (p *ProxyClient) validatePathParam(c *gin.Context) (string, bool) {
232 path := c.Param("path")
233 if path == "" {
234 p.server.sendError(c, http.StatusBadRequest, "missing_path", "Secret path is required", nil)
235 return "", false
236 }
237 return path, true
238}
239
240// getAllAvailableGRPCClients returns all available gRPC clients for mirroring operations
241// Returns (clients, true) on success, or (nil, false) if no agents available (error already sent to client)
242func (p *ProxyClient) getAllAvailableGRPCClients(c *gin.Context) (map[string]hsm.Client, bool) {
243 // Use operator namespace where HSMPools and agents are located
244 namespace := p.server.operatorNamespace
245
246 // Get all available devices
247 devices, err := p.server.getAllAvailableAgents(c.Request.Context(), namespace)
248 if err != nil {
249 p.server.sendError(c, http.StatusServiceUnavailable, "no_agents", "No HSM agents available", map[string]any{
250 "error": err.Error(),
251 })
252 return nil, false
253 }
254
255 clients := make(map[string]hsm.Client)
256 p.clientsMutex.Lock()
257 defer p.clientsMutex.Unlock()
258
259 for _, device := range devices {
260 // Try to get existing client for this device
261 if client, exists := p.grpcClients[device.SerialNumber]; exists && client.IsConnected() {
262 clients[device.SerialNumber] = client
263 continue
264 }
265
266 // Close existing client for this device if it exists but is not connected
267 if oldClient, exists := p.grpcClients[device.SerialNumber]; exists {
268 if closeErr := oldClient.Close(); closeErr != nil {
269 p.logger.V(1).Info("Error closing old gRPC client", "node", device.NodeName, "serialNumber", device.SerialNumber, "error", closeErr)
270 }
271 delete(p.grpcClients, device.SerialNumber)
272 }
273
274 // Create new gRPC client
275 grpcClient, err := p.server.createGRPCClient(c.Request.Context(), device)
276 if err != nil {
277 p.logger.V(1).Info("Failed to create gRPC client", "node", device.NodeName, "serialNumber", device.SerialNumber, "error", err)
278 continue
279 }
280
281 // Cache and include the client
282 p.grpcClients[device.SerialNumber] = grpcClient
283 clients[device.SerialNumber] = grpcClient
284 p.logger.V(1).Info("Created new gRPC client", "node", device.NodeName, "serialNumber", device.SerialNumber)
285 }
286
287 if len(clients) == 0 {
288 p.server.sendError(c, http.StatusServiceUnavailable, "no_agents", "No HSM agents available", nil)
289 return nil, false
290 }
291
292 return clients, true
293}
294
295// GetInfo handles GET /hsm/info
296func (p *ProxyClient) GetInfo(c *gin.Context) {
297 clients, ok := p.getAllAvailableGRPCClients(c)
298 if !ok {
299 return
300 }
301
302 type infoResult struct {
303 deviceName string
304 info *hsm.HSMInfo
305 err error
306 }
307
308 resultsChan := make(chan infoResult, len(clients))
309 for deviceName, grpcClient := range clients {
310 go func(deviceName string, grpcClient hsm.Client) {
311 info, err := grpcClient.GetInfo(c)
312 resultsChan <- infoResult{deviceName, info, err}
313 }(deviceName, grpcClient)
314 }
315
316 // Collect successful results
317 deviceInfos := make(map[string]*hsm.HSMInfo, len(clients))
318 for i := 0; i < len(clients); i++ {
319 result := <-resultsChan
320 if result.err == nil {
321 deviceInfos[result.deviceName] = result.info
322 } else {
323 p.logger.V(1).Info("Failed to get info from device", "device", result.deviceName, "error", result.err)
324 }
325 }
326
327 if len(deviceInfos) == 0 {
328 p.server.sendError(c, http.StatusInternalServerError, "info_failed", "Failed to get info from any HSM device", nil)
329 return
330 }
331
332 response := GetInfoResponse{DeviceInfos: deviceInfos}
333 p.server.sendResponse(c, http.StatusOK, "HSM info retrieved successfully", response)
334}
335
336// ListSecrets handles GET /hsm/secrets
337func (p *ProxyClient) ListSecrets(c *gin.Context) {
338 prefix := c.Query("prefix")
339
340 clients, ok := p.getAllAvailableGRPCClients(c)
341 if !ok {
342 return
343 }
344 type secretsResult struct {
345 deviceName string
346 secrets []string
347 err error
348 }
349
350 resultsChan := make(chan secretsResult, len(clients))
351 for deviceName, grpcClient := range clients {
352 go func(deviceName string, grpcClient hsm.Client) {
353 secrets, err := grpcClient.ListSecrets(c, prefix)
354 resultsChan <- secretsResult{deviceName, secrets, err}
355 }(deviceName, grpcClient)
356 }
357
358 // Collect results and deduplicate
359 secretsSet := make(map[string]bool)
360 for i := 0; i < len(clients); i++ {
361 result := <-resultsChan
362 if result.err != nil {
363 p.logger.V(1).Info("Failed to list secrets from device", "device", result.deviceName, "error", result.err)
364 continue
365 }
366
367 // Add all secrets from this device to the union set
368 for _, secretPath := range result.secrets {
369 secretsSet[secretPath] = true
370 }
371 }
372
373 // Convert set to slice
374 allSecrets := make([]string, 0, len(secretsSet))
375 for secretPath := range secretsSet {
376 allSecrets = append(allSecrets, secretPath)
377 }
378
379 response := ListSecretsResponse{
380 Secrets: allSecrets,
381 Count: len(allSecrets),
382 Prefix: prefix,
383 }
384 p.server.sendResponse(c, http.StatusOK, "Secrets listed successfully", response)
385}
386
387// ReadSecret handles GET /hsm/secrets/:path
388func (p *ProxyClient) ReadSecret(c *gin.Context) {
389 path, ok := p.validatePathParam(c)
390 if !ok {
391 return
392 }
393
394 clients, ok := p.getAllAvailableGRPCClients(c)
395 if !ok {
396 return
397 }
398
399 // Read from all devices in parallel
400 resultsChan := make(chan secretResult, len(clients))
401 for deviceName, grpcClient := range clients {
402 go func(deviceName string, grpcClient hsm.Client) {
403 // Read secret data
404 data, err := grpcClient.ReadSecret(c.Request.Context(), path)
405 if err != nil {
406 resultsChan <- secretResult{deviceName: deviceName, err: err}
407 return
408 }
409
410 // Read metadata for timestamp comparison
411 metadata, metaErr := grpcClient.ReadMetadata(c.Request.Context(), path)
412 if metaErr != nil {
413 p.logger.V(1).Info("Failed to read metadata for version comparison", "device", deviceName, "path", path, "error", metaErr)
414 }
415
416 resultsChan <- secretResult{
417 deviceName: deviceName,
418 data: data,
419 metadata: metadata,
420 }
421 }(deviceName, grpcClient)
422 }
423
424 // Collect successful results
425 var successfulResults []secretResult
426 for i := 0; i < len(clients); i++ {
427 result := <-resultsChan
428 if result.err != nil {
429 p.logger.V(1).Info("Failed to read secret from device", "device", result.deviceName, "path", path, "error", result.err)
430 continue
431 }
432 successfulResults = append(successfulResults, result)
433 }
434
435 // Use helper to find most recent result based on timestamps
436 data, err := p.findMostRecentSecretResult(successfulResults, path)
437 if err != nil {
438 p.server.sendError(c, http.StatusNotFound, "secret_not_found", err.Error(), nil)
439 return
440 }
441
442 // Get checksum for the secret to include in response
443 checksum := ""
444 if len(successfulResults) > 0 {
445 // Get checksum from any available client (they should be consistent after consensus)
446 for _, grpcClient := range clients {
447 if checksumResult, checksumErr := grpcClient.GetChecksum(c.Request.Context(), path); checksumErr == nil {
448 checksum = checksumResult
449 break
450 }
451 }
452 }
453
454 response := ReadSecretResponse{
455 Path: path,
456 Data: data,
457 Checksum: checksum,
458 DeviceCount: len(successfulResults),
459 }
460 p.server.sendResponse(c, http.StatusOK, "Secret read successfully", response)
461}
462
463// WriteSecret handles POST/PUT /hsm/secrets/:path with mirroring support
464func (p *ProxyClient) WriteSecret(c *gin.Context) {
465 path, ok := p.validatePathParam(c)
466 if !ok {
467 return
468 }
469
470 // Parse request body
471 var req struct {
472 Data map[string]string `json:"data" binding:"required"`
473 Metadata *hsm.SecretMetadata `json:"metadata,omitempty"`
474 Mirror *bool `json:"mirror,omitempty"` // Enable/disable mirroring for this request
475 }
476 if err := c.ShouldBindJSON(&req); err != nil {
477 p.server.sendError(c, http.StatusBadRequest, "parse_error", "Failed to parse request body", map[string]any{
478 "error": err.Error(),
479 })
480 return
481 }
482
483 // Convert string data to byte data
484 data := make(hsm.SecretData)
485 for key, value := range req.Data {
486 data[key] = []byte(value)
487 }
488
489 // Get all available clients for mirroring
490 clients, ok := p.getAllAvailableGRPCClients(c)
491 if !ok {
492 return
493 }
494
495 // Add mirroring metadata
496 metadata := req.Metadata
497 if metadata == nil {
498 metadata = &hsm.SecretMetadata{Labels: make(map[string]string)}
499 }
500 if metadata.Labels == nil {
501 metadata.Labels = make(map[string]string)
502 }
503 metadata.Labels["sync.version"] = fmt.Sprintf("%d", time.Now().Unix())
504 metadata.Labels["sync.timestamp"] = time.Now().Format(time.RFC3339)
505
506 // Write to all devices in parallel
507 results := p.writeToAllDevices(c.Request.Context(), clients, path, data, metadata)
508
509 // Check results - log failures but succeed if at least one device works
510 successful := 0
511 for deviceName, result := range results {
512 if result.Error == nil {
513 successful++
514 } else {
515 p.logger.Error(result.Error, "Failed to write to device", "device", deviceName, "path", path)
516 }
517 }
518
519 // Succeed if we wrote to at least one device
520 if successful > 0 {
521 if successful < len(clients) {
522 p.logger.Info("Secret written to subset of devices",
523 "path", path,
524 "successful", successful,
525 "total", len(clients))
526 }
527
528 response := WriteSecretResponse{
529 Path: path,
530 Keys: len(data),
531 }
532
533 p.server.sendResponse(c, http.StatusCreated, "Secret written successfully", response)
534 } else {
535 // All devices failed
536 p.server.sendError(c, http.StatusInternalServerError, "write_failed", "Failed to write secret to any HSM device", nil)
537 }
538}
539
540// DeleteSecret handles DELETE /hsm/secrets/:path with mirroring support
541func (p *ProxyClient) DeleteSecret(c *gin.Context) {
542 path, ok := p.validatePathParam(c)
543 if !ok {
544 return
545 }
546
547 // Get all available clients for mirroring delete
548 clients, ok := p.getAllAvailableGRPCClients(c)
549 if !ok {
550 return
551 }
552
553 // Perform tombstone deletion from all devices in parallel
554 results := p.tombstoneDeleteFromAllDevices(c.Request.Context(), clients, path)
555
556 // Check results
557 successful := 0
558 var errors []string
559 deviceResults := make(map[string]any)
560
561 for deviceName, result := range results {
562 deviceResults[deviceName] = map[string]any{
563 "success": result.Error == nil,
564 "error": func() string {
565 if result.Error != nil {
566 return result.Error.Error()
567 }
568 return ""
569 }(),
570 }
571
572 if result.Error == nil {
573 successful++
574 } else {
575 errors = append(errors, fmt.Sprintf("%s: %v", deviceName, result.Error))
576 p.logger.Error(result.Error, "Failed to delete from device", "device", deviceName, "path", path)
577 }
578 }
579
580 // Consider the operation successful if we deleted from at least one device
581 if successful > 0 {
582 response := DeleteSecretResponse{
583 Path: path,
584 Devices: len(clients),
585 DeviceResults: deviceResults,
586 }
587 if len(errors) > 0 {
588 response.Warnings = errors
589 }
590
591 statusCode := http.StatusOK
592 message := "Secret deleted successfully"
593 if successful < len(clients) {
594 statusCode = http.StatusPartialContent // 206 indicates partial success
595 message = fmt.Sprintf("Secret deleted from %d/%d devices", successful, len(clients))
596 }
597
598 p.server.sendResponse(c, statusCode, message, response)
599 } else {
600 // All devices failed
601 p.server.sendError(c, http.StatusInternalServerError, "delete_failed", "Failed to delete secret from any HSM device", map[string]any{
602 "errors": errors,
603 "deviceResults": deviceResults,
604 "path": path,
605 })
606 }
607}
608
609// DeleteSecretKey handles DELETE /hsm/secrets/:path/:key with mirroring support
610func (p *ProxyClient) DeleteSecretKey(c *gin.Context) {
611 path, ok := p.validatePathParam(c)
612 if !ok {
613 return
614 }
615
616 key := c.Param("key")
617 if key == "" {
618 p.server.sendError(c, http.StatusBadRequest, "missing_key", "Key parameter is required", nil)
619 return
620 }
621
622 // Get all available clients for mirroring delete
623 clients, ok := p.getAllAvailableGRPCClients(c)
624 if !ok {
625 return
626 }
627
628 // Delete key from all devices in parallel
629 results := p.deleteKeyFromAllDevices(c.Request.Context(), clients, path, key)
630
631 // Check results
632 successful := 0
633 var errors []string
634 deviceResults := make(map[string]any)
635
636 for deviceName, result := range results {
637 deviceResults[deviceName] = map[string]any{
638 "success": result.Error == nil,
639 "error": func() string {
640 if result.Error != nil {
641 return result.Error.Error()
642 }
643 return ""
644 }(),
645 }
646
647 if result.Error == nil {
648 successful++
649 } else {
650 errors = append(errors, fmt.Sprintf("%s: %v", deviceName, result.Error))
651 p.logger.Error(result.Error, "Failed to delete key from device", "device", deviceName, "path", path, "key", key)
652 }
653 }
654
655 // Consider the operation successful if we deleted from at least one device
656 if successful > 0 {
657 response := map[string]any{
658 "path": path,
659 "key": key,
660 "devices": len(clients),
661 "deviceResults": deviceResults,
662 }
663 if len(errors) > 0 {
664 response["warnings"] = errors
665 }
666
667 statusCode := http.StatusOK
668 message := "Key deleted successfully"
669 if successful < len(clients) {
670 statusCode = http.StatusPartialContent
671 message = fmt.Sprintf("Key deleted from %d/%d devices", successful, len(clients))
672 }
673
674 p.server.sendResponse(c, statusCode, message, response)
675 } else {
676 // All devices failed
677 p.server.sendError(c, http.StatusInternalServerError, "delete_key_failed", "Failed to delete key from any HSM device", map[string]any{
678 "errors": errors,
679 "deviceResults": deviceResults,
680 "path": path,
681 "key": key,
682 })
683 }
684}
685
686// deleteKeyFromAllDevices deletes a specific key from all devices in parallel
687func (p *ProxyClient) deleteKeyFromAllDevices(ctx context.Context, clients map[string]hsm.Client, path, key string) map[string]WriteResult {
688 results := make(map[string]WriteResult)
689 resultsMutex := sync.Mutex{}
690 wg := sync.WaitGroup{}
691
692 for deviceName, client := range clients {
693 wg.Add(1)
694 go func(deviceName string, client hsm.Client) {
695 defer wg.Done()
696
697 err := client.DeleteSecretKey(ctx, path, key)
698
699 resultsMutex.Lock()
700 results[deviceName] = WriteResult{
701 DeviceName: deviceName,
702 Error: err,
703 }
704 resultsMutex.Unlock()
705 }(deviceName, client)
706 }
707
708 wg.Wait()
709 return results
710}
711
712// ReadMetadata handles GET /hsm/secrets/:path/metadata
713func (p *ProxyClient) ReadMetadata(c *gin.Context) {
714 path, ok := p.validatePathParam(c)
715 if !ok {
716 return
717 }
718
719 clients, ok := p.getAllAvailableGRPCClients(c)
720 if !ok {
721 return
722 }
723
724 // Read metadata from all devices in parallel
725 resultsChan := make(chan metadataResult, len(clients))
726 for deviceName, grpcClient := range clients {
727 go func(deviceName string, grpcClient hsm.Client) {
728 metadata, err := grpcClient.ReadMetadata(c.Request.Context(), path)
729 resultsChan <- metadataResult{
730 deviceName: deviceName,
731 metadata: metadata,
732 err: err,
733 }
734 }(deviceName, grpcClient)
735 }
736
737 // Collect successful results
738 var successfulResults []metadataResult
739 for i := 0; i < len(clients); i++ {
740 result := <-resultsChan
741 if result.err != nil {
742 p.logger.V(1).Info("Failed to read metadata from device", "device", result.deviceName, "path", path, "error", result.err)
743 continue
744 }
745 successfulResults = append(successfulResults, result)
746 }
747
748 // Use helper to find most recent result based on timestamps
749 metadata, err := p.findMostRecentMetadataResult(successfulResults, path)
750 if err != nil {
751 p.server.sendError(c, http.StatusNotFound, "metadata_not_found", err.Error(), nil)
752 return
753 }
754
755 response := ReadMetadataResponse{Path: path, Metadata: metadata}
756 p.server.sendResponse(c, http.StatusOK, "Metadata read successfully", response)
757}
758
759// GetChecksum handles GET /hsm/secrets/:path/checksum
760func (p *ProxyClient) GetChecksum(c *gin.Context) {
761 path, ok := p.validatePathParam(c)
762 if !ok {
763 return
764 }
765
766 clients, ok := p.getAllAvailableGRPCClients(c)
767 if !ok {
768 return
769 }
770
771 // Get checksums from all devices in parallel
772 resultsChan := make(chan checksumResult, len(clients))
773 for deviceName, grpcClient := range clients {
774 go func(deviceName string, grpcClient hsm.Client) {
775 checksum, err := grpcClient.GetChecksum(c.Request.Context(), path)
776 resultsChan <- checksumResult{
777 deviceName: deviceName,
778 checksum: checksum,
779 err: err,
780 }
781 }(deviceName, grpcClient)
782 }
783
784 // Collect successful results
785 var successfulResults []checksumResult
786 for i := 0; i < len(clients); i++ {
787 result := <-resultsChan
788 if result.err != nil {
789 p.logger.V(1).Info("Failed to get checksum from device", "device", result.deviceName, "path", path, "error", result.err)
790 continue
791 }
792 successfulResults = append(successfulResults, result)
793 }
794
795 // Use helper to find consensus checksum
796 consensusChecksum, err := p.findConsensusChecksum(successfulResults, path)
797 if err != nil {
798 p.server.sendError(c, http.StatusNotFound, "checksum_not_found", err.Error(), nil)
799 return
800 }
801
802 response := GetChecksumResponse{Path: path, Checksum: consensusChecksum}
803 p.server.sendResponse(c, http.StatusOK, "Checksum retrieved successfully", response)
804}
805
806// IsConnected handles GET /hsm/status
807func (p *ProxyClient) IsConnected(c *gin.Context) {
808 clients, ok := p.getAllAvailableGRPCClients(c)
809 if !ok {
810 return
811 }
812
813 devices := make(map[string]bool, len(clients))
814 connectedCount := 0
815
816 for deviceName, grpcClient := range clients {
817 connected := grpcClient.IsConnected()
818 devices[deviceName] = connected
819
820 if connected {
821 connectedCount++
822 }
823 }
824
825 // Log connectivity issues for operational visibility
826 if connectedCount == 0 {
827 p.logger.Info("No HSM devices are connected", "totalDevices", len(clients))
828 } else if connectedCount < len(clients) {
829 p.logger.Info("Partial HSM connectivity detected",
830 "connectedDevices", connectedCount,
831 "totalDevices", len(clients))
832 }
833
834 response := IsConnectedResponse{
835 Devices: devices,
836 TotalDevices: len(clients),
837 }
838
839 p.server.sendResponse(c, http.StatusOK, "HSM connection status retrieved", response)
840}
841
842// Close closes all cached gRPC clients
843func (p *ProxyClient) Close() error {
844 p.clientsMutex.Lock()
845 defer p.clientsMutex.Unlock()
846
847 var lastErr error
848 for deviceName, client := range p.grpcClients {
849 if err := client.Close(); err != nil {
850 p.logger.Error(err, "Failed to close gRPC client", "device", deviceName)
851 lastErr = err
852 }
853 }
854
855 // Clear the map
856 p.grpcClients = make(map[string]hsm.Client)
857 return lastErr
858}
859
860// CleanupDisconnectedClients removes disconnected clients from the cache
861func (p *ProxyClient) CleanupDisconnectedClients() {
862 p.clientsMutex.Lock()
863 defer p.clientsMutex.Unlock()
864
865 for deviceName, client := range p.grpcClients {
866 if !client.IsConnected() {
867 p.logger.V(1).Info("Removing disconnected gRPC client", "device", deviceName)
868 if closeErr := client.Close(); closeErr != nil {
869 p.logger.V(1).Info("Error closing disconnected gRPC client", "device", deviceName, "error", closeErr)
870 }
871 delete(p.grpcClients, deviceName)
872 }
873 }
874}
875
876// GetClientCount returns the number of cached gRPC clients
877func (p *ProxyClient) GetClientCount() int {
878 p.clientsMutex.RLock()
879 defer p.clientsMutex.RUnlock()
880 return len(p.grpcClients)
881}
882
883// writeToAllDevices writes secret data to all devices in parallel
884func (p *ProxyClient) writeToAllDevices(ctx context.Context, clients map[string]hsm.Client, path string, data hsm.SecretData, metadata *hsm.SecretMetadata) map[string]WriteResult {
885 results := make(map[string]WriteResult)
886 resultsMutex := sync.Mutex{}
887 wg := sync.WaitGroup{}
888
889 for deviceName, client := range clients {
890 wg.Add(1)
891 go func(deviceName string, client hsm.Client) {
892 defer wg.Done()
893
894 var err error
895 if metadata != nil {
896 err = client.WriteSecret(ctx, path, data, metadata)
897 }
898
899 resultsMutex.Lock()
900 results[deviceName] = WriteResult{
901 DeviceName: deviceName,
902 Error: err,
903 }
904 resultsMutex.Unlock()
905 }(deviceName, client)
906 }
907
908 wg.Wait()
909 return results
910}
911
912// tombstoneDeleteFromAllDevices performs tombstone deletion on all devices in parallel
913// Deletes secret data but leaves tombstone metadata to prevent resurrection
914func (p *ProxyClient) tombstoneDeleteFromAllDevices(ctx context.Context, clients map[string]hsm.Client, path string) map[string]WriteResult {
915 results := make(map[string]WriteResult)
916 resultsMutex := sync.Mutex{}
917 wg := sync.WaitGroup{}
918
919 // Create tombstone metadata
920 tombstoneMetadata := &hsm.SecretMetadata{
921 Labels: map[string]string{
922 "sync.deleted": "true",
923 "sync.timestamp": time.Now().Format(time.RFC3339),
924 "sync.version": "0",
925 },
926 }
927
928 for deviceName, client := range clients {
929 wg.Add(1)
930 go func(deviceName string, client hsm.Client) {
931 defer wg.Done()
932
933 var err error
934
935 // First, delete the secret data
936 if deleteErr := client.DeleteSecret(ctx, path); deleteErr != nil {
937 // If delete fails, still try to write tombstone metadata
938 p.logger.V(1).Info("Failed to delete secret data, will still create tombstone",
939 "device", deviceName, "path", path, "error", deleteErr)
940 }
941
942 // Write tombstone metadata (empty data with deletion markers)
943 emptyData := make(hsm.SecretData)
944 err = client.WriteSecret(ctx, path, emptyData, tombstoneMetadata)
945
946 resultsMutex.Lock()
947 results[deviceName] = WriteResult{
948 DeviceName: deviceName,
949 Error: err,
950 }
951 resultsMutex.Unlock()
952 }(deviceName, client)
953 }
954
955 wg.Wait()
956 return results
957}
958
959// ChangePIN handles PIN rotation requests by proxying to all available HSM agents
960func (p *ProxyClient) ChangePIN(c *gin.Context) {
961 ctx := c.Request.Context()
962
963 // Parse request body
964 var req ChangePINRequest
965 if err := c.ShouldBindJSON(&req); err != nil {
966 p.server.sendError(c, http.StatusBadRequest, "invalid_request", "Invalid request format", map[string]any{"error": err.Error()})
967 return
968 }
969
970 // Validate request
971 if req.OldPIN == "" {
972 p.server.sendError(c, http.StatusBadRequest, "missing_old_pin", "Missing old PIN", map[string]any{"error": "old_pin is required"})
973 return
974 }
975 if req.NewPIN == "" {
976 p.server.sendError(c, http.StatusBadRequest, "missing_new_pin", "Missing new PIN", map[string]any{"error": "new_pin is required"})
977 return
978 }
979 if req.OldPIN == req.NewPIN {
980 p.server.sendError(c, http.StatusBadRequest, "invalid_pin_change", "Invalid PIN change", map[string]any{"error": "new PIN must be different from old PIN"})
981 return
982 }
983
984 // Get all available HSM clients for multi-device PIN change
985 clients, ok := p.getAllAvailableGRPCClients(c)
986 if !ok {
987 return // Error already sent to client
988 }
989
990 p.logger.Info("Changing PIN on all HSM devices", "deviceCount", len(clients))
991
992 // Perform PIN change on all devices in parallel (atomic operation)
993 results := p.changePINOnAllDevices(ctx, clients, req.OldPIN, req.NewPIN)
994
995 // Analyze results
996 var errors []string
997 successCount := 0
998 for deviceName, result := range results {
999 if result.Error != nil {
1000 errors = append(errors, fmt.Sprintf("%s: %v", deviceName, result.Error))
1001 p.logger.Error(result.Error, "PIN change failed", "device", deviceName)
1002 } else {
1003 successCount++
1004 }
1005 }
1006
1007 // If any device failed, report as partial failure
1008 if len(errors) > 0 {
1009 if successCount == 0 {
1010 // All devices failed
1011 p.server.sendError(c, http.StatusInternalServerError, "pin_change_failed", "PIN change failed on all devices", map[string]any{"errors": errors})
1012 } else {
1013 // Some devices succeeded, some failed
1014 response := map[string]any{
1015 "success_count": successCount,
1016 "total_count": len(clients),
1017 "errors": errors,
1018 "message": "PIN changed successfully on some devices, but failed on others. Manual intervention may be required.",
1019 }
1020 p.server.sendResponse(c, http.StatusPartialContent, "Partial PIN change success", response)
1021 }
1022 return
1023 }
1024
1025 // All devices succeeded
1026 response := map[string]any{
1027 "success_count": successCount,
1028 "total_count": len(clients),
1029 "message": "PIN changed successfully on all HSM devices",
1030 }
1031
1032 p.logger.Info("PIN change completed successfully on all devices", "deviceCount", successCount)
1033 p.server.sendResponse(c, http.StatusOK, "PIN changed successfully", response)
1034}
1035
1036// changePINOnAllDevices performs PIN change on all devices in parallel
1037func (p *ProxyClient) changePINOnAllDevices(ctx context.Context, clients map[string]hsm.Client, oldPIN, newPIN string) map[string]WriteResult {
1038 results := make(map[string]WriteResult)
1039 resultsMutex := sync.Mutex{}
1040 wg := sync.WaitGroup{}
1041
1042 for deviceName, client := range clients {
1043 wg.Add(1)
1044 go func(deviceName string, client hsm.Client) {
1045 defer wg.Done()
1046
1047 err := client.ChangePIN(ctx, oldPIN, newPIN)
1048
1049 resultsMutex.Lock()
1050 results[deviceName] = WriteResult{
1051 DeviceName: deviceName,
1052 Error: err,
1053 }
1054 resultsMutex.Unlock()
1055 }(deviceName, client)
1056 }
1057
1058 wg.Wait()
1059 return results
1060}
1061
1062// Interface compliance methods (unused in HTTP mode but required for hsm.Client interface)
1063func (p *ProxyClient) Initialize(ctx context.Context, config hsm.Config) error { return nil }