A Kubernetes operator that bridges Hardware Security Module (HSM) data storage with Kubernetes Secrets, providing true secret portability th
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

implement agent pods to do the execution

+1949 -48
+4 -2
Dockerfile
··· 1 - # Build the manager binary only 1 + # Build the manager and agent binaries 2 2 FROM golang:1.24-alpine AS builder 3 3 ARG TARGETOS 4 4 ARG TARGETARCH ··· 11 11 # and so that source changes don't invalidate our downloaded layer 12 12 RUN go mod download 13 13 14 - COPY cmd/manager cmd/manager 14 + COPY cmd/ cmd/ 15 15 COPY api/ api/ 16 16 COPY internal/ internal/ 17 17 ··· 20 20 # the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore, 21 21 # by leaving it empty we can ensure that the container and binary shipped on it will have the same platform. 22 22 RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/manager/main.go 23 + RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o agent cmd/agent/main.go 23 24 24 25 FROM alpine:3.22 AS base 25 26 ··· 55 56 56 57 WORKDIR / 57 58 COPY --from=builder /workspace/manager . 59 + COPY --from=builder /workspace/agent . 58 60 USER 65532:65532 59 61 60 62 ENTRYPOINT ["/manager"]
+2 -2
Dockerfile.discovery
··· 52 52 COPY --from=base /usr/local/ /usr/local/ 53 53 54 54 WORKDIR / 55 - COPY --from=builder /workspace/manager . 55 + COPY --from=builder /workspace/discovery . 56 56 USER 65532:65532 57 57 58 - ENTRYPOINT ["/manager"] 58 + ENTRYPOINT ["/discovery"]
+2 -1
Makefile
··· 167 167 ##@ Build 168 168 169 169 .PHONY: build 170 - build: manifests generate fmt vet ## Build manager and discovery binaries. 170 + build: manifests generate fmt vet ## Build manager, discovery, and agent binaries. 171 171 go build -o bin/manager cmd/manager/main.go 172 172 go build -o bin/discovery cmd/discovery/main.go 173 + go build -o bin/agent cmd/agent/main.go 173 174 174 175 .PHONY: run 175 176 run: manifests generate fmt vet ## Run a controller from your host.
+162
cmd/agent/main.go
··· 1 + /* 2 + Copyright 2025. 3 + 4 + Licensed under the Apache License, Version 2.0 (the "License"); 5 + you may not use this file except in compliance with the License. 6 + You may obtain a copy of the License at 7 + 8 + http://www.apache.org/licenses/LICENSE-2.0 9 + 10 + Unless required by applicable law or agreed to in writing, software 11 + distributed under the License is distributed on an "AS IS" BASIS, 12 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 + See the License for the specific language governing permissions and 14 + limitations under the License. 15 + */ 16 + 17 + package main 18 + 19 + import ( 20 + "context" 21 + "flag" 22 + "fmt" 23 + "os" 24 + "os/signal" 25 + "syscall" 26 + "time" 27 + 28 + ctrl "sigs.k8s.io/controller-runtime" 29 + "sigs.k8s.io/controller-runtime/pkg/log/zap" 30 + 31 + "github.com/evanjarrett/hsm-secrets-operator/internal/agent" 32 + "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 33 + ) 34 + 35 + var ( 36 + setupLog = ctrl.Log.WithName("agent") 37 + ) 38 + 39 + func main() { 40 + var deviceName string 41 + var port int 42 + var healthPort int 43 + var pkcs11LibraryPath string 44 + var slotID int 45 + var tokenLabel string 46 + var pin string 47 + 48 + flag.StringVar(&deviceName, "device-name", "", "Name of the HSM device this agent serves") 49 + flag.IntVar(&port, "port", 8092, "Port for the HSM agent API") 50 + flag.IntVar(&healthPort, "health-port", 8093, "Port for health checks") 51 + flag.StringVar(&pkcs11LibraryPath, "pkcs11-library", "", "Path to PKCS#11 library") 52 + flag.IntVar(&slotID, "slot-id", 0, "PKCS#11 slot ID") 53 + flag.StringVar(&tokenLabel, "token-label", "", "PKCS#11 token label") 54 + flag.StringVar(&pin, "pin", "", "PKCS#11 PIN (use environment variable HSM_PIN for security)") 55 + 56 + opts := zap.Options{ 57 + Development: true, 58 + } 59 + opts.BindFlags(flag.CommandLine) 60 + flag.Parse() 61 + 62 + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) 63 + 64 + // Validate required parameters 65 + if deviceName == "" { 66 + deviceName = os.Getenv("HSM_DEVICE_NAME") 67 + if deviceName == "" { 68 + setupLog.Error(fmt.Errorf("device name required"), "Device name must be provided via --device-name or HSM_DEVICE_NAME environment variable") 69 + os.Exit(1) 70 + } 71 + } 72 + 73 + // Get configuration from environment variables if not provided via flags 74 + if pkcs11LibraryPath == "" { 75 + pkcs11LibraryPath = os.Getenv("PKCS11_LIBRARY_PATH") 76 + } 77 + if tokenLabel == "" { 78 + tokenLabel = os.Getenv("PKCS11_TOKEN_LABEL") 79 + } 80 + if pin == "" { 81 + pin = os.Getenv("PKCS11_PIN") 82 + } 83 + 84 + setupLog.Info("Starting HSM agent", 85 + "device", deviceName, 86 + "port", port, 87 + "health-port", healthPort, 88 + "pkcs11-library", pkcs11LibraryPath, 89 + "slot-id", slotID, 90 + "token-label", tokenLabel, 91 + ) 92 + 93 + // Create HSM client 94 + var hsmClient hsm.Client 95 + 96 + if pkcs11LibraryPath != "" { 97 + // Create PKCS#11 client for production use 98 + config := hsm.Config{ 99 + PKCS11LibraryPath: pkcs11LibraryPath, 100 + SlotID: uint(slotID), 101 + PIN: pin, 102 + TokenLabel: tokenLabel, 103 + ConnectionTimeout: 30 * time.Second, 104 + RetryAttempts: 3, 105 + RetryDelay: 2 * time.Second, 106 + } 107 + 108 + hsmClient = hsm.NewPKCS11Client() 109 + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 110 + defer cancel() 111 + 112 + if err := hsmClient.Initialize(ctx, config); err != nil { 113 + setupLog.Error(err, "Failed to initialize PKCS#11 client") 114 + os.Exit(1) 115 + } 116 + } else { 117 + // Use mock client for testing 118 + setupLog.Info("No PKCS#11 library specified, using mock client") 119 + hsmClient = hsm.NewMockClient() 120 + 121 + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 122 + defer cancel() 123 + 124 + if err := hsmClient.Initialize(ctx, hsm.DefaultConfig()); err != nil { 125 + setupLog.Error(err, "Failed to initialize mock client") 126 + os.Exit(1) 127 + } 128 + } 129 + 130 + // Create agent server 131 + server := agent.NewServer(hsmClient, deviceName, port, healthPort, setupLog) 132 + 133 + // Setup graceful shutdown 134 + ctx, cancel := context.WithCancel(context.Background()) 135 + defer cancel() 136 + 137 + // Handle shutdown signals 138 + sigChan := make(chan os.Signal, 1) 139 + signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) 140 + 141 + go func() { 142 + sig := <-sigChan 143 + setupLog.Info("Received signal, shutting down", "signal", sig) 144 + cancel() 145 + }() 146 + 147 + // Start server 148 + setupLog.Info("HSM agent ready", "device", deviceName) 149 + 150 + if err := server.Start(ctx); err != nil { 151 + setupLog.Error(err, "Server failed") 152 + os.Exit(1) 153 + } 154 + 155 + // Cleanup 156 + setupLog.Info("Closing HSM client") 157 + if err := hsmClient.Close(); err != nil { 158 + setupLog.Error(err, "Failed to close HSM client") 159 + } 160 + 161 + setupLog.Info("HSM agent shutdown complete") 162 + }
+9
cmd/manager/main.go
··· 39 39 "sigs.k8s.io/controller-runtime/pkg/webhook" 40 40 41 41 hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 42 + "github.com/evanjarrett/hsm-secrets-operator/internal/agent" 42 43 "github.com/evanjarrett/hsm-secrets-operator/internal/api" 43 44 "github.com/evanjarrett/hsm-secrets-operator/internal/controller" 44 45 "github.com/evanjarrett/hsm-secrets-operator/internal/discovery" ··· 240 241 // Register the HSM client with the mirroring manager for this node 241 242 mirroringManager.RegisterHSMClient(nodeName, hsmClient) 242 243 244 + // Create agent manager 245 + agentImage := os.Getenv("AGENT_IMAGE") 246 + if agentImage == "" { 247 + agentImage = "hsm-secrets-operator:latest" // Default to same image as manager 248 + } 249 + agentManager := agent.NewManager(mgr.GetClient(), agentImage, "hsm-secrets-operator-system") 250 + 243 251 if err := (&controller.HSMSecretReconciler{ 244 252 Client: mgr.GetClient(), 245 253 Scheme: mgr.GetScheme(), 246 254 HSMClient: hsmClient, 247 255 MirroringManager: mirroringManager, 256 + AgentManager: agentManager, 248 257 }).SetupWithManager(mgr); err != nil { 249 258 setupLog.Error(err, "unable to create controller", "controller", "HSMSecret") 250 259 os.Exit(1)
+13
config/rbac/role.yaml
··· 15 15 - "" 16 16 resources: 17 17 - secrets 18 + - services 19 + verbs: 20 + - create 21 + - delete 22 + - get 23 + - list 24 + - patch 25 + - update 26 + - watch 27 + - apiGroups: 28 + - apps 29 + resources: 30 + - deployments 18 31 verbs: 19 32 - create 20 33 - delete
+58 -30
config/samples/hsm_v1alpha1_hsmdevice.yaml
··· 10 10 # Device type - automatically uses well-known USB specs 11 11 deviceType: PicoHSM 12 12 13 - # Auto-discovery will use built-in USB specifications 14 - # For Pico HSM: vendorId: "20a0", productId: "4230" 15 - 16 - # Optional: Override USB specification 17 - # usb: 18 - # vendorId: "20a0" 19 - # productId: "4230" 20 - # serialNumber: "PICO123456" # Optional: target specific device 21 - 22 - # Optional: Path-based discovery instead of USB 23 - # devicePath: 24 - # path: "/dev/ttyUSB*" 25 - # permissions: "rw" 13 + # Discovery configuration (choose one method) 14 + discovery: 15 + # Option 1: USB discovery (recommended for Pico HSM) 16 + usb: 17 + vendorId: "20a0" 18 + productId: "4230" 19 + # serialNumber: "PICO123456" # Optional: target specific device 20 + 21 + # Option 2: Auto-discovery based on device type 22 + # autoDiscovery: true 23 + 24 + # Option 3: Device path discovery 25 + # devicePath: 26 + # path: "/dev/ttyUSB*" 27 + # permissions: "0666" 26 28 27 - # Optional: Node selector to limit discovery to specific nodes 28 - # nodeSelector: 29 - # kubernetes.io/hostname: "worker-node-1" 29 + # PKCS#11 configuration per device 30 + pkcs11: 31 + libraryPath: "/usr/local/lib/libsc-hsm-pkcs11.so" 32 + slotId: 0 33 + pinSecret: 34 + name: "pico-hsm-pin" 35 + key: "pin" 36 + namespace: "default" # Optional: cross-namespace secret 37 + tokenLabel: "PicoHSM" # Optional: specific token 30 38 31 - # Optional: PKCS#11 library path 32 - pkcs11LibraryPath: "/usr/local/lib/libsc-hsm-pkcs11.so" 39 + # Optional: target specific nodes 40 + nodeSelector: 41 + hsm-type: "pico" 33 42 34 - # Maximum number of devices to discover (default: 10) 35 - maxDevices: 5 43 + # Maximum number of devices to discover (default: 1) 44 + maxDevices: 1 36 45 --- 37 46 apiVersion: hsm.j5t.io/v1alpha1 38 47 kind: HSMDevice ··· 42 51 spec: 43 52 deviceType: SmartCardHSM 44 53 45 - # Use custom USB specification 46 - usb: 47 - vendorId: "04e6" 48 - productId: "5816" 54 + # Discovery configuration 55 + discovery: 56 + usb: 57 + vendorId: "04e6" 58 + productId: "5816" 49 59 50 - pkcs11LibraryPath: "/usr/lib/opensc-pkcs11.so" 60 + # PKCS#11 configuration 61 + pkcs11: 62 + libraryPath: "/usr/lib/opensc-pkcs11.so" 63 + slotId: 0 64 + pinSecret: 65 + name: "smartcard-hsm-pin" 66 + key: "pin" 67 + tokenLabel: "SmartCard-HSM" 68 + 51 69 maxDevices: 3 52 70 --- 53 71 apiVersion: hsm.j5t.io/v1alpha1 ··· 58 76 spec: 59 77 deviceType: Generic 60 78 61 - # Use path-based discovery 62 - devicePath: 63 - path: "/dev/sc-hsm*" 64 - permissions: "rw" 79 + # Discovery configuration 80 + discovery: 81 + # Device path discovery method 82 + devicePath: 83 + path: "/dev/sc-hsm*" 84 + permissions: "0666" 65 85 66 - pkcs11LibraryPath: "/usr/lib/generic-pkcs11.so" 86 + # PKCS#11 configuration 87 + pkcs11: 88 + libraryPath: "/usr/lib/generic-pkcs11.so" 89 + slotId: 0 90 + pinSecret: 91 + name: "generic-hsm-pin" 92 + key: "pin" 93 + tokenLabel: "GenericHSM" 94 + 67 95 maxDevices: 10
+82
examples/agent-deployment/README.md
··· 1 + # HSM Agent Pod Architecture 2 + 3 + This directory contains examples of how the HSM agent pod system works for distributed HSM operations. 4 + 5 + ## Architecture Overview 6 + 7 + The HSM Secrets Operator now uses a **3-binary architecture**: 8 + 9 + 1. **Manager** (`/manager`) - Main operator that orchestrates everything 10 + 2. **Discovery** (`/discovery`) - Lightweight USB device discovery (DaemonSet) 11 + 3. **Agent** (`/agent`) - HSM operation execution pods (deployed on-demand) 12 + 13 + ## How It Works 14 + 15 + ```mermaid 16 + graph TB 17 + subgraph "Manager Pod (Any Node)" 18 + M[Manager Controller] 19 + end 20 + 21 + subgraph "Node with HSM Hardware" 22 + D[Discovery Pod<br/>DaemonSet] 23 + A[Agent Pod<br/>On-demand] 24 + H[HSM Hardware] 25 + end 26 + 27 + subgraph "Other Nodes" 28 + D2[Discovery Pod<br/>DaemonSet] 29 + end 30 + 31 + M -->|1. Find HSMDevice| D 32 + M -->|2. Deploy Agent| A 33 + M -->|3. HTTP API calls| A 34 + A -->|4. PKCS#11| H 35 + D -->|USB discovery| H 36 + ``` 37 + 38 + ## Process Flow 39 + 40 + 1. **HSMSecret Created**: User creates an HSMSecret resource 41 + 2. **Device Discovery**: Manager finds available HSMDevice (discovered by DaemonSet) 42 + 3. **Agent Deployment**: Manager deploys HSM agent pod on node with hardware 43 + 4. **Node Affinity**: Agent pod pinned to specific node via `kubernetes.io/hostname` 44 + 5. **HTTP Communication**: Manager makes HTTP calls to agent for HSM operations 45 + 6. **Hardware Access**: Agent executes PKCS#11 operations locally on HSM 46 + 47 + ## Key Benefits 48 + 49 + ✅ **Remote Execution**: Manager can be anywhere, agents run where hardware exists 50 + ✅ **Resource Efficiency**: Agents only deployed when HSMSecrets exist 51 + ✅ **Auto-cleanup**: Agents removed when no longer needed 52 + ✅ **Node Targeting**: Perfect placement via HSMDevice discovery 53 + ✅ **Clean Architecture**: Each component has single responsibility 54 + 55 + ## Example Deployment 56 + 57 + See `agent-example.yaml` for a complete example of: 58 + - HSMDevice discovery configuration 59 + - HSMSecret that triggers agent deployment 60 + - Secret with PIN configuration 61 + 62 + ## Agent Pod Configuration 63 + 64 + Agent pods are automatically configured with: 65 + - **Environment Variables**: PKCS#11 library path, slot ID, token label 66 + - **Secrets**: PIN from Kubernetes Secret reference 67 + - **Node Affinity**: Pinned to node with actual hardware 68 + - **Security**: Non-privileged execution with proper security contexts 69 + - **Health Checks**: Liveness and readiness probes 70 + - **Resources**: CPU/memory limits and requests 71 + 72 + ## API Endpoints 73 + 74 + Each agent exposes: 75 + - `GET /api/v1/hsm/info` - HSM device information 76 + - `GET /api/v1/hsm/secrets/{path}` - Read secret 77 + - `POST /api/v1/hsm/secrets/{path}` - Write secret 78 + - `DELETE /api/v1/hsm/secrets/{path}` - Delete secret 79 + - `GET /api/v1/hsm/secrets` - List secrets 80 + - `GET /api/v1/hsm/checksum/{path}` - Get checksum 81 + - `GET /healthz` - Health check 82 + - `GET /readyz` - Readiness check
+141
examples/agent-deployment/agent-example.yaml
··· 1 + # Complete example showing HSM agent pod deployment 2 + # This demonstrates the full flow from HSMDevice discovery to agent-based secret operations 3 + 4 + --- 5 + # Step 1: Create PIN secret for HSM authentication 6 + apiVersion: v1 7 + kind: Secret 8 + metadata: 9 + name: pico-hsm-pin 10 + namespace: default 11 + type: Opaque 12 + data: 13 + pin: MTIzNDU2 # base64 encoded "123456" 14 + 15 + --- 16 + # Step 2: Configure HSMDevice for discovery and agent deployment 17 + apiVersion: hsm.j5t.io/v1alpha1 18 + kind: HSMDevice 19 + metadata: 20 + name: pico-hsm-main 21 + namespace: default 22 + labels: 23 + hsm-type: pico 24 + environment: production 25 + spec: 26 + # Device type - uses well-known specifications 27 + deviceType: PicoHSM 28 + 29 + # Discovery configuration 30 + discovery: 31 + usb: 32 + vendorId: "20a0" 33 + productId: "4230" 34 + # Optional: target specific device by serial number 35 + # serialNumber: "PICO123456" 36 + 37 + # PKCS#11 configuration for agent pods 38 + pkcs11: 39 + libraryPath: "/usr/local/lib/libsc-hsm-pkcs11.so" 40 + slotId: 0 41 + pinSecret: 42 + name: "pico-hsm-pin" 43 + key: "pin" 44 + namespace: "default" 45 + tokenLabel: "PicoHSM-Production" 46 + 47 + # Optional: restrict to specific nodes 48 + nodeSelector: 49 + hsm-enabled: "true" 50 + node-type: "worker" 51 + 52 + # Maximum devices to discover (usually 1 for production) 53 + maxDevices: 1 54 + 55 + --- 56 + # Step 3: Create HSMSecret that will trigger agent deployment 57 + apiVersion: hsm.j5t.io/v1alpha1 58 + kind: HSMSecret 59 + metadata: 60 + name: database-credentials 61 + namespace: default 62 + labels: 63 + app: myapp 64 + tier: database 65 + spec: 66 + # Path on HSM where secret is stored 67 + hsmPath: "secrets/production/database/credentials" 68 + 69 + # Name of Kubernetes Secret to create/sync 70 + secretName: "database-credentials" 71 + 72 + # Enable automatic synchronization 73 + autoSync: true 74 + 75 + # Sync every 5 minutes (300 seconds) 76 + syncInterval: 300 77 + 78 + # Secret type (default: Opaque) 79 + secretType: "Opaque" 80 + 81 + --- 82 + # Step 4: Example application using the synced secret 83 + apiVersion: apps/v1 84 + kind: Deployment 85 + metadata: 86 + name: database-app 87 + namespace: default 88 + spec: 89 + replicas: 1 90 + selector: 91 + matchLabels: 92 + app: database-app 93 + template: 94 + metadata: 95 + labels: 96 + app: database-app 97 + spec: 98 + containers: 99 + - name: app 100 + image: nginx:1.21 101 + env: 102 + - name: DB_USERNAME 103 + valueFrom: 104 + secretKeyRef: 105 + name: database-credentials # Synced from HSM 106 + key: username 107 + - name: DB_PASSWORD 108 + valueFrom: 109 + secretKeyRef: 110 + name: database-credentials # Synced from HSM 111 + key: password 112 + - name: DB_HOST 113 + valueFrom: 114 + secretKeyRef: 115 + name: database-credentials # Synced from HSM 116 + key: host 117 + ports: 118 + - containerPort: 80 119 + resources: 120 + requests: 121 + cpu: 100m 122 + memory: 128Mi 123 + limits: 124 + cpu: 200m 125 + memory: 256Mi 126 + 127 + --- 128 + # Optional: Service to expose the application 129 + apiVersion: v1 130 + kind: Service 131 + metadata: 132 + name: database-app-service 133 + namespace: default 134 + spec: 135 + selector: 136 + app: database-app 137 + ports: 138 + - name: http 139 + port: 80 140 + targetPort: 80 141 + type: ClusterIP
+2 -2
helm/hsm-secrets-operator/Chart.yaml
··· 2 2 name: hsm-secrets-operator 3 3 description: A Kubernetes operator that bridges Pico HSM binary data storage with Kubernetes Secrets 4 4 type: application 5 - version: 0.2.10 6 - appVersion: v0.2.10 5 + version: 0.2.11 6 + appVersion: v0.2.11 7 7 icon: https://raw.githubusercontent.com/cncf/artwork/master/projects/kubernetes/icon/color/kubernetes-icon-color.svg 8 8 home: https://github.com/evanjarrett/hsm-secrets-operator 9 9 sources:
+8
helm/hsm-secrets-operator/templates/_helpers.tpl
··· 92 92 {{- end }} 93 93 94 94 {{/* 95 + Create the agent image reference 96 + */}} 97 + {{- define "hsm-secrets-operator.agentImage" -}} 98 + {{- $tag := .Values.agentImage.tag | default .Chart.AppVersion }} 99 + {{- printf "%s:%s" .Values.agentImage.repository $tag }} 100 + {{- end }} 101 + 102 + {{/* 95 103 Create system namespace name 96 104 */}} 97 105 {{- define "hsm-secrets-operator.systemNamespace" -}}
+3
helm/hsm-secrets-operator/templates/deployment.yaml
··· 63 63 value: {{ .Values.config.defaultSyncInterval | quote }} 64 64 - name: DEFAULT_SECRET_TYPE 65 65 value: {{ .Values.config.defaultSecretType | quote }} 66 + # Agent image configuration for dynamic agent pod deployment 67 + - name: AGENT_IMAGE 68 + value: {{ include "hsm-secrets-operator.agentImage" . }} 66 69 ports: 67 70 {{- if .Values.metrics.enabled }} 68 71 - name: metrics
+24
helm/hsm-secrets-operator/templates/rbac/role.yaml
··· 27 27 - update 28 28 - watch 29 29 - apiGroups: 30 + - "" 31 + resources: 32 + - services 33 + verbs: 34 + - create 35 + - delete 36 + - get 37 + - list 38 + - patch 39 + - update 40 + - watch 41 + - apiGroups: 42 + - apps 43 + resources: 44 + - deployments 45 + verbs: 46 + - create 47 + - delete 48 + - get 49 + - list 50 + - patch 51 + - update 52 + - watch 53 + - apiGroups: 30 54 - hsm.j5t.io 31 55 resources: 32 56 - hsmdevices
+6
helm/hsm-secrets-operator/values.yaml
··· 14 14 pullPolicy: IfNotPresent 15 15 tag: "" # Defaults to the chart appVersion 16 16 17 + # Agent image configuration (for HSM agent pods) 18 + agentImage: 19 + repository: ghcr.io/evanjarrett/hsm-secrets-operator 20 + pullPolicy: IfNotPresent 21 + tag: "" # Defaults to the chart appVersion (uses same image as manager) 22 + 17 23 imagePullSecrets: [] 18 24 nameOverride: "" 19 25 fullnameOverride: ""
+391
internal/agent/client.go
··· 1 + /* 2 + Copyright 2025. 3 + 4 + Licensed under the Apache License, Version 2.0 (the "License"); 5 + you may not use this file except in compliance with the License. 6 + You may obtain a copy of the License at 7 + 8 + http://www.apache.org/licenses/LICENSE-2.0 9 + 10 + Unless required by applicable law or agreed to in writing, software 11 + distributed under the License is distributed on an "AS IS" BASIS, 12 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 + See the License for the specific language governing permissions and 14 + limitations under the License. 15 + */ 16 + 17 + package agent 18 + 19 + import ( 20 + "bytes" 21 + "context" 22 + "encoding/json" 23 + "fmt" 24 + "io" 25 + "net/http" 26 + "strings" 27 + "time" 28 + 29 + "github.com/go-logr/logr" 30 + 31 + "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 32 + ) 33 + 34 + // Client implements the HSM client interface by communicating with HSM agents 35 + type Client struct { 36 + httpClient *http.Client 37 + baseURL string 38 + logger logr.Logger 39 + deviceName string 40 + timeout time.Duration 41 + retryAttempts int 42 + retryDelay time.Duration 43 + } 44 + 45 + // NewClient creates a new agent client 46 + func NewClient(baseURL, deviceName string, logger logr.Logger) *Client { 47 + return &Client{ 48 + httpClient: &http.Client{ 49 + Timeout: 30 * time.Second, 50 + }, 51 + baseURL: strings.TrimSuffix(baseURL, "/"), 52 + logger: logger.WithName("agent-client"), 53 + deviceName: deviceName, 54 + timeout: 30 * time.Second, 55 + retryAttempts: 3, 56 + retryDelay: 2 * time.Second, 57 + } 58 + } 59 + 60 + // Initialize establishes connection to the HSM agent 61 + func (c *Client) Initialize(ctx context.Context, config hsm.Config) error { 62 + // The agent handles HSM initialization, we just need to verify connectivity 63 + info, err := c.GetInfo(ctx) 64 + if err != nil { 65 + return fmt.Errorf("failed to initialize agent client: %w", err) 66 + } 67 + 68 + c.logger.Info("Agent client initialized", "device", c.deviceName, "hsm_label", info.Label) 69 + return nil 70 + } 71 + 72 + // Close terminates the HSM connection 73 + func (c *Client) Close() error { 74 + // HTTP client doesn't need explicit closing, agent handles HSM cleanup 75 + return nil 76 + } 77 + 78 + // GetInfo returns information about the HSM device 79 + func (c *Client) GetInfo(ctx context.Context) (*hsm.HSMInfo, error) { 80 + var response AgentResponse 81 + if err := c.doRequest(ctx, "GET", "/api/v1/hsm/info", nil, &response); err != nil { 82 + return nil, fmt.Errorf("failed to get HSM info: %w", err) 83 + } 84 + 85 + if !response.Success { 86 + return nil, fmt.Errorf("agent error: %s", response.Error.Message) 87 + } 88 + 89 + // Convert response data to HSMInfo 90 + data, ok := response.Data.(map[string]interface{}) 91 + if !ok { 92 + return nil, fmt.Errorf("invalid response data format") 93 + } 94 + 95 + info := &hsm.HSMInfo{} 96 + if label, ok := data["label"].(string); ok { 97 + info.Label = label 98 + } 99 + if manufacturer, ok := data["manufacturer"].(string); ok { 100 + info.Manufacturer = manufacturer 101 + } 102 + if model, ok := data["model"].(string); ok { 103 + info.Model = model 104 + } 105 + if serialNumber, ok := data["serialNumber"].(string); ok { 106 + info.SerialNumber = serialNumber 107 + } 108 + if firmwareVersion, ok := data["firmwareVersion"].(string); ok { 109 + info.FirmwareVersion = firmwareVersion 110 + } 111 + 112 + return info, nil 113 + } 114 + 115 + // ReadSecret reads secret data from the specified HSM path 116 + func (c *Client) ReadSecret(ctx context.Context, path string) (hsm.SecretData, error) { 117 + escapedPath := c.escapePath(path) 118 + endpoint := fmt.Sprintf("/api/v1/hsm/secrets/%s", escapedPath) 119 + 120 + var response AgentResponse 121 + if err := c.doRequest(ctx, "GET", endpoint, nil, &response); err != nil { 122 + return nil, fmt.Errorf("failed to read secret: %w", err) 123 + } 124 + 125 + if !response.Success { 126 + return nil, fmt.Errorf("agent error: %s", response.Error.Message) 127 + } 128 + 129 + // Convert response data to SecretData 130 + responseData, ok := response.Data.(map[string]interface{}) 131 + if !ok { 132 + return nil, fmt.Errorf("invalid response data format") 133 + } 134 + 135 + secretDataRaw, ok := responseData["data"].(map[string]interface{}) 136 + if !ok { 137 + return nil, fmt.Errorf("invalid secret data format") 138 + } 139 + 140 + secretData := make(hsm.SecretData) 141 + for key, value := range secretDataRaw { 142 + switch v := value.(type) { 143 + case string: 144 + secretData[key] = []byte(v) 145 + case []byte: 146 + secretData[key] = v 147 + case []interface{}: 148 + // Handle JSON array (byte array) 149 + bytes := make([]byte, len(v)) 150 + for i, b := range v { 151 + if byteVal, ok := b.(float64); ok { 152 + bytes[i] = byte(byteVal) 153 + } 154 + } 155 + secretData[key] = bytes 156 + default: 157 + // Convert to string as fallback 158 + secretData[key] = []byte(fmt.Sprintf("%v", v)) 159 + } 160 + } 161 + 162 + return secretData, nil 163 + } 164 + 165 + // WriteSecret writes secret data to the specified HSM path 166 + func (c *Client) WriteSecret(ctx context.Context, path string, data hsm.SecretData) error { 167 + escapedPath := c.escapePath(path) 168 + endpoint := fmt.Sprintf("/api/v1/hsm/secrets/%s", escapedPath) 169 + 170 + // Convert SecretData to request format 171 + requestData := make(map[string]interface{}) 172 + for key, value := range data { 173 + requestData[key] = string(value) 174 + } 175 + 176 + request := AgentRequest{ 177 + Path: path, 178 + Data: requestData, 179 + } 180 + 181 + var response AgentResponse 182 + if err := c.doRequest(ctx, "POST", endpoint, &request, &response); err != nil { 183 + return fmt.Errorf("failed to write secret: %w", err) 184 + } 185 + 186 + if !response.Success { 187 + return fmt.Errorf("agent error: %s", response.Error.Message) 188 + } 189 + 190 + return nil 191 + } 192 + 193 + // DeleteSecret removes secret data from the specified HSM path 194 + func (c *Client) DeleteSecret(ctx context.Context, path string) error { 195 + escapedPath := c.escapePath(path) 196 + endpoint := fmt.Sprintf("/api/v1/hsm/secrets/%s", escapedPath) 197 + 198 + var response AgentResponse 199 + if err := c.doRequest(ctx, "DELETE", endpoint, nil, &response); err != nil { 200 + return fmt.Errorf("failed to delete secret: %w", err) 201 + } 202 + 203 + if !response.Success { 204 + return fmt.Errorf("agent error: %s", response.Error.Message) 205 + } 206 + 207 + return nil 208 + } 209 + 210 + // ListSecrets returns a list of secret paths 211 + func (c *Client) ListSecrets(ctx context.Context, prefix string) ([]string, error) { 212 + endpoint := "/api/v1/hsm/secrets" 213 + if prefix != "" { 214 + endpoint += "?prefix=" + prefix 215 + } 216 + 217 + var response AgentResponse 218 + if err := c.doRequest(ctx, "GET", endpoint, nil, &response); err != nil { 219 + return nil, fmt.Errorf("failed to list secrets: %w", err) 220 + } 221 + 222 + if !response.Success { 223 + return nil, fmt.Errorf("agent error: %s", response.Error.Message) 224 + } 225 + 226 + // Convert response data to string slice 227 + responseData, ok := response.Data.(map[string]interface{}) 228 + if !ok { 229 + return nil, fmt.Errorf("invalid response data format") 230 + } 231 + 232 + pathsRaw, ok := responseData["paths"].([]interface{}) 233 + if !ok { 234 + return nil, fmt.Errorf("invalid paths data format") 235 + } 236 + 237 + paths := make([]string, len(pathsRaw)) 238 + for i, pathRaw := range pathsRaw { 239 + if path, ok := pathRaw.(string); ok { 240 + paths[i] = path 241 + } 242 + } 243 + 244 + return paths, nil 245 + } 246 + 247 + // GetChecksum returns the SHA256 checksum of the secret data at the given path 248 + func (c *Client) GetChecksum(ctx context.Context, path string) (string, error) { 249 + escapedPath := c.escapePath(path) 250 + endpoint := fmt.Sprintf("/api/v1/hsm/checksum/%s", escapedPath) 251 + 252 + var response AgentResponse 253 + if err := c.doRequest(ctx, "GET", endpoint, nil, &response); err != nil { 254 + return "", fmt.Errorf("failed to get checksum: %w", err) 255 + } 256 + 257 + if !response.Success { 258 + return "", fmt.Errorf("agent error: %s", response.Error.Message) 259 + } 260 + 261 + // Extract checksum from response 262 + responseData, ok := response.Data.(map[string]interface{}) 263 + if !ok { 264 + return "", fmt.Errorf("invalid response data format") 265 + } 266 + 267 + checksum, ok := responseData["checksum"].(string) 268 + if !ok { 269 + return "", fmt.Errorf("invalid checksum format") 270 + } 271 + 272 + return checksum, nil 273 + } 274 + 275 + // IsConnected returns true if the HSM agent is connected and responsive 276 + func (c *Client) IsConnected() bool { 277 + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 278 + defer cancel() 279 + 280 + _, err := c.GetInfo(ctx) 281 + return err == nil 282 + } 283 + 284 + // doRequest performs an HTTP request with retry logic 285 + func (c *Client) doRequest(ctx context.Context, method, endpoint string, requestBody interface{}, responseBody interface{}) error { 286 + url := c.baseURL + endpoint 287 + 288 + var reqBodyReader io.Reader 289 + if requestBody != nil { 290 + jsonBody, err := json.Marshal(requestBody) 291 + if err != nil { 292 + return fmt.Errorf("failed to marshal request body: %w", err) 293 + } 294 + reqBodyReader = bytes.NewReader(jsonBody) 295 + } 296 + 297 + var lastErr error 298 + for attempt := 0; attempt <= c.retryAttempts; attempt++ { 299 + if attempt > 0 { 300 + c.logger.V(1).Info("Retrying request", "attempt", attempt, "url", url, "method", method) 301 + 302 + // Reset the request body reader for retry 303 + if requestBody != nil { 304 + jsonBody, _ := json.Marshal(requestBody) 305 + reqBodyReader = bytes.NewReader(jsonBody) 306 + } 307 + 308 + select { 309 + case <-ctx.Done(): 310 + return ctx.Err() 311 + case <-time.After(c.retryDelay): 312 + // Continue with retry 313 + } 314 + } 315 + 316 + req, err := http.NewRequestWithContext(ctx, method, url, reqBodyReader) 317 + if err != nil { 318 + lastErr = fmt.Errorf("failed to create request: %w", err) 319 + continue 320 + } 321 + 322 + if requestBody != nil { 323 + req.Header.Set("Content-Type", "application/json") 324 + } 325 + 326 + resp, err := c.httpClient.Do(req) 327 + if err != nil { 328 + lastErr = fmt.Errorf("request failed: %w", err) 329 + continue 330 + } 331 + 332 + defer resp.Body.Close() 333 + 334 + // Read response body 335 + bodyBytes, err := io.ReadAll(resp.Body) 336 + if err != nil { 337 + lastErr = fmt.Errorf("failed to read response body: %w", err) 338 + continue 339 + } 340 + 341 + // Check for HTTP errors 342 + if resp.StatusCode >= 400 { 343 + // Try to parse error response 344 + var errorResp AgentResponse 345 + if json.Unmarshal(bodyBytes, &errorResp) == nil && errorResp.Error != nil { 346 + lastErr = fmt.Errorf("agent error (status %d): %s", resp.StatusCode, errorResp.Error.Message) 347 + } else { 348 + lastErr = fmt.Errorf("HTTP error (status %d): %s", resp.StatusCode, string(bodyBytes)) 349 + } 350 + 351 + // Don't retry client errors (4xx) 352 + if resp.StatusCode >= 400 && resp.StatusCode < 500 { 353 + break 354 + } 355 + continue 356 + } 357 + 358 + // Parse successful response 359 + if responseBody != nil { 360 + if err := json.Unmarshal(bodyBytes, responseBody); err != nil { 361 + lastErr = fmt.Errorf("failed to unmarshal response: %w", err) 362 + continue 363 + } 364 + } 365 + 366 + // Success 367 + return nil 368 + } 369 + 370 + return fmt.Errorf("request failed after %d attempts: %w", c.retryAttempts+1, lastErr) 371 + } 372 + 373 + // escapePath escapes path components for URL usage 374 + func (c *Client) escapePath(path string) string { 375 + // Simple path escaping - in production might want more sophisticated handling 376 + path = strings.ReplaceAll(path, "/", "%2F") 377 + path = strings.ReplaceAll(path, " ", "%20") 378 + return path 379 + } 380 + 381 + // SetRetryPolicy configures retry behavior 382 + func (c *Client) SetRetryPolicy(attempts int, delay time.Duration) { 383 + c.retryAttempts = attempts 384 + c.retryDelay = delay 385 + } 386 + 387 + // SetTimeout configures request timeout 388 + func (c *Client) SetTimeout(timeout time.Duration) { 389 + c.timeout = timeout 390 + c.httpClient.Timeout = timeout 391 + }
+518
internal/agent/deployment.go
··· 1 + /* 2 + Copyright 2025. 3 + 4 + Licensed under the Apache License, Version 2.0 (the "License"); 5 + you may not use this file except in compliance with the License. 6 + You may obtain a copy of the License at 7 + 8 + http://www.apache.org/licenses/LICENSE-2.0 9 + 10 + Unless required by applicable law or agreed to in writing, software 11 + distributed under the License is distributed on an "AS IS" BASIS, 12 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 + See the License for the specific language governing permissions and 14 + limitations under the License. 15 + */ 16 + 17 + package agent 18 + 19 + import ( 20 + "context" 21 + "fmt" 22 + 23 + appsv1 "k8s.io/api/apps/v1" 24 + corev1 "k8s.io/api/core/v1" 25 + "k8s.io/apimachinery/pkg/api/errors" 26 + "k8s.io/apimachinery/pkg/api/resource" 27 + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 + "k8s.io/apimachinery/pkg/types" 29 + "k8s.io/apimachinery/pkg/util/intstr" 30 + ctrl "sigs.k8s.io/controller-runtime" 31 + "sigs.k8s.io/controller-runtime/pkg/client" 32 + 33 + hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 34 + ) 35 + 36 + const ( 37 + // AgentNamePrefix is the prefix for HSM agent deployment names 38 + AgentNamePrefix = "hsm-agent" 39 + 40 + // AgentImage is the container image for HSM agents 41 + AgentImage = "hsm-secrets-operator:latest" 42 + 43 + // AgentPort is the port the HSM agent serves on 44 + AgentPort = 8092 45 + 46 + // AgentHealthPort is the port for health checks 47 + AgentHealthPort = 8093 48 + ) 49 + 50 + // Manager handles HSM agent pod lifecycle 51 + type Manager struct { 52 + client.Client 53 + AgentImage string 54 + AgentNamespace string 55 + } 56 + 57 + // NewManager creates a new agent manager 58 + func NewManager(client client.Client, agentImage, namespace string) *Manager { 59 + if agentImage == "" { 60 + agentImage = AgentImage 61 + } 62 + if namespace == "" { 63 + namespace = "hsm-secrets-operator-system" 64 + } 65 + 66 + return &Manager{ 67 + Client: client, 68 + AgentImage: agentImage, 69 + AgentNamespace: namespace, 70 + } 71 + } 72 + 73 + // EnsureAgent ensures an HSM agent pod exists for the given HSM device 74 + func (m *Manager) EnsureAgent(ctx context.Context, hsmDevice *hsmv1alpha1.HSMDevice, hsmSecret *hsmv1alpha1.HSMSecret) (string, error) { 75 + agentName := m.generateAgentName(hsmDevice) 76 + 77 + // Check if deployment exists 78 + var deployment appsv1.Deployment 79 + err := m.Get(ctx, types.NamespacedName{ 80 + Name: agentName, 81 + Namespace: m.AgentNamespace, 82 + }, &deployment) 83 + 84 + if err == nil { 85 + // Agent exists, ensure it's running and return endpoint 86 + return m.getAgentEndpoint(agentName), nil 87 + } 88 + 89 + if !errors.IsNotFound(err) { 90 + return "", fmt.Errorf("failed to check agent deployment: %w", err) 91 + } 92 + 93 + // Create agent deployment 94 + if err := m.createAgentDeployment(ctx, hsmDevice, hsmSecret); err != nil { 95 + return "", fmt.Errorf("failed to create agent deployment: %w", err) 96 + } 97 + 98 + // Create agent service 99 + if err := m.createAgentService(ctx, hsmDevice); err != nil { 100 + return "", fmt.Errorf("failed to create agent service: %w", err) 101 + } 102 + 103 + return m.getAgentEndpoint(agentName), nil 104 + } 105 + 106 + // CleanupAgent removes the HSM agent for the given device when no longer needed 107 + func (m *Manager) CleanupAgent(ctx context.Context, hsmDevice *hsmv1alpha1.HSMDevice) error { 108 + agentName := m.generateAgentName(hsmDevice) 109 + 110 + // Check if any HSMSecrets still reference this device 111 + var hsmSecretList hsmv1alpha1.HSMSecretList 112 + if err := m.List(ctx, &hsmSecretList); err != nil { 113 + return fmt.Errorf("failed to list HSMSecrets: %w", err) 114 + } 115 + 116 + // Count references to this device 117 + references := 0 118 + for _, secret := range hsmSecretList.Items { 119 + if m.secretReferencesDevice(&secret, hsmDevice) { 120 + references++ 121 + } 122 + } 123 + 124 + // If there are still references, don't cleanup 125 + if references > 0 { 126 + return nil 127 + } 128 + 129 + // Delete service 130 + service := &corev1.Service{ 131 + ObjectMeta: metav1.ObjectMeta{ 132 + Name: agentName, 133 + Namespace: m.AgentNamespace, 134 + }, 135 + } 136 + if err := m.Delete(ctx, service); err != nil && !errors.IsNotFound(err) { 137 + return fmt.Errorf("failed to delete agent service: %w", err) 138 + } 139 + 140 + // Delete deployment 141 + deployment := &appsv1.Deployment{ 142 + ObjectMeta: metav1.ObjectMeta{ 143 + Name: agentName, 144 + Namespace: m.AgentNamespace, 145 + }, 146 + } 147 + if err := m.Delete(ctx, deployment); err != nil && !errors.IsNotFound(err) { 148 + return fmt.Errorf("failed to delete agent deployment: %w", err) 149 + } 150 + 151 + return nil 152 + } 153 + 154 + // generateAgentName creates a consistent agent name for an HSM device 155 + func (m *Manager) generateAgentName(hsmDevice *hsmv1alpha1.HSMDevice) string { 156 + return fmt.Sprintf("%s-%s", AgentNamePrefix, hsmDevice.Name) 157 + } 158 + 159 + // getAgentEndpoint returns the HTTP endpoint for the agent 160 + func (m *Manager) getAgentEndpoint(agentName string) string { 161 + return fmt.Sprintf("http://%s.%s.svc.cluster.local:%d", agentName, m.AgentNamespace, AgentPort) 162 + } 163 + 164 + // createAgentDeployment creates the HSM agent deployment 165 + func (m *Manager) createAgentDeployment(ctx context.Context, hsmDevice *hsmv1alpha1.HSMDevice, hsmSecret *hsmv1alpha1.HSMSecret) error { 166 + agentName := m.generateAgentName(hsmDevice) 167 + 168 + // Find the node where the HSM device is located 169 + targetNode := m.findTargetNode(hsmDevice) 170 + if targetNode == "" { 171 + return fmt.Errorf("no target node found for HSM device %s", hsmDevice.Name) 172 + } 173 + 174 + deployment := &appsv1.Deployment{ 175 + ObjectMeta: metav1.ObjectMeta{ 176 + Name: agentName, 177 + Namespace: m.AgentNamespace, 178 + Labels: map[string]string{ 179 + "app": agentName, 180 + "app.kubernetes.io/component": "hsm-agent", 181 + "app.kubernetes.io/instance": agentName, 182 + "app.kubernetes.io/name": "hsm-agent", 183 + "app.kubernetes.io/part-of": "hsm-secrets-operator", 184 + "hsm.j5t.io/device": hsmDevice.Name, 185 + "hsm.j5t.io/device-type": string(hsmDevice.Spec.DeviceType), 186 + }, 187 + }, 188 + Spec: appsv1.DeploymentSpec{ 189 + Replicas: int32Ptr(1), 190 + Selector: &metav1.LabelSelector{ 191 + MatchLabels: map[string]string{ 192 + "app": agentName, 193 + }, 194 + }, 195 + Template: corev1.PodTemplateSpec{ 196 + ObjectMeta: metav1.ObjectMeta{ 197 + Labels: map[string]string{ 198 + "app": agentName, 199 + "app.kubernetes.io/component": "hsm-agent", 200 + "app.kubernetes.io/instance": agentName, 201 + "app.kubernetes.io/name": "hsm-agent", 202 + "app.kubernetes.io/part-of": "hsm-secrets-operator", 203 + "hsm.j5t.io/device": hsmDevice.Name, 204 + "hsm.j5t.io/device-type": string(hsmDevice.Spec.DeviceType), 205 + }, 206 + }, 207 + Spec: corev1.PodSpec{ 208 + // Pin to the specific node with the HSM device 209 + NodeSelector: map[string]string{ 210 + "kubernetes.io/hostname": targetNode, 211 + }, 212 + // Affinity for better scheduling 213 + Affinity: &corev1.Affinity{ 214 + NodeAffinity: &corev1.NodeAffinity{ 215 + RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ 216 + NodeSelectorTerms: []corev1.NodeSelectorTerm{ 217 + { 218 + MatchExpressions: []corev1.NodeSelectorRequirement{ 219 + { 220 + Key: "kubernetes.io/hostname", 221 + Operator: corev1.NodeSelectorOpIn, 222 + Values: []string{targetNode}, 223 + }, 224 + }, 225 + }, 226 + }, 227 + }, 228 + }, 229 + }, 230 + SecurityContext: &corev1.PodSecurityContext{ 231 + RunAsNonRoot: boolPtr(true), 232 + SeccompProfile: &corev1.SeccompProfile{ 233 + Type: corev1.SeccompProfileTypeRuntimeDefault, 234 + }, 235 + }, 236 + ServiceAccountName: "hsm-secrets-operator-controller-manager", 237 + Containers: []corev1.Container{ 238 + { 239 + Name: "agent", 240 + Image: m.AgentImage, 241 + Command: []string{ 242 + "/agent", 243 + }, 244 + Args: []string{ 245 + "--agent-mode", 246 + "--device-name=" + hsmDevice.Name, 247 + "--port=" + fmt.Sprintf("%d", AgentPort), 248 + "--health-port=" + fmt.Sprintf("%d", AgentHealthPort), 249 + }, 250 + Env: m.buildAgentEnv(hsmDevice), 251 + Ports: []corev1.ContainerPort{ 252 + { 253 + Name: "http", 254 + ContainerPort: AgentPort, 255 + Protocol: corev1.ProtocolTCP, 256 + }, 257 + { 258 + Name: "health", 259 + ContainerPort: AgentHealthPort, 260 + Protocol: corev1.ProtocolTCP, 261 + }, 262 + }, 263 + LivenessProbe: &corev1.Probe{ 264 + ProbeHandler: corev1.ProbeHandler{ 265 + HTTPGet: &corev1.HTTPGetAction{ 266 + Path: "/healthz", 267 + Port: intstr.FromInt(AgentHealthPort), 268 + }, 269 + }, 270 + InitialDelaySeconds: 15, 271 + PeriodSeconds: 20, 272 + }, 273 + ReadinessProbe: &corev1.Probe{ 274 + ProbeHandler: corev1.ProbeHandler{ 275 + HTTPGet: &corev1.HTTPGetAction{ 276 + Path: "/readyz", 277 + Port: intstr.FromInt(AgentHealthPort), 278 + }, 279 + }, 280 + InitialDelaySeconds: 5, 281 + PeriodSeconds: 10, 282 + }, 283 + Resources: corev1.ResourceRequirements{ 284 + Requests: corev1.ResourceList{ 285 + corev1.ResourceCPU: resourceQuantity("100m"), 286 + corev1.ResourceMemory: resourceQuantity("128Mi"), 287 + }, 288 + Limits: corev1.ResourceList{ 289 + corev1.ResourceCPU: resourceQuantity("500m"), 290 + corev1.ResourceMemory: resourceQuantity("256Mi"), 291 + }, 292 + }, 293 + SecurityContext: &corev1.SecurityContext{ 294 + AllowPrivilegeEscalation: boolPtr(false), 295 + Capabilities: &corev1.Capabilities{ 296 + Drop: []corev1.Capability{"ALL"}, 297 + }, 298 + ReadOnlyRootFilesystem: boolPtr(true), 299 + RunAsNonRoot: boolPtr(true), 300 + RunAsUser: int64Ptr(65532), 301 + }, 302 + VolumeMounts: m.buildAgentVolumeMounts(hsmDevice), 303 + }, 304 + }, 305 + Volumes: m.buildAgentVolumes(hsmDevice), 306 + }, 307 + }, 308 + }, 309 + } 310 + 311 + // Set HSMSecret as owner if provided (for cleanup) 312 + if hsmSecret != nil { 313 + if err := ctrl.SetControllerReference(hsmSecret, deployment, m.Scheme()); err != nil { 314 + return fmt.Errorf("failed to set controller reference: %w", err) 315 + } 316 + } 317 + 318 + return m.Create(ctx, deployment) 319 + } 320 + 321 + // createAgentService creates the service for the HSM agent 322 + func (m *Manager) createAgentService(ctx context.Context, hsmDevice *hsmv1alpha1.HSMDevice) error { 323 + agentName := m.generateAgentName(hsmDevice) 324 + 325 + service := &corev1.Service{ 326 + ObjectMeta: metav1.ObjectMeta{ 327 + Name: agentName, 328 + Namespace: m.AgentNamespace, 329 + Labels: map[string]string{ 330 + "app": agentName, 331 + "app.kubernetes.io/component": "hsm-agent", 332 + "app.kubernetes.io/instance": agentName, 333 + "app.kubernetes.io/name": "hsm-agent", 334 + "app.kubernetes.io/part-of": "hsm-secrets-operator", 335 + "hsm.j5t.io/device": hsmDevice.Name, 336 + "hsm.j5t.io/device-type": string(hsmDevice.Spec.DeviceType), 337 + }, 338 + }, 339 + Spec: corev1.ServiceSpec{ 340 + Selector: map[string]string{ 341 + "app": agentName, 342 + }, 343 + Ports: []corev1.ServicePort{ 344 + { 345 + Name: "http", 346 + Port: AgentPort, 347 + TargetPort: intstr.FromInt(AgentPort), 348 + Protocol: corev1.ProtocolTCP, 349 + }, 350 + { 351 + Name: "health", 352 + Port: AgentHealthPort, 353 + TargetPort: intstr.FromInt(AgentHealthPort), 354 + Protocol: corev1.ProtocolTCP, 355 + }, 356 + }, 357 + Type: corev1.ServiceTypeClusterIP, 358 + }, 359 + } 360 + 361 + return m.Create(ctx, service) 362 + } 363 + 364 + // findTargetNode finds the node where the HSM device is located 365 + func (m *Manager) findTargetNode(hsmDevice *hsmv1alpha1.HSMDevice) string { 366 + // Look for discovered devices in the status 367 + for _, device := range hsmDevice.Status.DiscoveredDevices { 368 + if device.Available && device.NodeName != "" { 369 + return device.NodeName 370 + } 371 + } 372 + 373 + // Fallback: if no specific node found, use node selector if present 374 + if hsmDevice.Spec.NodeSelector != nil { 375 + // This would need more sophisticated logic to map selectors to actual nodes 376 + // For now, return empty to indicate no target found 377 + } 378 + 379 + return "" 380 + } 381 + 382 + // secretReferencesDevice checks if an HSMSecret references the given device 383 + func (m *Manager) secretReferencesDevice(hsmSecret *hsmv1alpha1.HSMSecret, hsmDevice *hsmv1alpha1.HSMDevice) bool { 384 + // This is a simplified check - in practice, you might want more sophisticated logic 385 + // to determine which device an HSMSecret should use based on path, device type, etc. 386 + 387 + // For now, assume any HSMSecret could use any available device of the right type 388 + // A more sophisticated implementation might check: 389 + // - HSMSecret annotations for device preferences 390 + // - Path-based device mapping 391 + // - Device type compatibility 392 + 393 + return true // Simplified for initial implementation 394 + } 395 + 396 + // buildAgentEnv builds environment variables for the HSM agent 397 + func (m *Manager) buildAgentEnv(hsmDevice *hsmv1alpha1.HSMDevice) []corev1.EnvVar { 398 + env := []corev1.EnvVar{ 399 + { 400 + Name: "HSM_DEVICE_NAME", 401 + Value: hsmDevice.Name, 402 + }, 403 + { 404 + Name: "HSM_DEVICE_TYPE", 405 + Value: string(hsmDevice.Spec.DeviceType), 406 + }, 407 + } 408 + 409 + // Add PKCS#11 configuration if available 410 + if hsmDevice.Spec.PKCS11 != nil { 411 + env = append(env, []corev1.EnvVar{ 412 + { 413 + Name: "PKCS11_LIBRARY_PATH", 414 + Value: hsmDevice.Spec.PKCS11.LibraryPath, 415 + }, 416 + { 417 + Name: "PKCS11_SLOT_ID", 418 + Value: fmt.Sprintf("%d", hsmDevice.Spec.PKCS11.SlotId), 419 + }, 420 + { 421 + Name: "PKCS11_TOKEN_LABEL", 422 + Value: hsmDevice.Spec.PKCS11.TokenLabel, 423 + }, 424 + }...) 425 + 426 + // Add PIN from secret if configured 427 + if hsmDevice.Spec.PKCS11.PinSecret != nil { 428 + env = append(env, corev1.EnvVar{ 429 + Name: "PKCS11_PIN", 430 + ValueFrom: &corev1.EnvVarSource{ 431 + SecretKeyRef: &corev1.SecretKeySelector{ 432 + LocalObjectReference: corev1.LocalObjectReference{ 433 + Name: hsmDevice.Spec.PKCS11.PinSecret.Name, 434 + }, 435 + Key: hsmDevice.Spec.PKCS11.PinSecret.Key, 436 + }, 437 + }, 438 + }) 439 + } 440 + } 441 + 442 + return env 443 + } 444 + 445 + // buildAgentVolumeMounts builds volume mounts for the HSM agent 446 + func (m *Manager) buildAgentVolumeMounts(hsmDevice *hsmv1alpha1.HSMDevice) []corev1.VolumeMount { 447 + mounts := []corev1.VolumeMount{ 448 + { 449 + Name: "tmp", 450 + MountPath: "/tmp", 451 + }, 452 + } 453 + 454 + // Add device mounts if needed 455 + for _, device := range hsmDevice.Status.DiscoveredDevices { 456 + if device.DevicePath != "" { 457 + mounts = append(mounts, corev1.VolumeMount{ 458 + Name: "hsm-device", 459 + MountPath: "/dev/hsm", 460 + }) 461 + break // Only need one mount point 462 + } 463 + } 464 + 465 + return mounts 466 + } 467 + 468 + // buildAgentVolumes builds volumes for the HSM agent 469 + func (m *Manager) buildAgentVolumes(hsmDevice *hsmv1alpha1.HSMDevice) []corev1.Volume { 470 + volumes := []corev1.Volume{ 471 + { 472 + Name: "tmp", 473 + VolumeSource: corev1.VolumeSource{ 474 + EmptyDir: &corev1.EmptyDirVolumeSource{}, 475 + }, 476 + }, 477 + } 478 + 479 + // Add device volumes if needed 480 + for _, device := range hsmDevice.Status.DiscoveredDevices { 481 + if device.DevicePath != "" { 482 + volumes = append(volumes, corev1.Volume{ 483 + Name: "hsm-device", 484 + VolumeSource: corev1.VolumeSource{ 485 + HostPath: &corev1.HostPathVolumeSource{ 486 + Path: device.DevicePath, 487 + Type: hostPathTypePtr(corev1.HostPathCharDev), 488 + }, 489 + }, 490 + }) 491 + break // Only need one volume 492 + } 493 + } 494 + 495 + return volumes 496 + } 497 + 498 + // Helper functions 499 + func int32Ptr(i int32) *int32 { 500 + return &i 501 + } 502 + 503 + func int64Ptr(i int64) *int64 { 504 + return &i 505 + } 506 + 507 + func boolPtr(b bool) *bool { 508 + return &b 509 + } 510 + 511 + func hostPathTypePtr(t corev1.HostPathType) *corev1.HostPathType { 512 + return &t 513 + } 514 + 515 + func resourceQuantity(s string) resource.Quantity { 516 + q, _ := resource.ParseQuantity(s) 517 + return q 518 + }
+470
internal/agent/server.go
··· 1 + /* 2 + Copyright 2025. 3 + 4 + Licensed under the Apache License, Version 2.0 (the "License"); 5 + you may not use this file except in compliance with the License. 6 + You may obtain a copy of the License at 7 + 8 + http://www.apache.org/licenses/LICENSE-2.0 9 + 10 + Unless required by applicable law or agreed to in writing, software 11 + distributed under the License is distributed on an "AS IS" BASIS, 12 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 + See the License for the specific language governing permissions and 14 + limitations under the License. 15 + */ 16 + 17 + package agent 18 + 19 + import ( 20 + "context" 21 + "fmt" 22 + "net/http" 23 + "time" 24 + 25 + "github.com/gin-gonic/gin" 26 + "github.com/go-logr/logr" 27 + "github.com/go-playground/validator/v10" 28 + 29 + "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 30 + ) 31 + 32 + // Server represents the HSM agent HTTP server 33 + type Server struct { 34 + hsmClient hsm.Client 35 + validator *validator.Validate 36 + logger logr.Logger 37 + router *gin.Engine 38 + deviceName string 39 + port int 40 + healthPort int 41 + } 42 + 43 + // AgentRequest represents a generic HSM operation request 44 + type AgentRequest struct { 45 + Path string `json:"path" validate:"required"` 46 + Data map[string]interface{} `json:"data,omitempty"` 47 + } 48 + 49 + // AgentResponse represents a generic HSM operation response 50 + type AgentResponse struct { 51 + Success bool `json:"success"` 52 + Message string `json:"message,omitempty"` 53 + Data interface{} `json:"data,omitempty"` 54 + Error *AgentError `json:"error,omitempty"` 55 + } 56 + 57 + // AgentError represents an error response 58 + type AgentError struct { 59 + Code string `json:"code"` 60 + Message string `json:"message"` 61 + Details map[string]interface{} `json:"details,omitempty"` 62 + } 63 + 64 + // HealthStatus represents the health status of the agent 65 + type HealthStatus struct { 66 + Status string `json:"status"` 67 + DeviceName string `json:"deviceName"` 68 + HSMConnected bool `json:"hsmConnected"` 69 + Timestamp time.Time `json:"timestamp"` 70 + Uptime string `json:"uptime"` 71 + } 72 + 73 + // NewServer creates a new HSM agent server 74 + func NewServer(hsmClient hsm.Client, deviceName string, port, healthPort int, logger logr.Logger) *Server { 75 + s := &Server{ 76 + hsmClient: hsmClient, 77 + validator: validator.New(), 78 + logger: logger.WithName("agent-server"), 79 + deviceName: deviceName, 80 + port: port, 81 + healthPort: healthPort, 82 + } 83 + 84 + s.setupRouter() 85 + return s 86 + } 87 + 88 + // setupRouter configures the HTTP routes 89 + func (s *Server) setupRouter() { 90 + gin.SetMode(gin.ReleaseMode) 91 + s.router = gin.New() 92 + 93 + // Add middleware 94 + s.router.Use(gin.Recovery()) 95 + s.router.Use(s.loggingMiddleware()) 96 + s.router.Use(s.authMiddleware()) 97 + 98 + // API v1 routes 99 + v1 := s.router.Group("/api/v1") 100 + { 101 + // HSM operations 102 + hsm := v1.Group("/hsm") 103 + { 104 + hsm.GET("/info", s.handleGetInfo) 105 + hsm.GET("/secrets/:path", s.handleReadSecret) 106 + hsm.POST("/secrets/:path", s.handleWriteSecret) 107 + hsm.PUT("/secrets/:path", s.handleWriteSecret) 108 + hsm.DELETE("/secrets/:path", s.handleDeleteSecret) 109 + hsm.GET("/secrets", s.handleListSecrets) 110 + hsm.GET("/checksum/:path", s.handleGetChecksum) 111 + } 112 + } 113 + 114 + // Health endpoints (separate router for different port) 115 + s.setupHealthRouter() 116 + } 117 + 118 + // setupHealthRouter sets up health check routes 119 + func (s *Server) setupHealthRouter() { 120 + // Health checks will be handled by a separate handler function 121 + // This is called during Start() 122 + } 123 + 124 + // Start starts both the main API server and health server 125 + func (s *Server) Start(ctx context.Context) error { 126 + // Start health server in background 127 + healthMux := http.NewServeMux() 128 + healthMux.HandleFunc("/healthz", s.handleHealthz) 129 + healthMux.HandleFunc("/readyz", s.handleReadyz) 130 + 131 + healthServer := &http.Server{ 132 + Addr: fmt.Sprintf(":%d", s.healthPort), 133 + Handler: healthMux, 134 + } 135 + 136 + go func() { 137 + s.logger.Info("Starting health server", "port", s.healthPort) 138 + if err := healthServer.ListenAndServe(); err != nil && err != http.ErrServerClosed { 139 + s.logger.Error(err, "Health server failed") 140 + } 141 + }() 142 + 143 + // Start main API server 144 + server := &http.Server{ 145 + Addr: fmt.Sprintf(":%d", s.port), 146 + Handler: s.router, 147 + } 148 + 149 + // Graceful shutdown 150 + go func() { 151 + <-ctx.Done() 152 + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 153 + defer cancel() 154 + 155 + s.logger.Info("Shutting down servers") 156 + healthServer.Shutdown(shutdownCtx) 157 + server.Shutdown(shutdownCtx) 158 + }() 159 + 160 + s.logger.Info("Starting HSM agent server", "port", s.port, "device", s.deviceName) 161 + return server.ListenAndServe() 162 + } 163 + 164 + // handleGetInfo handles HSM info requests 165 + func (s *Server) handleGetInfo(c *gin.Context) { 166 + ctx := c.Request.Context() 167 + 168 + if s.hsmClient == nil || !s.hsmClient.IsConnected() { 169 + s.sendError(c, http.StatusServiceUnavailable, "hsm_unavailable", "HSM client not connected", nil) 170 + return 171 + } 172 + 173 + info, err := s.hsmClient.GetInfo(ctx) 174 + if err != nil { 175 + s.logger.Error(err, "Failed to get HSM info") 176 + s.sendError(c, http.StatusInternalServerError, "hsm_error", "Failed to get HSM info", map[string]interface{}{ 177 + "error": err.Error(), 178 + }) 179 + return 180 + } 181 + 182 + s.sendResponse(c, http.StatusOK, "HSM info retrieved", info) 183 + } 184 + 185 + // handleReadSecret handles secret read requests 186 + func (s *Server) handleReadSecret(c *gin.Context) { 187 + ctx := c.Request.Context() 188 + path := c.Param("path") 189 + 190 + if path == "" { 191 + s.sendError(c, http.StatusBadRequest, "invalid_path", "Path parameter is required", nil) 192 + return 193 + } 194 + 195 + if s.hsmClient == nil || !s.hsmClient.IsConnected() { 196 + s.sendError(c, http.StatusServiceUnavailable, "hsm_unavailable", "HSM client not connected", nil) 197 + return 198 + } 199 + 200 + data, err := s.hsmClient.ReadSecret(ctx, path) 201 + if err != nil { 202 + s.logger.Error(err, "Failed to read secret", "path", path) 203 + s.sendError(c, http.StatusInternalServerError, "read_error", "Failed to read secret", map[string]interface{}{ 204 + "path": path, 205 + "error": err.Error(), 206 + }) 207 + return 208 + } 209 + 210 + // Calculate checksum 211 + checksum := hsm.CalculateChecksum(data) 212 + 213 + response := map[string]interface{}{ 214 + "path": path, 215 + "data": data, 216 + "checksum": checksum, 217 + } 218 + 219 + s.sendResponse(c, http.StatusOK, "Secret read successfully", response) 220 + } 221 + 222 + // handleWriteSecret handles secret write requests 223 + func (s *Server) handleWriteSecret(c *gin.Context) { 224 + ctx := c.Request.Context() 225 + path := c.Param("path") 226 + 227 + if path == "" { 228 + s.sendError(c, http.StatusBadRequest, "invalid_path", "Path parameter is required", nil) 229 + return 230 + } 231 + 232 + var req AgentRequest 233 + if err := c.ShouldBindJSON(&req); err != nil { 234 + s.sendError(c, http.StatusBadRequest, "invalid_request", "Invalid JSON payload", map[string]interface{}{ 235 + "error": err.Error(), 236 + }) 237 + return 238 + } 239 + 240 + // Use path from URL parameter, not request body 241 + req.Path = path 242 + 243 + if err := s.validator.Struct(&req); err != nil { 244 + s.sendError(c, http.StatusBadRequest, "validation_failed", "Request validation failed", map[string]interface{}{ 245 + "error": err.Error(), 246 + }) 247 + return 248 + } 249 + 250 + if s.hsmClient == nil || !s.hsmClient.IsConnected() { 251 + s.sendError(c, http.StatusServiceUnavailable, "hsm_unavailable", "HSM client not connected", nil) 252 + return 253 + } 254 + 255 + // Convert request data to HSM format 256 + hsmData := make(hsm.SecretData) 257 + for key, value := range req.Data { 258 + switch v := value.(type) { 259 + case string: 260 + hsmData[key] = []byte(v) 261 + case []byte: 262 + hsmData[key] = v 263 + default: 264 + // Convert to string as fallback 265 + hsmData[key] = []byte(fmt.Sprintf("%v", v)) 266 + } 267 + } 268 + 269 + if err := s.hsmClient.WriteSecret(ctx, req.Path, hsmData); err != nil { 270 + s.logger.Error(err, "Failed to write secret", "path", req.Path) 271 + s.sendError(c, http.StatusInternalServerError, "write_error", "Failed to write secret", map[string]interface{}{ 272 + "path": req.Path, 273 + "error": err.Error(), 274 + }) 275 + return 276 + } 277 + 278 + // Calculate checksum for response 279 + checksum := hsm.CalculateChecksum(hsmData) 280 + 281 + response := map[string]interface{}{ 282 + "path": req.Path, 283 + "checksum": checksum, 284 + } 285 + 286 + s.sendResponse(c, http.StatusOK, "Secret written successfully", response) 287 + } 288 + 289 + // handleDeleteSecret handles secret deletion requests 290 + func (s *Server) handleDeleteSecret(c *gin.Context) { 291 + ctx := c.Request.Context() 292 + path := c.Param("path") 293 + 294 + if path == "" { 295 + s.sendError(c, http.StatusBadRequest, "invalid_path", "Path parameter is required", nil) 296 + return 297 + } 298 + 299 + if s.hsmClient == nil || !s.hsmClient.IsConnected() { 300 + s.sendError(c, http.StatusServiceUnavailable, "hsm_unavailable", "HSM client not connected", nil) 301 + return 302 + } 303 + 304 + if err := s.hsmClient.DeleteSecret(ctx, path); err != nil { 305 + s.logger.Error(err, "Failed to delete secret", "path", path) 306 + s.sendError(c, http.StatusInternalServerError, "delete_error", "Failed to delete secret", map[string]interface{}{ 307 + "path": path, 308 + "error": err.Error(), 309 + }) 310 + return 311 + } 312 + 313 + response := map[string]interface{}{ 314 + "path": path, 315 + } 316 + 317 + s.sendResponse(c, http.StatusOK, "Secret deleted successfully", response) 318 + } 319 + 320 + // handleListSecrets handles secret listing requests 321 + func (s *Server) handleListSecrets(c *gin.Context) { 322 + ctx := c.Request.Context() 323 + prefix := c.Query("prefix") 324 + 325 + if s.hsmClient == nil || !s.hsmClient.IsConnected() { 326 + s.sendError(c, http.StatusServiceUnavailable, "hsm_unavailable", "HSM client not connected", nil) 327 + return 328 + } 329 + 330 + paths, err := s.hsmClient.ListSecrets(ctx, prefix) 331 + if err != nil { 332 + s.logger.Error(err, "Failed to list secrets", "prefix", prefix) 333 + s.sendError(c, http.StatusInternalServerError, "list_error", "Failed to list secrets", map[string]interface{}{ 334 + "prefix": prefix, 335 + "error": err.Error(), 336 + }) 337 + return 338 + } 339 + 340 + response := map[string]interface{}{ 341 + "prefix": prefix, 342 + "paths": paths, 343 + "count": len(paths), 344 + } 345 + 346 + s.sendResponse(c, http.StatusOK, "Secrets listed successfully", response) 347 + } 348 + 349 + // handleGetChecksum handles checksum requests 350 + func (s *Server) handleGetChecksum(c *gin.Context) { 351 + ctx := c.Request.Context() 352 + path := c.Param("path") 353 + 354 + if path == "" { 355 + s.sendError(c, http.StatusBadRequest, "invalid_path", "Path parameter is required", nil) 356 + return 357 + } 358 + 359 + if s.hsmClient == nil || !s.hsmClient.IsConnected() { 360 + s.sendError(c, http.StatusServiceUnavailable, "hsm_unavailable", "HSM client not connected", nil) 361 + return 362 + } 363 + 364 + checksum, err := s.hsmClient.GetChecksum(ctx, path) 365 + if err != nil { 366 + s.logger.Error(err, "Failed to get checksum", "path", path) 367 + s.sendError(c, http.StatusInternalServerError, "checksum_error", "Failed to get checksum", map[string]interface{}{ 368 + "path": path, 369 + "error": err.Error(), 370 + }) 371 + return 372 + } 373 + 374 + response := map[string]interface{}{ 375 + "path": path, 376 + "checksum": checksum, 377 + } 378 + 379 + s.sendResponse(c, http.StatusOK, "Checksum retrieved successfully", response) 380 + } 381 + 382 + // handleHealthz handles liveness probe requests 383 + func (s *Server) handleHealthz(w http.ResponseWriter, r *http.Request) { 384 + status := HealthStatus{ 385 + Status: "healthy", 386 + DeviceName: s.deviceName, 387 + HSMConnected: s.hsmClient != nil && s.hsmClient.IsConnected(), 388 + Timestamp: time.Now(), 389 + Uptime: "unknown", // Could track actual uptime 390 + } 391 + 392 + if !status.HSMConnected { 393 + status.Status = "degraded" 394 + w.WriteHeader(http.StatusServiceUnavailable) 395 + } else { 396 + w.WriteHeader(http.StatusOK) 397 + } 398 + 399 + w.Header().Set("Content-Type", "application/json") 400 + // Simple JSON encoding without external dependencies 401 + fmt.Fprintf(w, `{"status":"%s","deviceName":"%s","hsmConnected":%t,"timestamp":"%s"}`, 402 + status.Status, status.DeviceName, status.HSMConnected, status.Timestamp.Format(time.RFC3339)) 403 + } 404 + 405 + // handleReadyz handles readiness probe requests 406 + func (s *Server) handleReadyz(w http.ResponseWriter, r *http.Request) { 407 + // Agent is ready if HSM client is connected 408 + if s.hsmClient == nil || !s.hsmClient.IsConnected() { 409 + w.WriteHeader(http.StatusServiceUnavailable) 410 + w.Header().Set("Content-Type", "application/json") 411 + fmt.Fprintf(w, `{"status":"not_ready","reason":"hsm_not_connected"}`) 412 + return 413 + } 414 + 415 + w.WriteHeader(http.StatusOK) 416 + w.Header().Set("Content-Type", "application/json") 417 + fmt.Fprintf(w, `{"status":"ready"}`) 418 + } 419 + 420 + // sendResponse sends a successful API response 421 + func (s *Server) sendResponse(c *gin.Context, statusCode int, message string, data interface{}) { 422 + response := AgentResponse{ 423 + Success: true, 424 + Message: message, 425 + Data: data, 426 + } 427 + c.JSON(statusCode, response) 428 + } 429 + 430 + // sendError sends an error API response 431 + func (s *Server) sendError(c *gin.Context, statusCode int, code, message string, details map[string]interface{}) { 432 + response := AgentResponse{ 433 + Success: false, 434 + Error: &AgentError{ 435 + Code: code, 436 + Message: message, 437 + Details: details, 438 + }, 439 + } 440 + c.JSON(statusCode, response) 441 + } 442 + 443 + // loggingMiddleware provides request logging 444 + func (s *Server) loggingMiddleware() gin.HandlerFunc { 445 + return gin.LoggerWithFormatter(func(param gin.LogFormatterParams) string { 446 + s.logger.Info("HTTP request", 447 + "method", param.Method, 448 + "path", param.Path, 449 + "status", param.StatusCode, 450 + "latency", param.Latency, 451 + "ip", param.ClientIP, 452 + ) 453 + return "" 454 + }) 455 + } 456 + 457 + // authMiddleware provides basic authentication/authorization 458 + func (s *Server) authMiddleware() gin.HandlerFunc { 459 + return func(c *gin.Context) { 460 + // For now, no authentication - this runs in a secure cluster 461 + // In production, you might want to add: 462 + // - Service account token validation 463 + // - mTLS client certificate validation 464 + // - Custom authentication headers 465 + 466 + // Add request context 467 + c.Set("device_name", s.deviceName) 468 + c.Next() 469 + } 470 + }
+54 -11
internal/controller/hsmsecret_controller.go
··· 34 34 "sigs.k8s.io/controller-runtime/pkg/log" 35 35 36 36 hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 37 + "github.com/evanjarrett/hsm-secrets-operator/internal/agent" 37 38 "github.com/evanjarrett/hsm-secrets-operator/internal/discovery" 38 39 "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 39 40 ) ··· 52 53 Scheme *runtime.Scheme 53 54 HSMClient hsm.Client 54 55 MirroringManager *discovery.MirroringManager 56 + AgentManager *agent.Manager 55 57 } 56 58 57 59 // +kubebuilder:rbac:groups=hsm.j5t.io,resources=hsmsecrets,verbs=get;list;watch;create;update;patch;delete ··· 60 62 // +kubebuilder:rbac:groups=hsm.j5t.io,resources=hsmdevices,verbs=get;list;watch 61 63 // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update;patch;delete 62 64 // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch 65 + // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete 66 + // +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete 63 67 64 68 // Reconcile handles HSMSecret reconciliation 65 69 func (r *HSMSecretReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { ··· 76 80 return ctrl.Result{}, err 77 81 } 78 82 79 - // Check if HSM client is available 80 - if r.HSMClient == nil || !r.HSMClient.IsConnected() { 81 - logger.Error(fmt.Errorf("HSM client not available"), "HSM client not connected") 83 + // Find target HSM device and ensure agent is running 84 + hsmDevice, agentClient, err := r.ensureHSMAgent(ctx, &hsmSecret) 85 + if err != nil { 86 + logger.Error(err, "Failed to ensure HSM agent") 82 87 return ctrl.Result{RequeueAfter: time.Minute * 5}, nil 83 88 } 84 89 90 + // Use agent client instead of direct HSM client 91 + if agentClient == nil || !agentClient.IsConnected() { 92 + logger.Error(fmt.Errorf("HSM agent not available"), "HSM agent not connected", "device", hsmDevice.Name) 93 + return ctrl.Result{RequeueAfter: time.Minute * 2}, nil 94 + } 95 + 85 96 // Handle deletion 86 97 if hsmSecret.DeletionTimestamp != nil { 87 98 return r.reconcileDelete(ctx, &hsmSecret) ··· 97 108 return ctrl.Result{Requeue: true}, nil 98 109 } 99 110 100 - // Reconcile the HSMSecret 101 - result, err := r.reconcileNormal(ctx, &hsmSecret) 111 + // Reconcile the HSMSecret using the agent client 112 + result, err := r.reconcileNormal(ctx, &hsmSecret, agentClient) 102 113 if err != nil { 103 114 logger.Error(err, "Failed to reconcile HSMSecret") 104 115 r.updateStatus(ctx, &hsmSecret, hsmv1alpha1.SyncStatusError, err.Error()) ··· 107 118 return result, err 108 119 } 109 120 121 + // ensureHSMAgent finds an HSM device for the secret and ensures an agent is running 122 + func (r *HSMSecretReconciler) ensureHSMAgent(ctx context.Context, hsmSecret *hsmv1alpha1.HSMSecret) (*hsmv1alpha1.HSMDevice, hsm.Client, error) { 123 + logger := log.FromContext(ctx) 124 + 125 + // Find the appropriate HSM device 126 + hsmDevice, err := r.findHSMDeviceForSecret(ctx, hsmSecret) 127 + if err != nil { 128 + return nil, nil, fmt.Errorf("failed to find HSM device for secret: %w", err) 129 + } 130 + 131 + // Ensure agent pod is running for this device 132 + if r.AgentManager == nil { 133 + return nil, nil, fmt.Errorf("agent manager not configured") 134 + } 135 + 136 + agentEndpoint, err := r.AgentManager.EnsureAgent(ctx, hsmDevice, hsmSecret) 137 + if err != nil { 138 + return nil, nil, fmt.Errorf("failed to ensure HSM agent: %w", err) 139 + } 140 + 141 + // Create agent client 142 + agentClient := agent.NewClient(agentEndpoint, hsmDevice.Name, logger) 143 + 144 + // Wait a moment for agent to start if just created 145 + if !agentClient.IsConnected() { 146 + logger.Info("Waiting for HSM agent to start", "device", hsmDevice.Name, "endpoint", agentEndpoint) 147 + time.Sleep(5 * time.Second) 148 + } 149 + 150 + return hsmDevice, agentClient, nil 151 + } 152 + 110 153 // reconcileNormal handles normal reconciliation logic 111 - func (r *HSMSecretReconciler) reconcileNormal(ctx context.Context, hsmSecret *hsmv1alpha1.HSMSecret) (ctrl.Result, error) { 154 + func (r *HSMSecretReconciler) reconcileNormal(ctx context.Context, hsmSecret *hsmv1alpha1.HSMSecret, hsmClient hsm.Client) (ctrl.Result, error) { 112 155 logger := log.FromContext(ctx) 113 156 114 157 // Set default values ··· 123 166 } 124 167 125 168 // Read secret from HSM with readonly fallback support 126 - hsmData, err := r.readSecretWithFallback(ctx, hsmSecret) 169 + hsmData, err := r.readSecretWithFallback(ctx, hsmSecret, hsmClient) 127 170 if err != nil { 128 171 logger.Error(err, "Failed to read secret from HSM and mirrors", "path", hsmSecret.Spec.HSMPath) 129 172 return ctrl.Result{RequeueAfter: time.Minute * 2}, err ··· 314 357 } 315 358 316 359 // readSecretWithFallback attempts to read a secret from primary HSM, falling back to mirrors if needed 317 - func (r *HSMSecretReconciler) readSecretWithFallback(ctx context.Context, hsmSecret *hsmv1alpha1.HSMSecret) (hsm.SecretData, error) { 360 + func (r *HSMSecretReconciler) readSecretWithFallback(ctx context.Context, hsmSecret *hsmv1alpha1.HSMSecret, hsmClient hsm.Client) (hsm.SecretData, error) { 318 361 logger := log.FromContext(ctx) 319 362 320 - // Try to read from primary HSM first 321 - if r.HSMClient != nil && r.HSMClient.IsConnected() { 322 - data, err := r.HSMClient.ReadSecret(ctx, hsmSecret.Spec.HSMPath) 363 + // Try to read from primary HSM first (via agent) 364 + if hsmClient != nil && hsmClient.IsConnected() { 365 + data, err := hsmClient.ReadSecret(ctx, hsmSecret.Spec.HSMPath) 323 366 if err == nil { 324 367 logger.V(1).Info("Successfully read secret from primary HSM", "path", hsmSecret.Spec.HSMPath) 325 368 return data, nil