A Kubernetes operator that bridges Hardware Security Module (HSM) data storage with Kubernetes Secrets, providing true secret portability th
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

add unit tests

+162 -1407
+2 -2
helm/hsm-secrets-operator/Chart.yaml
··· 2 2 name: hsm-secrets-operator 3 3 description: A Kubernetes operator that bridges Pico HSM binary data storage with Kubernetes Secrets 4 4 type: application 5 - version: 0.5.21 6 - appVersion: v0.5.21 5 + version: 0.5.22 6 + appVersion: v0.5.22 7 7 icon: https://raw.githubusercontent.com/cncf/artwork/master/projects/kubernetes/icon/color/kubernetes-icon-color.svg 8 8 home: https://github.com/evanjarrett/hsm-secrets-operator 9 9 sources:
-1029
internal/agent/deployment.go
··· 1 - /* 2 - Copyright 2025. 3 - 4 - Licensed under the Apache License, Version 2.0 (the "License"); 5 - you may not use this file except in compliance with the License. 6 - You may obtain a copy of the License at 7 - 8 - http://www.apache.org/licenses/LICENSE-2.0 9 - 10 - Unless required by applicable law or agreed to in writing, software 11 - distributed under the License is distributed on an "AS IS" BASIS, 12 - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 - See the License for the specific language governing permissions and 14 - limitations under the License. 15 - */ 16 - 17 - package agent 18 - 19 - import ( 20 - "context" 21 - "fmt" 22 - "slices" 23 - "strings" 24 - "sync" 25 - "time" 26 - 27 - "github.com/go-logr/logr" 28 - appsv1 "k8s.io/api/apps/v1" 29 - corev1 "k8s.io/api/core/v1" 30 - "k8s.io/apimachinery/pkg/api/errors" 31 - "k8s.io/apimachinery/pkg/api/resource" 32 - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 - "k8s.io/apimachinery/pkg/types" 34 - "k8s.io/apimachinery/pkg/util/intstr" 35 - "sigs.k8s.io/controller-runtime/pkg/client" 36 - 37 - hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 38 - "github.com/evanjarrett/hsm-secrets-operator/internal/hsm" 39 - ) 40 - 41 - // ManagerInterface defines the interface for HSM agent management 42 - // This allows for easier testing with mocks 43 - type ManagerInterface interface { 44 - EnsureAgent(ctx context.Context, hsmPool *hsmv1alpha1.HSMPool) error 45 - CleanupAgent(ctx context.Context, hsmDevice *hsmv1alpha1.HSMDevice) error 46 - } 47 - 48 - // AgentStatus represents the current status of an agent 49 - type AgentStatus string 50 - 51 - const ( 52 - AgentStatusCreating AgentStatus = "Creating" 53 - AgentStatusReady AgentStatus = "Ready" 54 - AgentStatusFailed AgentStatus = "Failed" 55 - ) 56 - 57 - // AgentInfo tracks agent state and connections 58 - type AgentInfo struct { 59 - PodIPs []string 60 - CreatedAt time.Time 61 - LastHealthCheck time.Time 62 - Status AgentStatus 63 - AgentName string 64 - Namespace string 65 - } 66 - 67 - const ( 68 - // AgentNamePrefix is the prefix for HSM agent deployment names 69 - AgentNamePrefix = "hsm-agent" 70 - 71 - // AgentPort is the port the HSM agent serves on (now gRPC) 72 - AgentPort = 9090 73 - 74 - // AgentHealthPort is the port for health checks (HTTP for simplicity) 75 - AgentHealthPort = 8093 76 - ) 77 - 78 - // Manager handles HSM agent pod lifecycle 79 - type Manager struct { 80 - client.Client 81 - AgentImage string 82 - AgentNamespace string 83 - ImageResolver ImageResolver 84 - 85 - // Internal tracking 86 - activeAgents map[string]*AgentInfo // deviceName -> AgentInfo 87 - mu sync.RWMutex 88 - 89 - // Test configuration 90 - TestMode bool // Enable test mode for faster operations 91 - WaitTimeout time.Duration // Timeout for waiting operations (default: 60s) 92 - WaitPollInterval time.Duration // Polling interval for waiting operations (default: 2s) 93 - } 94 - 95 - // ImageResolver interface for dependency injection 96 - type ImageResolver interface { 97 - GetImage(ctx context.Context, defaultImage string) string 98 - } 99 - 100 - // deviceWork represents work to be done for a specific device 101 - type deviceWork struct { 102 - device hsmv1alpha1.DiscoveredDevice 103 - agentName string 104 - agentKey string 105 - index int 106 - } 107 - 108 - // NewManager creates a new agent manager 109 - func NewManager(k8sClient client.Client, namespace string, imageResolver ImageResolver) *Manager { 110 - 111 - m := &Manager{ 112 - Client: k8sClient, 113 - AgentNamespace: namespace, 114 - ImageResolver: imageResolver, 115 - activeAgents: make(map[string]*AgentInfo), 116 - // Default production timeouts 117 - WaitTimeout: 60 * time.Second, 118 - WaitPollInterval: 2 * time.Second, 119 - } 120 - 121 - // If no namespace provided, agents will be deployed in the same namespace as their HSMDevice 122 - // AgentNamespace is only used as a fallback now 123 - 124 - return m 125 - } 126 - 127 - // NewTestManager creates a new agent manager optimized for testing 128 - func NewTestManager(k8sClient client.Client, namespace string, imageResolver ImageResolver) *Manager { 129 - m := &Manager{ 130 - Client: k8sClient, 131 - AgentNamespace: namespace, 132 - ImageResolver: imageResolver, 133 - activeAgents: make(map[string]*AgentInfo), 134 - // Fast test timeouts 135 - TestMode: true, 136 - WaitTimeout: 5 * time.Second, 137 - WaitPollInterval: 100 * time.Millisecond, 138 - } 139 - return m 140 - } 141 - 142 - // generateAgentName creates a consistent agent name for an HSM device 143 - func (m *Manager) generateAgentName(hsmPool *hsmv1alpha1.HSMPool) string { 144 - return fmt.Sprintf("%s-%s", AgentNamePrefix, hsmPool.OwnerReferences[0].Name) 145 - } 146 - 147 - // EnsureAgent ensures HSM agents are deployed for all available devices in the pool 148 - func (m *Manager) EnsureAgent(ctx context.Context, hsmPool *hsmv1alpha1.HSMPool) error { 149 - 150 - // Pre-collect available devices to process (no mutex needed) 151 - workItems := make([]deviceWork, 0, len(hsmPool.Status.AggregatedDevices)) 152 - for i, aggregatedDevice := range hsmPool.Status.AggregatedDevices { 153 - if !aggregatedDevice.Available { 154 - continue 155 - } 156 - workItems = append(workItems, deviceWork{ 157 - device: aggregatedDevice, 158 - agentName: fmt.Sprintf("%s-%d", m.generateAgentName(hsmPool), i), 159 - agentKey: aggregatedDevice.SerialNumber, 160 - index: i, 161 - }) 162 - } 163 - 164 - if len(workItems) == 0 { 165 - return nil // No available devices to process 166 - } 167 - 168 - // Process devices in parallel 169 - var wg sync.WaitGroup 170 - errChan := make(chan error, len(workItems)) 171 - 172 - for _, work := range workItems { 173 - wg.Add(1) 174 - go func(w deviceWork) { 175 - defer wg.Done() 176 - 177 - // Mutex-protected check and update of activeAgents 178 - m.mu.Lock() 179 - needsDeployment := false 180 - if agentInfo, exists := m.activeAgents[w.agentKey]; exists { 181 - if !m.isAgentHealthy(ctx, agentInfo) { 182 - m.removeAgentFromTracking(w.agentKey) 183 - needsDeployment = true 184 - } 185 - } else { 186 - needsDeployment = true 187 - } 188 - m.mu.Unlock() 189 - 190 - // Skip if agent is healthy and tracked 191 - if !needsDeployment { 192 - return 193 - } 194 - 195 - // Deploy agent for this device (Kubernetes API calls - no mutex needed) 196 - if err := m.deployAgentForDevice(ctx, w, hsmPool); err != nil { 197 - errChan <- fmt.Errorf("failed to deploy agent %s: %w", w.agentName, err) 198 - } 199 - }(work) 200 - } 201 - 202 - // Wait for all goroutines to complete 203 - wg.Wait() 204 - close(errChan) 205 - 206 - // Collect any errors 207 - deploymentErrors := make([]error, 0, len(workItems)) 208 - for err := range errChan { 209 - deploymentErrors = append(deploymentErrors, err) 210 - } 211 - 212 - if len(deploymentErrors) > 0 { 213 - return fmt.Errorf("agent deployment errors: %v", deploymentErrors) 214 - } 215 - 216 - return nil 217 - } 218 - 219 - // deployAgentForDevice handles the deployment logic for a single device 220 - func (m *Manager) deployAgentForDevice(ctx context.Context, work deviceWork, hsmPool *hsmv1alpha1.HSMPool) error { 221 - // Check if deployment exists in Kubernetes 222 - var deployment appsv1.Deployment 223 - err := m.Get(ctx, types.NamespacedName{ 224 - Name: work.agentName, 225 - Namespace: hsmPool.Namespace, 226 - }, &deployment) 227 - 228 - if err == nil { 229 - // Agent exists, but check if it needs updating (image version, device/node configuration) 230 - needsUpdate, err := m.agentNeedsUpdate(ctx, &deployment, hsmPool) 231 - if err != nil { 232 - return fmt.Errorf("failed to check if agent deployment %s needs update: %w", work.agentName, err) 233 - } 234 - 235 - // Also check device-specific configuration 236 - if !needsUpdate { 237 - needsUpdate = m.deploymentNeedsUpdateForDevice(&deployment, &work.device) 238 - } 239 - 240 - if needsUpdate { 241 - // Delete existing deployment to trigger recreation 242 - if err := m.Delete(ctx, &deployment); err != nil && !errors.IsNotFound(err) { 243 - return fmt.Errorf("failed to delete outdated agent deployment %s: %w", work.agentName, err) 244 - } 245 - } else { 246 - // Agent exists and is correct - wait for it and track it 247 - podIPs, err := m.waitForAgentReady(ctx, work.agentName, hsmPool.Namespace) 248 - if err != nil { 249 - return fmt.Errorf("failed waiting for existing agent pods %s: %w", work.agentName, err) 250 - } 251 - 252 - // Track the existing agent (mutex-protected) 253 - m.mu.Lock() 254 - agentInfo := &AgentInfo{ 255 - PodIPs: podIPs, 256 - CreatedAt: time.Now(), 257 - LastHealthCheck: time.Now(), 258 - Status: AgentStatusReady, 259 - AgentName: work.agentName, 260 - Namespace: hsmPool.Namespace, 261 - } 262 - m.activeAgents[work.agentKey] = agentInfo 263 - m.mu.Unlock() 264 - return nil 265 - } 266 - } else if !errors.IsNotFound(err) { 267 - return fmt.Errorf("failed to check agent deployment %s: %w", work.agentName, err) 268 - } 269 - 270 - // Create agent deployment for this specific device 271 - if err := m.createAgentDeployment(ctx, hsmPool, &work.device, work.agentName); err != nil { 272 - return fmt.Errorf("failed to create agent deployment %s: %w", work.agentName, err) 273 - } 274 - 275 - // Wait for agent pods to be ready and get their IPs 276 - podIPs, err := m.waitForAgentReady(ctx, work.agentName, hsmPool.Namespace) 277 - if err != nil { 278 - return fmt.Errorf("failed waiting for agent pods %s: %w", work.agentName, err) 279 - } 280 - 281 - // Track the new agent (mutex-protected) 282 - m.mu.Lock() 283 - agentInfo := &AgentInfo{ 284 - PodIPs: podIPs, 285 - CreatedAt: time.Now(), 286 - LastHealthCheck: time.Now(), 287 - Status: AgentStatusReady, 288 - AgentName: work.agentName, 289 - Namespace: hsmPool.Namespace, 290 - } 291 - m.activeAgents[work.agentKey] = agentInfo 292 - m.mu.Unlock() 293 - 294 - return nil 295 - } 296 - 297 - // CleanupAgent removes all HSM agents for the given device when no longer needed 298 - func (m *Manager) CleanupAgent(ctx context.Context, hsmDevice *hsmv1alpha1.HSMDevice) error { 299 - m.mu.Lock() 300 - defer m.mu.Unlock() 301 - 302 - // Check if any HSMSecrets still reference this device 303 - var hsmSecretList hsmv1alpha1.HSMSecretList 304 - if err := m.List(ctx, &hsmSecretList); err != nil { 305 - return fmt.Errorf("failed to list HSMSecrets: %w", err) 306 - } 307 - 308 - // In the HSMPool architecture, cleanup should be based on device availability in pool 309 - // rather than individual secret references, since all secrets can use any available device 310 - // Check if there are any active HSMSecrets - if so, keep the agents running 311 - if len(hsmSecretList.Items) > 0 { 312 - return nil 313 - } 314 - 315 - // Get the HSMPool to find all agent deployments to clean up 316 - poolName := hsmDevice.Name + "-pool" 317 - var hsmPool hsmv1alpha1.HSMPool 318 - if err := m.Get(ctx, types.NamespacedName{ 319 - Name: poolName, 320 - Namespace: hsmDevice.Namespace, 321 - }, &hsmPool); err != nil { 322 - // If pool doesn't exist, try to clean up any remaining tracked agents 323 - return m.cleanupTrackedAgents(ctx, hsmDevice) 324 - } 325 - 326 - // Clean up all agent deployments (one per aggregated device) 327 - for i, aggregatedDevice := range hsmPool.Status.AggregatedDevices { 328 - agentName := fmt.Sprintf("%s-%s-%d", AgentNamePrefix, hsmDevice.Name, i) 329 - agentKey := fmt.Sprintf("%s-%s", hsmDevice.Name, aggregatedDevice.SerialNumber) 330 - 331 - // Remove from internal tracking 332 - m.removeAgentFromTracking(agentKey) 333 - 334 - // Delete deployment 335 - deployment := &appsv1.Deployment{ 336 - ObjectMeta: metav1.ObjectMeta{ 337 - Name: agentName, 338 - Namespace: hsmDevice.Namespace, 339 - }, 340 - } 341 - if err := m.Delete(ctx, deployment); err != nil && !errors.IsNotFound(err) { 342 - return fmt.Errorf("failed to delete agent deployment %s: %w", agentName, err) 343 - } 344 - } 345 - 346 - return nil 347 - } 348 - 349 - // cleanupTrackedAgents cleans up any remaining tracked agents when HSMPool is not available 350 - func (m *Manager) cleanupTrackedAgents(ctx context.Context, hsmDevice *hsmv1alpha1.HSMDevice) error { 351 - // Find all tracked agents for this device 352 - var agentsToCleanup []string 353 - devicePrefix := hsmDevice.Name + "-" 354 - 355 - for agentKey := range m.activeAgents { 356 - if strings.HasPrefix(agentKey, devicePrefix) { 357 - agentsToCleanup = append(agentsToCleanup, agentKey) 358 - } 359 - } 360 - 361 - // Clean up each tracked agent 362 - for _, agentKey := range agentsToCleanup { 363 - agentInfo := m.activeAgents[agentKey] 364 - 365 - // Remove from tracking 366 - m.removeAgentFromTracking(agentKey) 367 - 368 - // Delete deployment 369 - deployment := &appsv1.Deployment{ 370 - ObjectMeta: metav1.ObjectMeta{ 371 - Name: agentInfo.AgentName, 372 - Namespace: agentInfo.Namespace, 373 - }, 374 - } 375 - if err := m.Delete(ctx, deployment); err != nil && !errors.IsNotFound(err) { 376 - return fmt.Errorf("failed to delete agent deployment %s: %w", agentInfo.AgentName, err) 377 - } 378 - } 379 - 380 - return nil 381 - } 382 - 383 - // createAgentDeployment creates the HSM agent deployment for a specific device 384 - func (m *Manager) createAgentDeployment(ctx context.Context, hsmPool *hsmv1alpha1.HSMPool, specificDevice *hsmv1alpha1.DiscoveredDevice, customAgentName string) error { 385 - if specificDevice == nil { 386 - return fmt.Errorf("specificDevice is required") 387 - } 388 - 389 - var agentName string 390 - if customAgentName != "" { 391 - agentName = customAgentName 392 - } else { 393 - agentName = m.generateAgentName(hsmPool) 394 - } 395 - 396 - targetNode := specificDevice.NodeName 397 - devicePath := specificDevice.DevicePath 398 - deviceName := hsmPool.OwnerReferences[0].Name 399 - 400 - // Get discovery image from environment, manager image, or use default 401 - agentImage := m.ImageResolver.GetImage(ctx, "AGENT_IMAGE") 402 - 403 - deployment := &appsv1.Deployment{ 404 - ObjectMeta: metav1.ObjectMeta{ 405 - Name: agentName, 406 - Namespace: hsmPool.Namespace, 407 - Labels: map[string]string{ 408 - "app": agentName, 409 - "app.kubernetes.io/component": "hsm-agent", 410 - "app.kubernetes.io/instance": agentName, 411 - "app.kubernetes.io/name": "hsm-agent", 412 - "app.kubernetes.io/part-of": "hsm-secrets-operator", 413 - "hsm.j5t.io/device": deviceName, 414 - "hsm.j5t.io/serial-number": specificDevice.SerialNumber, 415 - "hsm.j5t.io/device-path": sanitizeLabelValue(specificDevice.DevicePath), 416 - }, 417 - }, 418 - Spec: appsv1.DeploymentSpec{ 419 - Replicas: int32Ptr(1), 420 - Selector: &metav1.LabelSelector{ 421 - MatchLabels: map[string]string{ 422 - "app": agentName, 423 - }, 424 - }, 425 - Template: corev1.PodTemplateSpec{ 426 - ObjectMeta: metav1.ObjectMeta{ 427 - Labels: map[string]string{ 428 - "app": agentName, 429 - "app.kubernetes.io/component": "hsm-agent", 430 - "app.kubernetes.io/instance": agentName, 431 - "app.kubernetes.io/name": "hsm-agent", 432 - "app.kubernetes.io/part-of": "hsm-secrets-operator", 433 - "hsm.j5t.io/device": deviceName, 434 - "hsm.j5t.io/serial-number": specificDevice.SerialNumber, 435 - "hsm.j5t.io/device-path": sanitizeLabelValue(specificDevice.DevicePath), 436 - }, 437 - }, 438 - Spec: corev1.PodSpec{ 439 - // Pin to the specific node with the HSM device 440 - NodeSelector: map[string]string{ 441 - "kubernetes.io/hostname": targetNode, 442 - }, 443 - // Affinity for better scheduling 444 - Affinity: &corev1.Affinity{ 445 - NodeAffinity: &corev1.NodeAffinity{ 446 - RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ 447 - NodeSelectorTerms: []corev1.NodeSelectorTerm{ 448 - { 449 - MatchExpressions: []corev1.NodeSelectorRequirement{ 450 - { 451 - Key: "kubernetes.io/hostname", 452 - Operator: corev1.NodeSelectorOpIn, 453 - Values: []string{targetNode}, 454 - }, 455 - }, 456 - }, 457 - }, 458 - }, 459 - }, 460 - }, 461 - SecurityContext: &corev1.PodSecurityContext{ 462 - RunAsUser: int64Ptr(0), 463 - RunAsGroup: int64Ptr(0), 464 - RunAsNonRoot: boolPtr(false), 465 - }, 466 - ServiceAccountName: "hsm-secrets-operator", 467 - Containers: []corev1.Container{ 468 - { 469 - Name: "agent", 470 - Image: agentImage, 471 - Command: []string{ 472 - "/entrypoint.sh", 473 - "agent", 474 - }, 475 - Args: []string{ 476 - "--device-name=" + deviceName, 477 - "--port=" + fmt.Sprintf("%d", AgentPort), 478 - "--health-port=" + fmt.Sprintf("%d", AgentHealthPort), 479 - }, 480 - Env: func() []corev1.EnvVar { 481 - env, err := m.buildAgentEnv(ctx, hsmPool) 482 - if err != nil { 483 - // Log error but continue with empty env to avoid breaking deployment creation 484 - return []corev1.EnvVar{} 485 - } 486 - return env 487 - }(), 488 - Ports: []corev1.ContainerPort{ 489 - { 490 - Name: "grpc", 491 - ContainerPort: AgentPort, 492 - Protocol: corev1.ProtocolTCP, 493 - }, 494 - { 495 - Name: "health", 496 - ContainerPort: AgentHealthPort, 497 - Protocol: corev1.ProtocolTCP, 498 - }, 499 - }, 500 - LivenessProbe: &corev1.Probe{ 501 - ProbeHandler: corev1.ProbeHandler{ 502 - HTTPGet: &corev1.HTTPGetAction{ 503 - Path: "/healthz", 504 - Port: intstr.FromInt(AgentHealthPort), 505 - }, 506 - }, 507 - InitialDelaySeconds: 15, 508 - PeriodSeconds: 20, 509 - }, 510 - ReadinessProbe: &corev1.Probe{ 511 - ProbeHandler: corev1.ProbeHandler{ 512 - HTTPGet: &corev1.HTTPGetAction{ 513 - Path: "/readyz", 514 - Port: intstr.FromInt(AgentHealthPort), 515 - }, 516 - }, 517 - InitialDelaySeconds: 5, 518 - PeriodSeconds: 10, 519 - }, 520 - Resources: corev1.ResourceRequirements{ 521 - Requests: corev1.ResourceList{ 522 - corev1.ResourceCPU: resourceQuantity("100m"), 523 - corev1.ResourceMemory: resourceQuantity("128Mi"), 524 - }, 525 - Limits: corev1.ResourceList{ 526 - corev1.ResourceCPU: resourceQuantity("500m"), 527 - corev1.ResourceMemory: resourceQuantity("256Mi"), 528 - }, 529 - }, 530 - SecurityContext: &corev1.SecurityContext{ 531 - Privileged: boolPtr(true), 532 - AllowPrivilegeEscalation: boolPtr(true), 533 - Capabilities: &corev1.Capabilities{ 534 - Drop: []corev1.Capability{}, 535 - Add: []corev1.Capability{ 536 - "SYS_ADMIN", 537 - }, 538 - }, 539 - ReadOnlyRootFilesystem: boolPtr(false), 540 - RunAsNonRoot: boolPtr(false), 541 - RunAsUser: int64Ptr(0), 542 - }, 543 - VolumeMounts: m.buildAgentVolumeMounts(), 544 - }, 545 - }, 546 - Volumes: m.buildAgentVolumes(devicePath), 547 - }, 548 - }, 549 - }, 550 - } 551 - 552 - return m.Create(ctx, deployment) 553 - } 554 - 555 - // buildAgentEnv builds environment variables for the HSM agent 556 - func (m *Manager) buildAgentEnv(ctx context.Context, hsmPool *hsmv1alpha1.HSMPool) ([]corev1.EnvVar, error) { 557 - // Get HSMDevice from owner reference 558 - deviceName := hsmPool.OwnerReferences[0].Name 559 - var hsmDevice hsmv1alpha1.HSMDevice 560 - if err := m.Get(ctx, types.NamespacedName{ 561 - Name: deviceName, 562 - Namespace: hsmPool.Namespace, 563 - }, &hsmDevice); err != nil { 564 - return nil, fmt.Errorf("failed to get HSMDevice %s: %w", deviceName, err) 565 - } 566 - env := []corev1.EnvVar{ 567 - { 568 - Name: "HSM_DEVICE_NAME", 569 - Value: hsmDevice.Name, 570 - }, 571 - { 572 - Name: "HSM_DEVICE_TYPE", 573 - Value: string(hsmDevice.Spec.DeviceType), 574 - }, 575 - } 576 - 577 - // Add PKCS#11 configuration if available 578 - if hsmDevice.Spec.PKCS11 != nil { 579 - env = append(env, []corev1.EnvVar{ 580 - { 581 - Name: "PKCS11_LIBRARY_PATH", 582 - Value: hsmDevice.Spec.PKCS11.LibraryPath, 583 - }, 584 - { 585 - Name: "PKCS11_SLOT_ID", 586 - Value: fmt.Sprintf("%d", hsmDevice.Spec.PKCS11.SlotId), 587 - }, 588 - { 589 - Name: "PKCS11_TOKEN_LABEL", 590 - Value: hsmDevice.Spec.PKCS11.TokenLabel, 591 - }, 592 - }...) 593 - 594 - // Add PIN from secret if configured 595 - if hsmDevice.Spec.PKCS11.PinSecret != nil { 596 - env = append(env, corev1.EnvVar{ 597 - Name: "PKCS11_PIN", 598 - ValueFrom: &corev1.EnvVarSource{ 599 - SecretKeyRef: &corev1.SecretKeySelector{ 600 - LocalObjectReference: corev1.LocalObjectReference{ 601 - Name: hsmDevice.Spec.PKCS11.PinSecret.Name, 602 - }, 603 - Key: hsmDevice.Spec.PKCS11.PinSecret.Key, 604 - }, 605 - }, 606 - }) 607 - } 608 - } 609 - 610 - return env, nil 611 - } 612 - 613 - // buildAgentVolumeMounts builds volume mounts for the HSM agent 614 - func (m *Manager) buildAgentVolumeMounts() []corev1.VolumeMount { 615 - return []corev1.VolumeMount{ 616 - { 617 - Name: "tmp", 618 - MountPath: "/tmp", 619 - }, 620 - { 621 - Name: "hsm-device", 622 - MountPath: "/dev/hsm", 623 - }, 624 - } 625 - } 626 - 627 - // buildAgentVolumes builds volumes for the HSM agent 628 - func (m *Manager) buildAgentVolumes(devicePath string) []corev1.Volume { 629 - return []corev1.Volume{ 630 - { 631 - Name: "tmp", 632 - VolumeSource: corev1.VolumeSource{ 633 - EmptyDir: &corev1.EmptyDirVolumeSource{}, 634 - }, 635 - }, 636 - { 637 - Name: "hsm-device", 638 - VolumeSource: corev1.VolumeSource{ 639 - HostPath: &corev1.HostPathVolumeSource{ 640 - Path: devicePath, 641 - Type: hostPathTypePtr(corev1.HostPathCharDev), 642 - }, 643 - }, 644 - }, 645 - } 646 - } 647 - 648 - // agentNeedsUpdate checks if the agent deployment needs to be updated due to device path or image changes 649 - func (m *Manager) agentNeedsUpdate(ctx context.Context, deployment *appsv1.Deployment, hsmPool *hsmv1alpha1.HSMPool) (bool, error) { 650 - if hsmPool == nil { 651 - return false, nil // No pool available, no update needed 652 - } 653 - // Check if container image needs updating 654 - if len(deployment.Spec.Template.Spec.Containers) == 0 { 655 - return false, fmt.Errorf("deployment has no containers") 656 - } 657 - 658 - container := deployment.Spec.Template.Spec.Containers[0] 659 - currentImage := container.Image 660 - 661 - // Check if image has changed (only if ImageResolver is available) 662 - if m.ImageResolver != nil { 663 - expectedImage := m.ImageResolver.GetImage(ctx, "AGENT_IMAGE") 664 - if currentImage != expectedImage { 665 - // Image has changed, need to update 666 - return true, nil 667 - } 668 - } 669 - 670 - // Extract current volume mounts from deployment 671 - currentDeviceMounts := make(map[string]string) // mount name -> device path 672 - 673 - for _, mount := range container.VolumeMounts { 674 - if mount.Name == "hsm-device" { 675 - // Find corresponding volume 676 - for _, vol := range deployment.Spec.Template.Spec.Volumes { 677 - if vol.Name == mount.Name && vol.HostPath != nil { 678 - currentDeviceMounts[mount.Name] = vol.HostPath.Path 679 - break 680 - } 681 - } 682 - } 683 - } 684 - 685 - // Check if any device paths in the pool differ from current mounts 686 - for _, device := range hsmPool.Status.AggregatedDevices { 687 - if device.DevicePath != "" && device.Available { 688 - // Check if this device path is already mounted 689 - found := false 690 - for _, path := range currentDeviceMounts { 691 - if path == device.DevicePath { 692 - found = true 693 - break 694 - } 695 - } 696 - if !found { 697 - // New device path found that's not in current deployment 698 - return true, nil 699 - } 700 - } 701 - } 702 - 703 - // Check for stale device paths (mounted paths that are no longer in aggregated devices) 704 - for _, currentPath := range currentDeviceMounts { 705 - found := false 706 - for _, device := range hsmPool.Status.AggregatedDevices { 707 - if device.DevicePath == currentPath && device.Available { 708 - found = true 709 - break 710 - } 711 - } 712 - if !found { 713 - // Current mount points to a device path that's no longer available 714 - return true, nil 715 - } 716 - } 717 - 718 - return false, nil 719 - } 720 - 721 - // deploymentNeedsUpdateForDevice checks if a deployment needs to be updated for a specific device 722 - // This is a simplified check that only validates device-specific configuration 723 - func (m *Manager) deploymentNeedsUpdateForDevice(deployment *appsv1.Deployment, aggregatedDevice *hsmv1alpha1.DiscoveredDevice) bool { 724 - // Check node affinity - ensure agent is pinned to the correct node 725 - if deployment.Spec.Template.Spec.Affinity == nil || 726 - deployment.Spec.Template.Spec.Affinity.NodeAffinity == nil || 727 - deployment.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { 728 - return true // Missing required node affinity 729 - } 730 - 731 - // Check if the node name matches the aggregated device's node 732 - nodeSelector := deployment.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution 733 - if len(nodeSelector.NodeSelectorTerms) == 0 { 734 - return true 735 - } 736 - 737 - // Check if hostname requirement matches the device's node 738 - nodeMatches := false 739 - for _, term := range nodeSelector.NodeSelectorTerms { 740 - for _, expr := range term.MatchExpressions { 741 - if expr.Key == "kubernetes.io/hostname" && expr.Operator == corev1.NodeSelectorOpIn { 742 - if slices.Contains(expr.Values, aggregatedDevice.NodeName) { 743 - nodeMatches = true 744 - } 745 - } 746 - } 747 - } 748 - 749 - if !nodeMatches { 750 - return true // Node doesn't match 751 - } 752 - 753 - // Check device path in volume mounts 754 - for _, vol := range deployment.Spec.Template.Spec.Volumes { 755 - if vol.Name == "hsm-device" && vol.HostPath != nil { 756 - if vol.HostPath.Path != aggregatedDevice.DevicePath { 757 - return true // Device path changed 758 - } 759 - } 760 - } 761 - 762 - return false 763 - } 764 - 765 - // Helper functions 766 - // waitForAgentReady waits for agent pods to be ready and returns their IPs 767 - func (m *Manager) waitForAgentReady(ctx context.Context, agentName, namespace string) ([]string, error) { 768 - // In test mode, simulate immediate readiness for faster tests 769 - if m.TestMode { 770 - return []string{"127.0.0.1"}, nil 771 - } 772 - 773 - timeout := time.After(m.WaitTimeout) 774 - ticker := time.NewTicker(m.WaitPollInterval) 775 - defer ticker.Stop() 776 - 777 - for { 778 - select { 779 - case <-timeout: 780 - return nil, fmt.Errorf("timeout waiting for agent pods to be ready after %v", m.WaitTimeout) 781 - case <-ticker.C: 782 - pods := &corev1.PodList{} 783 - err := m.List(ctx, pods, 784 - client.InNamespace(namespace), 785 - client.MatchingLabels{"app": agentName}, 786 - ) 787 - if err != nil { 788 - continue 789 - } 790 - 791 - var readyPodIPs []string 792 - for _, pod := range pods.Items { 793 - if pod.Status.Phase == corev1.PodRunning && 794 - len(pod.Status.PodIP) > 0 { 795 - // Check if all containers are ready 796 - allReady := true 797 - for _, condition := range pod.Status.Conditions { 798 - if condition.Type == corev1.PodReady { 799 - allReady = condition.Status == corev1.ConditionTrue 800 - break 801 - } 802 - } 803 - if allReady { 804 - readyPodIPs = append(readyPodIPs, pod.Status.PodIP) 805 - } 806 - } 807 - } 808 - 809 - if len(readyPodIPs) > 0 { 810 - return readyPodIPs, nil 811 - } 812 - } 813 - } 814 - } 815 - 816 - // GetAgentInfo returns the AgentInfo for a device 817 - func (m *Manager) GetAgentInfo(deviceName string) (*AgentInfo, bool) { 818 - m.mu.RLock() 819 - defer m.mu.RUnlock() 820 - 821 - agentInfo, exists := m.activeAgents[deviceName] 822 - return agentInfo, exists 823 - } 824 - 825 - // removeAgentFromTracking removes an agent from internal tracking 826 - func (m *Manager) removeAgentFromTracking(deviceName string) { 827 - delete(m.activeAgents, deviceName) 828 - } 829 - 830 - // RemoveAgentFromTracking removes an agent from tracking (public method for testing) 831 - func (m *Manager) RemoveAgentFromTracking(deviceName string) { 832 - m.mu.Lock() 833 - defer m.mu.Unlock() 834 - m.removeAgentFromTracking(deviceName) 835 - } 836 - 837 - // SetAgentInfo sets agent information for testing 838 - func (m *Manager) SetAgentInfo(deviceName string, info *AgentInfo) { 839 - m.mu.Lock() 840 - defer m.mu.Unlock() 841 - 842 - m.activeAgents[deviceName] = info 843 - } 844 - 845 - // isAgentHealthy checks if an agent is healthy by verifying pod IPs 846 - func (m *Manager) isAgentHealthy(ctx context.Context, agentInfo *AgentInfo) bool { 847 - // Simple health check: ensure pod IPs are still valid 848 - // In the future, we can add gRPC health checks here 849 - if len(agentInfo.PodIPs) == 0 { 850 - return false 851 - } 852 - 853 - // Check if pods still exist and are running 854 - pods := &corev1.PodList{} 855 - err := m.List(ctx, pods, 856 - client.InNamespace(agentInfo.Namespace), 857 - client.MatchingLabels{"app": agentInfo.AgentName}, 858 - ) 859 - if err != nil { 860 - return false 861 - } 862 - 863 - runningPods := 0 864 - for _, pod := range pods.Items { 865 - if pod.Status.Phase == corev1.PodRunning { 866 - runningPods++ 867 - } 868 - } 869 - 870 - return runningPods > 0 871 - } 872 - 873 - // GetAgentPodIPs returns all agent pod IPs for a device type from HSMPool 874 - func (m *Manager) GetAgentPodIPs(ctx context.Context, deviceName, namespace string) ([]string, error) { 875 - // Get HSMPool for this device 876 - poolName := deviceName + "-pool" 877 - var hsmPool hsmv1alpha1.HSMPool 878 - if err := m.Get(ctx, types.NamespacedName{ 879 - Name: poolName, 880 - Namespace: namespace, 881 - }, &hsmPool); err != nil { 882 - return nil, fmt.Errorf("failed to get HSMPool %s: %w", poolName, err) 883 - } 884 - 885 - m.mu.RLock() 886 - defer m.mu.RUnlock() 887 - 888 - var allPodIPs []string 889 - 890 - // Collect pod IPs from all agent instances for this device 891 - for _, aggregatedDevice := range hsmPool.Status.AggregatedDevices { 892 - agentKey := fmt.Sprintf("%s-%s", deviceName, aggregatedDevice.SerialNumber) 893 - if agentInfo, exists := m.activeAgents[agentKey]; exists && len(agentInfo.PodIPs) > 0 { 894 - allPodIPs = append(allPodIPs, agentInfo.PodIPs...) 895 - } 896 - } 897 - 898 - if len(allPodIPs) == 0 { 899 - return nil, fmt.Errorf("no active agents found for device %s in pool %s", deviceName, poolName) 900 - } 901 - 902 - return allPodIPs, nil 903 - } 904 - 905 - // sanitizeLabelValue sanitizes a string to be a valid Kubernetes label value 906 - // Kubernetes labels must be alphanumeric, '-', '_', or '.' and start/end with alphanumeric 907 - func sanitizeLabelValue(value string) string { 908 - if len(value) == 0 { 909 - return value 910 - } 911 - 912 - // Replace invalid characters with dashes 913 - sanitized := strings.Map(func(r rune) rune { 914 - if (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' || r == '.' { 915 - return r 916 - } 917 - return '-' 918 - }, value) 919 - 920 - // Ensure starts and ends with alphanumeric 921 - sanitized = strings.TrimFunc(sanitized, func(r rune) bool { 922 - return (r < 'A' || r > 'Z') && (r < 'a' || r > 'z') && (r < '0' || r > '9') 923 - }) 924 - 925 - // Kubernetes label values have a 63 character limit 926 - if len(sanitized) > 63 { 927 - sanitized = sanitized[:63] 928 - // Re-trim end if we cut off at a non-alphanumeric 929 - sanitized = strings.TrimFunc(sanitized, func(r rune) bool { 930 - return (r < 'A' || r > 'Z') && (r < 'a' || r > 'z') && (r < '0' || r > '9') 931 - }) 932 - } 933 - 934 - return sanitized 935 - } 936 - 937 - // GetGRPCEndpoints returns gRPC endpoints for all agent pods of a device 938 - func (m *Manager) GetGRPCEndpoints(ctx context.Context, deviceName, namespace string) ([]string, error) { 939 - podIPs, err := m.GetAgentPodIPs(ctx, deviceName, namespace) 940 - if err != nil { 941 - return nil, err 942 - } 943 - 944 - endpoints := make([]string, 0, len(podIPs)) 945 - for _, ip := range podIPs { 946 - endpoints = append(endpoints, fmt.Sprintf("%s:%d", ip, AgentPort)) 947 - } 948 - 949 - return endpoints, nil 950 - } 951 - 952 - // CreateGRPCClient creates a gRPC client for the first available agent pod of a device 953 - func (m *Manager) CreateGRPCClient(ctx context.Context, deviceName, namespace string, logger logr.Logger) (hsm.Client, error) { 954 - endpoints, err := m.GetGRPCEndpoints(ctx, deviceName, namespace) 955 - if err != nil { 956 - return nil, err 957 - } 958 - 959 - if len(endpoints) == 0 { 960 - return nil, fmt.Errorf("no agent endpoints available for device %s", deviceName) 961 - } 962 - 963 - // Use the first endpoint for single client 964 - grpcClient, err := NewGRPCClient(endpoints[0], deviceName, logger) 965 - if err != nil { 966 - return nil, fmt.Errorf("failed to create gRPC client for %s: %w", endpoints[0], err) 967 - } 968 - 969 - // Test the connection 970 - if err := grpcClient.Initialize(ctx, hsm.Config{}); err != nil { 971 - if err := grpcClient.Close(); err != nil { 972 - logger.Error(err, "Failed to close gRPC client after failed initialization") 973 - } 974 - return nil, fmt.Errorf("failed to initialize gRPC client for %s: %w", endpoints[0], err) 975 - } 976 - 977 - return grpcClient, nil 978 - } 979 - 980 - // GetAvailableDevices finds all devices with ready HSMPools and active agents 981 - func (m *Manager) GetAvailableDevices(ctx context.Context, namespace string) ([]string, error) { 982 - // List all HSMPools to find all with active agents 983 - var hsmPoolList hsmv1alpha1.HSMPoolList 984 - if err := m.List(ctx, &hsmPoolList, client.InNamespace(namespace)); err != nil { 985 - return nil, fmt.Errorf("failed to list HSM pools: %w", err) 986 - } 987 - 988 - var availableDevices []string 989 - // Check all pools that have active agents 990 - for _, pool := range hsmPoolList.Items { 991 - if pool.Status.Phase != hsmv1alpha1.HSMPoolPhaseReady { 992 - continue 993 - } 994 - 995 - // Extract device name from pool name (remove "-pool" suffix) 996 - deviceName := strings.TrimSuffix(pool.Name, "-pool") 997 - 998 - if podIPs, err := m.GetAgentPodIPs(ctx, deviceName, namespace); err == nil && len(podIPs) > 0 { 999 - availableDevices = append(availableDevices, deviceName) 1000 - } 1001 - } 1002 - 1003 - if len(availableDevices) == 0 { 1004 - return nil, fmt.Errorf("no available HSM agents found") 1005 - } 1006 - 1007 - return availableDevices, nil 1008 - } 1009 - 1010 - func int32Ptr(i int32) *int32 { 1011 - return &i 1012 - } 1013 - 1014 - func int64Ptr(i int64) *int64 { 1015 - return &i 1016 - } 1017 - 1018 - func boolPtr(b bool) *bool { 1019 - return &b 1020 - } 1021 - 1022 - func hostPathTypePtr(t corev1.HostPathType) *corev1.HostPathType { 1023 - return &t 1024 - } 1025 - 1026 - func resourceQuantity(s string) resource.Quantity { 1027 - q, _ := resource.ParseQuantity(s) 1028 - return q 1029 - }
-376
internal/agent/deployment_test.go
··· 1 - /* 2 - Copyright 2025. 3 - 4 - Licensed under the Apache License, Version 2.0 (the "License"); 5 - you may not use this file except in compliance with the License. 6 - You may obtain a copy of the License at 7 - 8 - http://www.apache.org/licenses/LICENSE-2.0 9 - 10 - Unless required by applicable law or agreed to in writing, software 11 - distributed under the License is distributed on an "AS IS" BASIS, 12 - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 - See the License for the specific language governing permissions and 14 - limitations under the License. 15 - */ 16 - 17 - package agent 18 - 19 - import ( 20 - "context" 21 - "testing" 22 - 23 - "github.com/stretchr/testify/assert" 24 - "github.com/stretchr/testify/require" 25 - appsv1 "k8s.io/api/apps/v1" 26 - corev1 "k8s.io/api/core/v1" 27 - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 - "k8s.io/apimachinery/pkg/runtime" 29 - "sigs.k8s.io/controller-runtime/pkg/client/fake" 30 - 31 - hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 32 - ) 33 - 34 - func TestAgentNeedsUpdate(t *testing.T) { 35 - scheme := runtime.NewScheme() 36 - require.NoError(t, hsmv1alpha1.AddToScheme(scheme)) 37 - require.NoError(t, appsv1.AddToScheme(scheme)) 38 - require.NoError(t, corev1.AddToScheme(scheme)) 39 - 40 - tests := []struct { 41 - name string 42 - deployment *appsv1.Deployment 43 - hsmDevice *hsmv1alpha1.HSMDevice 44 - hsmPool *hsmv1alpha1.HSMPool 45 - expectedUpdate bool 46 - expectError bool 47 - }{ 48 - { 49 - name: "no update needed - same device path", 50 - deployment: &appsv1.Deployment{ 51 - ObjectMeta: metav1.ObjectMeta{ 52 - Name: "test-agent", 53 - Namespace: "default", 54 - }, 55 - Spec: appsv1.DeploymentSpec{ 56 - Template: corev1.PodTemplateSpec{ 57 - Spec: corev1.PodSpec{ 58 - Containers: []corev1.Container{ 59 - { 60 - Name: "agent", 61 - VolumeMounts: []corev1.VolumeMount{ 62 - { 63 - Name: "hsm-device", 64 - MountPath: "/dev/hsm", 65 - }, 66 - }, 67 - }, 68 - }, 69 - Volumes: []corev1.Volume{ 70 - { 71 - Name: "hsm-device", 72 - VolumeSource: corev1.VolumeSource{ 73 - HostPath: &corev1.HostPathVolumeSource{ 74 - Path: "/dev/bus/usb/001/015", 75 - }, 76 - }, 77 - }, 78 - }, 79 - }, 80 - }, 81 - }, 82 - }, 83 - hsmDevice: &hsmv1alpha1.HSMDevice{ 84 - ObjectMeta: metav1.ObjectMeta{ 85 - Name: "test-device", 86 - Namespace: "default", 87 - }, 88 - }, 89 - hsmPool: &hsmv1alpha1.HSMPool{ 90 - ObjectMeta: metav1.ObjectMeta{ 91 - Name: "test-device-pool", 92 - Namespace: "default", 93 - }, 94 - Status: hsmv1alpha1.HSMPoolStatus{ 95 - AggregatedDevices: []hsmv1alpha1.DiscoveredDevice{ 96 - { 97 - DevicePath: "/dev/bus/usb/001/015", 98 - Available: true, 99 - }, 100 - }, 101 - }, 102 - }, 103 - expectedUpdate: false, 104 - expectError: false, 105 - }, 106 - { 107 - name: "update needed - device path changed", 108 - deployment: &appsv1.Deployment{ 109 - ObjectMeta: metav1.ObjectMeta{ 110 - Name: "test-agent", 111 - Namespace: "default", 112 - }, 113 - Spec: appsv1.DeploymentSpec{ 114 - Template: corev1.PodTemplateSpec{ 115 - Spec: corev1.PodSpec{ 116 - Containers: []corev1.Container{ 117 - { 118 - Name: "agent", 119 - VolumeMounts: []corev1.VolumeMount{ 120 - { 121 - Name: "hsm-device", 122 - MountPath: "/dev/hsm", 123 - }, 124 - }, 125 - }, 126 - }, 127 - Volumes: []corev1.Volume{ 128 - { 129 - Name: "hsm-device", 130 - VolumeSource: corev1.VolumeSource{ 131 - HostPath: &corev1.HostPathVolumeSource{ 132 - Path: "/dev/bus/usb/001/015", // Old path 133 - }, 134 - }, 135 - }, 136 - }, 137 - }, 138 - }, 139 - }, 140 - }, 141 - hsmDevice: &hsmv1alpha1.HSMDevice{ 142 - ObjectMeta: metav1.ObjectMeta{ 143 - Name: "test-device", 144 - Namespace: "default", 145 - }, 146 - }, 147 - hsmPool: &hsmv1alpha1.HSMPool{ 148 - ObjectMeta: metav1.ObjectMeta{ 149 - Name: "test-device-pool", 150 - Namespace: "default", 151 - }, 152 - Status: hsmv1alpha1.HSMPoolStatus{ 153 - AggregatedDevices: []hsmv1alpha1.DiscoveredDevice{ 154 - { 155 - DevicePath: "/dev/bus/usb/001/016", // New path 156 - Available: true, 157 - }, 158 - }, 159 - }, 160 - }, 161 - expectedUpdate: true, 162 - expectError: false, 163 - }, 164 - { 165 - name: "no update needed - pool not found", 166 - deployment: &appsv1.Deployment{ 167 - ObjectMeta: metav1.ObjectMeta{ 168 - Name: "test-agent", 169 - Namespace: "default", 170 - }, 171 - Spec: appsv1.DeploymentSpec{ 172 - Template: corev1.PodTemplateSpec{ 173 - Spec: corev1.PodSpec{ 174 - Containers: []corev1.Container{ 175 - { 176 - Name: "agent", 177 - }, 178 - }, 179 - }, 180 - }, 181 - }, 182 - }, 183 - hsmDevice: &hsmv1alpha1.HSMDevice{ 184 - ObjectMeta: metav1.ObjectMeta{ 185 - Name: "test-device", 186 - Namespace: "default", 187 - }, 188 - }, 189 - // No HSMPool object created 190 - expectedUpdate: false, 191 - expectError: false, 192 - }, 193 - } 194 - 195 - for _, tt := range tests { 196 - t.Run(tt.name, func(t *testing.T) { 197 - ctx := context.Background() 198 - 199 - // Create fake client with objects 200 - objs := []runtime.Object{tt.hsmDevice} 201 - if tt.hsmPool != nil { 202 - objs = append(objs, tt.hsmPool) 203 - } 204 - 205 - fakeClient := fake.NewClientBuilder(). 206 - WithScheme(scheme). 207 - WithRuntimeObjects(objs...). 208 - Build() 209 - 210 - manager := &Manager{ 211 - Client: fakeClient, 212 - AgentImage: "test-image", 213 - } 214 - 215 - needsUpdate, err := manager.agentNeedsUpdate(ctx, tt.deployment, tt.hsmPool) 216 - 217 - if tt.expectError { 218 - assert.Error(t, err) 219 - } else { 220 - assert.NoError(t, err) 221 - assert.Equal(t, tt.expectedUpdate, needsUpdate) 222 - } 223 - }) 224 - } 225 - } 226 - 227 - func TestAgentTracking(t *testing.T) { 228 - scheme := runtime.NewScheme() 229 - require.NoError(t, hsmv1alpha1.AddToScheme(scheme)) 230 - require.NoError(t, appsv1.AddToScheme(scheme)) 231 - require.NoError(t, corev1.AddToScheme(scheme)) 232 - 233 - t.Run("GetAgentInfo - agent exists", func(t *testing.T) { 234 - fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() 235 - manager := NewManager(fakeClient, "test-namespace", nil) 236 - 237 - // Add agent to tracking 238 - agentInfo := &AgentInfo{ 239 - PodIPs: []string{"10.1.1.5", "10.1.1.6"}, 240 - Status: AgentStatusReady, 241 - AgentName: "hsm-agent-test-device", 242 - Namespace: "default", 243 - } 244 - manager.activeAgents["test-device"] = agentInfo 245 - 246 - // Test retrieval 247 - retrieved, exists := manager.GetAgentInfo("test-device") 248 - assert.True(t, exists) 249 - assert.Equal(t, agentInfo, retrieved) 250 - }) 251 - 252 - t.Run("GetAgentInfo - agent does not exist", func(t *testing.T) { 253 - fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() 254 - manager := NewManager(fakeClient, "test-namespace", nil) 255 - 256 - retrieved, exists := manager.GetAgentInfo("nonexistent-device") 257 - assert.False(t, exists) 258 - assert.Nil(t, retrieved) 259 - }) 260 - 261 - // GetAgentPodIPs tests removed - function now uses HSMPool-based lookup 262 - 263 - // GetGRPCEndpoints tests removed - function now uses HSMPool-based lookup 264 - 265 - t.Run("removeAgentFromTracking", func(t *testing.T) { 266 - fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() 267 - manager := NewManager(fakeClient, "test-namespace", nil) 268 - 269 - // Add agent to tracking 270 - agentInfo := &AgentInfo{ 271 - PodIPs: []string{"10.1.1.5"}, 272 - Status: AgentStatusReady, 273 - } 274 - manager.activeAgents["test-device"] = agentInfo 275 - 276 - // Verify it exists 277 - _, exists := manager.GetAgentInfo("test-device") 278 - assert.True(t, exists) 279 - 280 - // Remove it 281 - manager.removeAgentFromTracking("test-device") 282 - 283 - // Verify it's gone 284 - _, exists = manager.GetAgentInfo("test-device") 285 - assert.False(t, exists) 286 - }) 287 - } 288 - 289 - func TestIsAgentHealthy(t *testing.T) { 290 - scheme := runtime.NewScheme() 291 - require.NoError(t, hsmv1alpha1.AddToScheme(scheme)) 292 - require.NoError(t, appsv1.AddToScheme(scheme)) 293 - require.NoError(t, corev1.AddToScheme(scheme)) 294 - 295 - t.Run("healthy agent with running pods", func(t *testing.T) { 296 - ctx := context.Background() 297 - 298 - // Create running pods 299 - pod1 := &corev1.Pod{ 300 - ObjectMeta: metav1.ObjectMeta{ 301 - Name: "agent-pod-1", 302 - Namespace: "default", 303 - Labels: map[string]string{"app": "hsm-agent-test-device"}, 304 - }, 305 - Status: corev1.PodStatus{ 306 - Phase: corev1.PodRunning, 307 - }, 308 - } 309 - 310 - fakeClient := fake.NewClientBuilder(). 311 - WithScheme(scheme). 312 - WithRuntimeObjects(pod1). 313 - Build() 314 - 315 - manager := NewManager(fakeClient, "test-namespace", nil) 316 - 317 - agentInfo := &AgentInfo{ 318 - PodIPs: []string{"10.1.1.5"}, 319 - Status: AgentStatusReady, 320 - AgentName: "hsm-agent-test-device", 321 - Namespace: "default", 322 - } 323 - 324 - healthy := manager.isAgentHealthy(ctx, agentInfo) 325 - assert.True(t, healthy) 326 - }) 327 - 328 - t.Run("unhealthy agent with no pod IPs", func(t *testing.T) { 329 - ctx := context.Background() 330 - fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() 331 - manager := NewManager(fakeClient, "test-namespace", nil) 332 - 333 - agentInfo := &AgentInfo{ 334 - PodIPs: []string{}, // No pod IPs 335 - Status: AgentStatusReady, 336 - AgentName: "hsm-agent-test-device", 337 - Namespace: "default", 338 - } 339 - 340 - healthy := manager.isAgentHealthy(ctx, agentInfo) 341 - assert.False(t, healthy) 342 - }) 343 - 344 - t.Run("unhealthy agent with no running pods", func(t *testing.T) { 345 - ctx := context.Background() 346 - 347 - // Create non-running pod 348 - pod1 := &corev1.Pod{ 349 - ObjectMeta: metav1.ObjectMeta{ 350 - Name: "agent-pod-1", 351 - Namespace: "default", 352 - Labels: map[string]string{"app": "hsm-agent-test-device"}, 353 - }, 354 - Status: corev1.PodStatus{ 355 - Phase: corev1.PodFailed, 356 - }, 357 - } 358 - 359 - fakeClient := fake.NewClientBuilder(). 360 - WithScheme(scheme). 361 - WithRuntimeObjects(pod1). 362 - Build() 363 - 364 - manager := NewManager(fakeClient, "test-namespace", nil) 365 - 366 - agentInfo := &AgentInfo{ 367 - PodIPs: []string{"10.1.1.5"}, 368 - Status: AgentStatusReady, 369 - AgentName: "hsm-agent-test-device", 370 - Namespace: "default", 371 - } 372 - 373 - healthy := manager.isAgentHealthy(ctx, agentInfo) 374 - assert.False(t, healthy) 375 - }) 376 - }
+160
internal/controller/hsmsecret_controller_test.go
··· 18 18 19 19 import ( 20 20 "context" 21 + "testing" 21 22 22 23 . "github.com/onsi/ginkgo/v2" 23 24 . "github.com/onsi/gomega" 25 + "github.com/stretchr/testify/assert" 26 + "github.com/stretchr/testify/require" 24 27 "k8s.io/apimachinery/pkg/api/errors" 28 + "k8s.io/apimachinery/pkg/runtime" 25 29 "k8s.io/apimachinery/pkg/types" 30 + "sigs.k8s.io/controller-runtime/pkg/client/fake" 26 31 "sigs.k8s.io/controller-runtime/pkg/reconcile" 27 32 28 33 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 34 30 35 hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1" 36 + "github.com/evanjarrett/hsm-secrets-operator/internal/agent" 31 37 ) 32 38 33 39 var _ = Describe("HSMSecret Controller", func() { ··· 82 88 }) 83 89 }) 84 90 }) 91 + 92 + func testStringPtr(s string) *string { 93 + return &s 94 + } 95 + 96 + // Unit tests for the refactored HSMSecretReconciler using standard Go testing 97 + func TestHSMSecretReconciler_Reconcile(t *testing.T) { 98 + scheme := runtime.NewScheme() 99 + require.NoError(t, hsmv1alpha1.AddToScheme(scheme)) 100 + 101 + tests := []struct { 102 + name string 103 + hsmSecret *hsmv1alpha1.HSMSecret 104 + agentManager *agent.Manager 105 + expectRequeue bool 106 + expectError bool 107 + expectedErrorMsg string 108 + }{ 109 + { 110 + name: "HSMSecret not found - should not error", 111 + hsmSecret: nil, // No HSMSecret in fake client 112 + agentManager: func() *agent.Manager { 113 + fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() 114 + return agent.NewManager(fakeClient, "test-namespace", nil) 115 + }(), 116 + expectRequeue: false, 117 + expectError: false, 118 + }, 119 + { 120 + name: "No available devices - should requeue after 2 minutes", 121 + hsmSecret: &hsmv1alpha1.HSMSecret{ 122 + ObjectMeta: metav1.ObjectMeta{ 123 + Name: "test-secret", 124 + Namespace: "default", 125 + Finalizers: []string{"hsmsecret.hsm.j5t.io/finalizer"}, 126 + }, 127 + Spec: hsmv1alpha1.HSMSecretSpec{ 128 + AutoSync: true, 129 + ParentRef: &hsmv1alpha1.ParentReference{ 130 + Name: "test-operator", 131 + Namespace: testStringPtr("test-namespace"), 132 + }, 133 + }, 134 + }, 135 + agentManager: func() *agent.Manager { 136 + fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() 137 + return agent.NewManager(fakeClient, "test-namespace", nil) 138 + }(), 139 + expectRequeue: true, 140 + expectError: false, 141 + }, 142 + } 143 + 144 + for _, tt := range tests { 145 + t.Run(tt.name, func(t *testing.T) { 146 + ctx := context.Background() 147 + 148 + // Create fake client with HSMSecret if provided 149 + var objs []runtime.Object 150 + if tt.hsmSecret != nil { 151 + objs = append(objs, tt.hsmSecret) 152 + } 153 + 154 + fakeClient := fake.NewClientBuilder(). 155 + WithScheme(scheme). 156 + WithRuntimeObjects(objs...). 157 + Build() 158 + 159 + reconciler := &HSMSecretReconciler{ 160 + Client: fakeClient, 161 + Scheme: scheme, 162 + AgentManager: tt.agentManager, 163 + OperatorNamespace: "test-namespace", 164 + OperatorName: "test-operator", 165 + } 166 + 167 + req := reconcile.Request{ 168 + NamespacedName: types.NamespacedName{ 169 + Name: "test-secret", 170 + Namespace: "default", 171 + }, 172 + } 173 + 174 + result, err := reconciler.Reconcile(ctx, req) 175 + 176 + if tt.expectError { 177 + assert.Error(t, err) 178 + if tt.expectedErrorMsg != "" { 179 + assert.Contains(t, err.Error(), tt.expectedErrorMsg) 180 + } 181 + } else { 182 + assert.NoError(t, err) 183 + } 184 + 185 + if tt.expectRequeue { 186 + assert.True(t, result.RequeueAfter > 0, "Expected RequeueAfter but got %+v", result) 187 + } else if !tt.expectError { 188 + assert.Equal(t, int64(0), result.RequeueAfter.Nanoseconds(), "Did not expect RequeueAfter but got %+v", result) 189 + } 190 + }) 191 + } 192 + } 193 + 194 + func TestHSMSecretReconciler_ReconcileWithAgentManager(t *testing.T) { 195 + scheme := runtime.NewScheme() 196 + require.NoError(t, hsmv1alpha1.AddToScheme(scheme)) 197 + 198 + t.Run("reconciler uses agent manager for device discovery", func(t *testing.T) { 199 + ctx := context.Background() 200 + 201 + hsmSecret := &hsmv1alpha1.HSMSecret{ 202 + ObjectMeta: metav1.ObjectMeta{ 203 + Name: "test-secret", 204 + Namespace: "default", 205 + Finalizers: []string{"hsmsecret.hsm.j5t.io/finalizer"}, 206 + }, 207 + Spec: hsmv1alpha1.HSMSecretSpec{ 208 + AutoSync: true, 209 + ParentRef: &hsmv1alpha1.ParentReference{ 210 + Name: "test-operator", 211 + Namespace: testStringPtr("test-namespace"), 212 + }, 213 + }, 214 + } 215 + 216 + fakeClient := fake.NewClientBuilder(). 217 + WithScheme(scheme). 218 + WithRuntimeObjects(hsmSecret). 219 + Build() 220 + 221 + agentManager := agent.NewManager(fakeClient, "test-namespace", nil) 222 + 223 + reconciler := &HSMSecretReconciler{ 224 + Client: fakeClient, 225 + Scheme: scheme, 226 + AgentManager: agentManager, 227 + OperatorNamespace: "test-namespace", 228 + OperatorName: "test-operator", 229 + } 230 + 231 + req := reconcile.Request{ 232 + NamespacedName: types.NamespacedName{ 233 + Name: "test-secret", 234 + Namespace: "default", 235 + }, 236 + } 237 + 238 + result, err := reconciler.Reconcile(ctx, req) 239 + 240 + // Should not error, but should requeue since no devices available 241 + assert.NoError(t, err) 242 + assert.True(t, result.RequeueAfter > 0, "Expected RequeueAfter due to no available devices") 243 + }) 244 + }