···22222323// HSMPoolSpec defines the desired state of HSMPool
2424type HSMPoolSpec struct {
2525- // HSMDeviceRefs references the HSMDevice specifications this pool aggregates
2626- HSMDeviceRefs []string `json:"hsmDeviceRefs"`
2727-2825 // GracePeriod defines how long to wait before considering a pod's report stale
2926 // +kubebuilder:default="5m"
3027 // +optional
···115112// +kubebuilder:object:root=true
116113// +kubebuilder:subresource:status
117114// +kubebuilder:resource:shortName=hsmpool
118118-// +kubebuilder:printcolumn:name="HSMDevices",type=string,JSONPath=`.spec.hsmDeviceRefs`
119115// +kubebuilder:printcolumn:name="Total",type=integer,JSONPath=`.status.totalDevices`
120116// +kubebuilder:printcolumn:name="Available",type=integer,JSONPath=`.status.availableDevices`
121117// +kubebuilder:printcolumn:name="Reporting",type=string,JSONPath=`.status.reportingPods[*].podName`
···269269// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
270270func (in *HSMPoolSpec) DeepCopyInto(out *HSMPoolSpec) {
271271 *out = *in
272272- if in.HSMDeviceRefs != nil {
273273- in, out := &in.HSMDeviceRefs, &out.HSMDeviceRefs
274274- *out = make([]string, len(*in))
275275- copy(*out, *in)
276276- }
277272 if in.GracePeriod != nil {
278273 in, out := &in.GracePeriod, &out.GracePeriod
279274 *out = new(v1.Duration)
-11
config/crd/bases/hsm.j5t.io_hsmpools.yaml
···1717 scope: Namespaced
1818 versions:
1919 - additionalPrinterColumns:
2020- - jsonPath: .spec.hsmDeviceRefs
2121- name: HSMDevices
2222- type: string
2320 - jsonPath: .status.totalDevices
2421 name: Total
2522 type: integer
···6865 description: GracePeriod defines how long to wait before considering
6966 a pod's report stale
7067 type: string
7171- hsmDeviceRefs:
7272- description: HSMDeviceRefs references the HSMDevice specifications
7373- this pool aggregates
7474- items:
7575- type: string
7676- type: array
7768 mirroring:
7869 description: Mirroring defines device mirroring configuration for
7970 this pool
···10596 type: string
10697 type: array
10798 type: object
108108- required:
109109- - hsmDeviceRefs
11099 type: object
111100 status:
112101 description: HSMPoolStatus defines the observed state of HSMPool
···9292 },
9393 },
9494 Spec: hsmv1alpha1.HSMPoolSpec{
9595- HSMDeviceRefs: []string{hsmDevice.Name},
9696- GracePeriod: &metav1.Duration{Duration: 5 * time.Minute}, // Default grace period
9595+ GracePeriod: &metav1.Duration{Duration: 5 * time.Minute}, // Default grace period
9796 },
9897 }
9998···119118120119 // Update existing HSMPool if needed
121120 needsUpdate := false
122122-123123- // Check if device reference needs updating
124124- found := false
125125- for _, deviceRef := range existing.Spec.HSMDeviceRefs {
126126- if deviceRef == hsmDevice.Name {
127127- found = true
128128- break
129129- }
130130- }
131131- if !found {
132132- existing.Spec.HSMDeviceRefs = append(existing.Spec.HSMDeviceRefs, hsmDevice.Name)
133133- needsUpdate = true
134134- }
135121136122 // Update grace period if it's nil
137123 if existing.Spec.GracePeriod == nil {
+76-67
internal/controller/hsmpool_agent_controller.go
···64646565 // Only deploy agents for ready pools with discovered hardware
6666 if hsmPool.Status.Phase == hsmv1alpha1.HSMPoolPhaseReady && len(hsmPool.Status.AggregatedDevices) > 0 {
6767- // For each HSMDevice referenced by this pool, ensure agents exist for all aggregated devices
6868- for _, deviceRef := range hsmPool.Spec.HSMDeviceRefs {
6969- // Get the HSMDevice to pass to agent manager
7070- var hsmDevice hsmv1alpha1.HSMDevice
7171- if err := r.Get(ctx, client.ObjectKey{
7272- Name: deviceRef,
7373- Namespace: hsmPool.Namespace,
7474- }, &hsmDevice); err != nil {
7575- logger.Error(err, "Failed to get referenced HSMDevice", "device", deviceRef)
7676- continue
7777- }
6767+ // Ensure owner reference exists and get the HSMDevice
6868+ if len(hsmPool.OwnerReferences) == 0 {
6969+ logger.Error(fmt.Errorf("no owner references"), "HSMPool has no owner references", "pool", hsmPool.Name)
7070+ return ctrl.Result{}, nil
7171+ }
78727979- if r.AgentManager != nil {
8080- if err := r.AgentManager.EnsureAgent(ctx, &hsmDevice, nil); err != nil {
8181- logger.Error(err, "Failed to ensure HSM agents for pool", "device", deviceRef)
8282- }
8383- } else {
8484- logger.Error(fmt.Errorf("agent manager not configured"), "Cannot ensure agents without agent manager")
7373+ deviceRef := hsmPool.OwnerReferences[0].Name
7474+ // Get the HSMDevice to pass to agent manager
7575+ var hsmDevice hsmv1alpha1.HSMDevice
7676+ if err := r.Get(ctx, client.ObjectKey{
7777+ Name: deviceRef,
7878+ Namespace: hsmPool.Namespace,
7979+ }, &hsmDevice); err != nil {
8080+ logger.Error(err, "Failed to get referenced HSMDevice", "device", deviceRef)
8181+ // Don't return error - this allows graceful handling of missing devices
8282+ return ctrl.Result{}, nil
8383+ }
8484+8585+ if r.AgentManager != nil {
8686+ if err := r.AgentManager.EnsureAgent(ctx, &hsmPool); err != nil {
8787+ logger.Error(err, "Failed to ensure HSM agents for pool", "device", deviceRef)
8588 }
8989+ } else {
9090+ logger.Error(fmt.Errorf("agent manager not configured"), "Cannot ensure agents without agent manager")
8691 }
8792 } else {
8893 logger.V(1).Info("HSMPool not ready for agent deployment",
···114119 absenceTimeout = 2 * gracePeriod // Default to 2x grace period
115120 }
116121117117- // For each HSMDevice referenced by this pool, check if it should be cleaned up
118118- for _, deviceRef := range hsmPool.Spec.HSMDeviceRefs {
119119- // Get the HSMDevice
120120- var hsmDevice hsmv1alpha1.HSMDevice
121121- if err := r.Get(ctx, client.ObjectKey{
122122- Name: deviceRef,
123123- Namespace: hsmPool.Namespace,
124124- }, &hsmDevice); err != nil {
125125- logger.V(1).Info("HSMDevice not found, skipping cleanup check", "device", deviceRef)
126126- continue
127127- }
122122+ // Check if the HSMDevice referenced by this pool should be cleaned up (from ownerReferences)
123123+ if len(hsmPool.OwnerReferences) == 0 {
124124+ logger.V(1).Info("HSMPool has no owner references, skipping cleanup")
125125+ return nil
126126+ }
127127+128128+ deviceRef := hsmPool.OwnerReferences[0].Name
129129+ // Get the HSMDevice
130130+ var hsmDevice hsmv1alpha1.HSMDevice
131131+ if err := r.Get(ctx, client.ObjectKey{
132132+ Name: deviceRef,
133133+ Namespace: hsmPool.Namespace,
134134+ }, &hsmDevice); err != nil {
135135+ logger.V(1).Info("HSMDevice not found, skipping cleanup check", "device", deviceRef)
136136+ return nil
137137+ }
128138129129- // Check if this device has available aggregated devices in the pool
130130- deviceAvailable := false
131131- var lastSeenTime time.Time
139139+ // Check if this device has available aggregated devices in the pool
140140+ deviceAvailable := false
141141+ var lastSeenTime time.Time
132142133133- for _, aggregatedDevice := range hsmPool.Status.AggregatedDevices {
134134- if aggregatedDevice.Available {
135135- deviceAvailable = true
136136- break
137137- }
138138- // Track the most recent LastSeen time for unavailable devices
139139- if aggregatedDevice.LastSeen.After(lastSeenTime) {
140140- lastSeenTime = aggregatedDevice.LastSeen.Time
141141- }
143143+ for _, aggregatedDevice := range hsmPool.Status.AggregatedDevices {
144144+ if aggregatedDevice.Available {
145145+ deviceAvailable = true
146146+ break
142147 }
148148+ // Track the most recent LastSeen time for unavailable devices
149149+ if aggregatedDevice.LastSeen.After(lastSeenTime) {
150150+ lastSeenTime = aggregatedDevice.LastSeen.Time
151151+ }
152152+ }
143153144144- // If device is not available and hasn't been seen for longer than absence timeout
145145- if !deviceAvailable {
146146- timeSinceLastSeen := time.Since(lastSeenTime)
154154+ // If device is not available and hasn't been seen for longer than absence timeout
155155+ if !deviceAvailable {
156156+ timeSinceLastSeen := time.Since(lastSeenTime)
147157148148- if lastSeenTime.IsZero() {
149149- // No devices have ever been seen - check if pool has been around long enough
150150- poolAge := time.Since(hsmPool.CreationTimestamp.Time)
151151- if poolAge > absenceTimeout {
152152- logger.Info("Cleaning up agent for device with no discovered instances",
153153- "device", deviceRef,
154154- "poolAge", poolAge,
155155- "absenceTimeout", absenceTimeout)
156156-157157- if err := r.cleanupAgentForDevice(ctx, &hsmDevice); err != nil {
158158- logger.Error(err, "Failed to cleanup agent for device with no instances", "device", deviceRef)
159159- }
160160- }
161161- } else if timeSinceLastSeen > absenceTimeout {
162162- logger.Info("Cleaning up agent for device absent too long",
158158+ if lastSeenTime.IsZero() {
159159+ // No devices have ever been seen - check if pool has been around long enough
160160+ poolAge := time.Since(hsmPool.CreationTimestamp.Time)
161161+ if poolAge > absenceTimeout {
162162+ logger.Info("Cleaning up agent for device with no discovered instances",
163163 "device", deviceRef,
164164- "timeSinceLastSeen", timeSinceLastSeen,
165165- "absenceTimeout", absenceTimeout,
166166- "lastSeen", lastSeenTime)
164164+ "poolAge", poolAge,
165165+ "absenceTimeout", absenceTimeout)
167166168167 if err := r.cleanupAgentForDevice(ctx, &hsmDevice); err != nil {
169169- logger.Error(err, "Failed to cleanup agent for absent device", "device", deviceRef)
168168+ logger.Error(err, "Failed to cleanup agent for device with no instances", "device", deviceRef)
170169 }
171171- } else {
172172- logger.V(1).Info("Device unavailable but within tolerance",
173173- "device", deviceRef,
174174- "timeSinceLastSeen", timeSinceLastSeen,
175175- "absenceTimeout", absenceTimeout)
170170+ }
171171+ } else if timeSinceLastSeen > absenceTimeout {
172172+ logger.Info("Cleaning up agent for device absent too long",
173173+ "device", deviceRef,
174174+ "timeSinceLastSeen", timeSinceLastSeen,
175175+ "absenceTimeout", absenceTimeout,
176176+ "lastSeen", lastSeenTime)
177177+178178+ if err := r.cleanupAgentForDevice(ctx, &hsmDevice); err != nil {
179179+ logger.Error(err, "Failed to cleanup agent for absent device", "device", deviceRef)
176180 }
181181+ } else {
182182+ logger.V(1).Info("Device unavailable but within tolerance",
183183+ "device", deviceRef,
184184+ "timeSinceLastSeen", timeSinceLastSeen,
185185+ "absenceTimeout", absenceTimeout)
177186 }
178187 }
179188
···192192193193 // Ensure agent pods are running for all devices and create clients
194194 for _, hsmDevice := range hsmDevices {
195195+ // Get the HSMPool for this device
196196+ poolName := hsmDevice.Name + "-pool"
197197+ var hsmPool hsmv1alpha1.HSMPool
198198+ if err := r.Get(ctx, types.NamespacedName{
199199+ Name: poolName,
200200+ Namespace: hsmDevice.Namespace,
201201+ }, &hsmPool); err != nil {
202202+ // Clean up any successful connections before returning error
203203+ if err := deviceClients.Close(); err != nil {
204204+ logger.Error(err, "Failed to close device clients during cleanup")
205205+ }
206206+ return nil, fmt.Errorf("failed to get HSMPool %s for device %s: %w", poolName, hsmDevice.Name, err)
207207+ }
208208+195209 // EnsureAgent ensures agents for all devices in the pool
196196- err = r.AgentManager.EnsureAgent(ctx, hsmDevice, hsmSecret)
210210+ err = r.AgentManager.EnsureAgent(ctx, &hsmPool)
197211 if err != nil {
198212 // Clean up any successful connections before returning error
199213 if err := deviceClients.Close(); err != nil {