···301301# Get agent pod
302302AGENT_POD=$(kubectl get pods -l app.kubernetes.io/name=hsm-agent -o jsonpath='{.items[0].metadata.name}')
303303304304-# List all secrets (requires PIN authentication)
305305-kubectl exec $AGENT_POD -- pkcs11-tool --module="/usr/lib/opensc-pkcs11.so" --login --pin="$PKCS11_PIN" --list-objects --type=data
304304+# Get PIN from the HSM PIN secret
305305+HSM_PIN=$(kubectl get secret hsm-pin -o jsonpath='{.data.pin}' | base64 -d)
306306+307307+# List all secrets (requires PIN authentication)
308308+kubectl exec $AGENT_POD -- pkcs11-tool --module="/usr/lib/opensc-pkcs11.so" --login --pin="$HSM_PIN" --list-objects --type=data
306309307310# Read specific secret component
308308-kubectl exec $AGENT_POD -- pkcs11-tool --module="/usr/lib/opensc-pkcs11.so" --login --pin="$PKCS11_PIN" --read-object --type=data --label="my-secret/api_key"
311311+kubectl exec $AGENT_POD -- pkcs11-tool --module="/usr/lib/opensc-pkcs11.so" --login --pin="$HSM_PIN" --read-object --type=data --label="my-secret/api_key"
309312310313# HSM device info
311314kubectl exec $AGENT_POD -- pkcs11-tool --module="/usr/lib/opensc-pkcs11.so" -I
···3463493. Implement in `internal/agent/grpc_server.go`
3473504. Update client calls in controller or agent code
348351349349-This operator provides secure, hardware-backed secret management that integrates seamlessly with Kubernetes while maintaining the security benefits of HSM-based storage.352352+This operator provides secure, hardware-backed secret management that integrates seamlessly with Kubernetes while maintaining the security benefits of HSM-based storage.
353353+- do not try and deploy to the kubernetes cluster
+4-1
Makefile
···33# To re-generate a bundle for another specific version without changing the standard setup, you can:
44# - use the VERSION as arg of the bundle target (e.g make bundle VERSION=0.0.2)
55# - use environment variables to overwrite this value (e.g export VERSION=0.0.2)
66-VERSION ?= 0.5.30
66+VERSION ?= 0.5.41
7788# CHANNELS define the bundle channels used in the bundle.
99# Add a new line here if you would like to change its default config. (E.g CHANNELS = "candidate,fast,stable")
···144144 @echo "Syncing CRDs from config/crd/bases/ to helm/hsm-secrets-operator/crds/"
145145 cp config/crd/bases/*.yaml helm/hsm-secrets-operator/crds/
146146 @echo "✅ CRDs synced successfully"
147147+ @echo "⚠️ RBAC sync: Please manually verify helm/hsm-secrets-operator/templates/rbac/role.yaml matches config/rbac/role.yaml"
147148148149.PHONY: generate
149150generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
···180181 echo "Creating Kind cluster '$(KIND_CLUSTER)'..."; \
181182 $(KIND) create cluster --name $(KIND_CLUSTER) ;; \
182183 esac
184184+ @echo "Setting kubectl context to kind-$(KIND_CLUSTER)"
185185+ @kubectl config use-context kind-$(KIND_CLUSTER)
183186184187.PHONY: test-e2e
185188test-e2e: setup-test-e2e manifests generate fmt vet ## Run the e2e tests. Expected an isolated environment using Kind.
···88service HSMAgent {
99 // GetInfo returns information about the HSM device
1010 rpc GetInfo(GetInfoRequest) returns (GetInfoResponse);
1111-1111+1212 // ReadSecret reads secret data from the specified HSM path
1313 rpc ReadSecret(ReadSecretRequest) returns (ReadSecretResponse);
1414-1414+1515 // WriteSecret writes secret data and metadata to the specified HSM path
1616 rpc WriteSecret(WriteSecretRequest) returns (WriteSecretResponse);
1717-1717+1818 // ReadMetadata reads metadata for a secret at the given path
1919 rpc ReadMetadata(ReadMetadataRequest) returns (ReadMetadataResponse);
2020-2020+2121 // DeleteSecret removes secret data from the specified HSM path
2222 rpc DeleteSecret(DeleteSecretRequest) returns (DeleteSecretResponse);
2323-2323+2424 // ListSecrets returns a list of secret paths
2525 rpc ListSecrets(ListSecretsRequest) returns (ListSecretsResponse);
2626-2626+2727 // GetChecksum returns the SHA256 checksum of the secret data at the given path
2828 rpc GetChecksum(GetChecksumRequest) returns (GetChecksumResponse);
2929-2929+3030 // IsConnected returns true if the HSM is connected and responsive
3131 rpc IsConnected(IsConnectedRequest) returns (IsConnectedResponse);
3232-3232+3333 // Health check for gRPC health protocol
3434 rpc Health(HealthRequest) returns (HealthResponse);
3535+3636+ // ChangePIN changes the HSM device PIN from old to new PIN
3737+ rpc ChangePIN(ChangePINRequest) returns (ChangePINResponse);
3538}
36393740// Common types
···120123message HealthResponse {
121124 string status = 1;
122125 string message = 2;
123123-}126126+}
127127+128128+message ChangePINRequest {
129129+ string old_pin = 1;
130130+ string new_pin = 2;
131131+}
132132+133133+message ChangePINResponse {}
+40
api/proto/hsm/v1/hsm_grpc.pb.go
···2828 HSMAgent_GetChecksum_FullMethodName = "/hsm.v1.HSMAgent/GetChecksum"
2929 HSMAgent_IsConnected_FullMethodName = "/hsm.v1.HSMAgent/IsConnected"
3030 HSMAgent_Health_FullMethodName = "/hsm.v1.HSMAgent/Health"
3131+ HSMAgent_ChangePIN_FullMethodName = "/hsm.v1.HSMAgent/ChangePIN"
3132)
32333334// HSMAgentClient is the client API for HSMAgent service.
···5455 IsConnected(ctx context.Context, in *IsConnectedRequest, opts ...grpc.CallOption) (*IsConnectedResponse, error)
5556 // Health check for gRPC health protocol
5657 Health(ctx context.Context, in *HealthRequest, opts ...grpc.CallOption) (*HealthResponse, error)
5858+ // ChangePIN changes the HSM device PIN from old to new PIN
5959+ ChangePIN(ctx context.Context, in *ChangePINRequest, opts ...grpc.CallOption) (*ChangePINResponse, error)
5760}
58615962type hSMAgentClient struct {
···154157 return out, nil
155158}
156159160160+func (c *hSMAgentClient) ChangePIN(ctx context.Context, in *ChangePINRequest, opts ...grpc.CallOption) (*ChangePINResponse, error) {
161161+ cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
162162+ out := new(ChangePINResponse)
163163+ err := c.cc.Invoke(ctx, HSMAgent_ChangePIN_FullMethodName, in, out, cOpts...)
164164+ if err != nil {
165165+ return nil, err
166166+ }
167167+ return out, nil
168168+}
169169+157170// HSMAgentServer is the server API for HSMAgent service.
158171// All implementations must embed UnimplementedHSMAgentServer
159172// for forward compatibility.
···178191 IsConnected(context.Context, *IsConnectedRequest) (*IsConnectedResponse, error)
179192 // Health check for gRPC health protocol
180193 Health(context.Context, *HealthRequest) (*HealthResponse, error)
194194+ // ChangePIN changes the HSM device PIN from old to new PIN
195195+ ChangePIN(context.Context, *ChangePINRequest) (*ChangePINResponse, error)
181196 mustEmbedUnimplementedHSMAgentServer()
182197}
183198···214229}
215230func (UnimplementedHSMAgentServer) Health(context.Context, *HealthRequest) (*HealthResponse, error) {
216231 return nil, status.Errorf(codes.Unimplemented, "method Health not implemented")
232232+}
233233+func (UnimplementedHSMAgentServer) ChangePIN(context.Context, *ChangePINRequest) (*ChangePINResponse, error) {
234234+ return nil, status.Errorf(codes.Unimplemented, "method ChangePIN not implemented")
217235}
218236func (UnimplementedHSMAgentServer) mustEmbedUnimplementedHSMAgentServer() {}
219237func (UnimplementedHSMAgentServer) testEmbeddedByValue() {}
···398416 return interceptor(ctx, in, info, handler)
399417}
400418419419+func _HSMAgent_ChangePIN_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
420420+ in := new(ChangePINRequest)
421421+ if err := dec(in); err != nil {
422422+ return nil, err
423423+ }
424424+ if interceptor == nil {
425425+ return srv.(HSMAgentServer).ChangePIN(ctx, in)
426426+ }
427427+ info := &grpc.UnaryServerInfo{
428428+ Server: srv,
429429+ FullMethod: HSMAgent_ChangePIN_FullMethodName,
430430+ }
431431+ handler := func(ctx context.Context, req interface{}) (interface{}, error) {
432432+ return srv.(HSMAgentServer).ChangePIN(ctx, req.(*ChangePINRequest))
433433+ }
434434+ return interceptor(ctx, in, info, handler)
435435+}
436436+401437// HSMAgent_ServiceDesc is the grpc.ServiceDesc for HSMAgent service.
402438// It's only intended for direct use with grpc.RegisterService,
403439// and not to be introspected or modified (even as a copy)
···440476 {
441477 MethodName: "Health",
442478 Handler: _HSMAgent_Health_Handler,
479479+ },
480480+ {
481481+ MethodName: "ChangePIN",
482482+ Handler: _HSMAgent_ChangePIN_Handler,
443483 },
444484 },
445485 Streams: []grpc.StreamDesc{},
-5
api/v1alpha1/hsmdevice_types.go
···171171 // Mirroring configures cross-node device mirroring for high availability
172172 // +optional
173173 Mirroring *MirroringSpec `json:"mirroring,omitempty"`
174174-175175- // PKCS11LibraryPath is the path to the PKCS#11 library for this device
176176- // +optional
177177- // Deprecated: Use PKCS11.LibraryPath instead
178178- PKCS11LibraryPath string `json:"pkcs11LibraryPath,omitempty"`
179174}
180175181176// DeviceRole defines the role of a device in a mirrored setup
-5
config/crd/bases/hsm.j5t.io_hsmdevices.yaml
···178178 match
179179 type: string
180180 type: object
181181- pkcs11LibraryPath:
182182- description: |-
183183- PKCS11LibraryPath is the path to the PKCS#11 library for this device
184184- Deprecated: Use PKCS11.LibraryPath instead
185185- type: string
186181 required:
187182 - deviceType
188183 type: object
···11+/*
22+Copyright 2025.
33+44+Licensed under the Apache License, Version 2.0 (the "License");
55+you may not use this file except in compliance with the License.
66+You may obtain a copy of the License at
77+88+ http://www.apache.org/licenses/LICENSE-2.0
99+1010+Unless required by applicable law or agreed to in writing, software
1111+distributed under the License is distributed on an "AS IS" BASIS,
1212+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313+See the License for the specific language governing permissions and
1414+limitations under the License.
1515+*/
1616+1717+package config
1818+1919+import (
2020+ "fmt"
2121+ "os"
2222+ "strings"
2323+)
2424+2525+// GetCurrentNamespace returns the namespace the operator is running in.
2626+// It first tries to read the namespace from the service account mount,
2727+// and returns an error if it cannot be determined.
2828+func GetCurrentNamespace() (string, error) {
2929+ // First try the envvar
3030+ if ns := os.Getenv("POD_NAMESPACE"); ns != "" {
3131+ return strings.TrimSpace(ns), nil
3232+ }
3333+3434+ // Try to read namespace from service account mount
3535+ if ns, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace"); err == nil {
3636+ return strings.TrimSpace(string(ns)), nil
3737+ }
3838+3939+ // Return error instead of defaulting to "default" namespace
4040+ return "", fmt.Errorf("unable to determine current namespace: service account namespace file not found")
4141+}
···11+/*
22+Copyright 2025.
33+44+Licensed under the Apache License, Version 2.0 (the "License");
55+you may not use this file except in compliance with the License.
66+You may obtain a copy of the License at
77+88+ http://www.apache.org/licenses/LICENSE-2.0
99+1010+Unless required by applicable law or agreed to in writing, software
1111+distributed under the License is distributed on an "AS IS" BASIS,
1212+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313+See the License for the specific language governing permissions and
1414+limitations under the License.
1515+*/
1616+1717+package commands
1818+1919+import (
2020+ "context"
2121+ "encoding/base64"
2222+ "fmt"
2323+ "syscall"
2424+2525+ "github.com/spf13/cobra"
2626+ "golang.org/x/term"
2727+)
2828+2929+// NewRotatePinCmd creates the rotate-pin command
3030+func NewRotatePinCmd() *cobra.Command {
3131+ var (
3232+ oldPin string
3333+ newPin string
3434+ dryRun bool
3535+ namespace string
3636+ verbose bool
3737+ )
3838+3939+ cmd := &cobra.Command{
4040+ Use: "rotate-pin",
4141+ Short: "Rotate HSM PIN on all connected devices",
4242+ Long: `Rotate the PIN for all connected HSM devices in the cluster.
4343+4444+This command will:
4545+1. Validate the old PIN against the current Kubernetes Secret
4646+2. Change the PIN on all HSM devices atomically
4747+3. Provide a kubectl patch command to update the Kubernetes Secret
4848+4949+Examples:
5050+ # Interactive PIN rotation (recommended for security)
5151+ kubectl hsm rotate-pin
5252+5353+ # Specify PINs via flags (less secure due to shell history)
5454+ kubectl hsm rotate-pin --old-pin=123456 --new-pin=654321
5555+5656+ # Dry run to see what would happen
5757+ kubectl hsm rotate-pin --dry-run
5858+5959+ # Rotate PIN in specific namespace
6060+ kubectl hsm rotate-pin --namespace=production`,
6161+ RunE: func(cmd *cobra.Command, args []string) error {
6262+ return runRotatePin(cmd.Context(), oldPin, newPin, dryRun, namespace, verbose)
6363+ },
6464+ }
6565+6666+ cmd.Flags().StringVar(&oldPin, "old-pin", "", "Current PIN (will prompt if not provided)")
6767+ cmd.Flags().StringVar(&newPin, "new-pin", "", "New PIN (will prompt if not provided)")
6868+ cmd.Flags().BoolVar(&dryRun, "dry-run", false, "Show what would be done without making changes")
6969+ cmd.Flags().StringVarP(&namespace, "namespace", "n", "", "Namespace to use (default: current context namespace)")
7070+ cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Enable verbose output")
7171+7272+ return cmd
7373+}
7474+7575+func runRotatePin(ctx context.Context, oldPin, newPin string, dryRun bool, namespace string, verbose bool) error {
7676+ // Create client manager
7777+ cm, err := NewClientManager(namespace, verbose)
7878+ if err != nil {
7979+ return fmt.Errorf("failed to initialize client manager: %w", err)
8080+ }
8181+ defer cm.Close()
8282+8383+ // Get current namespace for display
8484+ currentNamespace := cm.GetCurrentNamespace()
8585+ if namespace == "" {
8686+ namespace = currentNamespace
8787+ }
8888+8989+ // Get HSM client
9090+ hsmClient, err := cm.GetClient(ctx)
9191+ if err != nil {
9292+ return fmt.Errorf("failed to connect to HSM operator: %w", err)
9393+ }
9494+9595+ // Get device status to show what devices will be affected
9696+ deviceStatus, err := hsmClient.GetDeviceStatus(ctx)
9797+ if err != nil {
9898+ return fmt.Errorf("failed to get device status: %w", err)
9999+ }
100100+101101+ if len(deviceStatus.Devices) == 0 {
102102+ return fmt.Errorf("no HSM devices found - ensure HSM agents are running and devices are connected")
103103+ }
104104+105105+ // Show device information
106106+ fmt.Printf("Found %d HSM device(s) in namespace '%s':\n", len(deviceStatus.Devices), namespace)
107107+ connectedCount := 0
108108+ for deviceName, connected := range deviceStatus.Devices {
109109+ status := "disconnected"
110110+ if connected {
111111+ status = "connected"
112112+ connectedCount++
113113+ }
114114+ fmt.Printf(" - %s: %s\n", deviceName, status)
115115+ }
116116+117117+ if connectedCount == 0 {
118118+ return fmt.Errorf("no HSM devices are currently connected")
119119+ }
120120+121121+ fmt.Printf("\nPIN rotation will affect %d connected device(s).\n\n", connectedCount)
122122+123123+ // Get PINs interactively if not provided
124124+ if oldPin == "" {
125125+ oldPin, err = readPIN("Enter current PIN: ")
126126+ if err != nil {
127127+ return fmt.Errorf("failed to read current PIN: %w", err)
128128+ }
129129+ }
130130+131131+ if newPin == "" {
132132+ newPin, err = readPIN("Enter new PIN: ")
133133+ if err != nil {
134134+ return fmt.Errorf("failed to read new PIN: %w", err)
135135+ }
136136+137137+ // Confirm new PIN
138138+ confirmPin, err := readPIN("Confirm new PIN: ")
139139+ if err != nil {
140140+ return fmt.Errorf("failed to read PIN confirmation: %w", err)
141141+ }
142142+143143+ if newPin != confirmPin {
144144+ return fmt.Errorf("new PIN and confirmation do not match")
145145+ }
146146+ }
147147+148148+ // Validate PINs
149149+ if oldPin == "" {
150150+ return fmt.Errorf("old PIN cannot be empty")
151151+ }
152152+ if newPin == "" {
153153+ return fmt.Errorf("new PIN cannot be empty")
154154+ }
155155+ if oldPin == newPin {
156156+ return fmt.Errorf("new PIN must be different from old PIN")
157157+ }
158158+159159+ // TODO: Validate old PIN against Kubernetes Secret
160160+ // This would require reading the HSM PIN secret and comparing
161161+162162+ if dryRun {
163163+ fmt.Println("DRY RUN: PIN rotation plan")
164164+ fmt.Println("================================")
165165+ fmt.Printf("Current PIN: %s (masked)\n", maskPIN(oldPin))
166166+ fmt.Printf("New PIN: %s (masked)\n", maskPIN(newPin))
167167+ fmt.Printf("Devices to update: %d\n", connectedCount)
168168+ for deviceName, connected := range deviceStatus.Devices {
169169+ if connected {
170170+ fmt.Printf(" - %s\n", deviceName)
171171+ }
172172+ }
173173+ fmt.Println("\nKubectl command to update PIN secret after rotation:")
174174+ fmt.Printf("kubectl patch secret hsm-pin -n %s --type='json' -p='[{\"op\":\"replace\",\"path\":\"/data/pin\",\"value\":\"%s\"}]'\n",
175175+ namespace, encodePINForSecret(newPin))
176176+ fmt.Println("\nNo changes made (dry run).")
177177+ return nil
178178+ }
179179+180180+ // Confirm operation
181181+ fmt.Printf("About to rotate PIN on %d HSM device(s). This operation cannot be undone.\n", connectedCount)
182182+ if !confirmOperation("Continue with PIN rotation? (y/N): ") {
183183+ fmt.Println("PIN rotation cancelled.")
184184+ return nil
185185+ }
186186+187187+ // Perform PIN rotation
188188+ fmt.Println("Rotating PIN on HSM devices...")
189189+190190+ response, err := hsmClient.ChangePIN(ctx, oldPin, newPin)
191191+ if err != nil {
192192+ return fmt.Errorf("PIN rotation failed: %w", err)
193193+ }
194194+195195+ // Check if operation was completely successful
196196+ if len(response.Errors) > 0 {
197197+ fmt.Printf("⚠ PIN rotation completed with warnings:\n")
198198+ fmt.Printf(" Successful devices: %d/%d\n", response.SuccessCount, response.TotalCount)
199199+ fmt.Printf(" Errors:\n")
200200+ for _, errMsg := range response.Errors {
201201+ fmt.Printf(" - %s\n", errMsg)
202202+ }
203203+ fmt.Printf(" Message: %s\n", response.Message)
204204+ } else {
205205+ fmt.Printf("✓ PIN rotation completed successfully on all %d device(s)!\n", response.SuccessCount)
206206+ }
207207+208208+ fmt.Println()
209209+ fmt.Println("IMPORTANT: Update the Kubernetes Secret with the new PIN:")
210210+ fmt.Printf("kubectl patch secret hsm-pin -n %s --type='json' -p='[{\"op\":\"replace\",\"path\":\"/data/pin\",\"value\":\"%s\"}]'\n",
211211+ namespace, encodePINForSecret(newPin))
212212+ fmt.Println()
213213+ fmt.Println("After updating the secret, HSM agents will automatically use the new PIN.")
214214+215215+ return nil
216216+}
217217+218218+// readPIN securely reads a PIN from user input
219219+func readPIN(prompt string) (string, error) {
220220+ fmt.Print(prompt)
221221+222222+ // Read password without echoing
223223+ bytePin, err := term.ReadPassword(int(syscall.Stdin))
224224+ fmt.Println() // Add newline after hidden input
225225+226226+ if err != nil {
227227+ return "", fmt.Errorf("failed to read PIN: %w", err)
228228+ }
229229+230230+ return string(bytePin), nil
231231+}
232232+233233+// maskPIN returns a masked version of the PIN for display
234234+func maskPIN(pin string) string {
235235+ if len(pin) <= 2 {
236236+ return "***"
237237+ }
238238+ return pin[:1] + "***" + pin[len(pin)-1:]
239239+}
240240+241241+// encodePINForSecret base64 encodes the PIN for Kubernetes Secret
242242+func encodePINForSecret(pin string) string {
243243+ // kubectl patch expects base64 encoded values for secret data
244244+ return base64.StdEncoding.EncodeToString([]byte(pin))
245245+}
246246+247247+// confirmOperation prompts user for confirmation
248248+func confirmOperation(prompt string) bool {
249249+ fmt.Print(prompt)
250250+ var response string
251251+ fmt.Scanln(&response)
252252+ return response == "y" || response == "Y" || response == "yes" || response == "Yes"
253253+}
+45
kubectl-hsm/pkg/util/kubectl.go
···3131 "k8s.io/client-go/tools/clientcmd"
3232 "k8s.io/client-go/tools/portforward"
3333 "k8s.io/client-go/transport/spdy"
3434+3535+ "github.com/evanjarrett/hsm-secrets-operator/kubectl-hsm/pkg/client"
3436)
35373638const (
···263265264266 return namespace, nil
265267}
268268+269269+// CreateClient creates an HSM API client with automatic authentication
270270+func CreateClient() (*client.Client, error) {
271271+ // For direct API access, try to detect if the API is available locally
272272+ // If not, set up port forwarding automatically
273273+274274+ baseURL := "http://localhost:8090"
275275+276276+ // Test if API is directly accessible
277277+ testClient := client.NewClient(baseURL)
278278+ ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
279279+ defer cancel()
280280+281281+ if _, err := testClient.GetHealth(ctx); err == nil {
282282+ // API is directly accessible
283283+ return testClient, nil
284284+ }
285285+286286+ // API not directly accessible, try to set up port forwarding
287287+ kubectlUtil, err := NewKubectlUtil("")
288288+ if err != nil {
289289+ return nil, fmt.Errorf("failed to create kubectl util for port forwarding: %w", err)
290290+ }
291291+292292+ // Try to find the operator service
293293+ ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second)
294294+ defer cancel2()
295295+296296+ if err := kubectlUtil.FindOperatorService(ctx2); err != nil {
297297+ return nil, fmt.Errorf("HSM operator not found: %w", err)
298298+ }
299299+300300+ // Create port forward (this might fail silently if port is already in use)
301301+ pf, err := kubectlUtil.CreatePortForward(ctx2, 8090, false)
302302+ if err == nil {
303303+ // Port forward successful, defer cleanup is handled by the calling command
304304+ _ = pf // Use the port forward
305305+ }
306306+307307+ // Return client regardless of port forward success
308308+ // The client will handle authentication errors gracefully
309309+ return client.NewClient(baseURL), nil
310310+}
+85-3
test/e2e/e2e_test.go
···8686 cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", projectImage))
8787 _, err = utils.Run(cmd)
8888 Expect(err).NotTo(HaveOccurred(), "Failed to deploy the controller-manager")
8989+9090+ By("waiting for deployment to be ready")
9191+ cmd = exec.Command("kubectl", "rollout", "status",
9292+ "deployment/hsm-secrets-operator-controller-manager", "-n", namespace, "--timeout=60s")
9393+ _, err = utils.Run(cmd)
9494+ Expect(err).NotTo(HaveOccurred(), "Failed to wait for manager deployment to be ready")
9595+9696+ By("deploying test HSM devices to trigger discovery and agent deployment")
9797+ cmd = exec.Command("kubectl", "apply", "-f", "test/e2e/test-hsm-device.yaml")
9898+ _, err = utils.Run(cmd)
9999+ Expect(err).NotTo(HaveOccurred(), "Failed to deploy test HSM device")
89100 })
9010191102 // Note: Main cleanup is handled by DeferCleanup in BeforeAll
···278289279290 By("getting the metrics by checking curl-metrics logs")
280291 metricsOutput := getMetricsOutput()
281281- Expect(metricsOutput).To(ContainSubstring(
282282- "controller_runtime_reconcile_total",
283283- ))
292292+ // Check for metrics that should always be present
293293+ Expect(metricsOutput).To(ContainSubstring("go_goroutines"))
284294 })
285295286296 // +kubebuilder:scaffold:e2e-webhooks-checks
297297+298298+ It("should have API server start after cache is ready", func() {
299299+ By("verifying that the API server starts properly after manager cache")
300300+ verifyAPIServerStartup := func(g Gomega) {
301301+ cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace)
302302+ output, err := utils.Run(cmd)
303303+ g.Expect(err).NotTo(HaveOccurred())
304304+305305+ // The API server should start after the manager cache is ready
306306+ // Look for manager startup and API server startup in the correct order
307307+ g.Expect(output).To(ContainSubstring("Starting API server"),
308308+ "API server should have started")
309309+310310+ // Should not see the cache error that indicates race condition
311311+ g.Expect(output).NotTo(ContainSubstring("the cache is not started, can not read objects"),
312312+ "API server should not attempt to read from cache before it's started")
313313+ }
314314+ Eventually(verifyAPIServerStartup).Should(Succeed())
315315+ })
316316+317317+ It("should serve API requests without 'no_agents' error immediately after startup", func() {
318318+ By("creating a test pod to call the API server health endpoint")
319319+ token, err := serviceAccountToken()
320320+ Expect(err).NotTo(HaveOccurred())
321321+ Expect(token).NotTo(BeEmpty())
322322+323323+ // Create a pod that will test the API server
324324+ apiURL := fmt.Sprintf("http://hsm-secrets-operator-hsm-secrets-operator-api.%s.svc.cluster.local:8090/api/v1/health",
325325+ namespace)
326326+ curlCmd := fmt.Sprintf("curl -v -H 'Authorization: Bearer %s' %s", token, apiURL)
327327+ overrides := fmt.Sprintf(`{"spec":{"containers":[{"name":"curl","image":"curlimages/curl:latest",`+
328328+ `"command":["/bin/sh","-c"],"args":["%s"],`+
329329+ `"securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},`+
330330+ `"runAsNonRoot":true,"runAsUser":1000,"seccompProfile":{"type":"RuntimeDefault"}}}],`+
331331+ `"serviceAccount":"%s"}}`, curlCmd, serviceAccountName)
332332+ cmd := exec.Command("kubectl", "run", "api-test", "--restart=Never",
333333+ "--namespace", namespace,
334334+ "--image=curlimages/curl:latest",
335335+ "--overrides", overrides)
336336+ _, err = utils.Run(cmd)
337337+ Expect(err).NotTo(HaveOccurred(), "Failed to create api-test pod")
338338+339339+ // Clean up the test pod
340340+ DeferCleanup(func() {
341341+ cmd := exec.Command("kubectl", "delete", "pod", "api-test", "-n", namespace, "--ignore-not-found=true")
342342+ _, _ = utils.Run(cmd)
343343+ })
344344+345345+ By("waiting for the api-test pod to complete")
346346+ verifyAPITestComplete := func(g Gomega) {
347347+ cmd := exec.Command("kubectl", "get", "pods", "api-test",
348348+ "-o", "jsonpath={.status.phase}",
349349+ "-n", namespace)
350350+ output, err := utils.Run(cmd)
351351+ g.Expect(err).NotTo(HaveOccurred())
352352+ g.Expect(output).To(Equal("Succeeded"), "api-test pod should complete successfully")
353353+ }
354354+ Eventually(verifyAPITestComplete, 2*time.Minute).Should(Succeed())
355355+356356+ By("checking the API response in the test pod logs")
357357+ cmd = exec.Command("kubectl", "logs", "api-test", "-n", namespace)
358358+ apiOutput, err := utils.Run(cmd)
359359+ Expect(err).NotTo(HaveOccurred(), "Failed to retrieve logs from api-test pod")
360360+361361+ // Should get a successful response, not a 'no_agents' error
362362+ Expect(apiOutput).To(ContainSubstring("< HTTP/1.1 200 OK"),
363363+ "API health endpoint should return 200 OK")
364364+ Expect(apiOutput).To(ContainSubstring("\"success\":true"),
365365+ "API should return successful response")
366366+ Expect(apiOutput).NotTo(ContainSubstring("no_agents"),
367367+ "API should not return 'no_agents' error after cache is ready")
368368+ })
287369288370 // TODO: Customize the e2e test suite with scenarios specific to your project.
289371 // Consider applying sample/CR(s) and check their status and/or verifying