From 589045da28427ba2953dda9903960d25df1293d8 Mon Sep 17 00:00:00 2001
From: Rafael Benevides <rafabene@gmail.com>
Date: Tue, 13 Jan 2026 16:25:50 -0300
Subject: [PATCH 1/2] HYPERFLEET-454 - feat: implement health endpoints
 standard

Consolidate health endpoints on metrics server (port 8080) following
HyperFleet health-endpoints.md standard:

- Add /healthz (liveness) and /readyz (readiness) endpoints
- Remove separate healthcheck server (port 8083)
- Integrate readiness state with graceful shutdown
- Update Kubernetes probes in deployment.yaml
- Update documentation
---
 AGENTS.md                                     |  11 +-
 README.md                                     |   3 +-
 charts/templates/deployment.yaml              |  18 +-
 charts/templates/service.yaml                 |   4 -
 charts/values.yaml                            |   1 -
 cmd/hyperfleet-api/servecmd/cmd.go            |  13 +-
 .../server/healthcheck_server.go              |  87 --------
 .../logging/request_logging_middleware.go     |   6 -
 cmd/hyperfleet-api/server/metrics_server.go   |   6 +
 docs/deployment.md                            |   7 +-
 docs/development.md                           |   3 +-
 docs/hyperfleet-api.http                      |   9 +-
 pkg/config/config.go                          |  24 +-
 pkg/config/health_check.go                    |  26 ---
 pkg/health/handlers.go                        | 104 +++++++++
 pkg/health/handlers_test.go                   | 209 ++++++++++++++++++
 pkg/health/readiness.go                       |  68 ++++++
 pkg/health/readiness_test.go                  | 100 +++++++++
 test/helper.go                                |  37 +---
 19 files changed, 542 insertions(+), 194 deletions(-)
 delete mode 100755 cmd/hyperfleet-api/server/healthcheck_server.go
 delete mode 100755 pkg/config/health_check.go
 create mode 100644 pkg/health/handlers.go
 create mode 100644 pkg/health/handlers_test.go
 create mode 100644 pkg/health/readiness.go
 create mode 100644 pkg/health/readiness_test.go

diff --git a/AGENTS.md b/AGENTS.md
index 54e47e2..2382b9b 100755
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -344,9 +344,7 @@ Serves the hyperfleet REST API with full authentication, database connectivity,
   - `--ocm-debug` - Enable OCM API debug logging
 
 - **Monitoring & Health Checks:**
-  - `--health-check-server-bindaddress` - Health check server address (default: "localhost:8083")
-  - `--enable-health-check-https` - Enable HTTPS for health check server
-  - `--metrics-server-bindaddress` - Metrics server address (default: "localhost:8080")
+  - `--metrics-server-bindaddress` - Metrics and health endpoints server address (default: "localhost:8080")
   - `--enable-metrics-https` - Enable HTTPS for metrics server
 
 - **Performance Tuning:**
@@ -686,8 +684,7 @@ The server is configured in cmd/hyperfleet/server/:
 
 **Ports**:
 - `8000` - Main API server
-- `8080` - Metrics endpoint
-- `8083` - Health check endpoint
+- `8080` - Metrics and health endpoints (`/metrics`, `/healthz`, `/readyz`)
 
 **Middleware Chain**:
 1. Request logging
@@ -772,7 +769,9 @@ The API is designed to be stateless and horizontally scalable:
 - No event creation or message queues
 - Kubernetes-ready (multiple replicas)
 
-**Health Check**: `GET /healthcheck` returns 200 OK when database is accessible
+**Health Endpoints**:
+- `GET /healthz` - Liveness probe, returns 200 OK if the process is alive
+- `GET /readyz` - Readiness probe, returns 200 OK when ready to receive traffic (checks database connection)
 
 **Metrics**: Prometheus metrics available at `/metrics`
 
diff --git a/README.md b/README.md
index f5e27fe..66fcb90 100755
--- a/README.md
+++ b/README.md
@@ -80,7 +80,8 @@ The service starts on `localhost:8000`:
 - **REST API**: `http://localhost:8000/api/hyperfleet/v1/`
 - **OpenAPI spec**: `http://localhost:8000/api/hyperfleet/v1/openapi`
 - **Swagger UI**: `http://localhost:8000/api/hyperfleet/v1/openapi.html`
-- **Health check**: `http://localhost:8083/healthcheck`
+- **Liveness probe**: `http://localhost:8080/healthz`
+- **Readiness probe**: `http://localhost:8080/readyz`
 - **Metrics**: `http://localhost:8080/metrics`
 
 ```bash
diff --git a/charts/templates/deployment.yaml b/charts/templates/deployment.yaml
index 7ea0b7c..5e3d600 100644
--- a/charts/templates/deployment.yaml
+++ b/charts/templates/deployment.yaml
@@ -53,15 +53,11 @@ spec:
         args:
         - serve
         - --api-server-bindaddress={{ .Values.server.bindAddress | default ":8000" }}
-        - --health-check-server-bindaddress={{ .Values.server.healthBindAddress | default ":8083" }}
         - --metrics-server-bindaddress={{ .Values.server.metricsBindAddress | default ":8080" }}
         ports:
         - name: http
           containerPort: 8000
           protocol: TCP
-        - name: health
-          containerPort: 8083
-          protocol: TCP
         - name: metrics
           containerPort: 8080
           protocol: TCP
@@ -82,18 +78,18 @@ spec:
         {{- end }}
         livenessProbe:
           httpGet:
-            path: /healthcheck
-            port: health
-          initialDelaySeconds: 30
-          periodSeconds: 10
+            path: /healthz
+            port: metrics
+          initialDelaySeconds: 15
+          periodSeconds: 20
           timeoutSeconds: 5
           failureThreshold: 3
         readinessProbe:
           httpGet:
-            path: /healthcheck
-            port: health
+            path: /readyz
+            port: metrics
           initialDelaySeconds: 5
-          periodSeconds: 5
+          periodSeconds: 10
           timeoutSeconds: 3
           failureThreshold: 3
         resources:
diff --git a/charts/templates/service.yaml b/charts/templates/service.yaml
index 6eb3628..6f6bbd3 100644
--- a/charts/templates/service.yaml
+++ b/charts/templates/service.yaml
@@ -11,10 +11,6 @@ spec:
     targetPort: http
     protocol: TCP
     name: http
-  - port: 8083
-    targetPort: health
-    protocol: TCP
-    name: health
   - port: 8080
     targetPort: metrics
     protocol: TCP
diff --git a/charts/values.yaml b/charts/values.yaml
index d5535f8..d95e53d 100644
--- a/charts/values.yaml
+++ b/charts/values.yaml
@@ -17,7 +17,6 @@ fullnameOverride: ""
 # Use ":PORT" format to bind to all interfaces (required for Kubernetes)
 server:
   bindAddress: ":8000"
-  healthBindAddress: ":8083"
   metricsBindAddress: ":8080"
 
 serviceAccount:
diff --git a/cmd/hyperfleet-api/servecmd/cmd.go b/cmd/hyperfleet-api/servecmd/cmd.go
index 6835d3f..2d3ce86 100755
--- a/cmd/hyperfleet-api/servecmd/cmd.go
+++ b/cmd/hyperfleet-api/servecmd/cmd.go
@@ -13,6 +13,7 @@ import (
 	"github.com/openshift-hyperfleet/hyperfleet-api/cmd/hyperfleet-api/environments"
 	"github.com/openshift-hyperfleet/hyperfleet-api/cmd/hyperfleet-api/server"
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/api"
+	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/health"
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/logger"
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/telemetry"
 )
@@ -75,8 +76,9 @@ func runServe(cmd *cobra.Command, args []string) {
 	metricsServer := server.NewMetricsServer()
 	go metricsServer.Start()
 
-	healthcheckServer := server.NewHealthCheckServer()
-	go healthcheckServer.Start()
+	// Mark application as ready to receive traffic
+	health.GetReadinessState().SetReady()
+	logger.Info(ctx, "Application ready to receive traffic")
 
 	sigChan := make(chan os.Signal, 1)
 	signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
@@ -84,9 +86,10 @@ func runServe(cmd *cobra.Command, args []string) {
 
 	logger.Info(ctx, "Shutdown signal received, starting graceful shutdown...")
 
-	if err := healthcheckServer.Stop(); err != nil {
-		logger.WithError(ctx, err).Error("Failed to stop healthcheck server")
-	}
+	// Mark application as not ready (returns 503 on /readyz)
+	health.GetReadinessState().SetShuttingDown()
+	logger.Info(ctx, "Marked as not ready, draining in-flight requests...")
+
 	if err := apiServer.Stop(); err != nil {
 		logger.WithError(ctx, err).Error("Failed to stop API server")
 	}
diff --git a/cmd/hyperfleet-api/server/healthcheck_server.go b/cmd/hyperfleet-api/server/healthcheck_server.go
deleted file mode 100755
index 63f8631..0000000
--- a/cmd/hyperfleet-api/server/healthcheck_server.go
+++ /dev/null
@@ -1,87 +0,0 @@
-package server
-
-import (
-	"context"
-	"fmt"
-	"net"
-	"net/http"
-
-	health "github.com/docker/go-healthcheck"
-	"github.com/gorilla/mux"
-
-	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/logger"
-)
-
-var (
-	updater = health.NewStatusUpdater()
-)
-
-var _ Server = &healthCheckServer{}
-
-type healthCheckServer struct {
-	httpServer *http.Server
-}
-
-func NewHealthCheckServer() *healthCheckServer {
-	router := mux.NewRouter()
-	health.DefaultRegistry = health.NewRegistry()
-	health.Register("maintenance_status", updater)
-	router.HandleFunc("/healthcheck", health.StatusHandler).Methods(http.MethodGet)
-	router.HandleFunc("/healthcheck/down", downHandler).Methods(http.MethodPost)
-	router.HandleFunc("/healthcheck/up", upHandler).Methods(http.MethodPost)
-
-	srv := &http.Server{
-		Handler: router,
-		Addr:    env().Config.HealthCheck.BindAddress,
-	}
-
-	return &healthCheckServer{
-		httpServer: srv,
-	}
-}
-
-func (s healthCheckServer) Start() {
-	ctx := context.Background()
-	var err error
-	if env().Config.HealthCheck.EnableHTTPS {
-		if env().Config.Server.HTTPSCertFile == "" || env().Config.Server.HTTPSKeyFile == "" {
-			check(
-				fmt.Errorf("unspecified required --https-cert-file, --https-key-file"),
-				"Can't start https server",
-			)
-		}
-
-		// Serve with TLS
-		logger.With(ctx, logger.FieldBindAddress, env().Config.HealthCheck.BindAddress).Info("Serving HealthCheck with TLS")
-		err = s.httpServer.ListenAndServeTLS(env().Config.Server.HTTPSCertFile, env().Config.Server.HTTPSKeyFile)
-	} else {
-		logger.With(ctx, logger.FieldBindAddress, env().Config.HealthCheck.BindAddress).Info("Serving HealthCheck without TLS")
-		err = s.httpServer.ListenAndServe()
-	}
-	if err != nil && err != http.ErrServerClosed {
-		check(err, "HealthCheck server terminated with errors")
-	} else {
-		logger.Info(ctx, "HealthCheck server terminated")
-	}
-}
-
-func (s healthCheckServer) Stop() error {
-	return s.httpServer.Shutdown(context.Background())
-}
-
-// Listen Unimplemented
-func (s healthCheckServer) Listen() (listener net.Listener, err error) {
-	return nil, nil
-}
-
-// Serve Unimplemented
-func (s healthCheckServer) Serve(listener net.Listener) {
-}
-
-func upHandler(w http.ResponseWriter, r *http.Request) {
-	updater.Update(nil)
-}
-
-func downHandler(w http.ResponseWriter, r *http.Request) {
-	updater.Update(fmt.Errorf("maintenance mode"))
-}
diff --git a/cmd/hyperfleet-api/server/logging/request_logging_middleware.go b/cmd/hyperfleet-api/server/logging/request_logging_middleware.go
index 2f0bddf..2885a43 100755
--- a/cmd/hyperfleet-api/server/logging/request_logging_middleware.go
+++ b/cmd/hyperfleet-api/server/logging/request_logging_middleware.go
@@ -7,7 +7,6 @@ import (
 	"log/slog"
 	"net"
 	"net/http"
-	"strings"
 	"time"
 
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/logger"
@@ -19,11 +18,6 @@ func RequestLoggingMiddleware(masker *middleware.MaskingMiddleware) func(http.Ha
 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 			ctx := r.Context()
 
-			if strings.TrimSuffix(r.URL.Path, "/") == "/healthcheck" {
-				handler.ServeHTTP(w, r)
-				return
-			}
-
 			var maskedHeaders http.Header
 			if masker != nil {
 				maskedHeaders = masker.MaskHeaders(r.Header)
diff --git a/cmd/hyperfleet-api/server/metrics_server.go b/cmd/hyperfleet-api/server/metrics_server.go
index 198ad69..a9b8001 100755
--- a/cmd/hyperfleet-api/server/metrics_server.go
+++ b/cmd/hyperfleet-api/server/metrics_server.go
@@ -10,6 +10,7 @@ import (
 
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/api"
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/handlers"
+	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/health"
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/logger"
 )
 
@@ -21,6 +22,11 @@ func NewMetricsServer() Server {
 	prometheusMetricsHandler := handlers.NewPrometheusMetricsHandler()
 	mainRouter.Handle("/metrics", prometheusMetricsHandler.Handler())
 
+	// health endpoints (HyperFleet standard)
+	healthHandler := health.NewHandler(env().Database.SessionFactory)
+	mainRouter.HandleFunc("/healthz", healthHandler.LivenessHandler).Methods(http.MethodGet)
+	mainRouter.HandleFunc("/readyz", healthHandler.ReadinessHandler).Methods(http.MethodGet)
+
 	var mainHandler http.Handler = mainRouter
 
 	s := &metricsServer{}
diff --git a/docs/deployment.md b/docs/deployment.md
index b374b28..ebbff37 100644
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -78,8 +78,7 @@ export OPENAPI_SCHEMA_PATH=/path/to/custom-schema.yaml
 
 **Server:**
 - `PORT` - API server port (default: `8000`)
-- `METRICS_PORT` - Metrics endpoint port (default: `8080`)
-- `HEALTH_PORT` - Health check port (default: `8083`)
+- `METRICS_PORT` - Metrics and health endpoints port (default: `8080`)
 
 **Logging:**
 - `LOG_LEVEL` - Logging level: `debug`, `info`, `warn`, `error` (default: `info`)
@@ -272,7 +271,9 @@ kubectl get configmaps --namespace hyperfleet-system
 
 ## Health Checks
 
-The deployment includes liveness and readiness probes at `GET /healthcheck` (port 8083).
+The deployment includes:
+- Liveness probe: `GET /healthz` (port 8080) - Returns 200 if the process is alive
+- Readiness probe: `GET /readyz` (port 8080) - Returns 200 when ready to receive traffic, 503 during startup/shutdown
 
 ## Scaling
 
diff --git a/docs/development.md b/docs/development.md
index e0107eb..20acf92 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -95,7 +95,8 @@ The service starts on `localhost:8000`:
 - REST API: `http://localhost:8000/api/hyperfleet/v1/`
 - OpenAPI spec: `http://localhost:8000/api/hyperfleet/v1/openapi`
 - Swagger UI: `http://localhost:8000/api/hyperfleet/v1/openapi.html`
-- Health check: `http://localhost:8083/healthcheck`
+- Liveness probe: `http://localhost:8080/healthz`
+- Readiness probe: `http://localhost:8080/readyz`
 - Metrics: `http://localhost:8080/metrics`
 
 ### Testing the API
diff --git a/docs/hyperfleet-api.http b/docs/hyperfleet-api.http
index 969cc9f..40d658a 100644
--- a/docs/hyperfleet-api.http
+++ b/docs/hyperfleet-api.http
@@ -5,7 +5,6 @@
 @host = localhost
 @port = 8000
 @metrics_port = 8080
-@health_port = 8083
 @baseUrl = http://{{host}}:{{port}}
 @authToken = <your-jwt-token-here>
 
@@ -193,8 +192,12 @@ Content-Type: application/json
 }
 
 ###
-# @name healthCheck
-GET http://{{host}}:{{health_port}}/healthcheck
+# @name livenessProbe
+GET http://{{host}}:{{metrics_port}}/healthz
+
+###
+# @name readinessProbe
+GET http://{{host}}:{{metrics_port}}/readyz
 
 ###
 # @name metrics
diff --git a/pkg/config/config.go b/pkg/config/config.go
index d5851d9..68875cb 100755
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -13,22 +13,20 @@ import (
 )
 
 type ApplicationConfig struct {
-	Server      *ServerConfig      `json:"server"`
-	Metrics     *MetricsConfig     `json:"metrics"`
-	HealthCheck *HealthCheckConfig `json:"health_check"`
-	Database    *DatabaseConfig    `json:"database"`
-	OCM         *OCMConfig         `json:"ocm"`
-	Logging     *LoggingConfig     `json:"logging"`
+	Server   *ServerConfig   `json:"server"`
+	Metrics  *MetricsConfig  `json:"metrics"`
+	Database *DatabaseConfig `json:"database"`
+	OCM      *OCMConfig      `json:"ocm"`
+	Logging  *LoggingConfig  `json:"logging"`
 }
 
 func NewApplicationConfig() *ApplicationConfig {
 	return &ApplicationConfig{
-		Server:      NewServerConfig(),
-		Metrics:     NewMetricsConfig(),
-		HealthCheck: NewHealthCheckConfig(),
-		Database:    NewDatabaseConfig(),
-		OCM:         NewOCMConfig(),
-		Logging:     NewLoggingConfig(),
+		Server:   NewServerConfig(),
+		Metrics:  NewMetricsConfig(),
+		Database: NewDatabaseConfig(),
+		OCM:      NewOCMConfig(),
+		Logging:  NewLoggingConfig(),
 	}
 }
 
@@ -36,7 +34,6 @@ func (c *ApplicationConfig) AddFlags(flagset *pflag.FlagSet) {
 	flagset.AddGoFlagSet(flag.CommandLine)
 	c.Server.AddFlags(flagset)
 	c.Metrics.AddFlags(flagset)
-	c.HealthCheck.AddFlags(flagset)
 	c.Database.AddFlags(flagset)
 	c.OCM.AddFlags(flagset)
 	c.Logging.AddFlags(flagset)
@@ -51,7 +48,6 @@ func (c *ApplicationConfig) ReadFiles() []string {
 		{c.Database.ReadFiles, "Database"},
 		{c.OCM.ReadFiles, "OCM"},
 		{c.Metrics.ReadFiles, "Metrics"},
-		{c.HealthCheck.ReadFiles, "HealthCheck"},
 		{c.Logging.ReadFiles, "Logging"},
 	}
 	var messages []string
diff --git a/pkg/config/health_check.go b/pkg/config/health_check.go
deleted file mode 100755
index e3aaa79..0000000
--- a/pkg/config/health_check.go
+++ /dev/null
@@ -1,26 +0,0 @@
-package config
-
-import (
-	"github.com/spf13/pflag"
-)
-
-type HealthCheckConfig struct {
-	BindAddress string `json:"bind_address"`
-	EnableHTTPS bool   `json:"enable_https"`
-}
-
-func NewHealthCheckConfig() *HealthCheckConfig {
-	return &HealthCheckConfig{
-		BindAddress: "localhost:8083",
-		EnableHTTPS: false,
-	}
-}
-
-func (c *HealthCheckConfig) AddFlags(fs *pflag.FlagSet) {
-	fs.StringVar(&c.BindAddress, "health-check-server-bindaddress", c.BindAddress, "Health check server bind adddress")
-	fs.BoolVar(&c.EnableHTTPS, "enable-health-check-https", c.EnableHTTPS, "Enable HTTPS for health check server")
-}
-
-func (c *HealthCheckConfig) ReadFiles() error {
-	return nil
-}
diff --git a/pkg/health/handlers.go b/pkg/health/handlers.go
new file mode 100644
index 0000000..c5bf23e
--- /dev/null
+++ b/pkg/health/handlers.go
@@ -0,0 +1,104 @@
+package health
+
+import (
+	"encoding/json"
+	"net/http"
+
+	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/db"
+)
+
+// CheckFunc is a function that performs a health check and returns an error if unhealthy
+type CheckFunc func() error
+
+// Handler holds the health check handlers and their dependencies
+type Handler struct {
+	sessionFactory db.SessionFactory
+}
+
+// NewHandler creates a new health handler with the given session factory
+func NewHandler(sf db.SessionFactory) *Handler {
+	return &Handler{
+		sessionFactory: sf,
+	}
+}
+
+// HealthResponse represents the JSON response for health endpoints
+type HealthResponse struct {
+	Status  string            `json:"status"`
+	Message string            `json:"message,omitempty"`
+	Checks  map[string]string `json:"checks,omitempty"`
+}
+
+// LivenessHandler handles GET /healthz requests
+// Returns 200 OK if the process is alive, 503 if unhealthy
+func (h *Handler) LivenessHandler(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "application/json")
+
+	response := HealthResponse{
+		Status: "ok",
+	}
+
+	w.WriteHeader(http.StatusOK)
+	_ = json.NewEncoder(w).Encode(response)
+}
+
+// ReadinessHandler handles GET /readyz requests
+// Returns 200 OK if ready to receive traffic, 503 during startup or shutdown
+func (h *Handler) ReadinessHandler(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "application/json")
+
+	readiness := GetReadinessState()
+
+	// Check if shutting down
+	if readiness.IsShuttingDown() {
+		response := HealthResponse{
+			Status:  "error",
+			Message: "shutting down",
+		}
+		w.WriteHeader(http.StatusServiceUnavailable)
+		_ = json.NewEncoder(w).Encode(response)
+		return
+	}
+
+	// Perform readiness checks
+	checks := make(map[string]string)
+	allOK := true
+
+	// Check database connection
+	if h.sessionFactory != nil {
+		if err := h.sessionFactory.CheckConnection(); err != nil {
+			checks["database"] = "error"
+			allOK = false
+		} else {
+			checks["database"] = "ok"
+		}
+	} else {
+		checks["database"] = "ok"
+	}
+
+	// Check if marked as ready
+	if !readiness.IsReady() {
+		checks["ready"] = "error"
+		allOK = false
+	} else {
+		checks["ready"] = "ok"
+	}
+
+	if !allOK {
+		response := HealthResponse{
+			Status:  "error",
+			Message: "not ready",
+			Checks:  checks,
+		}
+		w.WriteHeader(http.StatusServiceUnavailable)
+		_ = json.NewEncoder(w).Encode(response)
+		return
+	}
+
+	response := HealthResponse{
+		Status: "ok",
+		Checks: checks,
+	}
+	w.WriteHeader(http.StatusOK)
+	_ = json.NewEncoder(w).Encode(response)
+}
diff --git a/pkg/health/handlers_test.go b/pkg/health/handlers_test.go
new file mode 100644
index 0000000..22a3f33
--- /dev/null
+++ b/pkg/health/handlers_test.go
@@ -0,0 +1,209 @@
+package health
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+)
+
+func TestLivenessHandler_ReturnsOK(t *testing.T) {
+	handler := &Handler{sessionFactory: nil}
+
+	req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
+	w := httptest.NewRecorder()
+
+	handler.LivenessHandler(w, req)
+
+	resp := w.Result()
+	defer func() { _ = resp.Body.Close() }()
+
+	if resp.StatusCode != http.StatusOK {
+		t.Errorf("expected status %d, got %d", http.StatusOK, resp.StatusCode)
+	}
+
+	var response HealthResponse
+	if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
+		t.Fatalf("failed to decode response: %v", err)
+	}
+
+	if response.Status != "ok" {
+		t.Errorf("expected status 'ok', got '%s'", response.Status)
+	}
+}
+
+func TestReadinessHandler_ReturnsOKWhenReady(t *testing.T) {
+	// Reset and set ready state
+	state := GetReadinessState()
+	state.Reset()
+	state.SetReady()
+
+	handler := &Handler{sessionFactory: nil}
+
+	req := httptest.NewRequest(http.MethodGet, "/readyz", nil)
+	w := httptest.NewRecorder()
+
+	handler.ReadinessHandler(w, req)
+
+	resp := w.Result()
+	defer func() { _ = resp.Body.Close() }()
+
+	if resp.StatusCode != http.StatusOK {
+		t.Errorf("expected status %d, got %d", http.StatusOK, resp.StatusCode)
+	}
+
+	var response HealthResponse
+	if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
+		t.Fatalf("failed to decode response: %v", err)
+	}
+
+	if response.Status != "ok" {
+		t.Errorf("expected status 'ok', got '%s'", response.Status)
+	}
+
+	// Reset state for other tests
+	state.Reset()
+}
+
+func TestReadinessHandler_Returns503WhenNotReady(t *testing.T) {
+	// Reset state to not ready
+	state := GetReadinessState()
+	state.Reset()
+
+	handler := &Handler{sessionFactory: nil}
+
+	req := httptest.NewRequest(http.MethodGet, "/readyz", nil)
+	w := httptest.NewRecorder()
+
+	handler.ReadinessHandler(w, req)
+
+	resp := w.Result()
+	defer func() { _ = resp.Body.Close() }()
+
+	if resp.StatusCode != http.StatusServiceUnavailable {
+		t.Errorf("expected status %d, got %d", http.StatusServiceUnavailable, resp.StatusCode)
+	}
+
+	var response HealthResponse
+	if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
+		t.Fatalf("failed to decode response: %v", err)
+	}
+
+	if response.Status != "error" {
+		t.Errorf("expected status 'error', got '%s'", response.Status)
+	}
+}
+
+func TestReadinessHandler_Returns503WhenShuttingDown(t *testing.T) {
+	state := GetReadinessState()
+	state.Reset()
+	state.SetReady()
+	state.SetShuttingDown()
+
+	handler := &Handler{sessionFactory: nil}
+
+	req := httptest.NewRequest(http.MethodGet, "/readyz", nil)
+	w := httptest.NewRecorder()
+
+	handler.ReadinessHandler(w, req)
+
+	resp := w.Result()
+	defer func() { _ = resp.Body.Close() }()
+
+	if resp.StatusCode != http.StatusServiceUnavailable {
+		t.Errorf("expected status %d, got %d", http.StatusServiceUnavailable, resp.StatusCode)
+	}
+
+	var response HealthResponse
+	if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
+		t.Fatalf("failed to decode response: %v", err)
+	}
+
+	if response.Status != "error" {
+		t.Errorf("expected status 'error', got '%s'", response.Status)
+	}
+
+	if response.Message != "shutting down" {
+		t.Errorf("expected message 'shutting down', got '%s'", response.Message)
+	}
+
+	// Reset state for other tests
+	state.Reset()
+}
+
+func TestReadinessHandler_IncludesChecks(t *testing.T) {
+	state := GetReadinessState()
+	state.Reset()
+	state.SetReady()
+
+	handler := &Handler{sessionFactory: nil}
+
+	req := httptest.NewRequest(http.MethodGet, "/readyz", nil)
+	w := httptest.NewRecorder()
+
+	handler.ReadinessHandler(w, req)
+
+	resp := w.Result()
+	defer func() { _ = resp.Body.Close() }()
+
+	var response HealthResponse
+	if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
+		t.Fatalf("failed to decode response: %v", err)
+	}
+
+	if response.Checks == nil {
+		t.Fatal("expected checks to be present")
+	}
+
+	if response.Checks["database"] != "ok" {
+		t.Errorf("expected database check 'ok', got '%s'", response.Checks["database"])
+	}
+
+	if response.Checks["ready"] != "ok" {
+		t.Errorf("expected ready check 'ok', got '%s'", response.Checks["ready"])
+	}
+
+	// Reset state for other tests
+	state.Reset()
+}
+
+func TestReadinessHandler_ContentType(t *testing.T) {
+	state := GetReadinessState()
+	state.Reset()
+	state.SetReady()
+
+	handler := &Handler{sessionFactory: nil}
+
+	req := httptest.NewRequest(http.MethodGet, "/readyz", nil)
+	w := httptest.NewRecorder()
+
+	handler.ReadinessHandler(w, req)
+
+	resp := w.Result()
+	defer func() { _ = resp.Body.Close() }()
+
+	contentType := resp.Header.Get("Content-Type")
+	if contentType != "application/json" {
+		t.Errorf("expected Content-Type 'application/json', got '%s'", contentType)
+	}
+
+	// Reset state for other tests
+	state.Reset()
+}
+
+func TestLivenessHandler_ContentType(t *testing.T) {
+	handler := &Handler{sessionFactory: nil}
+
+	req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
+	w := httptest.NewRecorder()
+
+	handler.LivenessHandler(w, req)
+
+	resp := w.Result()
+	defer func() { _ = resp.Body.Close() }()
+
+	contentType := resp.Header.Get("Content-Type")
+	if contentType != "application/json" {
+		t.Errorf("expected Content-Type 'application/json', got '%s'", contentType)
+	}
+}
diff --git a/pkg/health/readiness.go b/pkg/health/readiness.go
new file mode 100644
index 0000000..84a38a3
--- /dev/null
+++ b/pkg/health/readiness.go
@@ -0,0 +1,68 @@
+package health
+
+import (
+	"sync"
+)
+
+// ReadinessState represents the current readiness state of the application
+type ReadinessState struct {
+	mu       sync.RWMutex
+	ready    bool
+	shutting bool
+}
+
+// globalReadiness is the singleton readiness state for the application
+var globalReadiness = &ReadinessState{
+	ready:    false,
+	shutting: false,
+}
+
+// GetReadinessState returns the global readiness state
+func GetReadinessState() *ReadinessState {
+	return globalReadiness
+}
+
+// SetReady marks the application as ready to receive traffic
+func (r *ReadinessState) SetReady() {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.ready = true
+}
+
+// SetNotReady marks the application as not ready to receive traffic
+func (r *ReadinessState) SetNotReady() {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.ready = false
+}
+
+// SetShuttingDown marks the application as shutting down
+// This also sets ready to false
+func (r *ReadinessState) SetShuttingDown() {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.shutting = true
+	r.ready = false
+}
+
+// IsReady returns true if the application is ready to receive traffic
+func (r *ReadinessState) IsReady() bool {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	return r.ready && !r.shutting
+}
+
+// IsShuttingDown returns true if the application is shutting down
+func (r *ReadinessState) IsShuttingDown() bool {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	return r.shutting
+}
+
+// Reset resets the readiness state (useful for testing)
+func (r *ReadinessState) Reset() {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.ready = false
+	r.shutting = false
+}
diff --git a/pkg/health/readiness_test.go b/pkg/health/readiness_test.go
new file mode 100644
index 0000000..17a1df4
--- /dev/null
+++ b/pkg/health/readiness_test.go
@@ -0,0 +1,100 @@
+package health
+
+import (
+	"testing"
+)
+
+func TestReadinessState_InitialState(t *testing.T) {
+	state := &ReadinessState{}
+
+	if state.IsReady() {
+		t.Error("expected IsReady() to be false initially")
+	}
+
+	if state.IsShuttingDown() {
+		t.Error("expected IsShuttingDown() to be false initially")
+	}
+}
+
+func TestReadinessState_SetReady(t *testing.T) {
+	state := &ReadinessState{}
+
+	state.SetReady()
+
+	if !state.IsReady() {
+		t.Error("expected IsReady() to be true after SetReady()")
+	}
+
+	if state.IsShuttingDown() {
+		t.Error("expected IsShuttingDown() to be false after SetReady()")
+	}
+}
+
+func TestReadinessState_SetNotReady(t *testing.T) {
+	state := &ReadinessState{}
+
+	state.SetReady()
+	state.SetNotReady()
+
+	if state.IsReady() {
+		t.Error("expected IsReady() to be false after SetNotReady()")
+	}
+}
+
+func TestReadinessState_SetShuttingDown(t *testing.T) {
+	state := &ReadinessState{}
+
+	state.SetReady()
+	state.SetShuttingDown()
+
+	if state.IsReady() {
+		t.Error("expected IsReady() to be false after SetShuttingDown()")
+	}
+
+	if !state.IsShuttingDown() {
+		t.Error("expected IsShuttingDown() to be true after SetShuttingDown()")
+	}
+}
+
+func TestReadinessState_Reset(t *testing.T) {
+	state := &ReadinessState{}
+
+	state.SetReady()
+	state.SetShuttingDown()
+	state.Reset()
+
+	if state.IsReady() {
+		t.Error("expected IsReady() to be false after Reset()")
+	}
+
+	if state.IsShuttingDown() {
+		t.Error("expected IsShuttingDown() to be false after Reset()")
+	}
+}
+
+func TestReadinessState_IsReadyFalseWhenShuttingDown(t *testing.T) {
+	state := &ReadinessState{}
+
+	// Set ready first
+	state.SetReady()
+	if !state.IsReady() {
+		t.Error("expected IsReady() to be true after SetReady()")
+	}
+
+	// Then set shutting down
+	state.SetShuttingDown()
+
+	// IsReady should be false even if ready was set before
+	if state.IsReady() {
+		t.Error("expected IsReady() to be false when shutting down")
+	}
+}
+
+func TestGetReadinessState_ReturnsSingleton(t *testing.T) {
+	state1 := GetReadinessState()
+	state2 := GetReadinessState()
+
+	if state1 != state2 {
+		t.Error("expected GetReadinessState() to return the same instance")
+	}
+}
diff --git a/test/helper.go b/test/helper.go
index 75a3e45..40e8607 100755
--- a/test/helper.go
+++ b/test/helper.go
@@ -54,18 +54,17 @@ var jwkURL string
 type TimeFunc func() time.Time
 
 type Helper struct {
-	Ctx               context.Context
-	DBFactory         db.SessionFactory
-	AppConfig         *config.ApplicationConfig
-	APIServer         server.Server
-	MetricsServer     server.Server
-	HealthCheckServer server.Server
-	TimeFunc          TimeFunc
-	JWTPrivateKey     *rsa.PrivateKey
-	JWTCA             *rsa.PublicKey
-	T                 *testing.T
-	teardowns         []func() error
-	Factories         factories.Factories
+	Ctx           context.Context
+	DBFactory     db.SessionFactory
+	AppConfig     *config.ApplicationConfig
+	APIServer     server.Server
+	MetricsServer server.Server
+	TimeFunc      TimeFunc
+	JWTPrivateKey *rsa.PrivateKey
+	JWTCA         *rsa.PublicKey
+	T             *testing.T
+	teardowns     []func() error
+	Factories     factories.Factories
 }
 
 func NewHelper(t *testing.T) *Helper {
@@ -115,7 +114,6 @@ func NewHelper(t *testing.T) *Helper {
 		}
 		helper.startAPIServer()
 		helper.startMetricsServer()
-		helper.startHealthCheckServer()
 	})
 	helper.T = t
 	return helper
@@ -180,15 +178,6 @@ func (helper *Helper) stopMetricsServer() error {
 	return nil
 }
 
-func (helper *Helper) startHealthCheckServer() {
-	ctx := context.Background()
-	helper.HealthCheckServer = server.NewHealthCheckServer()
-	go func() {
-		logger.Debug(ctx, "Test health check server started")
-		helper.HealthCheckServer.Start()
-		logger.Debug(ctx, "Test health check server stopped")
-	}()
-}
 
 func (helper *Helper) RestartServer() {
 	ctx := context.Background()
@@ -257,10 +246,6 @@ func (helper *Helper) MetricsURL(path string) string {
 	return fmt.Sprintf("http://%s%s", helper.AppConfig.Metrics.BindAddress, path)
 }
 
-func (helper *Helper) HealthCheckURL(path string) string {
-	return fmt.Sprintf("http://%s%s", helper.AppConfig.HealthCheck.BindAddress, path)
-}
-
 func (helper *Helper) NewApiClient() *openapi.APIClient {
 	config := openapi.NewConfiguration()
 	// Override the server URL to use the local test server

From 28df99584d907d839e0f19579a98cc16002649de Mon Sep 17 00:00:00 2001
From: Rafael Benevides <rafabene@gmail.com>
Date: Tue, 13 Jan 2026 16:36:50 -0300
Subject: [PATCH 2/2] HYPERFLEET-453 - feat: implement graceful shutdown with
 signal handling

Add configurable graceful shutdown with SHUTDOWN_TIMEOUT environment
variable (default 20s). Use signal.NotifyContext for SIGTERM/SIGINT
handling and implement proper shutdown sequence: mark not ready,
drain HTTP connections, cleanup OpenTelemetry, close database.
---
 AGENTS.md                                   |   3 +
 cmd/hyperfleet-api/servecmd/cmd.go          |  65 ++++++++----
 cmd/hyperfleet-api/server/api_server.go     |   4 +-
 cmd/hyperfleet-api/server/metrics_server.go |   4 +-
 cmd/hyperfleet-api/server/server.go         |   2 +-
 docs/deployment.md                          |   3 +
 pkg/config/config.go                        |   3 +
 pkg/config/shutdown.go                      |  41 ++++++++
 pkg/config/shutdown_test.go                 | 108 ++++++++++++++++++++
 test/helper.go                              |   4 +-
 10 files changed, 212 insertions(+), 25 deletions(-)
 create mode 100644 pkg/config/shutdown.go
 create mode 100644 pkg/config/shutdown_test.go

diff --git a/AGENTS.md b/AGENTS.md
index 2382b9b..3b5d7c5 100755
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -347,6 +347,9 @@ Serves the hyperfleet REST API with full authentication, database connectivity,
   - `--metrics-server-bindaddress` - Metrics and health endpoints server address (default: "localhost:8080")
   - `--enable-metrics-https` - Enable HTTPS for metrics server
 
+- **Graceful Shutdown:**
+  - `--shutdown-timeout` - Graceful shutdown timeout (default: 20s, env: `SHUTDOWN_TIMEOUT`)
+
 - **Performance Tuning:**
   - `--http-read-timeout` - HTTP server read timeout (default: 5s)
   - `--http-write-timeout` - HTTP server write timeout (default: 30s)
diff --git a/cmd/hyperfleet-api/servecmd/cmd.go b/cmd/hyperfleet-api/servecmd/cmd.go
index 2d3ce86..e3f66c0 100755
--- a/cmd/hyperfleet-api/servecmd/cmd.go
+++ b/cmd/hyperfleet-api/servecmd/cmd.go
@@ -44,11 +44,15 @@ func runServe(cmd *cobra.Command, args []string) {
 		os.Exit(1)
 	}
 
-	// Bind environment variables for advanced configuration (OTel, Masking)
+	// Bind environment variables for advanced configuration (OTel, Masking, Shutdown)
 	environments.Environment().Config.Logging.BindEnv(cmd.PersistentFlags())
+	environments.Environment().Config.Shutdown.BindEnv()
 
 	initLogger()
 
+	shutdownTimeout := environments.Environment().Config.Shutdown.Timeout
+	logger.With(ctx, "shutdown_timeout", shutdownTimeout.String()).Info("Shutdown timeout configured")
+
 	var tp *trace.TracerProvider
 	if environments.Environment().Config.Logging.OTel.Enabled {
 		samplingRate := environments.Environment().Config.Logging.OTel.SamplingRate
@@ -80,30 +84,55 @@ func runServe(cmd *cobra.Command, args []string) {
 	health.GetReadinessState().SetReady()
 	logger.Info(ctx, "Application ready to receive traffic")
 
-	sigChan := make(chan os.Signal, 1)
-	signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
-	<-sigChan
+	// Wait for shutdown signal using signal.NotifyContext
+	signalCtx, stop := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
+	defer stop()
+	<-signalCtx.Done()
 
 	logger.Info(ctx, "Shutdown signal received, starting graceful shutdown...")
 
-	// Mark application as not ready (returns 503 on /readyz)
-	health.GetReadinessState().SetShuttingDown()
-	logger.Info(ctx, "Marked as not ready, draining in-flight requests...")
+	// Create shutdown context with timeout
+	shutdownCtx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
+	defer cancel()
 
-	if err := apiServer.Stop(); err != nil {
-		logger.WithError(ctx, err).Error("Failed to stop API server")
-	}
-	if err := metricsServer.Stop(); err != nil {
-		logger.WithError(ctx, err).Error("Failed to stop metrics server")
-	}
+	// Channel to signal shutdown completion
+	shutdownComplete := make(chan struct{})
 
-	if tp != nil {
-		if err := telemetry.Shutdown(context.Background(), tp); err != nil {
-			logger.WithError(ctx, err).Error("Failed to shutdown OpenTelemetry")
+	go func() {
+		defer close(shutdownComplete)
+
+		// Phase 1: Mark application as not ready (returns 503 on /readyz)
+		health.GetReadinessState().SetShuttingDown()
+		logger.Info(ctx, "Marked as not ready, draining in-flight requests...")
+
+		// Phase 2-3: Stop servers (stops accepting new connections and drains in-flight requests)
+		if err := apiServer.Stop(shutdownCtx); err != nil {
+			logger.WithError(ctx, err).Error("Failed to stop API server")
+		}
+		if err := metricsServer.Stop(shutdownCtx); err != nil {
+			logger.WithError(ctx, err).Error("Failed to stop metrics server")
+		}
+
+		// Phase 4: Cleanup resources
+		if tp != nil {
+			if err := telemetry.Shutdown(shutdownCtx, tp); err != nil {
+				logger.WithError(ctx, err).Error("Failed to shutdown OpenTelemetry")
+			}
 		}
-	}
 
-	logger.Info(ctx, "Graceful shutdown completed")
+		// Close database connections
+		environments.Environment().Teardown()
+		logger.Info(ctx, "Resources cleaned up")
+	}()
+
+	// Wait for shutdown to complete or timeout
+	select {
+	case <-shutdownComplete:
+		logger.Info(ctx, "Graceful shutdown completed")
+	case <-shutdownCtx.Done():
+		logger.Error(ctx, "Shutdown timeout exceeded, forcing exit")
+		os.Exit(1)
+	}
 }
 
 // initLogger initializes the global slog logger from configuration
diff --git a/cmd/hyperfleet-api/server/api_server.go b/cmd/hyperfleet-api/server/api_server.go
index 2e23d0c..a5b2939 100755
--- a/cmd/hyperfleet-api/server/api_server.go
+++ b/cmd/hyperfleet-api/server/api_server.go
@@ -154,6 +154,6 @@ func (s apiServer) Start() {
 	}
 }
 
-func (s apiServer) Stop() error {
-	return s.httpServer.Shutdown(context.Background())
+func (s apiServer) Stop(ctx context.Context) error {
+	return s.httpServer.Shutdown(ctx)
 }
diff --git a/cmd/hyperfleet-api/server/metrics_server.go b/cmd/hyperfleet-api/server/metrics_server.go
index a9b8001..2f154eb 100755
--- a/cmd/hyperfleet-api/server/metrics_server.go
+++ b/cmd/hyperfleet-api/server/metrics_server.go
@@ -74,6 +74,6 @@ func (s metricsServer) Start() {
 	}
 }
 
-func (s metricsServer) Stop() error {
-	return s.httpServer.Shutdown(context.Background())
+func (s metricsServer) Stop(ctx context.Context) error {
+	return s.httpServer.Shutdown(ctx)
 }
diff --git a/cmd/hyperfleet-api/server/server.go b/cmd/hyperfleet-api/server/server.go
index 02a5d14..ae85330 100755
--- a/cmd/hyperfleet-api/server/server.go
+++ b/cmd/hyperfleet-api/server/server.go
@@ -12,7 +12,7 @@ import (
 
 type Server interface {
 	Start()
-	Stop() error
+	Stop(ctx context.Context) error
 	Listen() (net.Listener, error)
 	Serve(net.Listener)
 }
diff --git a/docs/deployment.md b/docs/deployment.md
index ebbff37..b0fac4e 100644
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -80,6 +80,9 @@ export OPENAPI_SCHEMA_PATH=/path/to/custom-schema.yaml
 - `PORT` - API server port (default: `8000`)
 - `METRICS_PORT` - Metrics and health endpoints port (default: `8080`)
 
+**Graceful Shutdown:**
+- `SHUTDOWN_TIMEOUT` - Graceful shutdown timeout (default: `20s`)
+
 **Logging:**
 - `LOG_LEVEL` - Logging level: `debug`, `info`, `warn`, `error` (default: `info`)
 - `LOG_FORMAT` - Log format: `json`, `text` (default: `json`)
diff --git a/pkg/config/config.go b/pkg/config/config.go
index 68875cb..e22e10f 100755
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -18,6 +18,7 @@ type ApplicationConfig struct {
 	Database *DatabaseConfig `json:"database"`
 	OCM      *OCMConfig      `json:"ocm"`
 	Logging  *LoggingConfig  `json:"logging"`
+	Shutdown *ShutdownConfig `json:"shutdown"`
 }
 
 func NewApplicationConfig() *ApplicationConfig {
@@ -27,6 +28,7 @@ func NewApplicationConfig() *ApplicationConfig {
 		Database: NewDatabaseConfig(),
 		OCM:      NewOCMConfig(),
 		Logging:  NewLoggingConfig(),
+		Shutdown: NewShutdownConfig(),
 	}
 }
 
@@ -37,6 +39,7 @@ func (c *ApplicationConfig) AddFlags(flagset *pflag.FlagSet) {
 	c.Database.AddFlags(flagset)
 	c.OCM.AddFlags(flagset)
 	c.Logging.AddFlags(flagset)
+	c.Shutdown.AddFlags(flagset)
 }
 
 func (c *ApplicationConfig) ReadFiles() []string {
diff --git a/pkg/config/shutdown.go b/pkg/config/shutdown.go
new file mode 100644
index 0000000..0851be7
--- /dev/null
+++ b/pkg/config/shutdown.go
@@ -0,0 +1,41 @@
+package config
+
+import (
+	"os"
+	"time"
+
+	"github.com/spf13/pflag"
+)
+
+const (
+	// DefaultShutdownTimeout is the default timeout for graceful shutdown
+	DefaultShutdownTimeout = 20 * time.Second
+)
+
+type ShutdownConfig struct {
+	Timeout time.Duration `json:"timeout"`
+}
+
+func NewShutdownConfig() *ShutdownConfig {
+	return &ShutdownConfig{
+		Timeout: DefaultShutdownTimeout,
+	}
+}
+
+func (s *ShutdownConfig) AddFlags(fs *pflag.FlagSet) {
+	fs.DurationVar(&s.Timeout, "shutdown-timeout", s.Timeout, "Graceful shutdown timeout")
+}
+
+func (s *ShutdownConfig) ReadFiles() error {
+	return nil
+}
+
+// BindEnv binds environment variables to config values
+// Environment variables take precedence over flags
+func (s *ShutdownConfig) BindEnv() {
+	if val := os.Getenv("SHUTDOWN_TIMEOUT"); val != "" {
+		if d, err := time.ParseDuration(val); err == nil {
+			s.Timeout = d
+		}
+	}
+}
diff --git a/pkg/config/shutdown_test.go b/pkg/config/shutdown_test.go
new file mode 100644
index 0000000..9e7905d
--- /dev/null
+++ b/pkg/config/shutdown_test.go
@@ -0,0 +1,108 @@
+package config
+
+import (
+	"testing"
+	"time"
+
+	"github.com/spf13/pflag"
+)
+
+func TestNewShutdownConfig_Defaults(t *testing.T) {
+	cfg := NewShutdownConfig()
+
+	if cfg.Timeout != DefaultShutdownTimeout {
+		t.Errorf("expected default timeout %v, got %v", DefaultShutdownTimeout, cfg.Timeout)
+	}
+
+	if cfg.Timeout != 20*time.Second {
+		t.Errorf("expected timeout 20s, got %v", cfg.Timeout)
+	}
+}
+
+func TestShutdownConfig_AddFlags(t *testing.T) {
+	cfg := NewShutdownConfig()
+	fs := pflag.NewFlagSet("test", pflag.ContinueOnError)
+
+	cfg.AddFlags(fs)
+
+	flag := fs.Lookup("shutdown-timeout")
+	if flag == nil {
+		t.Fatal("expected shutdown-timeout flag to be defined")
+	}
+
+	if flag.DefValue != "20s" {
+		t.Errorf("expected default value '20s', got '%s'", flag.DefValue)
+	}
+}
+
+func TestShutdownConfig_AddFlags_CustomValue(t *testing.T) {
+	cfg := NewShutdownConfig()
+	fs := pflag.NewFlagSet("test", pflag.ContinueOnError)
+
+	cfg.AddFlags(fs)
+
+	err := fs.Parse([]string{"--shutdown-timeout=30s"})
+	if err != nil {
+		t.Fatalf("failed to parse flags: %v", err)
+	}
+
+	if cfg.Timeout != 30*time.Second {
+		t.Errorf("expected timeout 30s, got %v", cfg.Timeout)
+	}
+}
+
+func TestShutdownConfig_BindEnv(t *testing.T) {
+	tests := []struct {
+		name     string
+		envValue string
+		expected time.Duration
+	}{
+		{
+			name:     "valid duration",
+			envValue: "45s",
+			expected: 45 * time.Second,
+		},
+		{
+			name:     "valid duration in minutes",
+			envValue: "1m",
+			expected: 1 * time.Minute,
+		},
+		{
+			name:     "invalid duration keeps default",
+			envValue: "invalid",
+			expected: DefaultShutdownTimeout,
+		},
+		{
+			name:     "empty value keeps default",
+			envValue: "",
+			expected: DefaultShutdownTimeout,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			cfg := NewShutdownConfig()
+
+			if tt.envValue != "" {
+				t.Setenv("SHUTDOWN_TIMEOUT", tt.envValue)
+			} else {
+				t.Setenv("SHUTDOWN_TIMEOUT", "")
+			}
+
+			cfg.BindEnv()
+
+			if cfg.Timeout != tt.expected {
+				t.Errorf("expected timeout %v, got %v", tt.expected, cfg.Timeout)
+			}
+		})
+	}
+}
+
+func TestShutdownConfig_ReadFiles(t *testing.T) {
+	cfg := NewShutdownConfig()
+
+	err := cfg.ReadFiles()
+	if err != nil {
+		t.Errorf("expected no error, got %v", err)
+	}
+}
diff --git a/test/helper.go b/test/helper.go
index 40e8607..6736b43 100755
--- a/test/helper.go
+++ b/test/helper.go
@@ -155,7 +155,7 @@ func (helper *Helper) startAPIServer() {
 }
 
 func (helper *Helper) stopAPIServer() error {
-	if err := helper.APIServer.Stop(); err != nil {
+	if err := helper.APIServer.Stop(context.Background()); err != nil {
 		return fmt.Errorf("unable to stop api server: %s", err.Error())
 	}
 	return nil
@@ -172,7 +172,7 @@ func (helper *Helper) startMetricsServer() {
 }
 
 func (helper *Helper) stopMetricsServer() error {
-	if err := helper.MetricsServer.Stop(); err != nil {
+	if err := helper.MetricsServer.Stop(context.Background()); err != nil {
 		return fmt.Errorf("unable to stop metrics server: %s", err.Error())
 	}
 	return nil