openshift-hyperfleet · rafabene · Jan 13, 2026 · Jan 13, 2026 · yasun1 · Jan 14, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -344,11 +344,12 @@ Serves the hyperfleet REST API with full authentication, database connectivity,
   - `--ocm-debug` - Enable OCM API debug logging
 
 - **Monitoring & Health Checks:**
-  - `--health-check-server-bindaddress` - Health check server address (default: "localhost:8083")
-  - `--enable-health-check-https` - Enable HTTPS for health check server
-  - `--metrics-server-bindaddress` - Metrics server address (default: "localhost:8080")
+  - `--metrics-server-bindaddress` - Metrics and health endpoints server address (default: "localhost:8080")
   - `--enable-metrics-https` - Enable HTTPS for metrics server
 
+- **Graceful Shutdown:**
+  - `--shutdown-timeout` - Graceful shutdown timeout (default: 20s, env: `SHUTDOWN_TIMEOUT`)
+
 - **Performance Tuning:**
   - `--http-read-timeout` - HTTP server read timeout (default: 5s)
   - `--http-write-timeout` - HTTP server write timeout (default: 30s)
@@ -686,8 +687,7 @@ The server is configured in cmd/hyperfleet/server/:
 
 **Ports**:
 - `8000` - Main API server
-- `8080` - Metrics endpoint
-- `8083` - Health check endpoint
+- `8080` - Metrics and health endpoints (`/metrics`, `/healthz`, `/readyz`)
 
 **Middleware Chain**:
 1. Request logging
@@ -772,7 +772,9 @@ The API is designed to be stateless and horizontally scalable:
 - No event creation or message queues
 - Kubernetes-ready (multiple replicas)
 
-**Health Check**: `GET /healthcheck` returns 200 OK when database is accessible
+**Health Endpoints**:
+- `GET /healthz` - Liveness probe, returns 200 OK if the process is alive
+- `GET /readyz` - Readiness probe, returns 200 OK when ready to receive traffic (checks database connection)
 
 **Metrics**: Prometheus metrics available at `/metrics`
 

diff --git a/README.md b/README.md
@@ -80,7 +80,8 @@ The service starts on `localhost:8000`:
 - **REST API**: `http://localhost:8000/api/hyperfleet/v1/`
 - **OpenAPI spec**: `http://localhost:8000/api/hyperfleet/v1/openapi`
 - **Swagger UI**: `http://localhost:8000/api/hyperfleet/v1/openapi.html`
-- **Health check**: `http://localhost:8083/healthcheck`
+- **Liveness probe**: `http://localhost:8080/healthz`
+- **Readiness probe**: `http://localhost:8080/readyz`
 - **Metrics**: `http://localhost:8080/metrics`
 
 ```bash

diff --git a/charts/templates/deployment.yaml b/charts/templates/deployment.yaml
@@ -53,15 +53,11 @@ spec:
         args:
         - serve
         - --api-server-bindaddress={{ .Values.server.bindAddress | default ":8000" }}
-        - --health-check-server-bindaddress={{ .Values.server.healthBindAddress | default ":8083" }}
         - --metrics-server-bindaddress={{ .Values.server.metricsBindAddress | default ":8080" }}
         ports:
         - name: http
           containerPort: 8000
           protocol: TCP
-        - name: health
-          containerPort: 8083
-          protocol: TCP
         - name: metrics
           containerPort: 8080
           protocol: TCP
@@ -82,18 +78,18 @@ spec:
         {{- end }}
         livenessProbe:
           httpGet:
-            path: /healthcheck
-            port: health
-          initialDelaySeconds: 30
-          periodSeconds: 10
+            path: /healthz
+            port: metrics
+          initialDelaySeconds: 15
+          periodSeconds: 20
           timeoutSeconds: 5
           failureThreshold: 3
         readinessProbe:
           httpGet:
-            path: /healthcheck
-            port: health
+            path: /readyz
+            port: metrics
           initialDelaySeconds: 5
-          periodSeconds: 5
+          periodSeconds: 10
           timeoutSeconds: 3
           failureThreshold: 3
         resources:

diff --git a/charts/templates/service.yaml b/charts/templates/service.yaml
@@ -11,10 +11,6 @@ spec:
     targetPort: http
     protocol: TCP
     name: http
-  - port: 8083
-    targetPort: health
-    protocol: TCP
-    name: health
   - port: 8080
     targetPort: metrics
     protocol: TCP

diff --git a/charts/values.yaml b/charts/values.yaml
@@ -17,7 +17,6 @@ fullnameOverride: ""
 # Use ":PORT" format to bind to all interfaces (required for Kubernetes)
 server:
   bindAddress: ":8000"
-  healthBindAddress: ":8083"
   metricsBindAddress: ":8080"
 
 serviceAccount:

diff --git a/cmd/hyperfleet-api/servecmd/cmd.go b/cmd/hyperfleet-api/servecmd/cmd.go
@@ -13,6 +13,7 @@ import (
 	"github.com/openshift-hyperfleet/hyperfleet-api/cmd/hyperfleet-api/environments"
 	"github.com/openshift-hyperfleet/hyperfleet-api/cmd/hyperfleet-api/server"
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/api"
+	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/health"
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/logger"
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/telemetry"
 )
@@ -43,11 +44,15 @@ func runServe(cmd *cobra.Command, args []string) {
 		os.Exit(1)
 	}
 
-	// Bind environment variables for advanced configuration (OTel, Masking)
+	// Bind environment variables for advanced configuration (OTel, Masking, Shutdown)
 	environments.Environment().Config.Logging.BindEnv(cmd.PersistentFlags())
+	environments.Environment().Config.Shutdown.BindEnv()
 
 	initLogger()
 
+	shutdownTimeout := environments.Environment().Config.Shutdown.Timeout
+	logger.With(ctx, "shutdown_timeout", shutdownTimeout.String()).Info("Shutdown timeout configured")
+
 	var tp *trace.TracerProvider
 	if environments.Environment().Config.Logging.OTel.Enabled {
 		samplingRate := environments.Environment().Config.Logging.OTel.SamplingRate
@@ -75,32 +80,59 @@ func runServe(cmd *cobra.Command, args []string) {
 	metricsServer := server.NewMetricsServer()
 	go metricsServer.Start()
 
-	healthcheckServer := server.NewHealthCheckServer()
-	go healthcheckServer.Start()
+	// Mark application as ready to receive traffic
+	health.GetReadinessState().SetReady()
+	logger.Info(ctx, "Application ready to receive traffic")
 
-	sigChan := make(chan os.Signal, 1)
-	signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
-	<-sigChan
+	// Wait for shutdown signal using signal.NotifyContext
+	signalCtx, stop := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
+	defer stop()
+	<-signalCtx.Done()
 
 	logger.Info(ctx, "Shutdown signal received, starting graceful shutdown...")
 
-	if err := healthcheckServer.Stop(); err != nil {
-		logger.WithError(ctx, err).Error("Failed to stop healthcheck server")
-	}
-	if err := apiServer.Stop(); err != nil {
-		logger.WithError(ctx, err).Error("Failed to stop API server")
-	}
-	if err := metricsServer.Stop(); err != nil {
-		logger.WithError(ctx, err).Error("Failed to stop metrics server")
-	}
+	// Create shutdown context with timeout
+	shutdownCtx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
+	defer cancel()
+
+	// Channel to signal shutdown completion
+	shutdownComplete := make(chan struct{})
+
+	go func() {
+		defer close(shutdownComplete)
+
+		// Phase 1: Mark application as not ready (returns 503 on /readyz)
+		health.GetReadinessState().SetShuttingDown()
+		logger.Info(ctx, "Marked as not ready, draining in-flight requests...")
 
-	if tp != nil {
-		if err := telemetry.Shutdown(context.Background(), tp); err != nil {
-			logger.WithError(ctx, err).Error("Failed to shutdown OpenTelemetry")
+		// Phase 2-3: Stop servers (stops accepting new connections and drains in-flight requests)
+		if err := apiServer.Stop(shutdownCtx); err != nil {
+			logger.WithError(ctx, err).Error("Failed to stop API server")
+		}
+		if err := metricsServer.Stop(shutdownCtx); err != nil {
+			logger.WithError(ctx, err).Error("Failed to stop metrics server")
+		}
+
+		// Phase 4: Cleanup resources
+		if tp != nil {
+			if err := telemetry.Shutdown(shutdownCtx, tp); err != nil {
+				logger.WithError(ctx, err).Error("Failed to shutdown OpenTelemetry")
+			}
 		}
-	}
 
-	logger.Info(ctx, "Graceful shutdown completed")
+		// Close database connections
+		environments.Environment().Teardown()
+		logger.Info(ctx, "Resources cleaned up")
+	}()
+
+	// Wait for shutdown to complete or timeout
+	select {
+	case <-shutdownComplete:
+		logger.Info(ctx, "Graceful shutdown completed")
+	case <-shutdownCtx.Done():
+		logger.Error(ctx, "Shutdown timeout exceeded, forcing exit")
+		os.Exit(1)
+	}
 }
 
 // initLogger initializes the global slog logger from configuration

diff --git a/cmd/hyperfleet-api/server/api_server.go b/cmd/hyperfleet-api/server/api_server.go
@@ -154,6 +154,6 @@ func (s apiServer) Start() {
 	}
 }
 
-func (s apiServer) Stop() error {
-	return s.httpServer.Shutdown(context.Background())
+func (s apiServer) Stop(ctx context.Context) error {
+	return s.httpServer.Shutdown(ctx)
 }
diff --git a/cmd/hyperfleet-api/server/healthcheck_server.go b/cmd/hyperfleet-api/server/healthcheck_server.go
diff --git a/cmd/hyperfleet-api/server/logging/request_logging_middleware.go b/cmd/hyperfleet-api/server/logging/request_logging_middleware.go
@@ -7,7 +7,6 @@ import (
 	"log/slog"
 	"net"
 	"net/http"
-	"strings"
 	"time"
 
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/logger"
@@ -19,11 +18,6 @@ func RequestLoggingMiddleware(masker *middleware.MaskingMiddleware) func(http.Ha
 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 			ctx := r.Context()
 
-			if strings.TrimSuffix(r.URL.Path, "/") == "/healthcheck" {
-				handler.ServeHTTP(w, r)
-				return
-			}
-
 			var maskedHeaders http.Header
 			if masker != nil {
 				maskedHeaders = masker.MaskHeaders(r.Header)

diff --git a/cmd/hyperfleet-api/server/metrics_server.go b/cmd/hyperfleet-api/server/metrics_server.go
@@ -10,6 +10,7 @@ import (
 
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/api"
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/handlers"
+	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/health"
 	"github.com/openshift-hyperfleet/hyperfleet-api/pkg/logger"
 )
 
@@ -21,6 +22,11 @@ func NewMetricsServer() Server {
 	prometheusMetricsHandler := handlers.NewPrometheusMetricsHandler()
 	mainRouter.Handle("/metrics", prometheusMetricsHandler.Handler())
 
+	// health endpoints (HyperFleet standard)
+	healthHandler := health.NewHandler(env().Database.SessionFactory)
+	mainRouter.HandleFunc("/healthz", healthHandler.LivenessHandler).Methods(http.MethodGet)
+	mainRouter.HandleFunc("/readyz", healthHandler.ReadinessHandler).Methods(http.MethodGet)
+
 	var mainHandler http.Handler = mainRouter
 
 	s := &metricsServer{}
@@ -68,6 +74,6 @@ func (s metricsServer) Start() {
 	}
 }
 
-func (s metricsServer) Stop() error {
-	return s.httpServer.Shutdown(context.Background())
+func (s metricsServer) Stop(ctx context.Context) error {
+	return s.httpServer.Shutdown(ctx)
 }
diff --git a/cmd/hyperfleet-api/server/server.go b/cmd/hyperfleet-api/server/server.go
@@ -12,7 +12,7 @@ import (
 
 type Server interface {
 	Start()
-	Stop() error
+	Stop(ctx context.Context) error
 	Listen() (net.Listener, error)
 	Serve(net.Listener)
 }

diff --git a/docs/deployment.md b/docs/deployment.md
@@ -78,8 +78,10 @@ export OPENAPI_SCHEMA_PATH=/path/to/custom-schema.yaml
 
 **Server:**
 - `PORT` - API server port (default: `8000`)
-- `METRICS_PORT` - Metrics endpoint port (default: `8080`)
-- `HEALTH_PORT` - Health check port (default: `8083`)
+- `METRICS_PORT` - Metrics and health endpoints port (default: `8080`)
+
+**Graceful Shutdown:**
+- `SHUTDOWN_TIMEOUT` - Graceful shutdown timeout (default: `20s`)
 
 **Logging:**
 - `LOG_LEVEL` - Logging level: `debug`, `info`, `warn`, `error` (default: `info`)
@@ -272,7 +274,9 @@ kubectl get configmaps --namespace hyperfleet-system
 
 ## Health Checks
 
-The deployment includes liveness and readiness probes at `GET /healthcheck` (port 8083).
+The deployment includes:
+- Liveness probe: `GET /healthz` (port 8080) - Returns 200 if the process is alive
+- Readiness probe: `GET /readyz` (port 8080) - Returns 200 when ready to receive traffic, 503 during startup/shutdown
 
 ## Scaling