From 1b9e5faae48473e62d3db3c66635622e4113f15b Mon Sep 17 00:00:00 2001 From: wass3r <1301201+wass3r@users.noreply.github.com> Date: Fri, 7 Nov 2025 21:55:16 -0600 Subject: [PATCH] fix: record proper step state on timeout --- executor/linux/step.go | 42 +++++++++++ executor/linux/step_test.go | 137 ++++++++++++++++++++++++++++++++++++ executor/local/step.go | 42 +++++++++++ executor/local/step_test.go | 105 +++++++++++++++++++++++++++ mock/docker/container.go | 12 ++++ 5 files changed, 338 insertions(+) diff --git a/executor/linux/step.go b/executor/linux/step.go index 8568bc06..361eb113 100644 --- a/executor/linux/step.go +++ b/executor/linux/step.go @@ -6,6 +6,7 @@ import ( "bufio" "bytes" "context" + "errors" "fmt" "io" "strings" @@ -213,6 +214,7 @@ func (c *client) ExecStep(ctx context.Context, ctn *pipeline.Container) error { // wait for the runtime container err = c.Runtime.WaitContainer(ctx, ctn) if err != nil { + recordStepRuntimeError(ctn, _step, err) return err } @@ -220,12 +222,52 @@ func (c *client) ExecStep(ctx context.Context, ctn *pipeline.Container) error { // inspect the runtime container err = c.Runtime.InspectContainer(ctx, ctn) if err != nil { + recordStepRuntimeError(ctn, _step, err) return err } return nil } +// recordStepRuntimeError updates the in-memory step so Snapshot/Upload won't report success. +func recordStepRuntimeError(ctn *pipeline.Container, s *api.Step, err error) { + if s == nil || err == nil { + return + } + + s.SetError(err.Error()) + + exitCode := int32(0) + if ctn != nil { + exitCode = ctn.ExitCode + } + + switch { + case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded): + s.SetStatus(constants.StatusFailure) + + if exitCode == 0 { + exitCode = 137 + } + default: + s.SetStatus(constants.StatusError) + + if exitCode == 0 { + exitCode = 1 + } + } + + if exitCode != 0 { + if ctn != nil { + ctn.ExitCode = exitCode + } + + if s.GetExitCode() == 0 { + s.SetExitCode(exitCode) + } + } +} + // StreamStep tails the output for a step. func (c *client) StreamStep(ctx context.Context, ctn *pipeline.Container) error { // TODO: remove hardcoded reference diff --git a/executor/linux/step_test.go b/executor/linux/step_test.go index 30aaee0e..5af1630b 100644 --- a/executor/linux/step_test.go +++ b/executor/linux/step_test.go @@ -14,6 +14,7 @@ import ( "github.com/go-vela/sdk-go/vela" api "github.com/go-vela/server/api/types" "github.com/go-vela/server/compiler/types/pipeline" + "github.com/go-vela/server/constants" "github.com/go-vela/server/mock/server" "github.com/go-vela/worker/internal/message" "github.com/go-vela/worker/runtime" @@ -524,6 +525,142 @@ func TestLinux_ExecStep(t *testing.T) { } } +func TestLinux_ExecStep_WaitTimeout(t *testing.T) { + _build := testBuild() + + gin.SetMode(gin.TestMode) + + s := httptest.NewServer(server.FakeHandler()) + defer s.Close() + + _client, err := vela.NewClient(s.URL, "", nil) + if err != nil { + t.Fatalf("unable to create Vela API client: %v", err) + } + + _docker, err := docker.NewMock() + if err != nil { + t.Fatalf("unable to create docker runtime engine: %v", err) + } + + streamRequests, done := message.MockStreamRequestsWithCancel(context.Background()) + defer done() + + _engine, err := New( + WithBuild(_build), + WithPipeline(new(pipeline.Build)), + WithRuntime(_docker), + WithVelaClient(_client), + withStreamRequests(streamRequests), + ) + if err != nil { + t.Fatalf("unable to create executor engine: %v", err) + } + + container := &pipeline.Container{ + ID: "step_github_octocat_1_wait-timeout", + Directory: "/vela/src/github.com/github/octocat", + Environment: map[string]string{"FOO": "bar"}, + Image: "alpine:latest", + Name: "echo", + Number: 1, + Pull: "not_present", + } + + stepEntry := api.StepFromBuildContainer(_build, container) + _engine.steps.Store(container.ID, stepEntry) + _engine.stepLogs.Store(container.ID, new(api.Log)) + + err = _engine.ExecStep(context.Background(), container) + if err == nil { + t.Fatalf("ExecStep should have returned err") + } + + if got := stepEntry.GetStatus(); got != constants.StatusFailure { + t.Errorf("step status = %s, want %s", got, constants.StatusFailure) + } + + if got := stepEntry.GetExitCode(); got != 137 { + t.Errorf("step exit code = %d, want %d", got, 137) + } + + if container.ExitCode != 137 { + t.Errorf("container exit code = %d, want %d", container.ExitCode, 137) + } + + if stepEntry.GetError() == "" { + t.Error("expected step error to be recorded") + } +} + +func TestLinux_ExecStep_WaitError(t *testing.T) { + _build := testBuild() + + gin.SetMode(gin.TestMode) + + s := httptest.NewServer(server.FakeHandler()) + defer s.Close() + + _client, err := vela.NewClient(s.URL, "", nil) + if err != nil { + t.Fatalf("unable to create Vela API client: %v", err) + } + + _docker, err := docker.NewMock() + if err != nil { + t.Fatalf("unable to create docker runtime engine: %v", err) + } + + streamRequests, done := message.MockStreamRequestsWithCancel(context.Background()) + defer done() + + _engine, err := New( + WithBuild(_build), + WithPipeline(new(pipeline.Build)), + WithRuntime(_docker), + WithVelaClient(_client), + withStreamRequests(streamRequests), + ) + if err != nil { + t.Fatalf("unable to create executor engine: %v", err) + } + + container := &pipeline.Container{ + ID: "step_github_octocat_1_wait-error", + Directory: "/vela/src/github.com/github/octocat", + Environment: map[string]string{"FOO": "bar"}, + Image: "alpine:latest", + Name: "echo", + Number: 1, + Pull: "not_present", + } + + stepEntry := api.StepFromBuildContainer(_build, container) + _engine.steps.Store(container.ID, stepEntry) + _engine.stepLogs.Store(container.ID, new(api.Log)) + + err = _engine.ExecStep(context.Background(), container) + if err == nil { + t.Fatalf("ExecStep should have returned err") + } + + if got := stepEntry.GetStatus(); got != constants.StatusError { + t.Errorf("step status = %s, want %s", got, constants.StatusError) + } + + if got := stepEntry.GetExitCode(); got != 1 { + t.Errorf("step exit code = %d, want %d", got, 1) + } + + if container.ExitCode != 1 { + t.Errorf("container exit code = %d, want %d", container.ExitCode, 1) + } + + if stepEntry.GetError() == "" { + t.Error("expected step error to be recorded") + } +} + func TestLinux_StreamStep(t *testing.T) { // setup types _build := testBuild() diff --git a/executor/local/step.go b/executor/local/step.go index b097aed9..a3bcb2d1 100644 --- a/executor/local/step.go +++ b/executor/local/step.go @@ -5,6 +5,7 @@ package local import ( "bufio" "context" + "errors" "fmt" "time" @@ -111,18 +112,59 @@ func (c *client) ExecStep(ctx context.Context, ctn *pipeline.Container) error { // wait for the runtime container err = c.Runtime.WaitContainer(ctx, ctn) if err != nil { + recordStepRuntimeError(ctn, _step, err) return err } // inspect the runtime container err = c.Runtime.InspectContainer(ctx, ctn) if err != nil { + recordStepRuntimeError(ctn, _step, err) return err } return nil } +// recordStepRuntimeError updates the in-memory step so Snapshot/Upload won't report success. +func recordStepRuntimeError(ctn *pipeline.Container, s *api.Step, err error) { + if s == nil || err == nil { + return + } + + s.SetError(err.Error()) + + exitCode := int32(0) + if ctn != nil { + exitCode = ctn.ExitCode + } + + switch { + case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded): + s.SetStatus(constants.StatusFailure) + + if exitCode == 0 { + exitCode = 137 + } + default: + s.SetStatus(constants.StatusError) + + if exitCode == 0 { + exitCode = 1 + } + } + + if exitCode != 0 { + if ctn != nil { + ctn.ExitCode = exitCode + } + + if s.GetExitCode() == 0 { + s.SetExitCode(exitCode) + } + } +} + // StreamStep tails the output for a step. func (c *client) StreamStep(ctx context.Context, ctn *pipeline.Container) error { // TODO: remove hardcoded reference diff --git a/executor/local/step_test.go b/executor/local/step_test.go index 5916b174..af89bdf4 100644 --- a/executor/local/step_test.go +++ b/executor/local/step_test.go @@ -8,6 +8,7 @@ import ( api "github.com/go-vela/server/api/types" "github.com/go-vela/server/compiler/types/pipeline" + "github.com/go-vela/server/constants" "github.com/go-vela/worker/internal/message" "github.com/go-vela/worker/runtime/docker" ) @@ -278,6 +279,110 @@ func TestLocal_ExecStep(t *testing.T) { } } +func TestLocal_ExecStep_WaitTimeout(t *testing.T) { + _build := testBuild() + + _runtime, err := docker.NewMock() + if err != nil { + t.Fatalf("unable to create runtime engine: %v", err) + } + + streamRequests, done := message.MockStreamRequestsWithCancel(context.Background()) + defer done() + + _engine, err := New( + WithBuild(_build), + WithPipeline(new(pipeline.Build)), + WithRuntime(_runtime), + withStreamRequests(streamRequests), + ) + if err != nil { + t.Fatalf("unable to create executor engine: %v", err) + } + + container := &pipeline.Container{ + ID: "step_github_octocat_1_wait-timeout", + Directory: "/vela/src/github.com/github/octocat", + Environment: map[string]string{"FOO": "bar"}, + Image: "alpine:latest", + Name: "echo", + Number: 1, + Pull: "not_present", + } + + stepEntry := api.StepFromBuildContainer(_build, container) + _engine.steps.Store(container.ID, stepEntry) + + err = _engine.ExecStep(context.Background(), container) + if err == nil { + t.Fatalf("ExecStep should have returned err") + } + + if got := stepEntry.GetStatus(); got != constants.StatusFailure { + t.Errorf("step status = %s, want %s", got, constants.StatusFailure) + } + + if container.ExitCode != 137 { + t.Errorf("container exit code = %d, want %d", container.ExitCode, 137) + } + + if stepEntry.GetError() == "" { + t.Error("expected step error to be recorded") + } +} + +func TestLocal_ExecStep_WaitError(t *testing.T) { + _build := testBuild() + + _runtime, err := docker.NewMock() + if err != nil { + t.Fatalf("unable to create runtime engine: %v", err) + } + + streamRequests, done := message.MockStreamRequestsWithCancel(context.Background()) + defer done() + + _engine, err := New( + WithBuild(_build), + WithPipeline(new(pipeline.Build)), + WithRuntime(_runtime), + withStreamRequests(streamRequests), + ) + if err != nil { + t.Fatalf("unable to create executor engine: %v", err) + } + + container := &pipeline.Container{ + ID: "step_github_octocat_1_wait-error", + Directory: "/vela/src/github.com/github/octocat", + Environment: map[string]string{"FOO": "bar"}, + Image: "alpine:latest", + Name: "echo", + Number: 1, + Pull: "not_present", + } + + stepEntry := api.StepFromBuildContainer(_build, container) + _engine.steps.Store(container.ID, stepEntry) + + err = _engine.ExecStep(context.Background(), container) + if err == nil { + t.Fatalf("ExecStep should have returned err") + } + + if got := stepEntry.GetStatus(); got != constants.StatusError { + t.Errorf("step status = %s, want %s", got, constants.StatusError) + } + + if container.ExitCode != 1 { + t.Errorf("container exit code = %d, want %d", container.ExitCode, 1) + } + + if stepEntry.GetError() == "" { + t.Error("expected step error to be recorded") + } +} + func TestLocal_StreamStep(t *testing.T) { // setup types _build := testBuild() diff --git a/mock/docker/container.go b/mock/docker/container.go index e1df4360..2a485aff 100644 --- a/mock/docker/container.go +++ b/mock/docker/container.go @@ -467,6 +467,18 @@ func (c *ContainerService) ContainerWait(ctx context.Context, ctn string, condit return ctnCh, errCh } + if strings.Contains(ctn, "wait-timeout") { + errCh <- context.DeadlineExceeded + + return ctnCh, errCh + } + + if strings.Contains(ctn, "wait-error") { + errCh <- errors.New("container wait error") + + return ctnCh, errCh + } + // create goroutine for responding to call go func() { // create response object to return