From b35653dec132268b149494efba8c4aa1aa6bcd55 Mon Sep 17 00:00:00 2001 From: Piotr Konopka Date: Mon, 10 Nov 2025 10:51:50 +0100 Subject: [PATCH] Improvements in logging failures of auto-environments - Environment failure is reported as an Error, not Warn - we simplify "MesosCommand_Transition timed out for task 2xhyfnmHttq" to "Transition timed out", so the full error is more readable. For examplewq "Transition canceled with error: CONFIGURE could not complete for critical tasks, errors: task 'readout' on alio2-cr1-mvs03 (id 2y3E83DDK6E) failed with error: Transition timed out; task 'stfb' on alio2-cr1-mvs03 (id 2y3E83DDeaA) failed with error: Transition timed out; task 'stfs' on alio2-cr1-mvs03 (id 2y3E83DDz46) failed with error: Transition timed out" Closes OCTRL-1059. --- core/controlcommands/mesoscommandservent.go | 4 +++- core/environment/manager.go | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/core/controlcommands/mesoscommandservent.go b/core/controlcommands/mesoscommandservent.go index 2185eea2..4eded55e 100644 --- a/core/controlcommands/mesoscommandservent.go +++ b/core/controlcommands/mesoscommandservent.go @@ -26,6 +26,7 @@ package controlcommands import ( "fmt" + "strings" "sync" "time" @@ -144,7 +145,8 @@ func (s *Servent) RunCommand(cmd MesosCommand, receiver MesosCommandTarget) (Mes // By the time we get here, ProcessResponse should have already added a Response to the // pending call, and removed it from servent.pending. case <-time.After(cmd.GetResponseTimeout()): - call.Error = fmt.Errorf("%s timed out for task %s", cmd.GetName(), receiver.TaskId.Value) + userFriendlyCommandName, _ := strings.CutPrefix(cmd.GetName(), "MesosCommand_") + call.Error = fmt.Errorf("%s timed out", userFriendlyCommandName) log.WithPrefix("servent"). WithField("partition", cmd.GetEnvironmentId().String()). diff --git a/core/environment/manager.go b/core/environment/manager.go index 33ad5c09..8546bd0c 100644 --- a/core/environment/manager.go +++ b/core/environment/manager.go @@ -487,7 +487,7 @@ func (envs *Manager) CreateEnvironment(workflowPath string, userVars map[string] log.WithField("state", envState). WithField("partition", env.Id().String()). WithError(err). - Warnf("auto-transitioning environment failed %s, cleanup in progress", op) + Errorf("auto-transitioning environment failed %s, cleanup in progress", op) the.EventWriterWithTopic(topic.Environment).WriteEvent( NewEnvGoErrorEvent(env, fmt.Sprintf("%s failed: %v", op, err)), @@ -1460,7 +1460,7 @@ func (envs *Manager) CreateAutoEnvironment(workflowPath string, userVars map[str log.WithField("state", envState). WithField("partition", env.Id().String()). WithError(err). - Warnf("auto-transitioning environment failed %s, cleanup in progress", op) + Errorf("auto-transitioning environment failed %s, cleanup in progress", op) the.EventWriterWithTopic(topic.Environment).WriteEvent( NewEnvGoErrorEvent(env, fmt.Sprintf("%s failed: %v", op, err)),