From c1f4ed0aba063771125302297912dec3c80ea555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zandr=C3=A9=20Witte?= Date: Thu, 11 Aug 2022 09:59:11 +0200 Subject: [PATCH 1/5] graceful shutdown for process-agent --- cmd/agent/collector.go | 2 +- cmd/agent/main_nodocker.go | 33 +++++++++++++++++++++++---------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/cmd/agent/collector.go b/cmd/agent/collector.go index d22b127c..0ba63753 100644 --- a/cmd/agent/collector.go +++ b/cmd/agent/collector.go @@ -140,7 +140,7 @@ func (l *Collector) run(exit chan bool) { } log.Infof("Starting process-agent for host=%s, endpoints=%s, enabled checks=%v", l.cfg.HostName, eps, l.cfg.EnabledChecks) - go handleSignals(exit) + handleSignals(exit) heartbeat := time.NewTicker(15 * time.Second) queueSizeTicker := time.NewTicker(10 * time.Second) featuresTicker := time.NewTicker(5 * time.Second) diff --git a/cmd/agent/main_nodocker.go b/cmd/agent/main_nodocker.go index 1656fe10..ad6fd1dd 100644 --- a/cmd/agent/main_nodocker.go +++ b/cmd/agent/main_nodocker.go @@ -13,16 +13,29 @@ import ( // Handles signals - tells us whether we should exit. func handleSignals(exit chan bool) { - sigIn := make(chan os.Signal, 100) - signal.Notify(sigIn) - // unix only in all likelihood; but we don't care. - for sig := range sigIn { - switch sig { - case syscall.SIGINT, syscall.SIGTERM: - log.Criticalf("Caught signal '%s'; terminating.", sig) - close(exit) + signalCh := make(chan os.Signal, 1) + signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM) + + go func() { + // Set up the signals async so we can Start the agent + select { + case sig := <-signalCh: + log.Infof("Received signal '%s', shutting down...", sig) + signalCh <- nil default: - log.Warnf("Caught signal %s; continuing/ignoring.", sig) + // continue + } + }() + + // By default systemd redirects the stdout to journald. When journald is stopped or crashes we receive a SIGPIPE signal. + // Go ignores SIGPIPE signals unless it is when stdout or stdout is closed, in this case the agent is stopped. + // We never want the agent to stop upon receiving SIGPIPE, so we intercept the SIGPIPE signals and just discard them. + sigpipeCh := make(chan os.Signal, 1) + signal.Notify(sigpipeCh, syscall.SIGPIPE) + go func() { + for range sigpipeCh { + // do nothing } - } + }() + } From ab1158a589578270a9ebd6160a4022753261eb33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zandr=C3=A9=20van=20Heerden?= Date: Fri, 12 Aug 2022 08:45:33 +0200 Subject: [PATCH 2/5] STAC-16788: Updated process agent graceful shutdown --- cmd/agent/main_docker.go | 35 ++++++++++++++++++++++------------- cmd/network-tracer/main.go | 34 +++++++++++++++++++++++----------- 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/cmd/agent/main_docker.go b/cmd/agent/main_docker.go index 52495fdc..66bb92ce 100644 --- a/cmd/agent/main_docker.go +++ b/cmd/agent/main_docker.go @@ -14,19 +14,28 @@ import ( // Handles signals - tells us whether we should exit. func handleSignals(exit chan bool) { - sigIn := make(chan os.Signal, 100) - signal.Notify(sigIn) - // unix only in all likelihood; but we don't care. - for sig := range sigIn { - switch sig { - case syscall.SIGINT, syscall.SIGTERM: - log.Criticalf("Caught signal '%s'; terminating.", sig) - close(exit) - case syscall.SIGCHLD: - // Running docker.GetDockerStat() spins up / kills a new process - continue + signalCh := make(chan os.Signal, 1) + signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM) + + go func() { + // Set up the signals async so we can Start the agent + select { + case sig := <-signalCh: + log.Infof("Received signal '%s', shutting down...", sig) + signalCh <- nil default: - log.Warnf("Caught signal %s; continuing/ignoring.", sig) + // continue + } + }() + + // By default systemd redirects the stdout to journald. When journald is stopped or crashes we receive a SIGPIPE signal. + // Go ignores SIGPIPE signals unless it is when stdout or stdout is closed, in this case the agent is stopped. + // We never want the agent to stop upon receiving SIGPIPE, so we intercept the SIGPIPE signals and just discard them. + sigpipeCh := make(chan os.Signal, 1) + signal.Notify(sigpipeCh, syscall.SIGPIPE) + go func() { + for range sigpipeCh { + // do nothing } - } + }() } diff --git a/cmd/network-tracer/main.go b/cmd/network-tracer/main.go index 5be268fa..b25f836b 100644 --- a/cmd/network-tracer/main.go +++ b/cmd/network-tracer/main.go @@ -93,7 +93,7 @@ func main() { // Handles signals, which tells us whether we should exit. e := make(chan bool) - go handleSignals(e) + handleSignals(e) <-e } @@ -106,18 +106,30 @@ func gracefulExit() { } func handleSignals(exit chan bool) { - sigIn := make(chan os.Signal, 100) - signal.Notify(sigIn) - // unix only in all likelihood; but we don't care. - for sig := range sigIn { - switch sig { - case syscall.SIGINT, syscall.SIGTERM, syscall.SIGKILL, syscall.SIGQUIT: - log.Criticalf("Caught signal '%s'; terminating.", sig) - close(exit) + signalCh := make(chan os.Signal, 1) + signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM) + + go func() { + // Set up the signals async so we can Start the agent + select { + case sig := <-signalCh: + log.Infof("Received signal '%s', shutting down...", sig) + signalCh <- nil default: - log.Warnf("Caught signal %s; continuing/ignoring.", sig) + // continue } - } + }() + + // By default systemd redirects the stdout to journald. When journald is stopped or crashes we receive a SIGPIPE signal. + // Go ignores SIGPIPE signals unless it is when stdout or stdout is closed, in this case the agent is stopped. + // We never want the agent to stop upon receiving SIGPIPE, so we intercept the SIGPIPE signals and just discard them. + sigpipeCh := make(chan os.Signal, 1) + signal.Notify(sigpipeCh, syscall.SIGPIPE) + go func() { + for range sigpipeCh { + // do nothing + } + }() } // versionString returns the version information filled in at build time From d772a7ea0e9c90daa6067b72efd3c4306445bf68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zandr=C3=A9=20van=20Heerden?= Date: Fri, 12 Aug 2022 11:48:48 +0200 Subject: [PATCH 3/5] STAC-16788: Return true on exit channel --- cmd/agent/main_docker.go | 1 + cmd/agent/main_nodocker.go | 1 + cmd/network-tracer/main.go | 1 + 3 files changed, 3 insertions(+) diff --git a/cmd/agent/main_docker.go b/cmd/agent/main_docker.go index 66bb92ce..4877fabf 100644 --- a/cmd/agent/main_docker.go +++ b/cmd/agent/main_docker.go @@ -23,6 +23,7 @@ func handleSignals(exit chan bool) { case sig := <-signalCh: log.Infof("Received signal '%s', shutting down...", sig) signalCh <- nil + exit <- true default: // continue } diff --git a/cmd/agent/main_nodocker.go b/cmd/agent/main_nodocker.go index ad6fd1dd..47434bac 100644 --- a/cmd/agent/main_nodocker.go +++ b/cmd/agent/main_nodocker.go @@ -22,6 +22,7 @@ func handleSignals(exit chan bool) { case sig := <-signalCh: log.Infof("Received signal '%s', shutting down...", sig) signalCh <- nil + exit <- true default: // continue } diff --git a/cmd/network-tracer/main.go b/cmd/network-tracer/main.go index b25f836b..4f2deed6 100644 --- a/cmd/network-tracer/main.go +++ b/cmd/network-tracer/main.go @@ -115,6 +115,7 @@ func handleSignals(exit chan bool) { case sig := <-signalCh: log.Infof("Received signal '%s', shutting down...", sig) signalCh <- nil + exit <- true default: // continue } From 9c4da94aa8e665d112f7976c9fad5d8c2ba51d02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zandr=C3=A9=20van=20Heerden?= Date: Tue, 16 Aug 2022 13:48:07 +0200 Subject: [PATCH 4/5] STAC-16788: Updated process agent graceful shutdown --- cmd/agent/collector.go | 15 ++++++------ cmd/agent/main_docker.go | 44 ++++++++++++++-------------------- cmd/agent/main_nodocker.go | 45 ++++++++++++++-------------------- cmd/network-tracer/main.go | 49 ++++++++++++++++---------------------- 4 files changed, 65 insertions(+), 88 deletions(-) diff --git a/cmd/agent/collector.go b/cmd/agent/collector.go index 0ba63753..0f6fa4b8 100644 --- a/cmd/agent/collector.go +++ b/cmd/agent/collector.go @@ -5,12 +5,6 @@ import ( "context" "encoding/json" "fmt" - "github.com/StackVista/stackstate-agent/pkg/aggregator" - "github.com/StackVista/stackstate-agent/pkg/batcher" - "github.com/StackVista/stackstate-agent/pkg/collector/check" - "github.com/StackVista/stackstate-agent/pkg/telemetry" - "github.com/StackVista/stackstate-agent/pkg/topology" - "github.com/StackVista/stackstate-process-agent/cmd/agent/features" "io" "io/ioutil" "math/rand" @@ -19,6 +13,13 @@ import ( "sync/atomic" "time" + "github.com/StackVista/stackstate-agent/pkg/aggregator" + "github.com/StackVista/stackstate-agent/pkg/batcher" + "github.com/StackVista/stackstate-agent/pkg/collector/check" + "github.com/StackVista/stackstate-agent/pkg/telemetry" + "github.com/StackVista/stackstate-agent/pkg/topology" + "github.com/StackVista/stackstate-process-agent/cmd/agent/features" + log "github.com/cihub/seelog" "github.com/StackVista/stackstate-process-agent/checks" @@ -140,7 +141,7 @@ func (l *Collector) run(exit chan bool) { } log.Infof("Starting process-agent for host=%s, endpoints=%s, enabled checks=%v", l.cfg.HostName, eps, l.cfg.EnabledChecks) - handleSignals(exit) + go HandleSignals(exit) heartbeat := time.NewTicker(15 * time.Second) queueSizeTicker := time.NewTicker(10 * time.Second) featuresTicker := time.NewTicker(5 * time.Second) diff --git a/cmd/agent/main_docker.go b/cmd/agent/main_docker.go index 4877fabf..3abc82c2 100644 --- a/cmd/agent/main_docker.go +++ b/cmd/agent/main_docker.go @@ -12,31 +12,23 @@ import ( log "github.com/cihub/seelog" ) -// Handles signals - tells us whether we should exit. -func handleSignals(exit chan bool) { - signalCh := make(chan os.Signal, 1) - signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM) - - go func() { - // Set up the signals async so we can Start the agent - select { - case sig := <-signalCh: - log.Infof("Received signal '%s', shutting down...", sig) - signalCh <- nil - exit <- true - default: - // continue - } - }() - - // By default systemd redirects the stdout to journald. When journald is stopped or crashes we receive a SIGPIPE signal. - // Go ignores SIGPIPE signals unless it is when stdout or stdout is closed, in this case the agent is stopped. - // We never want the agent to stop upon receiving SIGPIPE, so we intercept the SIGPIPE signals and just discard them. - sigpipeCh := make(chan os.Signal, 1) - signal.Notify(sigpipeCh, syscall.SIGPIPE) - go func() { - for range sigpipeCh { - // do nothing +// HandleSignals tells us whether we should exit. +func HandleSignals(exit chan bool) { + sigIn := make(chan os.Signal, 100) + signal.Notify(sigIn, syscall.SIGINT, syscall.SIGTERM, syscall.SIGPIPE) + // unix only in all likelihood; but we don't care. + for sig := range sigIn { + switch sig { + case syscall.SIGINT, syscall.SIGTERM: + log.Infof("Caught signal '%s'; terminating.", sig) + close(exit) + return + case syscall.SIGPIPE: + // By default systemd redirects the stdout to journald. When journald is stopped or crashes we receive a SIGPIPE signal. + // Go ignores SIGPIPE signals unless it is when stderr or stdout is closed, in this case the agent is stopped. + // We never want the agent to stop upon receiving SIGPIPE, so we intercept the SIGPIPE signals and just discard them. + // See https://golang.org/pkg/os/signal/#hdr-SIGPIPE + continue } - }() + } } diff --git a/cmd/agent/main_nodocker.go b/cmd/agent/main_nodocker.go index 47434bac..926334c4 100644 --- a/cmd/agent/main_nodocker.go +++ b/cmd/agent/main_nodocker.go @@ -11,32 +11,23 @@ import ( log "github.com/cihub/seelog" ) -// Handles signals - tells us whether we should exit. -func handleSignals(exit chan bool) { - signalCh := make(chan os.Signal, 1) - signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM) - - go func() { - // Set up the signals async so we can Start the agent - select { - case sig := <-signalCh: - log.Infof("Received signal '%s', shutting down...", sig) - signalCh <- nil - exit <- true - default: - // continue - } - }() - - // By default systemd redirects the stdout to journald. When journald is stopped or crashes we receive a SIGPIPE signal. - // Go ignores SIGPIPE signals unless it is when stdout or stdout is closed, in this case the agent is stopped. - // We never want the agent to stop upon receiving SIGPIPE, so we intercept the SIGPIPE signals and just discard them. - sigpipeCh := make(chan os.Signal, 1) - signal.Notify(sigpipeCh, syscall.SIGPIPE) - go func() { - for range sigpipeCh { - // do nothing +// HandleSignals tells us whether we should exit. +func HandleSignals(exit chan bool) { + sigIn := make(chan os.Signal, 100) + signal.Notify(sigIn, syscall.SIGINT, syscall.SIGTERM, syscall.SIGPIPE) + // unix only in all likelihood; but we don't care. + for sig := range sigIn { + switch sig { + case syscall.SIGINT, syscall.SIGTERM: + log.Infof("Caught signal '%s'; terminating.", sig) + close(exit) + return + case syscall.SIGPIPE: + // By default systemd redirects the stdout to journald. When journald is stopped or crashes we receive a SIGPIPE signal. + // Go ignores SIGPIPE signals unless it is when stderr or stdout is closed, in this case the agent is stopped. + // We never want the agent to stop upon receiving SIGPIPE, so we intercept the SIGPIPE signals and just discard them. + // See https://golang.org/pkg/os/signal/#hdr-SIGPIPE + continue } - }() - + } } diff --git a/cmd/network-tracer/main.go b/cmd/network-tracer/main.go index 4f2deed6..2c0d0505 100644 --- a/cmd/network-tracer/main.go +++ b/cmd/network-tracer/main.go @@ -92,9 +92,9 @@ func main() { log.Infof("network tracer started") // Handles signals, which tells us whether we should exit. - e := make(chan bool) - handleSignals(e) - <-e + exit := make(chan bool) + go HandleSignals(exit) + <-exit } func gracefulExit() { @@ -105,32 +105,25 @@ func gracefulExit() { os.Exit(0) } -func handleSignals(exit chan bool) { - signalCh := make(chan os.Signal, 1) - signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM) - - go func() { - // Set up the signals async so we can Start the agent - select { - case sig := <-signalCh: - log.Infof("Received signal '%s', shutting down...", sig) - signalCh <- nil - exit <- true - default: - // continue +// HandleSignals tells us whether we should exit. +func HandleSignals(exit chan bool) { + sigIn := make(chan os.Signal, 100) + signal.Notify(sigIn, syscall.SIGINT, syscall.SIGTERM, syscall.SIGPIPE) + // unix only in all likelihood; but we don't care. + for sig := range sigIn { + switch sig { + case syscall.SIGINT, syscall.SIGTERM: + log.Infof("Caught signal '%s'; terminating.", sig) + close(exit) + return + case syscall.SIGPIPE: + // By default systemd redirects the stdout to journald. When journald is stopped or crashes we receive a SIGPIPE signal. + // Go ignores SIGPIPE signals unless it is when stderr or stdout is closed, in this case the agent is stopped. + // We never want the agent to stop upon receiving SIGPIPE, so we intercept the SIGPIPE signals and just discard them. + // See https://golang.org/pkg/os/signal/#hdr-SIGPIPE + continue } - }() - - // By default systemd redirects the stdout to journald. When journald is stopped or crashes we receive a SIGPIPE signal. - // Go ignores SIGPIPE signals unless it is when stdout or stdout is closed, in this case the agent is stopped. - // We never want the agent to stop upon receiving SIGPIPE, so we intercept the SIGPIPE signals and just discard them. - sigpipeCh := make(chan os.Signal, 1) - signal.Notify(sigpipeCh, syscall.SIGPIPE) - go func() { - for range sigpipeCh { - // do nothing - } - }() + } } // versionString returns the version information filled in at build time From f7973bf0f65580668fe8c176dfca50e6fc24e3c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zandr=C3=A9=20van=20Heerden?= Date: Wed, 17 Aug 2022 09:50:42 +0200 Subject: [PATCH 5/5] STAC-16788: Updated process agent graceful shutdown --- cmd/agent/main_docker.go | 2 +- cmd/agent/main_nodocker.go | 2 +- cmd/network-tracer/main.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/agent/main_docker.go b/cmd/agent/main_docker.go index 3abc82c2..1b67a3b8 100644 --- a/cmd/agent/main_docker.go +++ b/cmd/agent/main_docker.go @@ -21,7 +21,7 @@ func HandleSignals(exit chan bool) { switch sig { case syscall.SIGINT, syscall.SIGTERM: log.Infof("Caught signal '%s'; terminating.", sig) - close(exit) + exit <- true return case syscall.SIGPIPE: // By default systemd redirects the stdout to journald. When journald is stopped or crashes we receive a SIGPIPE signal. diff --git a/cmd/agent/main_nodocker.go b/cmd/agent/main_nodocker.go index 926334c4..c44fe3f6 100644 --- a/cmd/agent/main_nodocker.go +++ b/cmd/agent/main_nodocker.go @@ -20,7 +20,7 @@ func HandleSignals(exit chan bool) { switch sig { case syscall.SIGINT, syscall.SIGTERM: log.Infof("Caught signal '%s'; terminating.", sig) - close(exit) + exit <- true return case syscall.SIGPIPE: // By default systemd redirects the stdout to journald. When journald is stopped or crashes we receive a SIGPIPE signal. diff --git a/cmd/network-tracer/main.go b/cmd/network-tracer/main.go index 2c0d0505..88bc12d5 100644 --- a/cmd/network-tracer/main.go +++ b/cmd/network-tracer/main.go @@ -114,7 +114,7 @@ func HandleSignals(exit chan bool) { switch sig { case syscall.SIGINT, syscall.SIGTERM: log.Infof("Caught signal '%s'; terminating.", sig) - close(exit) + exit <- true return case syscall.SIGPIPE: // By default systemd redirects the stdout to journald. When journald is stopped or crashes we receive a SIGPIPE signal.