diff --git a/embed/templates/scripts/run_prometheus.sh.tpl b/embed/templates/scripts/run_prometheus.sh.tpl index b9d65b516b..3cc9cc591e 100644 --- a/embed/templates/scripts/run_prometheus.sh.tpl +++ b/embed/templates/scripts/run_prometheus.sh.tpl @@ -55,5 +55,10 @@ exec bin/prometheus/prometheus \ {{- end}} {{- end}} {{- if not .EnablePromAgentMode}} - --storage.tsdb.retention="{{.Retention}}" +{{- if .RetentionSize}} + --storage.tsdb.retention.size="{{.RetentionSize}}" +{{- end}} +{{- if .RetentionTime}} + --storage.tsdb.retention.time="{{.RetentionTime}}" +{{- end}} {{- end}} diff --git a/pkg/cluster/spec/monitoring.go b/pkg/cluster/spec/monitoring.go index e295dfa1e3..9386344ca9 100644 --- a/pkg/cluster/spec/monitoring.go +++ b/pkg/cluster/spec/monitoring.go @@ -30,6 +30,7 @@ import ( "github.com/pingcap/tiup/pkg/cluster/ctxt" "github.com/pingcap/tiup/pkg/cluster/template/config" "github.com/pingcap/tiup/pkg/cluster/template/scripts" + logprinter "github.com/pingcap/tiup/pkg/logger/printer" "github.com/pingcap/tiup/pkg/meta" "github.com/pingcap/tiup/pkg/set" "github.com/pingcap/tiup/pkg/utils" @@ -54,7 +55,9 @@ type PrometheusSpec struct { RemoteConfig Remote `yaml:"remote_config,omitempty" validate:"remote_config:ignore"` ExternalAlertmanagers []ExternalAlertmanager `yaml:"external_alertmanagers" validate:"external_alertmanagers:ignore"` PushgatewayAddrs []string `yaml:"pushgateway_addrs,omitempty" validate:"pushgateway_addrs:ignore"` - Retention string `yaml:"storage_retention,omitempty" validate:"storage_retention:editable"` + Retention string `yaml:"storage_retention,omitempty" validate:"storage_retention:editable"` // deprecated + RetentionSize string `yaml:"storage_retention_size,omitempty" validate:"storage_retention_size:editable"` + RetentionTime string `yaml:"storage_retention_time,omitempty" validate:"storage_retention_time:editable"` ResourceControl meta.ResourceControl `yaml:"resource_control,omitempty" validate:"resource_control:editable"` Arch string `yaml:"arch,omitempty"` OS string `yaml:"os,omitempty"` @@ -270,7 +273,6 @@ func (i *MonitorInstance) InitConfig( cfg := &scripts.PrometheusScript{ Port: spec.Port, WebExternalURL: fmt.Sprintf("http://%s", utils.JoinHostPort(spec.Host, spec.Port)), - Retention: getRetention(spec.Retention), EnableNG: spec.NgPort > 0, EnablePromAgentMode: spec.EnablePromAgentMode, // Get from spec directly @@ -282,6 +284,14 @@ func (i *MonitorInstance) InitConfig( AdditionalArgs: spec.AdditionalArgs, } + // Set retention policy + logPtr := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger) + if spec.RetentionTime == "" { // keep backward compatiability + cfg.RetentionTime = getRetentionTime(logPtr, spec.Retention) + } else { + cfg.RetentionTime = getRetentionTime(logPtr, spec.RetentionTime) + } + cfg.RetentionSize = getRetentionSize(logPtr, spec.RetentionSize) // Check if agent mode is enabled in additional arguments if !cfg.EnablePromAgentMode { @@ -675,9 +685,25 @@ func mergeAdditionalScrapeConf(source string, addition map[string]any) error { return utils.WriteFile(source, bytes, 0644) } -func getRetention(retention string) string { +func getRetentionSize(l *logprinter.Logger, retention string) string { + retention = strings.ToUpper(strings.TrimSpace(retention)) + valid, _ := regexp.MatchString("^[1-9]\\d*(B|KB|MB|GB|TB|PB|EB)$", retention) + if retention == "" || !valid { + if !valid && l != nil { + l.Warnf("invalid retention size %s, ignored.", retention) + } + return "" + } + return retention +} + +func getRetentionTime(l *logprinter.Logger, retention string) string { + retention = strings.TrimSpace(retention) valid, _ := regexp.MatchString("^[1-9]\\d*d$", retention) if retention == "" || !valid { + if !valid && l != nil { + l.Warnf("invalid retention time %s, using 30d as default", retention) + } return "30d" } return retention diff --git a/pkg/cluster/spec/monitoring_test.go b/pkg/cluster/spec/monitoring_test.go index 6d3a33addf..ce8b34e355 100644 --- a/pkg/cluster/spec/monitoring_test.go +++ b/pkg/cluster/spec/monitoring_test.go @@ -229,32 +229,68 @@ scrape_configs: func TestGetRetention(t *testing.T) { var val string - val = getRetention("-1d") + val = getRetentionTime(nil, "-1d") assert.EqualValues(t, "30d", val) - val = getRetention("0d") + val = getRetentionTime(nil, "0d") assert.EqualValues(t, "30d", val) - val = getRetention("01d") + val = getRetentionTime(nil, "01d") assert.EqualValues(t, "30d", val) - val = getRetention("1dd") + val = getRetentionTime(nil, "1dd") assert.EqualValues(t, "30d", val) - val = getRetention("*1d") + val = getRetentionTime(nil, "*1d") assert.EqualValues(t, "30d", val) - val = getRetention("1d ") - assert.EqualValues(t, "30d", val) + val = getRetentionTime(nil, "1d ") + assert.EqualValues(t, "1d", val) + + val = getRetentionTime(nil, " 1d") + assert.EqualValues(t, "1d", val) - val = getRetention("ddd") + val = getRetentionTime(nil, "ddd") assert.EqualValues(t, "30d", val) - val = getRetention("60d") + val = getRetentionTime(nil, "60d") assert.EqualValues(t, "60d", val) - val = getRetention("999d") + val = getRetentionTime(nil, "999d") assert.EqualValues(t, "999d", val) + + val = getRetentionSize(nil, "-1MB") + assert.EqualValues(t, "", val) + + val = getRetentionSize(nil, "30d") + assert.EqualValues(t, "", val) + + val = getRetentionSize(nil, "1k") + assert.EqualValues(t, "", val) + + val = getRetentionSize(nil, "01G") + assert.EqualValues(t, "", val) + + val = getRetentionSize(nil, "233mb") + assert.EqualValues(t, "233MB", val) + + val = getRetentionSize(nil, "*1GB") + assert.EqualValues(t, "", val) + + val = getRetentionSize(nil, "20GB ") + assert.EqualValues(t, "20GB", val) + + val = getRetentionSize(nil, " 20GB") + assert.EqualValues(t, "20GB", val) + + val = getRetentionSize(nil, "3TB") + assert.EqualValues(t, "3TB", val) + + val = getRetentionSize(nil, "30GB") + assert.EqualValues(t, "30GB", val) + + val = getRetentionSize(nil, "1EB") + assert.EqualValues(t, "1EB", val) } // TestHandleRemoteWrite verifies that remote write configurations are properly handled diff --git a/pkg/cluster/template/scripts/monitoring.go b/pkg/cluster/template/scripts/monitoring.go index 2e40e38bd4..3bdec0f439 100644 --- a/pkg/cluster/template/scripts/monitoring.go +++ b/pkg/cluster/template/scripts/monitoring.go @@ -26,7 +26,8 @@ import ( type PrometheusScript struct { Port int WebExternalURL string - Retention string + RetentionSize string + RetentionTime string EnableNG bool EnablePromAgentMode bool diff --git a/pkg/cluster/template/scripts/monitoring_test.go b/pkg/cluster/template/scripts/monitoring_test.go index c2b2f664b2..f11e8df6d4 100644 --- a/pkg/cluster/template/scripts/monitoring_test.go +++ b/pkg/cluster/template/scripts/monitoring_test.go @@ -32,7 +32,8 @@ func TestPrometheusScriptWithAgentMode(t *testing.T) { script := &PrometheusScript{ Port: 9090, WebExternalURL: "http://localhost:9090", - Retention: "30d", + RetentionTime: "30d", + RetentionSize: "100GB", EnableNG: false, EnablePromAgentMode: true, DeployDir: "/deploy",