From 2a7945e70694c8179397c6c550dbdac86c81c6c6 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 21 Jan 2026 17:38:19 +0000 Subject: [PATCH 1/3] Add scheduled rolling restarts for API and Brainstore Co-authored-by: mike02 --- braintrust/Chart.yaml | 2 +- braintrust/README.md | 29 +++ braintrust/templates/scheduled-restart.yaml | 213 ++++++++++++++++++++ braintrust/values.yaml | 33 +++ 4 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 braintrust/templates/scheduled-restart.yaml diff --git a/braintrust/Chart.yaml b/braintrust/Chart.yaml index 7a1c171..9b265a6 100644 --- a/braintrust/Chart.yaml +++ b/braintrust/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: braintrust -version: 3.0.6 +version: 3.0.7 description: A Helm chart to run the Braintrust services for the self-hosted data plane type: application home: https://github.com/braintrustdata/helm diff --git a/braintrust/README.md b/braintrust/README.md index da4bbb6..d8e06b6 100644 --- a/braintrust/README.md +++ b/braintrust/README.md @@ -18,6 +18,35 @@ The `braintrust-secrets` secret must contain the following keys: | `GCS_ACCESS_KEY_ID` | Google HMAC Access ID string | Valid S3 API Key Id (only required if `cloud` is `google`) | | `GCS_SECRET_ACCESS_KEY` | Google HMAC Secret string | Valid S3 Secret string (only required if `cloud` is `google`) | +## Scheduled Restarts + +By default, the chart creates CronJobs that perform rolling restarts of the API, +Brainstore reader, and Brainstore writer Deployments once per hour using +`kubectl rollout restart`. This keeps restarts graceful and leverages the +Deployment rolling update strategy. + +You can customize or disable the schedules: + +```yaml +scheduledRestart: + enabled: true + schedules: + api: "0 * * * *" + brainstoreReader: "10 * * * *" + brainstoreWriter: "20 * * * *" + targets: + brainstoreWriter: false # Opt out of writer restarts + image: + tag: "v1.29.6" # Optional: pin kubectl version +``` + +If `scheduledRestart.image.tag` is left blank, the chart defaults to the +cluster's Kubernetes version for compatibility. + +If you already manage RBAC or service accounts, set +`scheduledRestart.serviceAccount.create` and `scheduledRestart.rbac.create` to +false and provide a `scheduledRestart.serviceAccount.name`. + ## Azure Key Vault Driver Integration If you're using Azure, the Azure Key Vault CSI driver is default enabled and will automatically sync secrets from Azure Key Vault into Kubernetes. This eliminates the need to manually create and manage the `braintrust-secrets` Kubernetes secret. diff --git a/braintrust/templates/scheduled-restart.yaml b/braintrust/templates/scheduled-restart.yaml new file mode 100644 index 0000000..545eb06 --- /dev/null +++ b/braintrust/templates/scheduled-restart.yaml @@ -0,0 +1,213 @@ +{{- if .Values.scheduledRestart.enabled }} +{{- $namespace := include "braintrust.namespace" . }} +{{- $saName := default "braintrust-restart" .Values.scheduledRestart.serviceAccount.name }} +{{- $labels := merge .Values.global.labels .Values.scheduledRestart.labels }} +{{- if .Values.scheduledRestart.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ $saName }} + namespace: {{ $namespace }} + {{- with .Values.scheduledRestart.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with $labels }} + labels: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} +{{- if .Values.scheduledRestart.rbac.create }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ $saName }} + namespace: {{ $namespace }} + {{- with $labels }} + labels: + {{- toYaml . | nindent 4 }} + {{- end }} +rules: + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "list", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ $saName }} + namespace: {{ $namespace }} + {{- with $labels }} + labels: + {{- toYaml . | nindent 4 }} + {{- end }} +subjects: + - kind: ServiceAccount + name: {{ $saName }} + namespace: {{ $namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ $saName }} +{{- end }} +{{- if .Values.scheduledRestart.targets.api }} +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ .Values.api.name }}-restart + namespace: {{ $namespace }} + {{- with $labels }} + labels: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.scheduledRestart.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + schedule: {{ required "scheduledRestart.schedules.api is required" .Values.scheduledRestart.schedules.api | quote }} + concurrencyPolicy: {{ .Values.scheduledRestart.concurrencyPolicy | default "Forbid" }} + startingDeadlineSeconds: {{ .Values.scheduledRestart.startingDeadlineSeconds | default 600 }} + successfulJobsHistoryLimit: {{ .Values.scheduledRestart.successfulJobsHistoryLimit | default 1 }} + failedJobsHistoryLimit: {{ .Values.scheduledRestart.failedJobsHistoryLimit | default 3 }} + jobTemplate: + spec: + backoffLimit: {{ .Values.scheduledRestart.backoffLimit | default 1 }} + {{- with .Values.scheduledRestart.ttlSecondsAfterFinished }} + ttlSecondsAfterFinished: {{ . }} + {{- end }} + template: + metadata: + {{- with $labels }} + labels: + {{- toYaml . | nindent 12 }} + {{- end }} + spec: + serviceAccountName: {{ $saName }} + restartPolicy: Never + containers: + - name: restart + image: "{{ .Values.scheduledRestart.image.repository }}:{{ default .Capabilities.KubeVersion.Version .Values.scheduledRestart.image.tag }}" + imagePullPolicy: {{ .Values.scheduledRestart.image.pullPolicy }} + command: + - kubectl + args: + - rollout + - restart + - deployment/{{ .Values.api.name }} + - --namespace + - {{ $namespace | quote }} + {{- with .Values.scheduledRestart.resources }} + resources: + {{- toYaml . | nindent 16 }} + {{- end }} +{{- end }} +{{- if .Values.scheduledRestart.targets.brainstoreReader }} +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ .Values.brainstore.reader.name }}-restart + namespace: {{ $namespace }} + {{- with $labels }} + labels: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.scheduledRestart.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + schedule: {{ required "scheduledRestart.schedules.brainstoreReader is required" .Values.scheduledRestart.schedules.brainstoreReader | quote }} + concurrencyPolicy: {{ .Values.scheduledRestart.concurrencyPolicy | default "Forbid" }} + startingDeadlineSeconds: {{ .Values.scheduledRestart.startingDeadlineSeconds | default 600 }} + successfulJobsHistoryLimit: {{ .Values.scheduledRestart.successfulJobsHistoryLimit | default 1 }} + failedJobsHistoryLimit: {{ .Values.scheduledRestart.failedJobsHistoryLimit | default 3 }} + jobTemplate: + spec: + backoffLimit: {{ .Values.scheduledRestart.backoffLimit | default 1 }} + {{- with .Values.scheduledRestart.ttlSecondsAfterFinished }} + ttlSecondsAfterFinished: {{ . }} + {{- end }} + template: + metadata: + {{- with $labels }} + labels: + {{- toYaml . | nindent 12 }} + {{- end }} + spec: + serviceAccountName: {{ $saName }} + restartPolicy: Never + containers: + - name: restart + image: "{{ .Values.scheduledRestart.image.repository }}:{{ default .Capabilities.KubeVersion.Version .Values.scheduledRestart.image.tag }}" + imagePullPolicy: {{ .Values.scheduledRestart.image.pullPolicy }} + command: + - kubectl + args: + - rollout + - restart + - deployment/{{ .Values.brainstore.reader.name }} + - --namespace + - {{ $namespace | quote }} + {{- with .Values.scheduledRestart.resources }} + resources: + {{- toYaml . | nindent 16 }} + {{- end }} +{{- end }} +{{- if .Values.scheduledRestart.targets.brainstoreWriter }} +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ .Values.brainstore.writer.name }}-restart + namespace: {{ $namespace }} + {{- with $labels }} + labels: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.scheduledRestart.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + schedule: {{ required "scheduledRestart.schedules.brainstoreWriter is required" .Values.scheduledRestart.schedules.brainstoreWriter | quote }} + concurrencyPolicy: {{ .Values.scheduledRestart.concurrencyPolicy | default "Forbid" }} + startingDeadlineSeconds: {{ .Values.scheduledRestart.startingDeadlineSeconds | default 600 }} + successfulJobsHistoryLimit: {{ .Values.scheduledRestart.successfulJobsHistoryLimit | default 1 }} + failedJobsHistoryLimit: {{ .Values.scheduledRestart.failedJobsHistoryLimit | default 3 }} + jobTemplate: + spec: + backoffLimit: {{ .Values.scheduledRestart.backoffLimit | default 1 }} + {{- with .Values.scheduledRestart.ttlSecondsAfterFinished }} + ttlSecondsAfterFinished: {{ . }} + {{- end }} + template: + metadata: + {{- with $labels }} + labels: + {{- toYaml . | nindent 12 }} + {{- end }} + spec: + serviceAccountName: {{ $saName }} + restartPolicy: Never + containers: + - name: restart + image: "{{ .Values.scheduledRestart.image.repository }}:{{ default .Capabilities.KubeVersion.Version .Values.scheduledRestart.image.tag }}" + imagePullPolicy: {{ .Values.scheduledRestart.image.pullPolicy }} + command: + - kubectl + args: + - rollout + - restart + - deployment/{{ .Values.brainstore.writer.name }} + - --namespace + - {{ $namespace | quote }} + {{- with .Values.scheduledRestart.resources }} + resources: + {{- toYaml . | nindent 16 }} + {{- end }} +{{- end }} +{{- end }} diff --git a/braintrust/values.yaml b/braintrust/values.yaml index e3847d8..8d11c14 100644 --- a/braintrust/values.yaml +++ b/braintrust/values.yaml @@ -68,6 +68,39 @@ objectStorage: # Single API bucket with paths for responses and code bundles apiBucket: "" +scheduledRestart: + # Perform rolling restarts on a schedule using kubectl rollout restart. + enabled: true + targets: + api: true + brainstoreReader: true + brainstoreWriter: true + schedules: + api: "0 * * * *" + brainstoreReader: "10 * * * *" + brainstoreWriter: "20 * * * *" + image: + repository: "registry.k8s.io/kubectl" + # Defaults to the cluster Kubernetes version when left blank. + tag: "" + pullPolicy: IfNotPresent + serviceAccount: + create: true + name: "braintrust-restart" + annotations: {} + rbac: + create: true + concurrencyPolicy: "Forbid" + startingDeadlineSeconds: 600 + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + backoffLimit: 1 + # Set to enable automatic cleanup of finished jobs. + ttlSecondsAfterFinished: "" + resources: {} + labels: {} + annotations: {} + api: name: "braintrust-api" labels: {} From fb5668e2ae902dd8278e2475ae224e74bae6935d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 21 Jan 2026 17:52:00 +0000 Subject: [PATCH 2/3] Use chainguard kubectl image and keep chart version Co-authored-by: mike02 --- braintrust/Chart.yaml | 2 +- braintrust/README.md | 6 +++--- braintrust/templates/scheduled-restart.yaml | 6 +++--- braintrust/values.yaml | 5 ++--- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/braintrust/Chart.yaml b/braintrust/Chart.yaml index 9b265a6..7a1c171 100644 --- a/braintrust/Chart.yaml +++ b/braintrust/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: braintrust -version: 3.0.7 +version: 3.0.6 description: A Helm chart to run the Braintrust services for the self-hosted data plane type: application home: https://github.com/braintrustdata/helm diff --git a/braintrust/README.md b/braintrust/README.md index d8e06b6..b7d5163 100644 --- a/braintrust/README.md +++ b/braintrust/README.md @@ -37,11 +37,11 @@ scheduledRestart: targets: brainstoreWriter: false # Opt out of writer restarts image: - tag: "v1.29.6" # Optional: pin kubectl version + repository: "chainguard/kubectl" + tag: "latest" # Optional: pin a specific version ``` -If `scheduledRestart.image.tag` is left blank, the chart defaults to the -cluster's Kubernetes version for compatibility. +Defaults to `chainguard/kubectl:latest` from Docker Hub. If you already manage RBAC or service accounts, set `scheduledRestart.serviceAccount.create` and `scheduledRestart.rbac.create` to diff --git a/braintrust/templates/scheduled-restart.yaml b/braintrust/templates/scheduled-restart.yaml index 545eb06..5105ef4 100644 --- a/braintrust/templates/scheduled-restart.yaml +++ b/braintrust/templates/scheduled-restart.yaml @@ -89,7 +89,7 @@ spec: restartPolicy: Never containers: - name: restart - image: "{{ .Values.scheduledRestart.image.repository }}:{{ default .Capabilities.KubeVersion.Version .Values.scheduledRestart.image.tag }}" + image: "{{ .Values.scheduledRestart.image.repository }}:{{ default "latest" .Values.scheduledRestart.image.tag }}" imagePullPolicy: {{ .Values.scheduledRestart.image.pullPolicy }} command: - kubectl @@ -142,7 +142,7 @@ spec: restartPolicy: Never containers: - name: restart - image: "{{ .Values.scheduledRestart.image.repository }}:{{ default .Capabilities.KubeVersion.Version .Values.scheduledRestart.image.tag }}" + image: "{{ .Values.scheduledRestart.image.repository }}:{{ default "latest" .Values.scheduledRestart.image.tag }}" imagePullPolicy: {{ .Values.scheduledRestart.image.pullPolicy }} command: - kubectl @@ -195,7 +195,7 @@ spec: restartPolicy: Never containers: - name: restart - image: "{{ .Values.scheduledRestart.image.repository }}:{{ default .Capabilities.KubeVersion.Version .Values.scheduledRestart.image.tag }}" + image: "{{ .Values.scheduledRestart.image.repository }}:{{ default "latest" .Values.scheduledRestart.image.tag }}" imagePullPolicy: {{ .Values.scheduledRestart.image.pullPolicy }} command: - kubectl diff --git a/braintrust/values.yaml b/braintrust/values.yaml index 8d11c14..0a12447 100644 --- a/braintrust/values.yaml +++ b/braintrust/values.yaml @@ -80,9 +80,8 @@ scheduledRestart: brainstoreReader: "10 * * * *" brainstoreWriter: "20 * * * *" image: - repository: "registry.k8s.io/kubectl" - # Defaults to the cluster Kubernetes version when left blank. - tag: "" + repository: "chainguard/kubectl" + tag: "latest" pullPolicy: IfNotPresent serviceAccount: create: true From 3c13a2373be1fb982b04451e3d8a6e73534653f8 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 21 Jan 2026 17:58:50 +0000 Subject: [PATCH 3/3] Consolidate scheduled restarts into one cronjob Co-authored-by: mike02 --- braintrust/README.md | 7 +- braintrust/templates/scheduled-restart.yaml | 115 ++------------------ braintrust/values.yaml | 5 +- 3 files changed, 12 insertions(+), 115 deletions(-) diff --git a/braintrust/README.md b/braintrust/README.md index b7d5163..89c1d8a 100644 --- a/braintrust/README.md +++ b/braintrust/README.md @@ -20,7 +20,7 @@ The `braintrust-secrets` secret must contain the following keys: ## Scheduled Restarts -By default, the chart creates CronJobs that perform rolling restarts of the API, +By default, the chart creates a CronJob that performs rolling restarts of the API, Brainstore reader, and Brainstore writer Deployments once per hour using `kubectl rollout restart`. This keeps restarts graceful and leverages the Deployment rolling update strategy. @@ -30,10 +30,7 @@ You can customize or disable the schedules: ```yaml scheduledRestart: enabled: true - schedules: - api: "0 * * * *" - brainstoreReader: "10 * * * *" - brainstoreWriter: "20 * * * *" + schedule: "0 * * * *" targets: brainstoreWriter: false # Opt out of writer restarts image: diff --git a/braintrust/templates/scheduled-restart.yaml b/braintrust/templates/scheduled-restart.yaml index 5105ef4..4d5b5d3 100644 --- a/braintrust/templates/scheduled-restart.yaml +++ b/braintrust/templates/scheduled-restart.yaml @@ -51,7 +51,8 @@ roleRef: kind: Role name: {{ $saName }} {{- end }} -{{- if .Values.scheduledRestart.targets.api }} +{{- $hasTargets := or .Values.scheduledRestart.targets.api (or .Values.scheduledRestart.targets.brainstoreReader .Values.scheduledRestart.targets.brainstoreWriter) }} +{{- if $hasTargets }} --- apiVersion: batch/v1 kind: CronJob @@ -67,7 +68,7 @@ metadata: {{- toYaml . | nindent 4 }} {{- end }} spec: - schedule: {{ required "scheduledRestart.schedules.api is required" .Values.scheduledRestart.schedules.api | quote }} + schedule: {{ required "scheduledRestart.schedule is required" .Values.scheduledRestart.schedule | quote }} concurrencyPolicy: {{ .Values.scheduledRestart.concurrencyPolicy | default "Forbid" }} startingDeadlineSeconds: {{ .Values.scheduledRestart.startingDeadlineSeconds | default 600 }} successfulJobsHistoryLimit: {{ .Values.scheduledRestart.successfulJobsHistoryLimit | default 1 }} @@ -96,113 +97,15 @@ spec: args: - rollout - restart + {{- if .Values.scheduledRestart.targets.api }} - deployment/{{ .Values.api.name }} - - --namespace - - {{ $namespace | quote }} - {{- with .Values.scheduledRestart.resources }} - resources: - {{- toYaml . | nindent 16 }} - {{- end }} -{{- end }} -{{- if .Values.scheduledRestart.targets.brainstoreReader }} ---- -apiVersion: batch/v1 -kind: CronJob -metadata: - name: {{ .Values.brainstore.reader.name }}-restart - namespace: {{ $namespace }} - {{- with $labels }} - labels: - {{- toYaml . | nindent 4 }} - {{- end }} - {{- with .Values.scheduledRestart.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - schedule: {{ required "scheduledRestart.schedules.brainstoreReader is required" .Values.scheduledRestart.schedules.brainstoreReader | quote }} - concurrencyPolicy: {{ .Values.scheduledRestart.concurrencyPolicy | default "Forbid" }} - startingDeadlineSeconds: {{ .Values.scheduledRestart.startingDeadlineSeconds | default 600 }} - successfulJobsHistoryLimit: {{ .Values.scheduledRestart.successfulJobsHistoryLimit | default 1 }} - failedJobsHistoryLimit: {{ .Values.scheduledRestart.failedJobsHistoryLimit | default 3 }} - jobTemplate: - spec: - backoffLimit: {{ .Values.scheduledRestart.backoffLimit | default 1 }} - {{- with .Values.scheduledRestart.ttlSecondsAfterFinished }} - ttlSecondsAfterFinished: {{ . }} - {{- end }} - template: - metadata: - {{- with $labels }} - labels: - {{- toYaml . | nindent 12 }} - {{- end }} - spec: - serviceAccountName: {{ $saName }} - restartPolicy: Never - containers: - - name: restart - image: "{{ .Values.scheduledRestart.image.repository }}:{{ default "latest" .Values.scheduledRestart.image.tag }}" - imagePullPolicy: {{ .Values.scheduledRestart.image.pullPolicy }} - command: - - kubectl - args: - - rollout - - restart + {{- end }} + {{- if .Values.scheduledRestart.targets.brainstoreReader }} - deployment/{{ .Values.brainstore.reader.name }} - - --namespace - - {{ $namespace | quote }} - {{- with .Values.scheduledRestart.resources }} - resources: - {{- toYaml . | nindent 16 }} - {{- end }} -{{- end }} -{{- if .Values.scheduledRestart.targets.brainstoreWriter }} ---- -apiVersion: batch/v1 -kind: CronJob -metadata: - name: {{ .Values.brainstore.writer.name }}-restart - namespace: {{ $namespace }} - {{- with $labels }} - labels: - {{- toYaml . | nindent 4 }} - {{- end }} - {{- with .Values.scheduledRestart.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - schedule: {{ required "scheduledRestart.schedules.brainstoreWriter is required" .Values.scheduledRestart.schedules.brainstoreWriter | quote }} - concurrencyPolicy: {{ .Values.scheduledRestart.concurrencyPolicy | default "Forbid" }} - startingDeadlineSeconds: {{ .Values.scheduledRestart.startingDeadlineSeconds | default 600 }} - successfulJobsHistoryLimit: {{ .Values.scheduledRestart.successfulJobsHistoryLimit | default 1 }} - failedJobsHistoryLimit: {{ .Values.scheduledRestart.failedJobsHistoryLimit | default 3 }} - jobTemplate: - spec: - backoffLimit: {{ .Values.scheduledRestart.backoffLimit | default 1 }} - {{- with .Values.scheduledRestart.ttlSecondsAfterFinished }} - ttlSecondsAfterFinished: {{ . }} - {{- end }} - template: - metadata: - {{- with $labels }} - labels: - {{- toYaml . | nindent 12 }} - {{- end }} - spec: - serviceAccountName: {{ $saName }} - restartPolicy: Never - containers: - - name: restart - image: "{{ .Values.scheduledRestart.image.repository }}:{{ default "latest" .Values.scheduledRestart.image.tag }}" - imagePullPolicy: {{ .Values.scheduledRestart.image.pullPolicy }} - command: - - kubectl - args: - - rollout - - restart + {{- end }} + {{- if .Values.scheduledRestart.targets.brainstoreWriter }} - deployment/{{ .Values.brainstore.writer.name }} + {{- end }} - --namespace - {{ $namespace | quote }} {{- with .Values.scheduledRestart.resources }} diff --git a/braintrust/values.yaml b/braintrust/values.yaml index 0a12447..3860849 100644 --- a/braintrust/values.yaml +++ b/braintrust/values.yaml @@ -75,10 +75,7 @@ scheduledRestart: api: true brainstoreReader: true brainstoreWriter: true - schedules: - api: "0 * * * *" - brainstoreReader: "10 * * * *" - brainstoreWriter: "20 * * * *" + schedule: "0 * * * *" image: repository: "chainguard/kubectl" tag: "latest"