diff --git a/ci-operator/config/redhat-developer/rhdh/redhat-developer-rhdh-main.yaml b/ci-operator/config/redhat-developer/rhdh/redhat-developer-rhdh-main.yaml index 45c09236b3084..3dfca14db826e 100644 --- a/ci-operator/config/redhat-developer/rhdh/redhat-developer-rhdh-main.yaml +++ b/ci-operator/config/redhat-developer/rhdh/redhat-developer-rhdh-main.yaml @@ -38,6 +38,25 @@ tests: test: - ref: redhat-developer-rhdh-ocp-helm workflow: generic-claim +- always_run: false + as: rerun-failed-tests + cluster_claim: + architecture: amd64 + cloud: aws + labels: + region: us-east-2 + owner: rhdh + product: ocp + timeout: 1h0m0s + version: "4.18" + optional: true + steps: + post: + - ref: redhat-developer-rhdh-send-data-router + - chain: gather + test: + - ref: redhat-developer-rhdh-ocp-rerun-failed-tests + workflow: generic-claim - always_run: false as: e2e-ocp-helm-nightly cluster_claim: diff --git a/ci-operator/jobs/redhat-developer/rhdh/redhat-developer-rhdh-main-presubmits.yaml b/ci-operator/jobs/redhat-developer/rhdh/redhat-developer-rhdh-main-presubmits.yaml index 8224cf719fc18..768047a62a7f1 100644 --- a/ci-operator/jobs/redhat-developer/rhdh/redhat-developer-rhdh-main-presubmits.yaml +++ b/ci-operator/jobs/redhat-developer/rhdh/redhat-developer-rhdh-main-presubmits.yaml @@ -1208,3 +1208,73 @@ presubmits: secret: secretName: result-aggregator trigger: (?m)^/test( | .* )e2e-osd-gcp-operator-nightly,?($|\s.*) + - agent: kubernetes + always_run: false + branches: + - ^main$ + - ^main- + cluster: build10 + context: ci/prow/rerun-failed-tests + decorate: true + decoration_config: + skip_cloning: true + labels: + ci.openshift.io/generator: prowgen + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: pull-ci-redhat-developer-rhdh-main-rerun-failed-tests + optional: true + rerun_command: /test rerun-failed-tests + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --hive-kubeconfig=/secrets/hive-hive-credentials/kubeconfig + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --target=rerun-failed-tests + command: + - ci-operator + image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /secrets/hive-hive-credentials + name: hive-hive-credentials + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: hive-hive-credentials + secret: + secretName: hive-hive-credentials + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator + trigger: (?m)^/test( | .* )rerun-failed-tests,?($|\s.*) diff --git a/ci-operator/step-registry/redhat-developer/rhdh/ocp/rerun-failed-tests/OWNERS b/ci-operator/step-registry/redhat-developer/rhdh/ocp/rerun-failed-tests/OWNERS new file mode 100644 index 0000000000000..f7227237a3adb --- /dev/null +++ b/ci-operator/step-registry/redhat-developer/rhdh/ocp/rerun-failed-tests/OWNERS @@ -0,0 +1,18 @@ +# DO NOT EDIT; this file is auto-generated using https://github.com/openshift/ci-tools. +# Fetched from https://github.com/redhat-developer/rhdh root OWNERS +# If the repo had OWNERS_ALIASES then the aliases were expanded +# Logins who are not members of 'openshift' organization were filtered out +# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md + +approvers: +- josephca +- subhashkhileri +- gustavolira +- zdrapela +options: {} +reviewers: +- albarbaro +- josephca +- subhashkhileri +- gustavolira +- zdrapela diff --git a/ci-operator/step-registry/redhat-developer/rhdh/ocp/rerun-failed-tests/redhat-developer-rhdh-ocp-rerun-failed-tests-commands.sh b/ci-operator/step-registry/redhat-developer/rhdh/ocp/rerun-failed-tests/redhat-developer-rhdh-ocp-rerun-failed-tests-commands.sh new file mode 100644 index 0000000000000..21617bd005ca8 --- /dev/null +++ b/ci-operator/step-registry/redhat-developer/rhdh/ocp/rerun-failed-tests/redhat-developer-rhdh-ocp-rerun-failed-tests-commands.sh @@ -0,0 +1,187 @@ +#!/bin/bash + +echo "========== Repository, Branch, and PR Variables ==========" +GITHUB_ORG_NAME="redhat-developer" +echo "GITHUB_ORG_NAME: $GITHUB_ORG_NAME" +GITHUB_REPOSITORY_NAME="rhdh" +echo "GITHUB_REPOSITORY_NAME: $GITHUB_REPOSITORY_NAME" +RELEASE_BRANCH_NAME=$(echo "${JOB_SPEC}" | jq -r '.extra_refs[].base_ref' 2>/dev/null || echo "${JOB_SPEC}" | jq -r '.refs.base_ref') +echo "RELEASE_BRANCH_NAME: $RELEASE_BRANCH_NAME" +GIT_PR_NUMBER=$(echo "${JOB_SPEC}" | jq -r '.refs.pulls[0].number') +echo "GIT_PR_NUMBER: $GIT_PR_NUMBER" +TAG_NAME="" +export GITHUB_ORG_NAME GITHUB_REPOSITORY_NAME RELEASE_BRANCH_NAME GIT_PR_NUMBER TAG_NAME + +# Export PR number for the rerun-failed-tests script +export PULL_NUMBER="${GIT_PR_NUMBER}" +export REPO_OWNER="${GITHUB_ORG_NAME}" +export REPO_NAME="${GITHUB_REPOSITORY_NAME}" + +echo "========== Workdir Setup ==========" +export HOME WORKSPACE +HOME=/tmp +WORKSPACE=$(pwd) +cd /tmp || exit + +echo "========== Cluster Authentication ==========" +export OPENSHIFT_PASSWORD +export OPENSHIFT_API +export OPENSHIFT_USERNAME + +OPENSHIFT_API="$(yq e '.clusters[0].cluster.server' "$KUBECONFIG")" +OPENSHIFT_USERNAME="kubeadmin" + +yq -i 'del(.clusters[].cluster.certificate-authority-data) | .clusters[].cluster.insecure-skip-tls-verify=true' "$KUBECONFIG" +if [[ -s "$KUBEADMIN_PASSWORD_FILE" ]]; then + OPENSHIFT_PASSWORD="$(cat "$KUBEADMIN_PASSWORD_FILE")" +elif [[ -s "${SHARED_DIR}/kubeadmin-password" ]]; then + # Recommendation from hypershift qe team in slack channel.. + OPENSHIFT_PASSWORD="$(cat "${SHARED_DIR}/kubeadmin-password")" +else + echo "Kubeadmin password file is empty... Aborting job" + exit 1 +fi + +timeout --foreground 5m bash <<-"EOF" + while ! oc login "$OPENSHIFT_API" -u "$OPENSHIFT_USERNAME" -p "$OPENSHIFT_PASSWORD" --insecure-skip-tls-verify=true; do + sleep 20 + done +EOF +if [ $? -ne 0 ]; then + echo "Timed out waiting for login" + exit 1 +fi + +echo "========== Cluster Service Account and Token Management ==========" +export K8S_CLUSTER_URL K8S_CLUSTER_TOKEN +K8S_CLUSTER_URL=$(oc whoami --show-server) +echo "K8S_CLUSTER_URL: $K8S_CLUSTER_URL" + +echo "Note: This cluster will be automatically deleted 4 hours after being claimed." +echo "To debug issues or log in to the cluster manually, use the script: .ibm/pipelines/ocp-cluster-claim-login.sh" + +oc create serviceaccount tester-sa-2 -n default +oc adm policy add-cluster-role-to-user cluster-admin system:serviceaccount:default:tester-sa-2 +K8S_CLUSTER_TOKEN=$(oc create token tester-sa-2 -n default --duration=4h) + +echo "========== Platform Environment Variables ==========" +echo "Setting platform environment variables:" +export IS_OPENSHIFT="true" +echo "IS_OPENSHIFT=${IS_OPENSHIFT}" +export CONTAINER_PLATFORM="ocp" +echo "CONTAINER_PLATFORM=${CONTAINER_PLATFORM}" +echo "Getting container platform version" +CONTAINER_PLATFORM_VERSION=$(oc version --output json 2> /dev/null | jq -r '.openshiftVersion' | cut -d'.' -f1,2 || echo "unknown") +export CONTAINER_PLATFORM_VERSION +echo "CONTAINER_PLATFORM_VERSION=${CONTAINER_PLATFORM_VERSION}" + +echo "========== Cluster kubeadmin logout ==========" +oc logout + +echo "========== Git Repository Setup & Checkout ==========" +QUAY_REPO="rhdh-community/rhdh" +export QUAY_REPO + +# Clone and checkout the specific PR +git clone "https://github.com/${GITHUB_ORG_NAME}/${GITHUB_REPOSITORY_NAME}.git" +cd "${GITHUB_REPOSITORY_NAME}" || exit +git checkout "$RELEASE_BRANCH_NAME" || exit + +git config --global user.name "rhdh-qe" +git config --global user.email "rhdh-qe@redhat.com" + +echo "========== PR Branch Handling ==========" +if [ "$JOB_TYPE" == "presubmit" ] && [[ "$JOB_NAME" != rehearse-* ]]; then + # If executed as PR check of the repository, switch to PR branch. + git fetch origin pull/"${GIT_PR_NUMBER}"/head:PR"${GIT_PR_NUMBER}" + git checkout PR"${GIT_PR_NUMBER}" + git merge origin/$RELEASE_BRANCH_NAME --no-edit + GIT_PR_RESPONSE=$(curl -s "https://api.github.com/repos/${GITHUB_ORG_NAME}/${GITHUB_REPOSITORY_NAME}/pulls/${GIT_PR_NUMBER}") + LONG_SHA=$(echo "$GIT_PR_RESPONSE" | jq -r '.head.sha') + SHORT_SHA=$(git rev-parse --short=8 ${LONG_SHA}) + TAG_NAME="pr-${GIT_PR_NUMBER}-${SHORT_SHA}" + echo "TAG_NAME: $TAG_NAME" + IMAGE_NAME="${QUAY_REPO}:${TAG_NAME}" + echo "IMAGE_NAME: $IMAGE_NAME" +fi + +echo "========== Changeset Analysis ==========" +PR_CHANGESET=$(git diff --name-only $RELEASE_BRANCH_NAME) +echo "Changeset: $PR_CHANGESET" + +# Check if changes are exclusively within the specified directories +DIRECTORIES_TO_CHECK=".ibm|e2e-tests|docs|.claude|.cursor|.rulesync|.vscode" +ONLY_IN_DIRS=true + +for change in $PR_CHANGESET; do + # Check if the change is not within the specified directories + if ! echo "$change" | grep -qE "^($DIRECTORIES_TO_CHECK)/"; then + ONLY_IN_DIRS=false + break + fi +done + +echo "ONLY_IN_DIRS: $ONLY_IN_DIRS" + +echo "========== Image Tag Resolution ==========" +if [[ "$JOB_NAME" == rehearse-* || "$JOB_TYPE" == "periodic" ]]; then + QUAY_REPO="rhdh/rhdh-hub-rhel9" + if [ "${RELEASE_BRANCH_NAME}" != "main" ]; then + # Get branch a specific tag name (e.g., 'release-1.5' becomes '1.5') + TAG_NAME="$(echo $RELEASE_BRANCH_NAME | cut -d'-' -f2)" + else + TAG_NAME="next" + fi + echo "TAG_NAME: $TAG_NAME" +elif [[ "$ONLY_IN_DIRS" == "true" && "$JOB_TYPE" == "presubmit" ]];then + if [ "${RELEASE_BRANCH_NAME}" != "main" ]; then + QUAY_REPO="rhdh/rhdh-hub-rhel9" + # Get branch a specific tag name (e.g., 'release-1.5' becomes '1.5') + TAG_NAME="$(echo $RELEASE_BRANCH_NAME | cut -d'-' -f2)" + else + QUAY_REPO="rhdh-community/rhdh" + TAG_NAME="next" + fi + echo "INFO: Bypassing PR image build wait, using tag: ${TAG_NAME}" + echo "INFO: Container image will be tagged as: ${QUAY_REPO}:${TAG_NAME}" +else + echo "Waiting for Docker image availability..." + # Timeout configuration for waiting for Docker image availability + MAX_WAIT_TIME_SECONDS=$((80*60)) # Maximum wait time: 1 hour 20 minutes + POLL_INTERVAL_SECONDS=60 # Check every 60 seconds + + ELAPSED_TIME=0 + + while true; do + # Check image availability + response=$(curl -s "https://quay.io/api/v1/repository/${QUAY_REPO}/tag/?specificTag=$TAG_NAME") + + # Use jq to parse the JSON and see if the tag exists + tag_count=$(echo $response | jq '.tags | length') + + if [ "$tag_count" -gt "0" ]; then + echo "Docker image $IMAGE_NAME is now available. Time elapsed: $(($ELAPSED_TIME / 60)) minute(s)." + break + fi + + # Wait for the interval duration + sleep $POLL_INTERVAL_SECONDS + + # Increment the elapsed time + ELAPSED_TIME=$(($ELAPSED_TIME + $POLL_INTERVAL_SECONDS)) + + # If the elapsed time exceeds the timeout, exit with an error + if [ $ELAPSED_TIME -ge $MAX_WAIT_TIME_SECONDS ]; then + echo "Timed out waiting for Docker image $IMAGE_NAME. Time elapsed: $(($ELAPSED_TIME / 60)) minute(s)." + exit 1 + fi + done +fi + +echo "========== Current branch ==========" +echo "Current branch: $(git branch --show-current)" +echo "Using Image: ${QUAY_REPO}:${TAG_NAME}" + +echo "========== Test Execution ==========" +echo "Executing openshift-ci-tests.sh for rerun-failed-tests" +bash ./.ibm/pipelines/openshift-ci-tests.sh diff --git a/ci-operator/step-registry/redhat-developer/rhdh/ocp/rerun-failed-tests/redhat-developer-rhdh-ocp-rerun-failed-tests-ref.metadata.json b/ci-operator/step-registry/redhat-developer/rhdh/ocp/rerun-failed-tests/redhat-developer-rhdh-ocp-rerun-failed-tests-ref.metadata.json new file mode 100644 index 0000000000000..2f2127214fac8 --- /dev/null +++ b/ci-operator/step-registry/redhat-developer/rhdh/ocp/rerun-failed-tests/redhat-developer-rhdh-ocp-rerun-failed-tests-ref.metadata.json @@ -0,0 +1,18 @@ +{ + "path": "redhat-developer/rhdh/ocp/rerun-failed-tests/redhat-developer-rhdh-ocp-rerun-failed-tests-ref.yaml", + "owners": { + "approvers": [ + "josephca", + "subhashkhileri", + "gustavolira", + "zdrapela" + ], + "reviewers": [ + "albarbaro", + "josephca", + "subhashkhileri", + "gustavolira", + "zdrapela" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/redhat-developer/rhdh/ocp/rerun-failed-tests/redhat-developer-rhdh-ocp-rerun-failed-tests-ref.yaml b/ci-operator/step-registry/redhat-developer/rhdh/ocp/rerun-failed-tests/redhat-developer-rhdh-ocp-rerun-failed-tests-ref.yaml new file mode 100644 index 0000000000000..58ac7e158bfba --- /dev/null +++ b/ci-operator/step-registry/redhat-developer/rhdh/ocp/rerun-failed-tests/redhat-developer-rhdh-ocp-rerun-failed-tests-ref.yaml @@ -0,0 +1,19 @@ +ref: + as: redhat-developer-rhdh-ocp-rerun-failed-tests + cli: latest + commands: "redhat-developer-rhdh-ocp-rerun-failed-tests-commands.sh" + credentials: + - mount_path: /tmp/secrets + name: rhdh + namespace: test-credentials + from_image: + name: rhdh-e2e-runner + namespace: ci + tag: main + resources: + limits: + memory: 6Gi + cpu: "2" + requests: + cpu: "2" + memory: 6Gi