diff --git a/deploy/infrastructure-manager/gcp-credentials-json/README.md b/deploy/infrastructure-manager/gcp-credentials-json/README.md new file mode 100644 index 0000000000..da1f44671b --- /dev/null +++ b/deploy/infrastructure-manager/gcp-credentials-json/README.md @@ -0,0 +1,91 @@ +## GCP Credentials JSON (Service Account Key) + +Deploy a GCP service account with JSON credentials for Elastic Agent GCP integration using GCP Infrastructure Manager. + +This creates a service account with the necessary permissions and stores the JSON key in Secret Manager for use in the Elastic Agent GCP integration in Kibana. + +### Prerequisites + +1. GCP project with required permissions +2. `gcloud` CLI configured with your project + +### Quick Deploy + +#### Option 1: Cloud Shell (Recommended) + +[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https://github.com/elastic/cloudbeat.git&cloudshell_git_branch=main&cloudshell_workspace=deploy/infrastructure-manager/gcp-credentials-json&show=terminal&ephemeral=true) + +```bash +# For project-level monitoring (default) +./deploy.sh + +# For organization-level monitoring +export ORG_ID="" +./deploy.sh +``` + +#### Option 2: GCP Console + +1. Go to [Infrastructure Manager Console](https://console.cloud.google.com/infra-manager/deployments/create) +2. Configure: + - **Source**: Git repository + - **Repository URL**: `https://github.com/elastic/cloudbeat.git` + - **Branch**: `main` + - **Directory**: `deploy/infrastructure-manager/gcp-credentials-json` + - **Location**: `us-central1` +3. Add input variables: + - `project_id`: Your GCP project ID + - `resource_suffix`: Unique suffix (e.g., `abc123`) + - `scope`: `projects` or `organizations` + - `parent_id`: Project ID or Organization ID +4. Click **Create** + +### Environment Variables + +| Variable | Required | Default | Description | +|----------|----------|---------|-------------| +| `ORG_ID` | No | - | Organization ID for org-level monitoring | +| `DEPLOYMENT_NAME` | No | `elastic-agent-credentials` | Deployment name prefix | +| `LOCATION` | No | `us-central1` | GCP region for Infrastructure Manager | + +### Resources Created + +- Service account with `cloudasset.viewer` and `browser` roles +- Service account key (stored securely in Secret Manager and saved locally) +- Secret Manager secret containing the JSON credentials +- IAM bindings (project or organization level) +- Local `KEY_FILE.json` with the service account credentials + +### Output + +After successful deployment, the script saves the service account credentials to `KEY_FILE.json` in the current directory. + +**To use the credentials:** + +1. Run `cat KEY_FILE.json` to view the service account key +2. Copy the entire JSON content +3. Paste it in the Elastic Agent GCP integration in Kibana + +> **Note:** The key is also stored in Secret Manager for future access. The script outputs the `gcloud` command to retrieve it if needed. + +### Management + +**View deployment:** +```bash +gcloud infra-manager deployments describe ${DEPLOYMENT_NAME} --location=${LOCATION} +``` + +**Delete deployment:** +```bash +gcloud infra-manager deployments delete ${DEPLOYMENT_NAME} --location=${LOCATION} +``` + +### Troubleshooting + +**Common Issues:** + +1. **Permission denied**: Ensure your account has the required IAM roles +2. **API not enabled**: The setup script enables required APIs automatically +3. **Organization scope fails**: Verify the ORG_ID is correct and you have org-level permissions + +**Console:** [Infrastructure Manager Deployments](https://console.cloud.google.com/infra-manager/deployments) diff --git a/deploy/infrastructure-manager/gcp-credentials-json/deploy.sh b/deploy/infrastructure-manager/gcp-credentials-json/deploy.sh new file mode 100755 index 0000000000..95b6b966ab --- /dev/null +++ b/deploy/infrastructure-manager/gcp-credentials-json/deploy.sh @@ -0,0 +1,113 @@ +#!/bin/bash +set -e + +# This script: +# 1. Enables necessary APIs for Elastic Agent GCP integration +# 2. Deploys Terraform via GCP Infrastructure Manager to create a service account with roles and key +# 3. Stores the key in Secret Manager +# 4. Saves the key locally to KEY_FILE.json for easy access + +# Get the directory where this script lives (for Terraform source files) +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +# Configure GCP project +PROJECT_ID=$(gcloud config get-value core/project) +SERVICE_ACCOUNT="infra-manager-deployer" + +# Ensure prerequisites are configured +"${SCRIPT_DIR}/setup.sh" "${PROJECT_ID}" "${SERVICE_ACCOUNT}" + +# Optional environment variables (defaults are in variables.tf or below) +# ORG_ID - Set for org-level monitoring +# DEPLOYMENT_NAME - Deployment name prefix (default: elastic-agent-credentials) +# LOCATION - GCP region for deployment (default: us-central1) + +# Generate unique suffix for resource names (8 hex characters) +RESOURCE_SUFFIX=$(openssl rand -hex 4) + +# Set deployment name with suffix +DEPLOYMENT_NAME="${DEPLOYMENT_NAME:-elastic-agent-credentials}-${RESOURCE_SUFFIX}" + +# Set location (not a TF variable, only used by gcloud) +LOCATION="${LOCATION:-us-central1}" + +RED='\033[0;31m' +GREEN='\033[0;32m' +RESET='\033[0m' + +# Build input values - only include values that are set +# Defaults are defined in variables.tf (single source of truth) +INPUT_VALUES="project_id=${PROJECT_ID}" +INPUT_VALUES="${INPUT_VALUES},resource_suffix=${RESOURCE_SUFFIX}" + +# Set scope and parent_id based on ORG_ID +if [ -n "${ORG_ID}" ]; then + INPUT_VALUES="${INPUT_VALUES},scope=organizations" + INPUT_VALUES="${INPUT_VALUES},parent_id=${ORG_ID}" +else + INPUT_VALUES="${INPUT_VALUES},scope=projects" + INPUT_VALUES="${INPUT_VALUES},parent_id=${PROJECT_ID}" +fi + +echo -e "${GREEN}Starting deployment '${DEPLOYMENT_NAME}'...${RESET}" + +# Deploy from local source +if ! gcloud infra-manager deployments apply "${DEPLOYMENT_NAME}" \ + --location="${LOCATION}" \ + --service-account="projects/${PROJECT_ID}/serviceAccounts/${SERVICE_ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com" \ + --local-source="${SCRIPT_DIR}" \ + --input-values="${INPUT_VALUES}"; then + echo "" + echo -e "${RED}Deployment failed${RESET}" + echo "" + echo "Common failure reasons:" + echo " - Service account permissions missing for ${SERVICE_ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com" + echo " - Organization ID incorrect (if using organization scope)" + echo "" + echo "Useful debugging commands:" + echo " # View deployment status" + echo " gcloud infra-manager deployments describe ${DEPLOYMENT_NAME} --location=${LOCATION}" + echo "" + echo " # Verify service account permissions" + echo " gcloud projects get-iam-policy ${PROJECT_ID} --flatten='bindings[].members' --filter='bindings.members:serviceAccount:${SERVICE_ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com' --format='table(bindings.role)'" + echo "" + exit 1 +fi + +# Get the latest revision name from the deployment +REVISION=$(gcloud infra-manager deployments describe "${DEPLOYMENT_NAME}" \ + --location="${LOCATION}" \ + --format='value(latestRevision)') + +if [ -z "$REVISION" ]; then + echo -e "${RED}Error: Could not find deployment revision.${RESET}" + exit 1 +fi + +# Extract the secret name from revision outputs (outputs are on revisions, not deployments) +SECRET_NAME=$(gcloud infra-manager revisions describe "${REVISION}" \ + --location="${LOCATION}" \ + --format='value(applyResults.outputs.secret_name.value)') + +if [ -z "$SECRET_NAME" ]; then + echo -e "${RED}Error: Secret name not found in revision outputs.${RESET}" + exit 1 +fi + +# Retrieve the key from Secret Manager and save locally +KEY_FILE="KEY_FILE.json" +if ! gcloud secrets versions access latest --secret="${SECRET_NAME}" --project="${PROJECT_ID}" | base64 -d >"${KEY_FILE}"; then + echo -e "${RED}Error: Failed to retrieve key from Secret Manager.${RESET}" + exit 1 +fi + +echo "" +echo -e "${GREEN}Deployment complete.${RESET}" +gcloud infra-manager deployments describe "${DEPLOYMENT_NAME}" --location="${LOCATION}" --format='table(resources)' + +echo "" +echo -e "${GREEN}Run 'cat ${KEY_FILE}' to view the service account key. Copy and paste it in the Elastic Agent GCP integration." +echo -e "Save the key securely for future use.${RESET}" +echo "" +echo -e "${GREEN}The key is also stored in Secret Manager for future access:${RESET}" +echo " gcloud secrets versions access latest --secret=\"${SECRET_NAME}\" --project=\"${PROJECT_ID}\" | base64 -d" diff --git a/deploy/infrastructure-manager/gcp-credentials-json/main.tf b/deploy/infrastructure-manager/gcp-credentials-json/main.tf new file mode 100644 index 0000000000..d882ce494e --- /dev/null +++ b/deploy/infrastructure-manager/gcp-credentials-json/main.tf @@ -0,0 +1,77 @@ +terraform { + required_version = ">= 1.0" + required_providers { + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + } +} + +provider "google" { + project = var.project_id +} + +locals { + # Use suffix from deploy.sh to ensure all resource names stay within GCP limits and allow multiple deployments + resource_suffix = var.resource_suffix + sa_name = "elastic-agent-sa-${local.resource_suffix}" +} + +# Service Account +resource "google_service_account" "elastic_agent" { + account_id = local.sa_name + display_name = "Elastic Agent service account" + project = var.project_id +} + +# Service Account Key +resource "google_service_account_key" "elastic_agent_key" { + service_account_id = google_service_account.elastic_agent.name +} + +# Project-level IAM bindings +resource "google_project_iam_member" "cloudasset_viewer" { + count = var.scope == "projects" ? 1 : 0 + project = var.parent_id + role = "roles/cloudasset.viewer" + member = "serviceAccount:${google_service_account.elastic_agent.email}" +} + +resource "google_project_iam_member" "browser" { + count = var.scope == "projects" ? 1 : 0 + project = var.parent_id + role = "roles/browser" + member = "serviceAccount:${google_service_account.elastic_agent.email}" +} + +# Organization-level IAM bindings +resource "google_organization_iam_member" "cloudasset_viewer_org" { + count = var.scope == "organizations" ? 1 : 0 + org_id = var.parent_id + role = "roles/cloudasset.viewer" + member = "serviceAccount:${google_service_account.elastic_agent.email}" +} + +resource "google_organization_iam_member" "browser_org" { + count = var.scope == "organizations" ? 1 : 0 + org_id = var.parent_id + role = "roles/browser" + member = "serviceAccount:${google_service_account.elastic_agent.email}" +} + +# Secret Manager secret to store the service account key securely +resource "google_secret_manager_secret" "sa_key" { + secret_id = "elastic-agent-sa-key-${local.resource_suffix}" + project = var.project_id + + replication { + auto {} + } +} + +# Store the service account key in Secret Manager +resource "google_secret_manager_secret_version" "sa_key" { + secret = google_secret_manager_secret.sa_key.id + secret_data = google_service_account_key.elastic_agent_key.private_key +} diff --git a/deploy/infrastructure-manager/gcp-credentials-json/outputs.tf b/deploy/infrastructure-manager/gcp-credentials-json/outputs.tf new file mode 100644 index 0000000000..642647c07c --- /dev/null +++ b/deploy/infrastructure-manager/gcp-credentials-json/outputs.tf @@ -0,0 +1,9 @@ +output "service_account_email" { + description = "Email of the created service account" + value = google_service_account.elastic_agent.email +} + +output "secret_name" { + description = "Secret Manager secret ID containing the service account key" + value = google_secret_manager_secret.sa_key.secret_id +} diff --git a/deploy/infrastructure-manager/gcp-credentials-json/setup.sh b/deploy/infrastructure-manager/gcp-credentials-json/setup.sh new file mode 100755 index 0000000000..baa15b7c54 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-credentials-json/setup.sh @@ -0,0 +1,44 @@ +#!/bin/bash +set -e + +# Accept parameters +PROJECT_ID="$1" +SERVICE_ACCOUNT="$2" +SERVICE_ACCOUNT_EMAIL="${SERVICE_ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com" + +REQUIRED_APIS=( + iam.googleapis.com + config.googleapis.com + cloudresourcemanager.googleapis.com + cloudasset.googleapis.com + secretmanager.googleapis.com +) + +REQUIRED_ROLES=( + roles/iam.serviceAccountAdmin + roles/iam.serviceAccountKeyAdmin + roles/resourcemanager.projectIamAdmin + roles/config.admin + roles/storage.admin + roles/secretmanager.admin +) + +echo "Setting up GCP Infrastructure Manager prerequisites..." + +# Enable APIs +gcloud services enable "${REQUIRED_APIS[@]}" --quiet + +# Create service account if it doesn't exist +if ! gcloud iam service-accounts describe "${SERVICE_ACCOUNT_EMAIL}" >/dev/null 2>&1; then + gcloud iam service-accounts create "${SERVICE_ACCOUNT}" \ + --display-name="Infra Manager Deployment Account" --quiet +fi + +# Grant permissions +for role in "${REQUIRED_ROLES[@]}"; do + gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ + --member="serviceAccount:${SERVICE_ACCOUNT_EMAIL}" \ + --role="${role}" --condition=None --quiet >/dev/null +done + +echo "✓ Setup complete" diff --git a/deploy/infrastructure-manager/gcp-credentials-json/variables.tf b/deploy/infrastructure-manager/gcp-credentials-json/variables.tf new file mode 100644 index 0000000000..b3e1036931 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-credentials-json/variables.tf @@ -0,0 +1,25 @@ +variable "project_id" { + description = "GCP Project ID" + type = string +} + +variable "resource_suffix" { + description = "Unique suffix for resource names (8 hex characters)" + type = string +} + +variable "scope" { + description = "Scope for IAM bindings (projects or organizations)" + type = string + default = "projects" + + validation { + condition = contains(["projects", "organizations"], var.scope) + error_message = "Scope must be either 'projects' or 'organizations'." + } +} + +variable "parent_id" { + description = "Parent ID (project ID or organization ID depending on scope)" + type = string +} diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/README.md b/deploy/infrastructure-manager/gcp-elastic-agent/README.md new file mode 100644 index 0000000000..465e8cf100 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/README.md @@ -0,0 +1,156 @@ +## Elastic Agent Infrastructure Manager (Terraform) + +Deploy Elastic Agent for CIS GCP integration using GCP Infrastructure Manager. Creates a compute instance with Elastic Agent pre-installed and configured with necessary permissions. + +### Prerequisites + +1. Elastic Stack with Fleet Server deployed +2. GCP project with required permissions (see [Required Permissions](#required-permissions)) +3. Fleet URL and enrollment token from Kibana + +### Quick Deploy + +#### Option 1: Cloud Shell (Recommended) + +[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https://github.com/elastic/cloudbeat.git&cloudshell_git_branch=main&cloudshell_workspace=deploy/infrastructure-manager/gcp-elastic-agent&show=terminal&ephemeral=true) + +```bash +# Set required configuration +export FLEET_URL="" +export ENROLLMENT_TOKEN="" +export STACK_VERSION="" + +# Optional: Set these to override defaults +# export ORG_ID="" # For org-level monitoring +# export DEPLOYMENT_NAME="elastic-agent-deployment" # Default: elastic-agent-deployment +# export ZONE="us-central1-a" # Default: us-central1-a +# export ELASTIC_ARTIFACT_SERVER="" # Default: https://artifacts.elastic.co/downloads/beats/elastic-agent + +# Deploy using the deploy script +./deploy.sh +``` + +#### Option 2: GCP Console + +1. Go to [Infrastructure Manager Console](https://console.cloud.google.com/infra-manager/deployments/create) +2. Configure: + - **Source**: Git repository + - **Repository URL**: `https://github.com/elastic/cloudbeat.git` + - **Branch**: `main` + - **Directory**: `deploy/infrastructure-manager/gcp-elastic-agent` + - **Location**: `us-central1` +3. Add input variables (see table below) +4. Click **Create** + +### Input Variables + +| Variable | Required | Default | Description | +|----------|----------|---------|-------------| +| `project_id` | Yes | - | GCP Project ID | +| `fleet_url` | Yes | - | Fleet Server URL | +| `enrollment_token` | Yes | - | Enrollment token (sensitive) | +| `elastic_agent_version` | Yes | - | Agent version (e.g., `8.15.0`) | +| `elastic_artifact_server` | No | `https://artifacts.elastic.co/downloads/beats/elastic-agent` | Artifact server URL for downloading Elastic Agent | +| `zone` | No | `us-central1-a` | GCP zone | +| `scope` | No | `projects` | `projects` or `organizations` | +| `parent_id` | Yes | - | Project ID or Organization ID | +| `startup_validation_enabled` | No | `true` | Enable validation of startup script completion | +| `startup_timeout_seconds` | No | `600` | Maximum time to wait for startup (seconds) | + +### Resources Created + +- Compute instance (Ubuntu, n2-standard-4, 32GB disk) +- Service account with `cloudasset.viewer` and `browser` roles +- VPC network with auto-created subnets +- IAM bindings (project or organization level) + +### Startup Validation + +By default, Terraform waits for the startup script to complete and validates success: +- **Enabled**: Deployment fails if agent installation fails +- **Timeout**: 5 minutes (configurable via `startup_timeout_seconds`) +- **Requires**: `gcloud` CLI installed where Terraform runs + +**Disable validation** (for testing or debugging): +```bash +# Via environment variable (for deploy.sh) +export STARTUP_VALIDATION_ENABLED=false +./deploy.sh + +# Or pass to gcloud directly +gcloud infra-manager deployments apply ${DEPLOYMENT_NAME} \ + --location=${LOCATION} \ + --input-values="...,startup_validation_enabled=false" +``` + +**Guest Attributes Written**: + +The startup script writes these attributes for monitoring: +- `elastic-agent/startup-status`: `"in-progress"`, `"success"`, or `"failed"` +- `elastic-agent/startup-error`: Error message (only when failed) +- `elastic-agent/startup-timestamp`: Completion timestamp (UTC) + +Query manually: +```bash +gcloud compute instances get-guest-attributes ${INSTANCE_NAME} \ + --zone ${ZONE} \ + --query-path=elastic-agent/ +``` + +### Management + +**View deployment:** +```bash +gcloud infra-manager deployments describe ${DEPLOYMENT_NAME} --location=${LOCATION} +``` + +**Delete deployment:** +```bash +gcloud infra-manager deployments delete ${DEPLOYMENT_NAME} --location=${LOCATION} +``` + +### Troubleshooting + +**Check deployment status:** +```bash +# The instance name is based on the deployment name with a random suffix +# Format: elastic-agent-vm- +# Example: elastic-agent-vm-0bc08b82 + +# Check startup script status via guest attributes +gcloud compute instances get-guest-attributes elastic-agent-vm- \ + --zone ${ZONE} \ + --query-path=elastic-agent/startup-status + +# Expected values: +# - "in-progress": Installation is running +# - "success": Installation completed successfully +# - "failed": Installation failed (check logs below) + +# To find your instance name: +gcloud compute instances list --filter="name~^elastic-agent-vm-" +``` + +**Check agent logs (without SSH):** +```bash +# View serial console output (includes startup script execution) +gcloud compute instances get-serial-port-output ${INSTANCE_NAME} --zone ${ZONE} + +# Filter for elastic-agent specific logs +gcloud compute instances get-serial-port-output ${INSTANCE_NAME} --zone ${ZONE} \ + | grep elastic-agent-setup +``` + +**Check agent logs (with SSH):** +```bash +gcloud compute ssh ${INSTANCE_NAME} --zone ${ZONE} +sudo journalctl -u google-startup-scripts.service +``` + +**Common Issues:** + +1. **404 error downloading agent**: Check `ELASTIC_ARTIFACT_SERVER` and `STACK_VERSION` are correct +2. **Guest attributes show "failed"**: Check serial console logs for error details +3. **Guest attributes not available**: Guest attributes are enabled by default and populate during startup + +**Console:** [Infrastructure Manager Deployments](https://console.cloud.google.com/infra-manager/deployments) diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/deploy.sh b/deploy/infrastructure-manager/gcp-elastic-agent/deploy.sh new file mode 100755 index 0000000000..d32c6df76a --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/deploy.sh @@ -0,0 +1,96 @@ +#!/bin/bash +set -e + +# Get the directory where this script lives (for Terraform source files) +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +# Configure GCP project +PROJECT_ID=$(gcloud config get-value core/project) +SERVICE_ACCOUNT="infra-manager-deployer" + +# Ensure prerequisites are configured +"${SCRIPT_DIR}/setup.sh" "${PROJECT_ID}" "${SERVICE_ACCOUNT}" + +# Required environment variables (no defaults - must be provided) +# FLEET_URL, ENROLLMENT_TOKEN, STACK_VERSION + +# Optional environment variables (defaults are in variables.tf) +# ORG_ID - Set for org-level monitoring +# ZONE - GCP zone (default: us-central1-a) +# DEPLOYMENT_NAME - Deployment name prefix (default: elastic-agent-deployment) +# ELASTIC_ARTIFACT_SERVER - Artifact server URL + +# Generate unique suffix for resource names (8 hex characters) +RESOURCE_SUFFIX=$(openssl rand -hex 4) + +# Set deployment name with suffix +DEPLOYMENT_NAME="${DEPLOYMENT_NAME:-elastic-agent-deployment}-${RESOURCE_SUFFIX}" + +# Determine zone for location extraction +# We need the zone to derive the region (location) - use default if not set +EFFECTIVE_ZONE="${ZONE:-us-central1-a}" +LOCATION="${EFFECTIVE_ZONE%-?}" # Extract region from zone + +# Build input values - only include values that are set +# Defaults are defined in variables.tf (single source of truth) +INPUT_VALUES="project_id=${PROJECT_ID}" +INPUT_VALUES="${INPUT_VALUES},resource_suffix=${RESOURCE_SUFFIX}" + +# Required values +INPUT_VALUES="${INPUT_VALUES},fleet_url=${FLEET_URL}" +INPUT_VALUES="${INPUT_VALUES},enrollment_token=${ENROLLMENT_TOKEN}" +INPUT_VALUES="${INPUT_VALUES},elastic_agent_version=${STACK_VERSION}" + +# Optional values - only add if explicitly set (let TF use its defaults otherwise) +if [ -n "${ZONE}" ]; then + INPUT_VALUES="${INPUT_VALUES},zone=${ZONE}" +fi + +if [ -n "${ELASTIC_ARTIFACT_SERVER}" ]; then + # Remove trailing slash if present + ELASTIC_ARTIFACT_SERVER="${ELASTIC_ARTIFACT_SERVER%/}" + INPUT_VALUES="${INPUT_VALUES},elastic_artifact_server=${ELASTIC_ARTIFACT_SERVER}" +fi + +# Set scope and parent_id based on ORG_ID +if [ -n "${ORG_ID}" ]; then + INPUT_VALUES="${INPUT_VALUES},scope=organizations" + INPUT_VALUES="${INPUT_VALUES},parent_id=${ORG_ID}" +else + INPUT_VALUES="${INPUT_VALUES},scope=projects" + INPUT_VALUES="${INPUT_VALUES},parent_id=${PROJECT_ID}" +fi + +# Deploy from local source (repo already cloned by Cloud Shell) +echo "Starting deployment ${DEPLOYMENT_NAME}..." +if ! gcloud infra-manager deployments apply "${DEPLOYMENT_NAME}" \ + --location="${LOCATION}" \ + --service-account="projects/${PROJECT_ID}/serviceAccounts/${SERVICE_ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com" \ + --local-source="${SCRIPT_DIR}" \ + --input-values="${INPUT_VALUES}"; then + echo "" + echo "Deployment failed" + echo "" + echo "Common failure reasons:" + echo " - Wrong artifacts server for pre-release artifact (check ELASTIC_ARTIFACT_SERVER for snapshots/pre-releases)" + echo " - Service account permissions missing for ${SERVICE_ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com" + echo " - Invalid input values (fleet_url, enrollment_token, etc.)" + echo "" + echo "Useful debugging commands:" + echo " # View deployment status" + echo " gcloud infra-manager deployments describe ${DEPLOYMENT_NAME} --location=${LOCATION}" + echo "" + echo " # Verify service account permissions" + echo " gcloud projects get-iam-policy ${PROJECT_ID} --flatten='bindings[].members' --filter='bindings.members:serviceAccount:${SERVICE_ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com' --format='table(bindings.role)'" + echo "" + echo " # View Cloud Build logs" + echo " gsutil cat \$(gcloud infra-manager revisions describe \$(gcloud infra-manager deployments describe ${DEPLOYMENT_NAME} --location=${LOCATION} --format='value(latestRevision)') --location=${LOCATION} --format='value(logs)')/*.txt" + echo "" + echo " # View VM startup script logs" + echo " gcloud compute instances get-serial-port-output elastic-agent-vm-${RESOURCE_SUFFIX} --zone=${EFFECTIVE_ZONE} --project=${PROJECT_ID}" + echo "" + exit 1 +fi + +echo "" +echo "Deployment successful!" diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/deploy_service_account.sh b/deploy/infrastructure-manager/gcp-elastic-agent/deploy_service_account.sh new file mode 100755 index 0000000000..680e3285b7 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/deploy_service_account.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -e + +# Shortcut to gcp-credentials-json/deploy.sh +# Creates a service account with roles and key for Elastic Agent GCP integration. +# +# See ../gcp-credentials-json/README.md for details. + +"$(dirname "$0")/../gcp-credentials-json/deploy.sh" "$@" diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/main.tf b/deploy/infrastructure-manager/gcp-elastic-agent/main.tf new file mode 100644 index 0000000000..ff40738486 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/main.tf @@ -0,0 +1,74 @@ +terraform { + required_version = ">= 1.0" + required_providers { + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + null = { + source = "hashicorp/null" + version = "~> 3.0" + } + } +} + +provider "google" { + project = var.project_id +} + +locals { + # Use suffix from deploy.sh to ensure all resource names stay within GCP limits and allow multiple deployments + resource_suffix = var.resource_suffix + sa_name = "elastic-agent-sa-${local.resource_suffix}" + sa_email = module.service_account.email + network_name = "elastic-agent-net-${local.resource_suffix}" + instance_name = "elastic-agent-vm-${local.resource_suffix}" +} + +# Resource suffix for all resource names +variable "resource_suffix" { + description = "Unique suffix for resource names (8 hex characters)" + type = string +} + +module "service_account" { + source = "./modules/service_account" + + project_id = var.project_id + service_account_name = local.sa_name + scope = var.scope + parent_id = var.parent_id +} + +module "compute_instance" { + source = "./modules/compute_instance" + + instance_name = local.instance_name + network_name = local.network_name + machine_type = var.machine_type + zone = var.zone + sa_email = local.sa_email + elastic_agent_version = var.elastic_agent_version + elastic_artifact_server = var.elastic_artifact_server + fleet_url = var.fleet_url + enrollment_token = var.enrollment_token + + depends_on = [ + module.service_account + ] +} + +module "startup_validation" { + source = "./modules/startup_validation" + + enabled = var.startup_validation_enabled + project_id = var.project_id + instance_name = local.instance_name + instance_id = module.compute_instance.id + zone = var.zone + timeout = var.startup_timeout_seconds + + depends_on = [ + module.compute_instance + ] +} diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/modules/compute_instance/main.tf b/deploy/infrastructure-manager/gcp-elastic-agent/modules/compute_instance/main.tf new file mode 100644 index 0000000000..1ab1f950db --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/modules/compute_instance/main.tf @@ -0,0 +1,133 @@ +locals { + # Determine install command based on version + install_command = startswith(var.elastic_agent_version, "9.") ? "sudo ./elastic-agent install --non-interactive --install-servers" : "sudo ./elastic-agent install --non-interactive" +} + +# VPC Network +resource "google_compute_network" "elastic_agent" { + name = var.network_name + auto_create_subnetworks = true + routing_mode = "REGIONAL" +} + +# Compute Instance +resource "google_compute_instance" "elastic_agent" { + name = var.instance_name + machine_type = var.machine_type + zone = var.zone + + labels = { + name = "elastic-agent" + } + + boot_disk { + initialize_params { + image = "ubuntu-os-cloud/ubuntu-minimal-2204-lts" + size = 32 + type = "pd-standard" + } + auto_delete = true + } + + network_interface { + network = google_compute_network.elastic_agent.self_link + + access_config { + # Ephemeral public IP + } + } + + service_account { + email = var.sa_email + scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/cloudplatformorganizations", + ] + } + + metadata = { + enable-guest-attributes = "TRUE" + startup-script = <<-EOT + #!/bin/bash + set -x # Enable debug output + + # Logging function - logs to both stdout and Cloud Logging + log() { + echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" + logger -t elastic-agent-setup "$1" + } + + # Function to set guest attribute + set_guest_attribute() { + local key=$1 + local value=$2 + curl -X PUT --data "$value" \ + -H "Metadata-Flavor: Google" \ + "http://metadata.google.internal/computeMetadata/v1/instance/guest-attributes/elastic-agent/$key" \ + || log "WARNING: Failed to set guest attribute $key" + } + + # Function to report failure + report_failure() { + local error_msg="$1" + log "ERROR: $error_msg" + set_guest_attribute "startup-status" "failed" + set_guest_attribute "startup-error" "$error_msg" + set_guest_attribute "startup-timestamp" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" + exit 1 + } + + log "Starting Elastic Agent installation" + set_guest_attribute "startup-status" "in-progress" + + # Download Elastic Agent + ElasticAgentArtifact=elastic-agent-${var.elastic_agent_version}-linux-x86_64 + ARTIFACT_URL="${var.elastic_artifact_server}/$ElasticAgentArtifact.tar.gz" + + log "Downloading Elastic Agent from $ARTIFACT_URL" + if ! curl -f -L -O --connect-timeout 30 --max-time 300 "$ARTIFACT_URL"; then + report_failure "Failed to download Elastic Agent from $ARTIFACT_URL" + fi + log "Download successful" + + # Verify download + if [ ! -f "$ElasticAgentArtifact.tar.gz" ]; then + report_failure "Downloaded file not found: $ElasticAgentArtifact.tar.gz" + fi + + # Extract archive + log "Extracting $ElasticAgentArtifact.tar.gz" + if ! tar xzvf "$ElasticAgentArtifact.tar.gz"; then + report_failure "Failed to extract $ElasticAgentArtifact.tar.gz" + fi + + # Verify extraction + if [ ! -d "$ElasticAgentArtifact" ]; then + report_failure "Extracted directory not found: $ElasticAgentArtifact" + fi + + cd "$ElasticAgentArtifact" + + # Install Elastic Agent + log "Installing Elastic Agent with command: ${local.install_command}" + if ! ${local.install_command} --url=${var.fleet_url} --enrollment-token=${var.enrollment_token}; then + report_failure "Elastic Agent installation command failed" + fi + + # Verify installation + log "Verifying Elastic Agent installation" + if systemctl is-active --quiet elastic-agent; then + log "SUCCESS: Elastic Agent is running" + set_guest_attribute "startup-status" "success" + set_guest_attribute "startup-timestamp" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" + else + report_failure "Elastic Agent service is not running after installation" + fi + + # Cleanup downloaded files + cd .. + rm -rf "$ElasticAgentArtifact.tar.gz" "$ElasticAgentArtifact" + log "Cleanup completed" + EOT + } +} diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/modules/compute_instance/outputs.tf b/deploy/infrastructure-manager/gcp-elastic-agent/modules/compute_instance/outputs.tf new file mode 100644 index 0000000000..c497d2365d --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/modules/compute_instance/outputs.tf @@ -0,0 +1,24 @@ +output "name" { + description = "Name of the compute instance" + value = google_compute_instance.elastic_agent.name +} + +output "id" { + description = "ID of the compute instance" + value = google_compute_instance.elastic_agent.id +} + +output "zone" { + description = "Zone of the compute instance" + value = google_compute_instance.elastic_agent.zone +} + +output "network_name" { + description = "Name of the VPC network" + value = google_compute_network.elastic_agent.name +} + +output "instance_self_link" { + description = "Self-link to the compute instance (check startup status in Cloud Console or Cloud Logging)" + value = google_compute_instance.elastic_agent.self_link +} diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/modules/compute_instance/variables.tf b/deploy/infrastructure-manager/gcp-elastic-agent/modules/compute_instance/variables.tf new file mode 100644 index 0000000000..94dfeb4c2a --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/modules/compute_instance/variables.tf @@ -0,0 +1,45 @@ +variable "instance_name" { + description = "Name of the compute instance" + type = string +} + +variable "network_name" { + description = "Name of the VPC network" + type = string +} + +variable "machine_type" { + description = "Machine type for the compute instance" + type = string +} + +variable "zone" { + description = "GCP Zone for the compute instance" + type = string +} + +variable "sa_email" { + description = "Email of the service account to be used by the instance" + type = string +} + +variable "elastic_agent_version" { + description = "Elastic Agent Version" + type = string +} + +variable "elastic_artifact_server" { + description = "Elastic Artifact Server URL" + type = string +} + +variable "fleet_url" { + description = "Elastic Agent Fleet URL" + type = string +} + +variable "enrollment_token" { + description = "Elastic Agent Enrollment Token" + type = string + sensitive = true +} diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/modules/service_account/main.tf b/deploy/infrastructure-manager/gcp-elastic-agent/modules/service_account/main.tf new file mode 100644 index 0000000000..79545dfcfb --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/modules/service_account/main.tf @@ -0,0 +1,36 @@ +# Service Account +resource "google_service_account" "elastic_agent" { + account_id = var.service_account_name + display_name = "Elastic agent service account for Cloud Assets" + project = var.project_id +} + +# Project-level IAM bindings +resource "google_project_iam_member" "cloudasset_viewer" { + count = var.scope == "projects" ? 1 : 0 + project = var.parent_id + role = "roles/cloudasset.viewer" + member = "serviceAccount:${google_service_account.elastic_agent.email}" +} + +resource "google_project_iam_member" "browser" { + count = var.scope == "projects" ? 1 : 0 + project = var.parent_id + role = "roles/browser" + member = "serviceAccount:${google_service_account.elastic_agent.email}" +} + +# Organization-level IAM bindings +resource "google_organization_iam_member" "cloudasset_viewer_org" { + count = var.scope == "organizations" ? 1 : 0 + org_id = var.parent_id + role = "roles/cloudasset.viewer" + member = "serviceAccount:${google_service_account.elastic_agent.email}" +} + +resource "google_organization_iam_member" "browser_org" { + count = var.scope == "organizations" ? 1 : 0 + org_id = var.parent_id + role = "roles/browser" + member = "serviceAccount:${google_service_account.elastic_agent.email}" +} diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/modules/service_account/outputs.tf b/deploy/infrastructure-manager/gcp-elastic-agent/modules/service_account/outputs.tf new file mode 100644 index 0000000000..5325696f49 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/modules/service_account/outputs.tf @@ -0,0 +1,4 @@ +output "email" { + description = "Service account email" + value = google_service_account.elastic_agent.email +} diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/modules/service_account/variables.tf b/deploy/infrastructure-manager/gcp-elastic-agent/modules/service_account/variables.tf new file mode 100644 index 0000000000..656e3f8fa8 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/modules/service_account/variables.tf @@ -0,0 +1,20 @@ +variable "project_id" { + description = "GCP Project ID" + type = string +} + +variable "service_account_name" { + description = "Service account name" + type = string +} + +variable "scope" { + description = "Scope for IAM bindings (projects or organizations)" + type = string + default = "projects" +} + +variable "parent_id" { + description = "Parent ID (project ID or organization ID)" + type = string +} diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/modules/startup_validation/main.tf b/deploy/infrastructure-manager/gcp-elastic-agent/modules/startup_validation/main.tf new file mode 100644 index 0000000000..b3ee582884 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/modules/startup_validation/main.tf @@ -0,0 +1,16 @@ +data "google_client_config" "default" {} + +resource "terraform_data" "validate_startup" { + count = var.enabled ? 1 : 0 + + provisioner "local-exec" { + command = "bash ${path.module}/validate_startup.sh '${var.project_id}' '${var.zone}' '${var.instance_name}' '${var.timeout}'" + environment = { + GCP_ACCESS_TOKEN = data.google_client_config.default.access_token + } + } + + triggers_replace = { + instance_id = var.instance_id + } +} diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/modules/startup_validation/outputs.tf b/deploy/infrastructure-manager/gcp-elastic-agent/modules/startup_validation/outputs.tf new file mode 100644 index 0000000000..09dedc6a41 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/modules/startup_validation/outputs.tf @@ -0,0 +1,4 @@ +output "validated" { + description = "Whether validation was performed and succeeded" + value = var.enabled +} diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/modules/startup_validation/validate_startup.sh b/deploy/infrastructure-manager/gcp-elastic-agent/modules/startup_validation/validate_startup.sh new file mode 100644 index 0000000000..18385c0327 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/modules/startup_validation/validate_startup.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -e +set +x + +# Token from environment variable (sensitive) +TOKEN="$GCP_ACCESS_TOKEN" + +# Arguments passed from Terraform +PROJECT_ID="$1" +ZONE="$2" +INSTANCE_NAME="$3" +TIMEOUT="$4" + +MAX_ATTEMPTS=$((TIMEOUT / 10)) +ATTEMPT=0 + +# Function to get guest attribute value +get_guest_attribute() { + local key=$1 + local response + response=$(curl -s -H "Authorization: Bearer $TOKEN" \ + "https://compute.googleapis.com/compute/v1/projects/${PROJECT_ID}/zones/${ZONE}/instances/${INSTANCE_NAME}/getGuestAttributes?queryPath=elastic-agent/$key" \ + 2>/dev/null || echo '{}') + echo "$response" | sed -n 's/.*"value":[[:space:]]*"\([^"]*\)".*/\1/p' | head -1 +} + +echo "Waiting for Elastic Agent startup script to complete..." + +while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do + STATUS=$(get_guest_attribute "startup-status") + [ -z "$STATUS" ] && STATUS="unknown" + + if [ "$STATUS" = "success" ]; then + TIMESTAMP=$(get_guest_attribute "startup-timestamp") + echo "✓ Elastic Agent installation successful (completed at $TIMESTAMP)" + exit 0 + elif [ "$STATUS" = "failed" ]; then + ERROR=$(get_guest_attribute "startup-error") + [ -z "$ERROR" ] && ERROR="Unknown error" + TIMESTAMP=$(get_guest_attribute "startup-timestamp") + + # Write to both stdout and stderr for better visibility + echo "✗ Elastic Agent installation failed (at $TIMESTAMP)" >&2 + echo "Error: $ERROR" >&2 + echo "" >&2 + echo "View detailed logs:" >&2 + echo " gcloud compute instances get-serial-port-output ${INSTANCE_NAME} --zone ${ZONE} --project ${PROJECT_ID}" >&2 + echo "" >&2 + echo "STARTUP_VALIDATION_FAILED: $ERROR" >&2 + exit 1 + fi + + echo "Attempt $ATTEMPT/$MAX_ATTEMPTS: Status is '$STATUS', waiting..." + sleep 10 + ATTEMPT=$((ATTEMPT + 1)) +done + +echo "✗ Timeout waiting for agent installation (${TIMEOUT}s)" +echo "Check status manually:" +echo " gcloud compute instances get-guest-attributes ${INSTANCE_NAME} --zone ${ZONE} --project ${PROJECT_ID} --query-path=elastic-agent/" +exit 1 diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/modules/startup_validation/variables.tf b/deploy/infrastructure-manager/gcp-elastic-agent/modules/startup_validation/variables.tf new file mode 100644 index 0000000000..837e2b420f --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/modules/startup_validation/variables.tf @@ -0,0 +1,29 @@ +variable "enabled" { + description = "Enable startup validation" + type = bool +} + +variable "project_id" { + description = "GCP project ID" + type = string +} + +variable "instance_name" { + description = "Name of the instance to validate" + type = string +} + +variable "instance_id" { + description = "ID of the instance (for triggering re-validation)" + type = string +} + +variable "zone" { + description = "Zone of the instance" + type = string +} + +variable "timeout" { + description = "Validation timeout in seconds" + type = number +} diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/outputs.tf b/deploy/infrastructure-manager/gcp-elastic-agent/outputs.tf new file mode 100644 index 0000000000..25ffe07752 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/outputs.tf @@ -0,0 +1,29 @@ +output "instance_name" { + description = "Name of the compute instance" + value = module.compute_instance.name +} + +output "instance_id" { + description = "ID of the compute instance" + value = module.compute_instance.id +} + +output "instance_zone" { + description = "Zone of the compute instance" + value = module.compute_instance.zone +} + +output "network_name" { + description = "Name of the VPC network" + value = module.compute_instance.network_name +} + +output "service_account_email" { + description = "Email of the service account used by the instance" + value = local.sa_email +} + +output "startup_validation_enabled" { + description = "Whether startup script validation was enabled" + value = var.startup_validation_enabled +} diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/setup.sh b/deploy/infrastructure-manager/gcp-elastic-agent/setup.sh new file mode 100755 index 0000000000..d87e90f1e7 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/setup.sh @@ -0,0 +1,44 @@ +#!/bin/bash +set -e + +# Accept parameters +PROJECT_ID="$1" +SERVICE_ACCOUNT="$2" +SERVICE_ACCOUNT_EMAIL="${SERVICE_ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com" + +REQUIRED_APIS=( + iam.googleapis.com + config.googleapis.com + compute.googleapis.com + cloudresourcemanager.googleapis.com + cloudasset.googleapis.com +) + +REQUIRED_ROLES=( + roles/compute.admin + roles/iam.serviceAccountAdmin + roles/iam.serviceAccountUser + roles/resourcemanager.projectIamAdmin + roles/config.admin + roles/storage.admin +) + +echo "Setting up GCP Infrastructure Manager prerequisites..." + +# Enable APIs +gcloud services enable "${REQUIRED_APIS[@]}" --quiet + +# Create service account if it doesn't exist +if ! gcloud iam service-accounts describe "${SERVICE_ACCOUNT_EMAIL}" >/dev/null 2>&1; then + gcloud iam service-accounts create "${SERVICE_ACCOUNT}" \ + --display-name="Infra Manager Deployment Account" --quiet +fi + +# Grant permissions +for role in "${REQUIRED_ROLES[@]}"; do + gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ + --member="serviceAccount:${SERVICE_ACCOUNT_EMAIL}" \ + --role="${role}" --condition=None --quiet >/dev/null +done + +echo "✓ Setup complete" diff --git a/deploy/infrastructure-manager/gcp-elastic-agent/variables.tf b/deploy/infrastructure-manager/gcp-elastic-agent/variables.tf new file mode 100644 index 0000000000..04097d0c58 --- /dev/null +++ b/deploy/infrastructure-manager/gcp-elastic-agent/variables.tf @@ -0,0 +1,61 @@ +variable "project_id" { + description = "GCP Project ID" + type = string +} + +variable "zone" { + description = "GCP Zone for the compute instance" + type = string + default = "us-central1-a" +} + +variable "fleet_url" { + description = "Elastic Agent Fleet URL" + type = string +} + +variable "enrollment_token" { + description = "Elastic Agent Enrollment Token" + type = string + sensitive = true +} + +variable "elastic_agent_version" { + description = "Elastic Agent Version (e.g., 8.8.0 or 8.8.0-SNAPSHOT)" + type = string +} + +variable "elastic_artifact_server" { + description = "Elastic Artifact Server URL" + type = string + default = "https://artifacts.elastic.co/downloads/beats/elastic-agent" +} + +variable "scope" { + description = "Scope for IAM bindings (projects or organizations)" + type = string + default = "projects" +} + +variable "parent_id" { + description = "Parent ID (project ID or organization ID)" + type = string +} + +variable "machine_type" { + description = "Machine type for the compute instance" + type = string + default = "n2-standard-4" +} + +variable "startup_validation_enabled" { + description = "Enable validation of startup script completion via guest attributes" + type = bool + default = true +} + +variable "startup_timeout_seconds" { + description = "Maximum time to wait for startup script to complete (seconds)" + type = number + default = 300 +}