diff --git a/.gitignore b/.gitignore index a17f406..341a9e9 100644 --- a/.gitignore +++ b/.gitignore @@ -115,3 +115,6 @@ relmons/* # pid file *.pid + +# Docker deployment +deploy/mongo/data diff --git a/deploy/README.md b/deploy/README.md new file mode 100644 index 0000000..36c1fe9 --- /dev/null +++ b/deploy/README.md @@ -0,0 +1,37 @@ +# Deploy a development version + +> [!IMPORTANT] +> There are several prerequisites you must fulfill before deploying a development version of this tool: + +- Use a development runtime environment reachable inside the CERN general purpose network as the HTCondor jobs need to be able to send callback signals. +- Have a CERN account with access to the [lxplus](https://lxplusdoc.web.cern.ch/) service or to a VM node located inside the CERN general purpose network. +- Have an X.509 certificate to authenticate to the CMS Web Services configured in your CERN account. Please follow these guidelines to request one if required [Personal certificates from CERN](https://twiki.cern.ch/twiki/bin/view/CMSPublic/PersonalCertificate) - [Basic requirements for using the Grid](https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookStartingGrid#BasicGrid). +- Have `docker` and `docker compose` available in your development environment. If you do not have it already, you could install by following the guidelines at: [Install Docker Engine](https://docs.docker.com/engine/install/). +- Have `python3.11` installed for your development environment. If you do not have it already, you could install it via [pyenv](https://github.com/pyenv/pyenv). +- Have `node` installed for your development environment. It is recommended to use the latest LTS version, nevertheless any version `node > 16` should work. If you do not have it already, you could install it via [nvm](https://github.com/nvm-sh/nvm). +- Have a copy of this repository, fork it. + +After you fulfill these prerequisites, follow the next steps: + +1. Clone the forked repository you created before +2. Edit the file `deploy/env.sh` and complete the `TODO` items available there. +3. Source the environment variables via `source deploy/env.sh`. +4. Edit the file `deploy/nginx/conf.d/proxy.conf` and complete the `TODO` items available there. +5. Deploy the application's components via `docker compose -f deploy/compose.yml up -d`. + +At this stage, you have deployed the database and a reverse proxy to forward and authenticate the requests. Next, let's deploy the RelMonService2 application. + +6. Create a virtual environment, activate the environment and install the required dependencies: `python3.11 -m venv venv && source ./venv/bin/activate && pip install -r requirements.txt` +7. Install `node` dependencies: `cd frontend/ && npm i && cd ..` + +To conclude, deploy the RelMon Report page. This web server will be used to render the generated reports available via SQLite files. To ease this step, deploy an Apache server via the [CERN Web Services](https://webservices-portal.web.cern.ch/) portal using WebEOS site. + +8. Set the category of this site as `Test`. +9. Create a folder inside the `/eos` personal filesystem for the `SERVICE_ACCOUNT_USERNAME` account. This will be the path you will set in the `WEB_LOCATION_PATH` variable. +10. Complete the required steps to share the folder with the web server - [Details](https://cernbox.docs.cern.ch/advanced/web-pages/personal_website_content/). +11. Enable `.htaccess` files for the web server. +12. Copy all the content available in the folder `report_website/` to `${WEB_LOCATION_PATH}/`. Do not forget to copy the `.htaccess` file too. + +With this steps, the web application should have been deployed successfully and you're ready to continue with your development tasks! + +13. Start the development application via: `./relmonsvc.sh dev`. Press `Ctrl+C` to stop the services. diff --git a/deploy/compose.yml b/deploy/compose.yml new file mode 100644 index 0000000..e575bd9 --- /dev/null +++ b/deploy/compose.yml @@ -0,0 +1,22 @@ +name: relmonservice2 + +services: + nginx: + image: nginx:2.26.3-alpine3.20-perl@sha256:ecf827c698f16db476cdf5ebd094881b7fde88514a1cbf0d11780bdd8120041f + volumes: + - "${PWD}/deploy/nginx/conf.d:/etc/nginx/conf.d:ro" + - "${PWD}/report_website:/var/www/html:ro" + extra_hosts: + - "host.docker.internal:host-gateway" + ports: + - "10000:10000" + mongodb: + image: mongodb/mongodb-community-server:7.0.7-ubi9@sha256:0ab7391ec61a618ff2d9b999146812e0c1c29631cdc86bc096c60c77ba9e2cfb + user: "${UID}:${GID}" + environment: + - "MONGODB_INITDB_ROOT_USERNAME=${MONGO_DB_USER}" + - "MONGODB_INITDB_ROOT_PASSWORD=${MONGO_DB_PASSWORD}" + volumes: + - "${PWD}/deploy/mongo/data:/data/db" + ports: + - "${MONGO_DB_PORT}:27017" diff --git a/deploy/env.sh b/deploy/env.sh new file mode 100644 index 0000000..c00792e --- /dev/null +++ b/deploy/env.sh @@ -0,0 +1,78 @@ +# !/bin/bash +# This script sets the required environment variables for +# deploying the application. Source its content! + +# RelMonService2 endpoints +export CALLBACK_URL="http://$(hostname):10000/relmonservice/api/update" +export SERVICE_URL="http://$(hostname):10000/relmonservice" + +# RelMon Reports page +# TODO: Set the web server url for related to the WebEOS site +# deployed via CERN Web Services. This URL will be something like +# e.g: relmon-mytest.web.cern.ch +export REPORTS_URL="" + +# Submission host to access HTCondor +# In case you have issues opening the SSH session, change +# this property to a specific node inside the lxplus8 pool. +export SUBMISSION_HOST="lxplus8.cern.ch" + +# Default CMSSW release for picking the RelMon generation module. +export CMSSW_RELEASE='CMSSW_14_1_0_pre7' + +# The following directory is relative to $HOME for the +# user used in the $SUBMISSION_HOST. Make sure the directory exists! +# e.g: export REMOTE_DIRECTORY="test/relmonservice/jobs/" +# TODO: Create the folder and assign the relative path. +export REMOTE_DIRECTORY="" + +# Service account to access the submission host +# TODO: Set the username for opening the SSH session. +export SERVICE_ACCOUNT_USERNAME="" + +# Service account password, provide the real one only if +# your runtime environment does not have Kerberos enabled. +export SERVICE_ACCOUNT_PASSWORD="NotRequiredToSet" + +# Absolute path to a folder to store the reports +# This is the folder you will link on the WebEOS site for rendering the +# reports. +# This folder must be reachable from the submission host and the HTCondor +# pool running the job so it can copy the file directly. +# e.g: export WEB_LOCATION_PATH="/eos/user/u/user/test/relmonservice/reports" +# +# TODO: Set the WebEOS site path +export WEB_LOCATION_PATH="" + +# MongoDB configuration +export MONGO_DB_HOST="127.0.0.1" + +# TODO: Set the MongoDB port to be exported in the host scope! +export MONGO_DB_PORT="" + +# TODO: Set the MongoDB administrator user +export MONGO_DB_USER="" + +# TODO: Set its password +export MONGO_DB_PASSWORD="" + +# Flask configuration for session cookies +export SECRET_KEY="$(openssl rand -base64 64 | sed 's#/#!#g')" + +# The following is used to perform callbacks, set a real value +# in case an authentication middleware is set for your +# development deployment. + +# Related to the RelmonService2 deployment. +export CLIENT_ID="NotRequiredToSet" + +# Credentials for requesting a token. +export CALLBACK_CLIENT_ID="NotRequiredToSet" +export CALLBACK_CLIENT_SECRET="NotRequiredToSet" +export DISABLE_CALLBACK_CREDENTIALS="True" + +# The following is required by Docker Compose +# to properly match the running host user so that host +# volumes access is granted properly +export UID=$(id -u) +export GID=$(id -g) diff --git a/deploy/mongo/data/.keep b/deploy/mongo/data/.keep new file mode 100644 index 0000000..047b4dd --- /dev/null +++ b/deploy/mongo/data/.keep @@ -0,0 +1,3 @@ +// This is just a dummy file to instruct Git to include +// this folder. The purpose of this subfolder is storing the +// data of your MongoDB deployment. \ No newline at end of file diff --git a/deploy/nginx/conf.d/proxy.conf b/deploy/nginx/conf.d/proxy.conf new file mode 100644 index 0000000..49c6bd8 --- /dev/null +++ b/deploy/nginx/conf.d/proxy.conf @@ -0,0 +1,21 @@ +server { + listen 10000; + root /var/www/html; + + # RelMonService 2 + location /relmonservice { + # Authentication headers + proxy_set_header Adfs-Group "cms-pdmv-serv"; + proxy_set_header Adfs-Login ""; # TODO: Set your username + proxy_set_header Adfs-Fullname ""; # TODO: Set your fullname + proxy_set_header Adfs-Firstname ""; # TODO: Set your first name + proxy_set_header Adfs-Lastname ""; # TODO: Set your last name + proxy_set_header Adfs-Email ""; # TODO: Set your email address + + # The following is a reference to the localhost scope + # in the host machine. + proxy_pass http://host.docker.internal:8000/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } +} diff --git a/environment.py b/environment.py index 2d74abc..c930bb0 100644 --- a/environment.py +++ b/environment.py @@ -44,6 +44,16 @@ CALLBACK_CLIENT_ID (str): Client ID for CLI integration application. CALLBACK_CLIENT_SECRET (str): Client secret for CLI integration application. CMSSW_RELEASE (str): cms-sw version to use for generating the monitoring report. + HTCONDOR_CAF_POOL (bool): If this environment variable is provided, + RelMon batch jobs will be configured to run inside the dedicated pool CMS CAF. + Otherwise, they will run in the public shared pool. + FILE_CREATOR_GIT_SOURCE (str): RelMonService2 source code to load inside the + HTCondor batch jobs. + FILE_CREATOR_GIT_BRANCH (str): Branch to use from `FILE_CREATOR_GIT_SOURCE`. + _CMSSW_CUSTOM_REPO (str): This is an optional setting, allows users to + compile the RelMon module from a custom source instead of using `cmssw` releases. + _CMSSW_CUSTOM_BRANCH (str): If `_CMSSW_CUSTOM_REPO` is set, this is the branch + for taking the code from. """ import os import inspect @@ -78,12 +88,27 @@ CLIENT_ID: str = os.getenv("CLIENT_ID", "") CALLBACK_CLIENT_ID: str = os.getenv("CALLBACK_CLIENT_ID", "") CALLBACK_CLIENT_SECRET: str = os.getenv("CALLBACK_CLIENT_SECRET", "") +DISABLE_CALLBACK_CREDENTIALS = bool(os.getenv("DISABLE_CALLBACK_CREDENTIALS")) + +# HTCondor submission pool +HTCONDOR_CAF_POOL = bool(os.getenv("HTCONDOR_CAF_POOL")) +HTCONDOR_MODULE = "lxbatch/tzero" if HTCONDOR_CAF_POOL else "lxbatch/share" + +# Repository source for the remote execution in HTCondor. +FILE_CREATOR_GIT_SOURCE: str = os.getenv("FILE_CREATOR_GIT_SOURCE", "https://github.com/cms-PdmV/relmonservice2.git") +FILE_CREATOR_GIT_BRANCH: str = os.getenv("FILE_CREATOR_GIT_BRANCH", "master") + +# Custom `cmssw` sources for development +# Use this to override the RelMon module from a custom source +# instead of using a release +_CMSSW_CUSTOM_REPO: str = os.getenv("CMSSW_CUSTOM_REPO", "") +_CMSSW_CUSTOM_BRANCH: str = os.getenv("CMSSW_CUSTOM_BRANCH", "") # Check that all environment variables are provided missing_environment_variables: dict[str, str] = { k: v for k, v in globals().items() - if not k.startswith("__") + if not k.startswith("_") and not inspect.ismodule(v) and not isinstance(v, bool) and not v diff --git a/local/controller.py b/local/controller.py index 7b4b637..da4ac92 100644 --- a/local/controller.py +++ b/local/controller.py @@ -11,14 +11,18 @@ import json from multiprocessing import Manager from mongodb_database import Database -from local.ssh_executor import SSHExecutor +from core_lib.utils.ssh_executor import SSHExecutor from local.relmon import RelMon from local.file_creator import FileCreator from local.email_sender import EmailSender from environment import ( + SUBMISSION_HOST, + SERVICE_ACCOUNT_USERNAME, + SERVICE_ACCOUNT_PASSWORD, SERVICE_URL, REPORTS_URL, REMOTE_DIRECTORY, + HTCONDOR_MODULE ) @@ -54,7 +58,11 @@ def set_config(self): if self.remote_directory[-1] == "/": self.remote_directory = self.remote_directory[:-1] - self.ssh_executor = SSHExecutor() + self.ssh_executor = SSHExecutor( + host=SUBMISSION_HOST, + username=SERVICE_ACCOUNT_USERNAME, + password=SERVICE_ACCOUNT_PASSWORD + ) self.file_creator = FileCreator() self.email_sender = EmailSender() self.service_url = SERVICE_URL @@ -183,8 +191,7 @@ def rename_relmon_reports(self, relmon_id, new_name): """ Rename relmon reports file """ - ssh_executor = SSHExecutor() - ssh_executor.execute_command( + self.ssh_executor.execute_command( [ "cd %s" % (self.file_creator.web_location), "EXISTING_REPORT=$(ls -1 %s*.sqlite | head -n 1)" % (relmon_id), @@ -327,12 +334,12 @@ def __submit_to_condor(self, relmon, database): # Run condor_submit # Submission happens through lxplus as condor is not available on website machine # It is easier to ssh to lxplus than set up condor locally - stdout, stderr = self.ssh_executor.execute_command( + stdout, stderr, _ = self.ssh_executor.execute_command( [ "cd %s" % (remote_relmon_directory), "voms-proxy-init -voms cms --valid 24:00 --out $(pwd)/proxy.txt", - "module load lxbatch/tzero && condor_submit RELMON_%s.sub" - % (relmon_id), + "module load %s && condor_submit RELMON_%s.sub" + % (HTCONDOR_MODULE, relmon_id), ] ) # Parse result of condor_submit @@ -369,9 +376,9 @@ def __check_if_running(self, relmon, database): self.logger.info( "Will check if %s is running in HTCondor, id: %s", relmon, relmon_condor_id ) - stdout, stderr = self.ssh_executor.execute_command( - "module load lxbatch/tzero && condor_q -af:h ClusterId JobStatus | " - "grep %s" % (relmon_condor_id) + stdout, stderr, _ = self.ssh_executor.execute_command( + "module load %s && condor_q -af:h ClusterId JobStatus | " + "grep %s" % (HTCONDOR_MODULE, relmon_condor_id) ) new_condor_status = "" if stdout and not stderr: @@ -506,7 +513,7 @@ def __terminate_relmon(self, relmon): condor_id = relmon.get_condor_id() if condor_id > 0: self.ssh_executor.execute_command( - "module load lxbatch/tzero && condor_rm %s" % (condor_id) + "module load %s && condor_rm %s" % (HTCONDOR_MODULE, condor_id) ) else: self.logger.info( diff --git a/local/file_creator.py b/local/file_creator.py index 54830c4..16b9c7a 100644 --- a/local/file_creator.py +++ b/local/file_creator.py @@ -10,7 +10,13 @@ CALLBACK_CLIENT_ID, CALLBACK_CLIENT_SECRET, CLIENT_ID, - CMSSW_RELEASE + CMSSW_RELEASE, + HTCONDOR_CAF_POOL, + DISABLE_CALLBACK_CREDENTIALS, + FILE_CREATOR_GIT_SOURCE, + FILE_CREATOR_GIT_BRANCH, + _CMSSW_CUSTOM_REPO, + _CMSSW_CUSTOM_BRANCH ) @@ -28,6 +34,26 @@ def __init__(self): self.cookie_url = SERVICE_URL self.callback_url = CALLBACK_URL + def load_custom_cmssw(self): + """Include some bash instructions to use a custom cms-sw source.""" + if not (_CMSSW_CUSTOM_REPO and _CMSSW_CUSTOM_BRANCH): + return [] + + return [ + 'echo "Using a custom cms-sw source - Source: %s - Branch: %s"' % (_CMSSW_CUSTOM_REPO, _CMSSW_CUSTOM_BRANCH), + 'git clone --no-checkout --sparse --filter=blob:none --branch "%s" --depth 1 "%s" cmssw' % (_CMSSW_CUSTOM_BRANCH, _CMSSW_CUSTOM_REPO), + 'cd cmssw/', + 'git sparse-checkout add Utilities/RelMon', + 'git checkout', + 'echo "Latest commit available: $(git rev-parse HEAD)"', + 'cd ..', + # Pull the desired module + 'mv ./cmssw/Utilities .', + 'rm -rf ./cmssw', + # Recompile + "scram b -j 4", + ] + def create_job_script_file(self, relmon): """ Create bash executable for condor @@ -42,6 +68,11 @@ def create_job_script_file(self, relmon): relmon_id, relmon_name, ) + callback_credentials = ( + "--callback-credentials" + if not DISABLE_CALLBACK_CREDENTIALS + else "" + ) script_file_content = [ "#!/bin/bash", "DIR=$(pwd)", @@ -50,7 +81,7 @@ def create_job_script_file(self, relmon): 'echo "Python version: $(python3 -V)"', 'echo "CMSSW release to use: $RELMON_CMSSW_RELEASE"', # Clone the relmon service - "git clone https://github.com/cms-PdmV/relmonservice2.git", + "git clone --branch %s %s relmonservice2" % (FILE_CREATOR_GIT_BRANCH, FILE_CREATOR_GIT_SOURCE), # Fallback for github hiccups "if [ ! -d relmonservice2 ]; then", " wget https://github.com/cms-PdmV/RelmonService2/archive/master.zip", @@ -64,13 +95,21 @@ def create_job_script_file(self, relmon): # Open scope for CMSSW "(", "eval `scramv1 runtime -sh`", + ] + + # Check if a custom cms-sw source is requested to be loaded + custom_cmssw_content = self.load_custom_cmssw() + if custom_cmssw_content: + script_file_content += custom_cmssw_content + + script_file_content += [ "cd ../..", # Create reports directory "mkdir -p Reports", # Run the remote apparatus "python3 relmonservice2/remote/remote_apparatus.py " # No newline - "-r RELMON_%s.json -p proxy.txt --cpus %s --callback %s" - % (relmon_id, cpus, self.callback_url), + "-r RELMON_%s.json -p proxy.txt --cpus %s --callback %s %s" + % (relmon_id, cpus, self.callback_url, callback_credentials), # Close scope for CMSSW ")", "cd $DIR", @@ -116,8 +155,8 @@ def create_job_script_file(self, relmon): "cd $DIR", "cp cookie.txt relmonservice2/remote", "python3 relmonservice2/remote/remote_apparatus.py " # No newlines here - "-r RELMON_%s.json --callback %s --notifydone" - % (relmon_id, self.callback_url), + "-r RELMON_%s.json --callback %s --notifydone %s" + % (relmon_id, self.callback_url, callback_credentials), ] script_file_content_string = "\n".join(script_file_content) @@ -151,6 +190,7 @@ def create_condor_job_file(cls, relmon): f"APPLICATION_CLIENT_ID={CLIENT_ID}" ) credentials_env_arg = f'"{credentials_env}"' + accounting_group = "group_u_CMS.CAF.PHYS" if HTCONDOR_CAF_POOL else "group_u_CMS.u_zh.users" condor_file_content = [ "executable = RELMON_%s.sh" % (relmon_id), "environment = %s" % (credentials_env_arg), @@ -168,7 +208,7 @@ def create_condor_job_file(cls, relmon): # Leave in queue when status is DONE for two hours - 7200 seconds "leave_in_queue = JobStatus == 4 && (CompletionDate =?= UNDEFINED" " || ((CurrentTime - CompletionDate) < 7200))", - '+AccountingGroup = "group_u_CMS.CAF.PHYS"', + '+AccountingGroup = "%s"' % (accounting_group), "queue", ] diff --git a/local/ssh_executor.py b/local/ssh_executor.py deleted file mode 100644 index be77b19..0000000 --- a/local/ssh_executor.py +++ /dev/null @@ -1,152 +0,0 @@ -""" -Module that handles all SSH operations - both ssh and ftp -""" -import logging -import time -import paramiko -from environment import ( - SERVICE_ACCOUNT_PASSWORD, - SERVICE_ACCOUNT_USERNAME, - SUBMISSION_HOST, -) - - -class SSHExecutor: - """ - SSH executor allows to perform remote commands and upload/download files - """ - - def __init__(self): - self.ssh_client = None - self.ftp_client = None - self.logger = logging.getLogger("logger") - self.remote_host = SUBMISSION_HOST - self.credentials = { - "username": SERVICE_ACCOUNT_USERNAME, - "password": SERVICE_ACCOUNT_PASSWORD, - } - - def setup_ssh(self): - """ - Initiate SSH connection and save it as self.ssh_client - """ - self.logger.info("Will set up ssh") - if self.ssh_client: - self.close_connections() - - self.logger.info( - "Credentials loaded successfully: %s", self.credentials["username"] - ) - self.ssh_client = paramiko.SSHClient() - self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - self.ssh_client.connect( - self.remote_host, - username=self.credentials["username"], - password=self.credentials["password"], - timeout=30, - ) - self.logger.info("Done setting up ssh") - - def setup_ftp(self): - """ - Initiate SFTP connection and save it as self.ftp_client - If needed, SSH connection will be automatically set up - """ - self.logger.info("Will set up ftp") - if self.ftp_client: - self.close_connections() - - if not self.ssh_client: - self.setup_ssh() - - self.ftp_client = self.ssh_client.open_sftp() - self.logger.info("Done setting up ftp") - - def execute_command(self, command): - """ - Execute command over SSH - """ - if not self.ssh_client: - self.setup_ssh() - - if isinstance(command, list): - command = "; ".join(command) - - self.logger.info("Executing %s", command) - (_, stdout, stderr) = self.ssh_client.exec_command(command) - self.logger.info("Executed %s. Reading response", command) - # Close channel after minute of waiting for EOF - # This timeouts and closes channel if nothing was received - stdout_timeout = time.time() + 60 - while not stdout.channel.eof_received: - time.sleep(1) - if time.time() > stdout_timeout: - stdout.channel.close() - break - - stdout = stdout.read().decode("utf-8").strip() - # Same thing for stderr - stderr_timeout = time.time() + 60 - while not stderr.channel.eof_received: - time.sleep(1) - if time.time() > stderr_timeout: - stderr.channel.close() - break - - stderr = stderr.read().decode("utf-8").strip() - # Read output from stdout and stderr streams - if stdout: - self.logger.info("STDOUT (%s): %s", command, stdout) - - if stderr: - self.logger.error("STDERR (%s): %s", command, stderr) - - return stdout, stderr - - def upload_file(self, copy_from, copy_to): - """ - Upload a file - """ - self.logger.info("Will upload file %s to %s", copy_from, copy_to) - if not self.ftp_client: - self.setup_ftp() - - try: - self.ftp_client.put(copy_from, copy_to) - self.logger.info("Uploaded file to %s", copy_to) - except Exception as ex: - self.logger.error( - "Error uploading file from %s to %s. %s", copy_from, copy_to, ex - ) - - def download_file(self, copy_from, copy_to): - """ - Download file from remote host - """ - self.logger.info("Will download file %s to %s", copy_from, copy_to) - if not self.ftp_client: - self.setup_ftp() - - try: - self.ftp_client.get(copy_from, copy_to) - self.logger.info("Downloaded file to %s", copy_to) - except Exception as ex: - self.logger.error( - "Error downloading file from %s to %s. %s", copy_from, copy_to, ex - ) - - def close_connections(self): - """ - Close any active connections - """ - if self.ftp_client: - self.logger.info("Closing ftp client") - self.ftp_client.close() - self.ftp_client = None - self.logger.info("Closed ftp client") - - if self.ssh_client: - self.logger.info("Closing ssh client") - self.ssh_client.close() - self.ssh_client = None - self.logger.info("Closed ssh client") diff --git a/remote/remote_apparatus.py b/remote/remote_apparatus.py index 7823ce2..108ae1c 100644 --- a/remote/remote_apparatus.py +++ b/remote/remote_apparatus.py @@ -165,7 +165,7 @@ def get_access_token(credentials): return header -def notify(relmon, callback_url): +def notify(relmon, callback_url, callback_credentials): """ Send a notification about progress back to RelMon service """ @@ -173,9 +173,6 @@ def notify(relmon, callback_url): with open("notify_data.json", "w") as json_file: json.dump(relmon, json_file, indent=2, sort_keys=True) - credentials = get_client_credentials() - access_token = get_access_token(credentials) - command = [ "curl", "-X", @@ -190,9 +187,15 @@ def notify(relmon, callback_url): "@notify_data.json", "-H", "'Content-Type: application/json'", - "-H", - "'Authorization: %s'" % access_token, ] + if callback_credentials: + credentials = get_client_credentials() + access_token = get_access_token(credentials) + command += [ + "-H", + "'Authorization: %s'" % access_token, + ] + command = " ".join(command) logging.info("Notifying...") proc = Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) @@ -204,7 +207,7 @@ def notify(relmon, callback_url): time.sleep(0.05) -def download_root_files(relmon, cmsweb, callback_url): +def download_root_files(relmon, cmsweb, callback_url, callback_credentials): """ Download all files needed for comparison and fill relmon dictionary """ @@ -227,7 +230,7 @@ def download_root_files(relmon, cmsweb, callback_url): workflow = cmsweb.get_workflow(item["name"]) if not workflow: item["status"] = "no_workflow" - notify(relmon, callback_url) + notify(relmon, callback_url, callback_credentials) logging.warning( "Could not find workflow %s in ReqMgr2", item["name"] ) @@ -236,7 +239,7 @@ def download_root_files(relmon, cmsweb, callback_url): dqmio_dataset = get_dqmio_dataset(workflow) if not dqmio_dataset: item["status"] = "no_dqmio" - notify(relmon, callback_url) + notify(relmon, callback_url, callback_credentials) logging.warning( "Could not find DQMIO dataset in %s. Datasets: %s", item["name"], @@ -249,7 +252,7 @@ def download_root_files(relmon, cmsweb, callback_url): ) if not file_urls: item["status"] = "no_root" - notify(relmon, callback_url) + notify(relmon, callback_url, callback_credentials) logging.warning( "Could not get root file path for %s dataset of %s workflow", dqmio_dataset, @@ -266,7 +269,7 @@ def download_root_files(relmon, cmsweb, callback_url): item["status"] = "downloading" item["file_name"] = item["file_url"].split("/")[-1] item["events"] = 0 - notify(relmon, callback_url) + notify(relmon, callback_url, callback_credentials) try: item["file_name"] = cmsweb.get_big_file(item["file_url"]) item["status"] = "downloaded" @@ -283,7 +286,7 @@ def download_root_files(relmon, cmsweb, callback_url): logging.error("Error getting %s for %s", item["file_url"], item["name"]) item["status"] = "failed" - notify(relmon, callback_url) + notify(relmon, callback_url, callback_credentials) def get_local_subreport_path(category_name, hlt): @@ -564,7 +567,7 @@ def compare_compress_move( proc.communicate() -def run_validation_matrix(relmon, cpus, callback_url): +def run_validation_matrix(relmon, cpus, callback_url, callback_credentials): """ Iterate through categories and start comparison process """ @@ -588,7 +591,7 @@ def run_validation_matrix(relmon, cpus, callback_url): reference_list, target_list = get_dataset_lists(category) if reference_list and target_list: category["status"] = "comparing" - notify(relmon, callback_url) + notify(relmon, callback_url, callback_credentials) # Run Generator without HLT # Do not run Generator with HLT if hlt in ("only", "both") and category_name.lower() != "generator": @@ -611,7 +614,7 @@ def run_validation_matrix(relmon, cpus, callback_url): ) category["status"] = "done" - notify(relmon, callback_url) + notify(relmon, callback_url, callback_credentials) def main(): @@ -637,6 +640,11 @@ def main(): parser.add_argument( "--notifydone", action="store_true", help="Just notify that job is completed" ) + parser.add_argument( + "--callback-credentials", + action="store_true", + help="Request and send OAuth tokens to authenticate the callback" + ) args = vars(parser.parse_args()) logging.basicConfig( @@ -652,6 +660,7 @@ def main(): cpus = args.get("cpus", 1) callback_url = args.get("callback") notify_done = bool(args.get("notifydone")) + callback_credentials = bool(args.get("callback_credentials")) logging.info( "Arguments: %s; cert %s; key %s; proxy: %s; cpus %s; callback %s; notify %s", relmon_filename, @@ -679,9 +688,9 @@ def main(): cmsweb = CMSWebWrapper(cert_file, key_file) relmon["status"] = "running" - notify(relmon, callback_url) - download_root_files(relmon, cmsweb, callback_url) - run_validation_matrix(relmon, cpus, callback_url) + notify(relmon, callback_url, callback_credentials) + download_root_files(relmon, cmsweb, callback_url, callback_credentials) + run_validation_matrix(relmon, cpus, callback_url, callback_credentials) relmon["status"] = "finishing" except Exception as ex: logging.error(ex) @@ -691,7 +700,7 @@ def main(): with open(relmon_filename, "w") as relmon_file: json.dump(relmon, relmon_file, indent=2, sort_keys=True) - notify(relmon, callback_url) + notify(relmon, callback_url, callback_credentials) if __name__ == "__main__":