diff --git a/ansible/playbooks/paas/coredns.yml b/ansible/playbooks/paas/coredns.yml index 284f0a85..85dff61f 100644 --- a/ansible/playbooks/paas/coredns.yml +++ b/ansible/playbooks/paas/coredns.yml @@ -5,5 +5,4 @@ gather_facts: true become: true roles: - - golang - coredns diff --git a/ansible/playbooks/paas/main.yml b/ansible/playbooks/paas/main.yml index 85c0b7f3..b1c0b2e4 100644 --- a/ansible/playbooks/paas/main.yml +++ b/ansible/playbooks/paas/main.yml @@ -5,6 +5,12 @@ gather_facts: true become: true pre_tasks: + + - name: Set fqdn hostname + ansible.builtin.hostname: + name: "{{ inventory_hostname }}" + use: systemd + - name: Create ansible facts.d directory become: true ansible.builtin.file: @@ -14,20 +20,6 @@ group: "root" mode: '0755' - - name: Get ipinfo.io - ansible.builtin.uri: - url: https://ipinfo.io - http_agent: curl/7.81.0 - register: register_uri - check_mode: false - - - name: Set ipinfo local_fact - ansible.builtin.copy: - content: | - {{ register_uri.json | to_nice_json }} - dest: /etc/ansible/facts.d/ipinfo.fact - mode: '0644' - - name: Install mandatories packages ansible.builtin.apt: pkg: @@ -42,5 +34,17 @@ until: apt_status is success delay: 6 retries: 10 + roles: - unattended-upgrades + +- name: Configure systemd resolved + ansible.builtin.import_playbook: systemd-resolved.yml +- name: Configure docker + ansible.builtin.import_playbook: docker.yml +- name: Configure nomad + ansible.builtin.import_playbook: nomad.yml +- name: Configure coredns + ansible.builtin.import_playbook: coredns.yml +- name: Configure metrology + ansible.builtin.import_playbook: metrology.yml diff --git a/ansible/playbooks/paas/metrology.yml b/ansible/playbooks/paas/metrology.yml index 2f334268..56718234 100644 --- a/ansible/playbooks/paas/metrology.yml +++ b/ansible/playbooks/paas/metrology.yml @@ -4,16 +4,51 @@ hosts: "{{ hosts_limit | default('infrastructure') }}" gather_facts: true become: true - roles: - - prometheus - - promtail - - phpfpm_exporter - - node_exporter - - mysqld_exporter - - systemd_exporter - - mongodb_exporter - - blackbox_exporter - - nginx_exporter - - scan_exporter - - dns_exporter - - script_exporter + vars_prompt: + - name: project + prompt: project name + private: false + tasks: + - name: End the play for hosts that are not in admins group + ansible.builtin.meta: end_host + when: fact_instance.location != 'admins' + + - name: Install prometheus + ansible.builtin.import_role: + name: prometheus + +- name: Install exporters + any_errors_fatal: true + hosts: "{{ hosts_limit | default('infrastructure') }}" + gather_facts: true + become: true + tasks: + - name: Create prometheus group + ansible.builtin.group: + name: prometheus + system: true + + - name: Create prometheus user + ansible.builtin.user: + name: prometheus + create_home: false + system: true + + - name: Install exporters + ansible.builtin.include_role: + name: "{{ exporter }}" + loop: + - promtail + - phpfpm_exporter + - node_exporter + - mysqld_exporter + - systemd_exporter + - mongodb_exporter + - blackbox_exporter + - nginx_exporter + - scan_exporter + - dns_exporter + - script_exporter + - nvidia_gpu_exporter + loop_control: + loop_var: exporter diff --git a/ansible/playbooks/paas/nomad-juicefs.yml b/ansible/playbooks/paas/nomad-juicefs.yml new file mode 100644 index 00000000..f4de3129 --- /dev/null +++ b/ansible/playbooks/paas/nomad-juicefs.yml @@ -0,0 +1,11 @@ +--- +- name: Install nomad juicefs CSI driver + any_errors_fatal: true + hosts: "{{ hosts_limit | default('infrastructure') }}" + gather_facts: true + become: true + tasks: + - name: Install nomad juicefs CSI driver + ansible.builtin.import_role: + name: nomad + tasks_from: 10_juicefs diff --git a/ansible/playbooks/paas/nvidia.yml b/ansible/playbooks/paas/nvidia.yml new file mode 100644 index 00000000..d8f2314d --- /dev/null +++ b/ansible/playbooks/paas/nvidia.yml @@ -0,0 +1,142 @@ +--- +- name: Install nomad nvidia plugin + any_errors_fatal: true + hosts: "{{ hosts_limit | default('infrastructure') }}" + gather_facts: true + become: true + vars: + build_work_dir: /tmp + upstream_file_url: https://github.com/hashicorp/nomad-device-nvidia.git + nvidia_container_toolkit_version: "1.17.8-1" + nvidia_gpg_key_url: "https://nvidia.github.io/libnvidia-container/gpgkey" + nvidia_repo_list_url: "https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list" + nvidia_keyring_path: "/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg" + nvidia_list_path: "/etc/apt/sources.list.d/nvidia-container-toolkit.list" + + roles: + - golang + + pre_tasks: + + - name: Créer le répertoire du keyring s'il n'existe pas + ansible.builtin.file: + path: "{{ nvidia_keyring_path | dirname }}" + state: directory + mode: "0755" + + - name: Télécharger et enregistrer la clé GPG NVIDIA + ansible.builtin.get_url: + url: "{{ nvidia_gpg_key_url }}" + dest: /tmp/nvidia-container-toolkit.gpg + mode: "0644" + + - name: Convertir la clé GPG en format keyring + ansible.builtin.command: + cmd: "gpg --dearmor -o {{ nvidia_keyring_path }} /tmp/nvidia-container-toolkit.gpg" + creates: "{{ nvidia_keyring_path }}" + + - name: Télécharger le fichier de dépôt NVIDIA et ajouter le signed-by + ansible.builtin.shell: | + curl -s -L {{ nvidia_repo_list_url }} | \ + sed 's#deb https://#deb [signed-by={{ nvidia_keyring_path }}] https://#g' > {{ nvidia_list_path }} + args: + creates: "{{ nvidia_list_path }}" + + - name: Activer la section experimental (décommenter) + ansible.builtin.replace: + path: "{{ nvidia_list_path }}" + regexp: '^#(.*experimental.*)$' + replace: '\1' + + - name: Mettre à jour la liste des paquets + ansible.builtin.apt: + update_cache: true + + - name: Installer les paquets NVIDIA Container Toolkit + ansible.builtin.apt: + name: + - "nvidia-container-toolkit={{ nvidia_container_toolkit_version }}" + - "nvidia-container-toolkit-base={{ nvidia_container_toolkit_version }}" + - "libnvidia-container-tools={{ nvidia_container_toolkit_version }}" + - "libnvidia-container1={{ nvidia_container_toolkit_version }}" + state: present + + tasks: + - name: Install dependencies + ansible.builtin.apt: + pkg: + - nvidia-utils-580 + - nvidia-driver-580 + - nvidia-container-runtime + - nomad-device-nvidia + state: present + install_recommends: true + update_cache: true + register: apt_status + until: apt_status is success + delay: 6 + retries: 10 + + - name: Nomad-nvidia-plugin | Git checkout + ansible.builtin.git: + repo: https://github.com/hashicorp/nomad-device-nvidia.git + dest: "{{ build_work_dir }}/nomad-device-nvidia" + version: main + force: true + + - name: Nomad-nvidia-plugin | Build binary + ansible.builtin.command: + cmd: make compile + chdir: "{{ build_work_dir }}/nomad-device-nvidia" + environment: + PATH: "/usr/local/go/bin:{{ ansible_env.PATH }}" + register: my_output + changed_when: my_output.rc != 0 + + - name: Create nomad plugin directory + ansible.builtin.file: + path: /opt/nomad/plugins + state: directory + owner: root + group: root + mode: "0755" + + - name: Nomad-nvidia-plugin | Copy binary + ansible.builtin.copy: + src: /tmp/nomad-plugins/nomad-device-nvidia + dest: /opt/nomad/plugins/nomad-device-nvidia + owner: root + group: root + mode: '0755' + remote_src: true + + + - name: Copy using inline content + ansible.builtin.copy: + content: | + plugin "nomad-device-nvidia" { + config { + enabled = true + fingerprint_period = "5s" + } + } + dest: /etc/nomad.d/nvidia.hcl + owner: root + group: root + mode: '0644' + + # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html + - name: Nomad-nvidia-plugin | Test nvidia support + ansible.builtin.command: nvidia-ctk runtime configure --runtime=docker + + - name: Nomad-nvidia-plugin | Restart docker + ansible.builtin.command: systemctl restart docker + + # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/sample-workload.html + - name: Nomad-nvidia-plugin | Test nvidia support + ansible.builtin.command: docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi + register: docker_run + + - name: Nomad-nvidia-plugin | Debug + ansible.builtin.debug: + msg: "{{ docker_run }}" diff --git a/ansible/playbooks/paas/partition-data.yml b/ansible/playbooks/paas/partition-data.yml new file mode 100644 index 00000000..eff0d7cd --- /dev/null +++ b/ansible/playbooks/paas/partition-data.yml @@ -0,0 +1,32 @@ +--- +- name: Configure sdb partition + any_errors_fatal: true + hosts: "{{ hosts_limit | default('infrastructure') }}" + gather_facts: true + become: true + tasks: + - name: Create default directory + ansible.builtin.file: + path: /data + state: directory + owner: root + group: root + mode: '0755' + + - name: Create a new primary partition /dev/sdb1 + community.general.parted: + device: /dev/sdb + number: 1 + state: present + + - name: Create a ext4 filesystem on /dev/sdb1 + community.general.filesystem: + fstype: ext4 + dev: /dev/sdb1 + + - name: Mount up device + ansible.posix.mount: + path: /data + src: /dev/sdb1 + fstype: ext4 + state: present diff --git a/ansible/playbooks/paas/roles/ansible-docker/defaults/main.yml b/ansible/playbooks/paas/roles/ansible-docker/defaults/main.yml index 53e0e926..5123c51c 100644 --- a/ansible/playbooks/paas/roles/ansible-docker/defaults/main.yml +++ b/ansible/playbooks/paas/roles/ansible-docker/defaults/main.yml @@ -13,7 +13,9 @@ docker_private_registry_state: false docker_private_registry_url: "" docker_private_registry_username: "" docker_private_registry_password: "" -docker_private_registry_config: /etc/docker/config.json +docker_private_registry_config: + - /etc/docker/config.json + - /root/.docker/config.json # DNS docker_dns_configuration: true diff --git a/ansible/playbooks/paas/roles/ansible-docker/tasks/install.yml b/ansible/playbooks/paas/roles/ansible-docker/tasks/install.yml index 01bb2add..dd13b02f 100644 --- a/ansible/playbooks/paas/roles/ansible-docker/tasks/install.yml +++ b/ansible/playbooks/paas/roles/ansible-docker/tasks/install.yml @@ -21,14 +21,7 @@ url: "https://download.docker.com/linux/{{ ansible_distribution | lower }}/gpg" dest: /etc/apt/keyrings/docker.asc -- name: Add Docker repository on ubuntu < 24.04 - ansible.builtin.apt_repository: - repo: "deb [arch={{ upstream_default_arch }} signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" - state: present - filename: docker - when: ansible_distribution_version is version('24.04', '<') - -- name: Add Docker repository on ubuntu >= 24.04 +- name: Add Docker repository on ubuntu ansible.builtin.copy: content: | Components: stable @@ -42,7 +35,6 @@ owner: root group: root mode: '0644' - when: ansible_distribution_version is version('24.04', '>=') - name: Install Docker ansible.builtin.apt: @@ -68,14 +60,24 @@ append: true notify: Docker_restart +- name: Create home docker directory + ansible.builtin.file: + path: "{{ item }}" + recurse: true + state: directory + mode: '0755' + loop: + - /root/.docker + - name: Copy config.json ansible.builtin.template: src: config.json.j2 - dest: "{{ docker_private_registry_config }}" + dest: "{{ item }}" owner: root group: root mode: '0600' when: docker_private_registry_state + loop: "{{ docker_private_registry_config }}" notify: Docker_restart - name: Copy daemon.json for DNS resolution diff --git a/ansible/playbooks/paas/roles/coredns/README.md b/ansible/playbooks/paas/roles/coredns/README.md index e03dc9f4..55beefb1 100644 --- a/ansible/playbooks/paas/roles/coredns/README.md +++ b/ansible/playbooks/paas/roles/coredns/README.md @@ -1,3 +1,7 @@ # Role: `coredns` ## How to use this Ansible role? + +### nomad cluster mode + +nomad_primary_master_node: Set a primary nomad master node to get nomad_management_token \ No newline at end of file diff --git a/ansible/playbooks/paas/roles/coredns/tasks/build.yml b/ansible/playbooks/paas/roles/coredns/tasks/build.yml index 86cc7946..fc9b0c1c 100644 --- a/ansible/playbooks/paas/roles/coredns/tasks/build.yml +++ b/ansible/playbooks/paas/roles/coredns/tasks/build.yml @@ -14,37 +14,18 @@ url: "{{ upstream_file_url }}" dest: "{{ build_work_dir }}/download/" mode: '0644' - force: no register: download_result -- name: Coredns | Git checkout - ansible.builtin.git: - repo: https://github.com/coredns/coredns - dest: "{{ build_work_dir }}/download/coredns" - version: master - force: true - - name: Coredns | Unarchive GitHub release ansible.builtin.unarchive: - src: "{{ build_work_dir }}/download/{{ image.upstream.repo }}-{{ upstream_file_name }}" + src: "{{ build_work_dir }}/download/{{ upstream_file_name }}" dest: "{{ build_work_dir }}/download" remote_src: true when: download_result.changed -- name: Coredns | Build binary - ansible.builtin.shell: - cmd: "{{ item }}" - chdir: "{{ build_work_dir }}/download/coredns" - environment: - PATH: "/usr/local/go/bin:{{ ansible_env.PATH }}" - loop: - - echo "nomad:github.com/ituoga/coredns-nomad" >> plugin.cfg - - go mod edit -replace github.com/ituoga/coredns-nomad={{ build_work_dir }}/download/coredns-nomad-{{ latest_version }} - - make gen coredns - - name: Coredns | Copy binary ansible.builtin.copy: - src: "{{ build_work_dir }}/download/coredns/{{ image.upstream.binary }}" + src: "{{ build_work_dir }}/download/coredns" dest: /usr/local/bin/coredns owner: root group: root diff --git a/ansible/playbooks/paas/roles/coredns/tasks/main.yml b/ansible/playbooks/paas/roles/coredns/tasks/main.yml index a7b94816..8f5799e0 100644 --- a/ansible/playbooks/paas/roles/coredns/tasks/main.yml +++ b/ansible/playbooks/paas/roles/coredns/tasks/main.yml @@ -3,13 +3,25 @@ ansible.builtin.include_vars: upstream.yml - name: Coredns | Get binary - include_tasks: build.yml + ansible.builtin.include_tasks: build.yml when: ansible_local[image.name] is not defined or ansible_local[image.name] != latest_version +- name: Coredns | Create group + ansible.builtin.group: + name: coredns + system: true + +- name: Coredns | Create user + ansible.builtin.user: + name: coredns + create_home: false + system: true + - name: Coredns | Create custom directories ansible.builtin.file: dest: "{{ item.dest }}" state: directory + mode: '0755' owner: "{{ item.owner | default('root') }}" group: "{{ item.group | default('root') }}" loop: @@ -19,7 +31,7 @@ ansible.builtin.template: src: "{{ item.src }}" dest: "{{ item.dest }}" - mode: 0600 + mode: '0640' owner: coredns group: coredns loop: @@ -46,5 +58,5 @@ Cache=no DNSStubListenerExtra=172.17.0.1:53 dest: /etc/systemd/resolved.conf.d/coredns.conf - mode: 0644 + mode: '0644' notify: Restart systemd-resolved diff --git a/ansible/playbooks/paas/roles/coredns/templates/Corefile.j2 b/ansible/playbooks/paas/roles/coredns/templates/Corefile.j2 index 491b7723..5cf6dbf6 100644 --- a/ansible/playbooks/paas/roles/coredns/templates/Corefile.j2 +++ b/ansible/playbooks/paas/roles/coredns/templates/Corefile.j2 @@ -4,9 +4,8 @@ service.nomad.:1053 { #debug #log nomad { - zone service.nomad - address https://127.0.0.1:4646 - token {{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }} + address https://{{ hostvars[nomad_primary_master_node | default(inventory_hostname)]['ansible_ens3']['ipv4']['address'] }}:4646 + token {{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }} ttl 10 } prometheus 127.0.0.1:9153 diff --git a/ansible/playbooks/paas/roles/coredns/templates/postinst.j2 b/ansible/playbooks/paas/roles/coredns/templates/postinst.j2 deleted file mode 100644 index 6284ae86..00000000 --- a/ansible/playbooks/paas/roles/coredns/templates/postinst.j2 +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -set -e - -SERVICE_NAME="{{ image.name }}" -USER_NAME="coredns" -GROUP_NAME="coredns" - -if ! getent group "$GROUP_NAME" >/dev/null; then - echo "Creating group $GROUP_NAME..." - groupadd --system $GROUP_NAME -fi - -if ! id -u "$USER_NAME" >/dev/null 2>&1; then - echo "Creating user $USER_NAME..." - useradd --system --gid $GROUP_NAME --shell /usr/sbin/nologin --no-create-home $USER_NAME -fi - - -if command -v systemctl >/dev/null 2>&1; then - echo "Reloading systemd configuration..." - systemctl daemon-reload - - echo "Enabling and starting $SERVICE_NAME service..." - systemctl enable "$SERVICE_NAME" - systemctl start "$SERVICE_NAME" -fi diff --git a/ansible/playbooks/paas/roles/coredns/templates/prerm.j2 b/ansible/playbooks/paas/roles/coredns/templates/prerm.j2 deleted file mode 100644 index f21147ef..00000000 --- a/ansible/playbooks/paas/roles/coredns/templates/prerm.j2 +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -set -e - -# Variables -SERVICE_NAME="{{ image.name }}" - -# Stop and disable the service -if command -v systemctl >/dev/null 2>&1; then - echo "Stopping and disabling $SERVICE_NAME service..." - systemctl stop "$SERVICE_NAME" || true - systemctl disable "$SERVICE_NAME" || true -fi diff --git a/ansible/playbooks/paas/roles/coredns/vars/main.yml b/ansible/playbooks/paas/roles/coredns/vars/main.yml index 11175960..262e254c 100644 --- a/ansible/playbooks/paas/roles/coredns/vars/main.yml +++ b/ansible/playbooks/paas/roles/coredns/vars/main.yml @@ -3,14 +3,14 @@ image: build: false upstream: source: github - user: ituoga - repo: coredns-nomad - type: archive - format: tar.gz - file: VERSION.FORMAT + user: coredns + repo: coredns + type: release + format: tgz + file: coredns_VERSION_OS_ARCH.FORMAT os: linux binary: coredns labels: {} name: coredns -build_work_dir: /tmp/coredns-nomad +build_work_dir: /tmp/coredns diff --git a/ansible/playbooks/paas/roles/coredns/vars/upstream.yml b/ansible/playbooks/paas/roles/coredns/vars/upstream.yml index f6893ce2..036aa64c 100644 --- a/ansible/playbooks/paas/roles/coredns/vars/upstream.yml +++ b/ansible/playbooks/paas/roles/coredns/vars/upstream.yml @@ -1,4 +1,4 @@ --- latest_version: "{{ (lookup('url', 'https://api.github.com/repos/{{ image.upstream.user }}/{{ image.upstream.repo }}/releases/latest', headers={'Accept': 'application/vnd.github+json', 'Authorization': 'Bearer ' + lookup('ansible.builtin.env', 'GITHUB_API_TOKEN') }) | from_json).get('tag_name') | replace('v', '') }}" upstream_file_name: "{{ image.upstream.file | replace('REPO', image.upstream.repo) | replace('VERSION', latest_version) | replace('OS', image.upstream.os) | replace('ARCH', upstream_default_arch) | replace('FORMAT', image.upstream.format) }}" -upstream_file_url: "https://github.com/{{ image.upstream.user }}/{{ image.upstream.repo }}/archive/refs/tags/v{{ upstream_file_name }}" +upstream_file_url: "https://github.com/{{ image.upstream.user }}/{{ image.upstream.repo }}/releases/download/v{{ latest_version }}/{{ upstream_file_name }}" diff --git a/ansible/playbooks/paas/roles/node_exporter/templates/default.j2 b/ansible/playbooks/paas/roles/node_exporter/templates/default.j2 index acb52c40..1289c4c7 100644 --- a/ansible/playbooks/paas/roles/node_exporter/templates/default.j2 +++ b/ansible/playbooks/paas/roles/node_exporter/templates/default.j2 @@ -1,5 +1,5 @@ ARGS="--log.level=info \ ---web.listen-address=127.0.0.1:9100 \ +--web.listen-address={{ hostvars[inventory_hostname]['ansible_' + nomad_iface]['ipv4']['address'] }}:9100 \ --web.telemetry-path=/metrics \ --collector.diskstats.ignored-devices='^(ram|loop|fd|(h|s|v|xv)d[a-z]|nbd|nvme[0-9]+n[0-9]+p|md|dm-)[0-9]+$' \ --collector.filesystem.mount-points-exclude='^/(dev(/shm)?|proc|run(/.+)?|sys|var/tmp|(var/lib|home)/(docker|kubelet)/.+)($|/)' \ diff --git a/ansible/playbooks/paas/roles/nomad/defaults/main.yml b/ansible/playbooks/paas/roles/nomad/defaults/main.yml index b4945a05..86c61ac5 100644 --- a/ansible/playbooks/paas/roles/nomad/defaults/main.yml +++ b/ansible/playbooks/paas/roles/nomad/defaults/main.yml @@ -15,7 +15,7 @@ nomad_timezone: "Europe/Paris" nomad_group: simplestack # Configuration -nomad_dc_name: "dc1" +nomad_dc_name: dc1 nomad_project: "{{ fact_instance.project }}" nomad_region: "{{ fact_instance.region }}" @@ -43,15 +43,15 @@ nomad_leave_on_interrupt: true nomad_client_auto_join: true nomad_server_auto_join: true -nomad_s3_storage_enabled: true +nomad_s3_storage_enabled: false # Network nomad_http_scheme: https nomad_http_ip: "127.0.0.1" nomad_http_port: 4646 -nomad_cluster_bridge: "ens3" -nomad_iface: "ens3" +nomad_cluster_bridge: ens3 +nomad_iface: ens3 nomad_bind_address: "0.0.0.0" nomad_advertise_address: "{{ hostvars[inventory_hostname]['ansible_' + nomad_iface]['ipv4']['address'] }}" @@ -146,25 +146,38 @@ nomad_client_host_network_cluster: name: cluster interface: "{{ nomad_cluster_bridge }}" -nomad_client_meta_list: {"arch": "{{ architecture_map[ansible_facts.architecture] }}", "location": "{{ fact_instance.location }}", "instance": "{{ inventory_hostname }}"} +nomad_client_meta_list: >- + {"arch": "{{ architecture_map[ansible_facts.architecture] }}", + "location": "{{ fact_instance.location }}", + "instance": "{{ inventory_hostname }}"} + +nomad_server_join: >- + "{% if nomad_mode == 'single' %}127.0.0.1{% else %}{{ (groups[nomad_deploy_cluster_name] | + map('extract', hostvars) | + selectattr('nomad_node_role', 'equalto', 'both') | + map(attribute='ansible_br0.ipv4.address')) or + (groups[nomad_deploy_cluster_name] | + map('extract', hostvars) | + selectattr('nomad_node_role', 'equalto', 'both') | + map(attribute='ansible_br0.ipv4.address')) | + unique | list }}{% endif %}" -nomad_server_join: "{% if nomad_mode == 'single' %}127.0.0.1{% else %}{{ (groups[nomad_deploy_cluster_name] | map('extract', hostvars) | selectattr('nomad_node_role', 'equalto', 'both') | map(attribute='ansible_br0.ipv4.address')) or (groups[nomad_deploy_cluster_name] | map('extract', hostvars) | selectattr('nomad_node_role', 'equalto', 'both') | map(attribute='ansible_br0.ipv4.address')) | unique | list }}{% endif %}" nomad_server_join_retry_max: 3 -nomad_server_join_retry_interval: "15s" +nomad_server_join_retry_interval: 15s nomad_client_server_join_retry_max: 3 -nomad_client_server_join_retry_interval: "15s" +nomad_client_server_join_retry_interval: 15s -nomad_client_drain_on_shutdown_deadline: "1m" -nomad_client_drain_on_shutdown_force: "true" -nomad_client_drain_on_shutdown_ignore_system_jobs: "true" +nomad_client_drain_on_shutdown_deadline: 1m +nomad_client_drain_on_shutdown_force: true +nomad_client_drain_on_shutdown_ignore_system_jobs: true nomad_client_cpu_total_compute: 0 nomad_client_memory_total_mb: 0 nomad_client_disk_total_mb: 0 nomad_client_disk_free_mb: 0 -nomad_client_gc_interval: "1m" +nomad_client_gc_interval: 1m nomad_client_gc_disk_usage_threshold: 80 nomad_client_gc_inode_usage_threshold: 70 nomad_client_gc_parallel_destroys: 2 @@ -175,12 +188,15 @@ nomad_client_reserved_disk: 0 # TLS nomad_tls_ca_host: localhost -nomad_tls_ca_host_dir: "~/.simple-stack/tls" -nomad_tls_ca_pubkey: "simplestack-ca.pem" -nomad_tls_ca_privatekey: "simplestack-ca-key.pem" -nomad_tls_ca_provider: "ownca" -nomad_tls_host_certificate_dir: "/etc/ssl/simplestack" -nomad_tls_common_name: "nomad" +nomad_tls_ca_host_dir: ~/.simple-stack/tls +nomad_tls_ca_pubkey: simplestack-ca.pem +nomad_tls_ca_privatekey: simplestack-ca-key.pem +nomad_tls_ca_provider: ownca +nomad_tls_host_certificate_dir: /etc/ssl/simplestack + +nomad_tls_common_name: nomad +# IP range for 192.168.0.0/24 (all 256 addresses) +nomad_tls_ip_range: "{{ range(0,256) | map('regex_replace', '^', 'IP:192.168.0.') | list | join(',') }}" nomad_tls_check_delay: "+2w" # TLS Server @@ -188,36 +204,32 @@ nomad_tls_cert_server: "{{ nomad_dc_name }}-server-nomad.pem" nomad_tls_privatekey_server: "{{ nomad_dc_name }}-server-nomad.key" nomad_tls_common_name_server: "*.{{ nomad_dc_name }}.{{ nomad_tls_common_name }}" -# nomad_tls_subject_alt_name_server: "DNS:localhost,IP:127.0.0.1,DNS:server.global.{{ certificate_subject_alt_name }},DNS:server.{{ nomad_region }}.{{ certificate_subject_alt_name }},DNS:server.{{ nomad_dc_name }}.{{ certificate_subject_alt_name }},DNS:*.{{ nomad_dc_name }}.{{ certificate_subject_alt_name }},IP:172.26.64.1,IP:172.17.0.1,IP:172.18.0.1" -# nomad_tls_subject_alt_name_server: "DNS:localhost,IP:127.0.0.1,DNS:server.global.nomad,DNS:server.{{ nomad_region }}.nomad,DNS:server.{{ nomad_dc_name }}.nomad,DNS:*.{{ nomad_dc_name }}.nomad,IP:172.26.64.1,IP:172.17.0.1,IP:172.18.0.1" -nomad_tls_subject_alt_name_server: "DNS:localhost,IP:127.0.0.1,IP:172.17.0.1,DNS:server.global.nomad,DNS:server.{{ nomad_region }}.nomad,DNS:server.{{ nomad_dc_name }}.nomad,DNS:*.{{ nomad_dc_name }}.nomad" +nomad_tls_subject_alt_name_server: "DNS:localhost,IP:127.0.0.1,IP:172.17.0.1,{{ nomad_tls_ip_range }},DNS:server.global.nomad,DNS:server.{{ nomad_region }}.nomad,DNS:server.{{ nomad_dc_name }}.nomad,DNS:*.{{ nomad_dc_name }}.nomad" # TLS client nomad_tls_cert_client: "{{ inventory_hostname }}-{{ nomad_dc_name }}-client-nomad.pem" nomad_tls_privatekey_client: "{{ inventory_hostname }}-{{ nomad_dc_name }}-client-nomad.key" nomad_tls_common_name_client: "*.{{ nomad_dc_name }}.{{ nomad_tls_common_name }}" -# nomad_tls_subject_alt_name_client: "DNS:localhost,IP:127.0.0.1,DNS:client.global.{{ certificate_subject_alt_name }},DNS:client.{{ nomad_region }}.{{ nomad_tls_common_name }},DNS:client.{{ nomad_dc_name }}.{{ nomad_tls_common_name }},DNS:*.{{ nomad_dc_name }}.{{ nomad_tls_common_name }},IP:172.26.64.1,IP:172.17.0.1,IP:172.18.0.1" -# nomad_tls_subject_alt_name_client: "DNS:localhost,IP:127.0.0.1,DNS:client.global.nomad,DNS:client.{{ nomad_region }}.nomad,DNS:client.{{ nomad_dc_name }}.nomad,DNS:*.{{ nomad_dc_name }}.nomad,IP:172.26.64.1,IP:172.17.0.1,IP:172.18.0.1" -nomad_tls_subject_alt_name_client: "DNS:localhost,IP:127.0.0.1,IP:172.17.0.1,DNS:client.global.nomad,DNS:client.{{ nomad_region }}.nomad,DNS:client.{{ nomad_dc_name }}.nomad,DNS:*.{{ nomad_dc_name }}.nomad" +nomad_tls_subject_alt_name_client: "DNS:localhost,IP:127.0.0.1,IP:172.17.0.1,{{ nomad_tls_ip_range }},DNS:client.global.nomad,DNS:client.{{ nomad_region }}.nomad,DNS:client.{{ nomad_dc_name }}.nomad,DNS:*.{{ nomad_dc_name }}.nomad" -nomad_tls_rpc_upgrade_mode: "false" -nomad_tls_verify_server_hostname: "true" -nomad_tls_verify_https_client: "false" +nomad_tls_rpc_upgrade_mode: false +nomad_tls_verify_server_hostname: true +nomad_tls_verify_https_client: false # ACL nomad_acl_enabled: true nomad_acl_token_ttl: 30s nomad_acl_policy_ttl: 30s -nomad_acl_replication_token: "" +nomad_acl_replication_token: # Docker -nomad_docker_client_dc_name: "dc1" +nomad_docker_client_dc_name: "{{ nomad_dc_name }}" nomad_docker_tcp_listen_address: "127.0.0.1" nomad_docker_tcp_listen_port: 2376 -docker_tls_configuration: false +nomad_docker_tls_configuration: false nomad_docker_client_tls_host_certificate_dir: "/etc/ssl/docker" nomad_docker_client_tls_cert: "{{ nomad_docker_client_dc_name }}-client-docker.pem" @@ -241,40 +253,40 @@ nomad_docker_client_allow_caps: - sys_ptrace - sys_admin -nomad_docker_private_registry_state: false +nomad_docker_private_registry_state: true nomad_docker_private_registry_config: /etc/docker/config.json -nomad_docker_allow_privileged: "{% if nomad_s3_storage_enabled %}true{% else %}false{% endif %}" -nomad_docker_volume_enable: "true" -nomad_docker_gc_image: "true" -nomad_docker_gc_image_delay: "1h" -nomad_docker_gc_container: "true" -nomad_docker_gc_dangling_containers_enabled: "true" -nomad_docker_gc_dangling_containers_dry_run: "false" -nomad_docker_gc_dangling_containers_period: "5m" -nomad_docker_gc_dangling_containers_creation_grace: "5m" +nomad_docker_allow_privileged: true +nomad_docker_volume_enable: true +nomad_docker_gc_image: true +nomad_docker_gc_image_delay: 1h +nomad_docker_gc_container: true +nomad_docker_gc_dangling_containers_enabled: true +nomad_docker_gc_dangling_containers_dry_run: false +nomad_docker_gc_dangling_containers_period: 5m +nomad_docker_gc_dangling_containers_creation_grace: 5m # Telemetry -nomad_telemetry_disable_hostname: "false" -nomad_telemetry_collection_interval: "5s" -nomad_telemetry_use_node_name: "false" -nomad_telemetry_publish_allocation_metrics: "true" -nomad_telemetry_publish_node_metrics: "true" -nomad_telemetry_filter_default: "true" +nomad_telemetry_disable_hostname: false +nomad_telemetry_collection_interval: 5s +nomad_telemetry_use_node_name: false +nomad_telemetry_publish_allocation_metrics: true +nomad_telemetry_publish_node_metrics: true +nomad_telemetry_filter_default: true # nomad_telemetry_prefix_filter: -nomad_telemetry_disable_dispatched_job_summary_metrics: "false" +nomad_telemetry_disable_dispatched_job_summary_metrics: false # nomad_telemetry_statsite_address: "" # nomad_telemetry_statsd_address: "" # nomad_telemetry_datadog_address: "" # nomad_telemetry_datadog_tags: -nomad_telemetry_prometheus_metrics: "true" +nomad_telemetry_prometheus_metrics: true # nomad_telemetry_circonus_api_token: "" nomad_telemetry_circonus_api_app: "nomad" nomad_telemetry_circonus_api_url: "https://api.circonus.com/v2" nomad_telemetry_circonus_submission_interval: "10s" # nomad_telemetry_circonus_submission_url: "" # nomad_telemetry_circonus_check_id: "" -nomad_telemetry_circonus_check_force_metric_activation: "false" +nomad_telemetry_circonus_check_force_metric_activation: false # nomad_telemetry_circonus_check_instance_id: "" # nomad_telemetry_circonus_check_search_tag: "" # nomad_telemetry_circonus_check_display_name: "" @@ -299,5 +311,5 @@ nomad_ui_content_security_policy_script_src: "'self'" nomad_ui_content_security_policy_style_src: "" nomad_ui_label_text: "{{ inventory_hostname }}" -nomad_ui_label_background_color: "blue" -nomad_ui_label_text_color: "white" +nomad_ui_label_background_color: blue +nomad_ui_label_text_color: white diff --git a/ansible/playbooks/paas/roles/nomad/tasks/04_tls_certs.yml b/ansible/playbooks/paas/roles/nomad/tasks/04_tls_certs.yml index 54f88757..43fc09ae 100644 --- a/ansible/playbooks/paas/roles/nomad/tasks/04_tls_certs.yml +++ b/ansible/playbooks/paas/roles/nomad/tasks/04_tls_certs.yml @@ -45,24 +45,22 @@ run_once: true when: not cert_tls_server_present.stat.exists or (cert_tls_server_present.stat.exists and not tls_check_server.valid_at.delay) - - name: "Nomad | Copy cert private server key on nodes" + - name: "Nomad | Copy certificates on server nodes" ansible.builtin.copy: - src: "{{ nomad_tls_ca_host_dir }}/{{ nomad_tls_privatekey_server }}" - dest: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_privatekey_server }}" - owner: "root" - group: "{{ nomad_group }}" - mode: "0640" - - - name: "Nomad | Copy cert server on nodes" - ansible.builtin.copy: - src: "{{ nomad_tls_ca_host_dir }}/{{ nomad_tls_cert_server }}" - dest: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_cert_server }}" - owner: "root" + src: "{{ item.src }}" + dest: "{{ item.dest }}" + owner: root group: "{{ nomad_group }}" mode: "0640" + loop: + - src: "{{ nomad_tls_ca_host_dir }}/{{ nomad_tls_privatekey_server }}" + dest: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_privatekey_server }}" + - src: "{{ nomad_tls_ca_host_dir }}/{{ nomad_tls_cert_server }}" + dest: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_cert_server }}" + notify: Nomad_restart - name: Nomad | Copy certificate on client nodes - when: nomad_node_role == 'client' + when: nomad_node_role in ['client', 'both'] block: - name: "Nomad | Check if TLS cert exists for Client" ansible.builtin.stat: @@ -90,20 +88,18 @@ certificate_client_privatekey: "{{ nomad_tls_privatekey_client }}" certificate_common_name: "{{ nomad_tls_common_name_client }}" certificate_subject_alt_name: "{{ nomad_tls_subject_alt_name_client }}" - when: nomad_mode == 'cluster' - - - name: "Nomad | Copy cert client key on nodes" - ansible.builtin.copy: - src: "{{ nomad_tls_ca_host_dir }}/{{ nomad_tls_cert_client }}" - dest: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_cert_client }}" - owner: "root" - group: "{{ nomad_group }}" - mode: "0640" + # when: nomad_mode == 'cluster' - - name: "Nomad | Copy cert private client key on nodes" + - name: "Nomad | Copy certificates on client nodes" ansible.builtin.copy: - src: "{{ nomad_tls_ca_host_dir }}/{{ nomad_tls_privatekey_client }}" - dest: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_privatekey_client }}" - owner: "root" + src: "{{ item.src }}" + dest: "{{ item.dest }}" + owner: root group: "{{ nomad_group }}" mode: "0640" + loop: + - src: "{{ nomad_tls_ca_host_dir }}/{{ nomad_tls_cert_client }}" + dest: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_cert_client }}" + - src: "{{ nomad_tls_ca_host_dir }}/{{ nomad_tls_privatekey_client }}" + dest: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_privatekey_client }}" + notify: Nomad_restart diff --git a/ansible/playbooks/paas/roles/nomad/tasks/06_configuration.yml b/ansible/playbooks/paas/roles/nomad/tasks/06_configuration.yml index fc3057c1..b32cecc8 100644 --- a/ansible/playbooks/paas/roles/nomad/tasks/06_configuration.yml +++ b/ansible/playbooks/paas/roles/nomad/tasks/06_configuration.yml @@ -68,10 +68,11 @@ port: "{{ nomad_http_port }}" - name: Block + when: nomad_node_role in ['server', 'both'] block: - name: "Nomad Install | Read Nomad management token from UI" ansible.builtin.set_fact: - nomad_management_token: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }}" + nomad_management_token: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" rescue: - name: "Nomad ACL | Generate Bootstrap token" ansible.builtin.uri: @@ -88,7 +89,7 @@ - name: "Nomad Install | Set Nomad management token and insert in UI" ansible.builtin.set_fact: - nomad_management_token: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='create', userpass=nomad_management_token_result.json.SecretID) }}" + nomad_management_token: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='create', userpass=nomad_management_token_result.json.SecretID) }}" - name: "Nomad Configuration | Enable MemoryOversubscription" ansible.builtin.uri: @@ -109,18 +110,18 @@ register: nomad_memoryoversubscription ignore_errors: true -- name: "Nomad Configuration | Add S3 storage plugin job templates" - ansible.builtin.template: - src: "{{ item }}.j2" - dest: "{{ nomad_job_files_dir }}/{{ item }}" - mode: '0644' - loop: - - "plugin-s3-controller.hcl" - - "plugin-s3-node.hcl" - when: - - nomad_s3_storage_enabled - - nomad_node_role in ['client', 'both'] - notify: Nomad_s3_jobs +# - name: "Nomad Configuration | Add S3 storage plugin job templates" +# ansible.builtin.template: +# src: "{{ item }}.j2" +# dest: "{{ nomad_job_files_dir }}/{{ item }}" +# mode: '0644' +# loop: +# - "plugin-s3-controller.hcl" +# - "plugin-s3-node.hcl" +# when: +# - nomad_s3_storage_enabled +# - nomad_node_role in ['client', 'both'] +# notify: Nomad_s3_jobs -- name: "Nomad Configuration | Flush handlers" - ansible.builtin.meta: flush_handlers +# - name: "Nomad Configuration | Flush handlers" +# ansible.builtin.meta: flush_handlers diff --git a/ansible/playbooks/paas/roles/nomad/tasks/10_juicefs.yml b/ansible/playbooks/paas/roles/nomad/tasks/10_juicefs.yml new file mode 100644 index 00000000..d5414e97 --- /dev/null +++ b/ansible/playbooks/paas/roles/nomad/tasks/10_juicefs.yml @@ -0,0 +1,59 @@ +--- +- name: "Nomad Juicefs | Copy configuration" + ansible.builtin.template: + src: "{{ item }}.j2" + dest: "{{ nomad_job_files_dir }}/{{ item }}" + mode: '0644' + loop: + - juicefs-controller.hcl + - juicefs-node.hcl + - juicefs-volume.hcl + +- name: "Nomad Juicefs | Copy volume configuration" + ansible.builtin.template: + src: juicefs-volume.hcl.j2 + dest: "{{ nomad_job_files_dir }}/juicefs-volume-{{ item }}.hcl" + mode: '0644' + loop: + - volume + - test + - llm + +- name: Nomad Juicefs | Run jobs + ansible.builtin.command: "/usr/bin/nomad job run {{ nomad_job_files_dir }}/{{ item }}" + args: + chdir: "{{ nomad_job_files_dir }}" + environment: + NOMAD_ADDR: "https://{{ nomad_http_ip }}:4646" + NOMAD_TOKEN: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }}" + NOMAD_CLIENT_CERT: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_cert_server }}" + NOMAD_CLIENT_KEY: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_privatekey_server }}" + NOMAD_CACERT: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_ca_pubkey }}" + register: nomad_job_start + loop: + - juicefs-controller.hcl + - juicefs-node.hcl + failed_when: nomad_job_start.rc >= 2 + changed_when: + - '"error" in nomad_job_start.stdout' + - nomad_job_start.rc >= 2 + +- name: Nomad Juicefs | Create volume + ansible.builtin.command: "/usr/bin/nomad volume create {{ nomad_job_files_dir }}/juicefs-volume-{{ item }}.hcl" + args: + chdir: "{{ nomad_job_files_dir }}" + environment: + NOMAD_ADDR: "https://{{ nomad_http_ip }}:4646" + NOMAD_TOKEN: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }}" + NOMAD_CLIENT_CERT: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_cert_server }}" + NOMAD_CLIENT_KEY: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_privatekey_server }}" + NOMAD_CACERT: "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_ca_pubkey }}" + register: nomad_job_start + loop: + - volume + - test + - llm + failed_when: nomad_job_start.rc >= 2 + changed_when: + - '"error" in nomad_job_start.stdout' + - nomad_job_start.rc >= 2 diff --git a/ansible/playbooks/paas/roles/nomad/tasks/main.yml b/ansible/playbooks/paas/roles/nomad/tasks/main.yml index 75a72cd8..51957285 100644 --- a/ansible/playbooks/paas/roles/nomad/tasks/main.yml +++ b/ansible/playbooks/paas/roles/nomad/tasks/main.yml @@ -7,8 +7,8 @@ name: "{{ nomad_timezone }}" hwclock: local -- name: "Nomad | Install CNI" - ansible.builtin.include_tasks: "02_network.yml" +# - name: "Nomad | Install CNI" +# ansible.builtin.include_tasks: "02_network.yml" - name: "Nomad | Commons tasks" ansible.builtin.include_tasks: "03_commons_tasks.yml" @@ -27,4 +27,3 @@ - name: "Nomad | Change SystemD configuration" ansible.builtin.include_tasks: "08_systemd_tuning.yml" - diff --git a/ansible/playbooks/paas/roles/nomad/templates/client.hcl.j2 b/ansible/playbooks/paas/roles/nomad/templates/client.hcl.j2 index 184a5b9d..a7db1122 100644 --- a/ansible/playbooks/paas/roles/nomad/templates/client.hcl.j2 +++ b/ansible/playbooks/paas/roles/nomad/templates/client.hcl.j2 @@ -1,12 +1,12 @@ client { - enabled = {{ nomad_client_enabled | bool | lower }} + enabled = {{ nomad_client_enabled | lower }} state_dir = "{{ nomad_state_dir_client }}" node_class = "{{ nomad_client_node_class }}" node_pool = "{{ nomad_client_node_pool }}" - no_host_uuid = {{ nomad_client_no_host_uuid | bool | lower }} + no_host_uuid = {{ nomad_client_no_host_uuid | lower }} servers = [ {%- set comma = joiner(",") -%} @@ -51,8 +51,8 @@ client { {% if nomad_mode == 'cluster' %} drain_on_shutdown { deadline = "{{ nomad_client_drain_on_shutdown_deadline }}" - force = {{ nomad_client_drain_on_shutdown_force }} - ignore_system_jobs = {{ nomad_client_drain_on_shutdown_ignore_system_jobs }} + force = {{ nomad_client_drain_on_shutdown_force | lower }} + ignore_system_jobs = {{ nomad_client_drain_on_shutdown_ignore_system_jobs | lower }} } {% endif %} diff --git a/ansible/playbooks/paas/roles/nomad/templates/docker.hcl.j2 b/ansible/playbooks/paas/roles/nomad/templates/docker.hcl.j2 index 14043718..a82ce90c 100644 --- a/ansible/playbooks/paas/roles/nomad/templates/docker.hcl.j2 +++ b/ansible/playbooks/paas/roles/nomad/templates/docker.hcl.j2 @@ -1,6 +1,6 @@ plugin "docker" { config { -{% if docker_tls_configuration == true %} +{% if nomad_docker_tls_configuration %} endpoint = "tcp://{{ nomad_docker_tcp_listen_address }}:{{ nomad_docker_tcp_listen_port }}" tls { @@ -12,27 +12,27 @@ plugin "docker" { endpoint = "unix:///var/run/docker.sock" {% endif %} -{% if nomad_docker_private_registry_state == true %} +{% if nomad_docker_private_registry_state %} auth { config = "{{ nomad_docker_private_registry_config }}" } {% endif %} - allow_privileged = {{ nomad_docker_allow_privileged }} + allow_privileged = {{ nomad_docker_allow_privileged | lower }} volumes { - enabled = {{ nomad_docker_volume_enable }} + enabled = {{ nomad_docker_volume_enable | lower }} } allow_caps = [{% for item in nomad_docker_client_allow_caps %}"{{ item }}"{% if not loop.last %}, {% endif %}{% endfor %}] gc { - image = {{ nomad_docker_gc_image }} - image_delay = "{{ nomad_docker_gc_image_delay }}" - container = {{ nomad_docker_gc_container }} + image = {{ nomad_docker_gc_image | lower }} + image_delay = "{{ nomad_docker_gc_image_delay | lower }}" + container = {{ nomad_docker_gc_container | lower }} dangling_containers { - enabled = {{ nomad_docker_gc_dangling_containers_enabled }} - dry_run = {{ nomad_docker_gc_dangling_containers_dry_run }} + enabled = {{ nomad_docker_gc_dangling_containers_enabled | lower }} + dry_run = {{ nomad_docker_gc_dangling_containers_dry_run | lower }} period = "{{ nomad_docker_gc_dangling_containers_period }}" creation_grace = "{{ nomad_docker_gc_dangling_containers_creation_grace }}" } diff --git a/ansible/playbooks/paas/roles/nomad/templates/juicefs-controller.hcl.j2 b/ansible/playbooks/paas/roles/nomad/templates/juicefs-controller.hcl.j2 new file mode 100644 index 00000000..c42da612 --- /dev/null +++ b/ansible/playbooks/paas/roles/nomad/templates/juicefs-controller.hcl.j2 @@ -0,0 +1,43 @@ +job "jfs-controller" { + datacenters = ["dc1"] + type = "system" + + group "controller" { + + constraint { + attribute = "${meta.instance}" + set_contains = "{{ nomad_constraints_juicefs_controller_instance }}" + } + + task "plugin" { + driver = "docker" + + config { + image = "juicedata/juicefs-csi-driver:v0.30.0" + + args = [ + "--endpoint=unix://csi/csi.sock", + "--logtostderr", + "--nodeid=test", + "--v=5", + "--by-process=true" + ] + + privileged = true + } + + csi_plugin { + id = "juicefs0" + type = "controller" + mount_dir = "/csi" + } + resources { + cpu = 100 + memory = 512 + } + env { + POD_NAME = "csi-controller" + } + } + } +} \ No newline at end of file diff --git a/ansible/playbooks/paas/roles/nomad/templates/juicefs-node.hcl.j2 b/ansible/playbooks/paas/roles/nomad/templates/juicefs-node.hcl.j2 new file mode 100644 index 00000000..f503ef6d --- /dev/null +++ b/ansible/playbooks/paas/roles/nomad/templates/juicefs-node.hcl.j2 @@ -0,0 +1,48 @@ +job "jfs-node" { + datacenters = ["dc1"] + type = "system" + + group "nodes" { + + constraint { + attribute = "${meta.location}" + operator = "set_contains_any" + value = "{{ nomad_constraints_juicefs_controller_nodes | join(',') }}" + } + + task "juicefs-plugin" { + driver = "docker" + + config { + image = "juicedata/juicefs-csi-driver:v0.30.0" + + args = [ + "--endpoint=unix://csi/csi.sock", + "--logtostderr", + "--v=5", + "--nodeid=test", + "--by-process=true", + #"--cache-dir", "/var/jfs-cache", + #"--cache-size", "50G", + #"--writeback", + #"--prefetch", "2" + ] + + privileged = true + } + + csi_plugin { + id = "juicefs0" + type = "node" + mount_dir = "/csi" + } + resources { + cpu = 1000 + memory = 1024 + } + env { + POD_NAME = "csi-node" + } + } + } +} \ No newline at end of file diff --git a/ansible/playbooks/paas/roles/nomad/templates/juicefs-volume.hcl.j2 b/ansible/playbooks/paas/roles/nomad/templates/juicefs-volume.hcl.j2 new file mode 100644 index 00000000..f45cf80f --- /dev/null +++ b/ansible/playbooks/paas/roles/nomad/templates/juicefs-volume.hcl.j2 @@ -0,0 +1,18 @@ +type = "csi" +id = "juicefs-{{ item }}" +name = "juicefs-{{ item }}" + +capability { + access_mode = "multi-node-multi-writer" + attachment_mode = "file-system" +} +plugin_id = "juicefs0" + +secrets { + name="juicefs-volume" + metaurl="redis://{{ nomad_juicefs_secrets.valkey.address }}:{{ nomad_juicefs_secrets.valkey.port }}/0" + bucket="http://{{ nomad_juicefs_secrets.minio.address }}:{{ nomad_juicefs_secrets.minio.port }}/minio/{{ item }}" + storage="minio" + access-key="{{ lookup('simple-stack-ui', type='secret', key=nomad_juicefs_secrets.minio.domain, subkey='user', missing='error') }}" + secret-key="{{ lookup('simple-stack-ui', type='secret', key=nomad_juicefs_secrets.minio.domain, subkey='passwd', missing='error') }}" +} \ No newline at end of file diff --git a/ansible/playbooks/paas/roles/nomad/templates/nomad.hcl.j2 b/ansible/playbooks/paas/roles/nomad/templates/nomad.hcl.j2 index 7d027483..c67aa932 100644 --- a/ansible/playbooks/paas/roles/nomad/templates/nomad.hcl.j2 +++ b/ansible/playbooks/paas/roles/nomad/templates/nomad.hcl.j2 @@ -2,8 +2,8 @@ name = "{{ nomad_node_name }}" region = "{{ nomad_region }}" datacenter = "{{ nomad_dc_name }}" -disable_anonymous_signature = {{ nomad_disable_anonymous_signature | bool | lower }} -disable_update_check = {{ nomad_disable_update_check | bool | lower }} +disable_anonymous_signature = {{ nomad_disable_anonymous_signature | lower }} +disable_update_check = {{ nomad_disable_update_check | lower }} data_dir = "{{ nomad_data_dir }}" @@ -20,15 +20,15 @@ ports { serf = {{ nomad_ports.serf }} } -enable_debug = {{ nomad_debug | bool | lower }} +enable_debug = {{ nomad_debug | lower }} log_file = "{{ nomad_log_file }}" log_level = "{{ nomad_log_level }}" log_rotate_bytes = {{ nomad_log_rotate_bytes }} log_rotate_duration = "{{ nomad_log_rotate_duration }}" log_rotate_max_files = {{ nomad_log_rotate_max_files }} -leave_on_terminate = {{ nomad_leave_on_terminate | bool | lower }} -leave_on_interrupt = {{ nomad_leave_on_interrupt | bool | lower }} +leave_on_terminate = {{ nomad_leave_on_terminate | lower }} +leave_on_interrupt = {{ nomad_leave_on_interrupt | lower }} tls { http = true @@ -36,39 +36,39 @@ tls { ca_file = "{{ nomad_tls_host_certificate_dir }}/{{ nomad_tls_ca_pubkey }}" cert_file = "{{ nomad_tls_host_certificate_dir }}/{{ (nomad_node_role == 'client') | ternary(nomad_tls_cert_client, nomad_tls_cert_server) }}" key_file = "{{ nomad_tls_host_certificate_dir }}/{{ (nomad_node_role == 'client') | ternary(nomad_tls_privatekey_client, nomad_tls_privatekey_server) }}" - rpc_upgrade_mode = {{ nomad_tls_rpc_upgrade_mode }} - verify_server_hostname = "{{ nomad_tls_verify_server_hostname }}" - verify_https_client = "{{ nomad_tls_verify_https_client }}" + rpc_upgrade_mode = {{ nomad_tls_rpc_upgrade_mode | lower }} + verify_server_hostname = {{ nomad_tls_verify_server_hostname | lower }} + verify_https_client = {{ nomad_tls_verify_https_client | lower }} } acl { - enabled = {{ nomad_acl_enabled | bool | lower }} + enabled = {{ nomad_acl_enabled | lower }} token_ttl = "{{ nomad_acl_token_ttl }}" policy_ttl = "{{ nomad_acl_policy_ttl }}" replication_token = "{{ nomad_acl_replication_token }}" } telemetry { - disable_hostname = {{ nomad_telemetry_disable_hostname }} + disable_hostname = {{ nomad_telemetry_disable_hostname | lower }} collection_interval = "{{ nomad_telemetry_collection_interval }}" - use_node_name = {{ nomad_telemetry_use_node_name }} - publish_allocation_metrics = {{ nomad_telemetry_publish_allocation_metrics }} - publish_node_metrics = {{ nomad_telemetry_publish_node_metrics }} - filter_default = {{ nomad_telemetry_filter_default }} + use_node_name = {{ nomad_telemetry_use_node_name | lower }} + publish_allocation_metrics = {{ nomad_telemetry_publish_allocation_metrics | lower }} + publish_node_metrics = {{ nomad_telemetry_publish_node_metrics | lower }} + filter_default = {{ nomad_telemetry_filter_default | lower }} prefix_filter = [] - disable_dispatched_job_summary_metrics = {{ nomad_telemetry_disable_dispatched_job_summary_metrics }} + disable_dispatched_job_summary_metrics = {{ nomad_telemetry_disable_dispatched_job_summary_metrics | lower }} statsite_address = "" statsd_address = "" datadog_address = "" datadog_tags = [] - prometheus_metrics = {{ nomad_telemetry_prometheus_metrics }} + prometheus_metrics = {{ nomad_telemetry_prometheus_metrics | lower }} circonus_api_token = "" circonus_api_app = "{{ nomad_telemetry_circonus_api_app }}" circonus_api_url = "{{ nomad_telemetry_circonus_api_url }}" circonus_submission_interval = "{{ nomad_telemetry_circonus_submission_interval }}" circonus_submission_url = "" circonus_check_id = "" - circonus_check_force_metric_activation = {{ nomad_telemetry_circonus_check_force_metric_activation }} + circonus_check_force_metric_activation = {{ nomad_telemetry_circonus_check_force_metric_activation | lower }} circonus_check_instance_id = "" circonus_check_search_tag = "" circonus_check_display_name = "" @@ -78,7 +78,7 @@ telemetry { } autopilot { - cleanup_dead_servers = {{ nomad_autopilot_cleanup_dead_servers | bool | lower }} + cleanup_dead_servers = {{ nomad_autopilot_cleanup_dead_servers | lower }} last_contact_threshold = "{{ nomad_autopilot_last_contact_threshold }}" max_trailing_logs = {{ nomad_autopilot_max_trailing_logs }} server_stabilization_time = "{{ nomad_autopilot_server_stabilization_time }}" @@ -90,7 +90,7 @@ limits { } ui { - enabled = {{ nomad_ui_enabled | bool | lower }} + enabled = {{ nomad_ui_enabled | lower }} content_security_policy { connect_src = ["{{ nomad_ui_content_security_policy_connect_src }}"] diff --git a/ansible/playbooks/paas/roles/nomad/templates/server.hcl.j2 b/ansible/playbooks/paas/roles/nomad/templates/server.hcl.j2 index e9edfca8..7a8e8e17 100644 --- a/ansible/playbooks/paas/roles/nomad/templates/server.hcl.j2 +++ b/ansible/playbooks/paas/roles/nomad/templates/server.hcl.j2 @@ -1,11 +1,11 @@ server { - enabled = {{ nomad_server_enabled | bool | lower }} + enabled = {{ nomad_server_enabled | lower }} bootstrap_expect = {{ nomad_servers | length }} data_dir = "{{ nomad_data_dir_server }}" - {% if nomad_server_retry_join | bool -%} + {% if nomad_server_retry_join -%} retry_join = [ {%- set comma = joiner(",") -%} {% for server in nomad_servers_advertise_address -%} @@ -21,7 +21,7 @@ server { {%- endfor -%} ] {%- endif %} - rejoin_after_leave = {{ nomad_server_rejoin_after_leave | bool | lower }} + rejoin_after_leave = {{ nomad_server_rejoin_after_leave | lower }} enabled_schedulers = [ {%- set comma = joiner(",") -%} diff --git a/ansible/playbooks/paas/roles/nvidia_gpu_exporter/README.md b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/README.md new file mode 100644 index 00000000..73adf688 --- /dev/null +++ b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/README.md @@ -0,0 +1 @@ +# Role: `nvidia_gpu_exporter` diff --git a/ansible/playbooks/paas/roles/nvidia_gpu_exporter/defaults/main.yml b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/defaults/main.yml new file mode 100644 index 00000000..bc455e64 --- /dev/null +++ b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/defaults/main.yml @@ -0,0 +1,2 @@ +--- +nvidia_gpu_exporter_enable: false diff --git a/ansible/playbooks/paas/roles/nvidia_gpu_exporter/handlers/main.yml b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/handlers/main.yml new file mode 100644 index 00000000..a017bade --- /dev/null +++ b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/handlers/main.yml @@ -0,0 +1,7 @@ +--- +- name: Restart nvidia_gpu_exporter + listen: Restart nvidia_gpu_exporter + ansible.builtin.service: + name: nvidia_gpu_exporter + state: restarted + enabled: true diff --git a/ansible/playbooks/paas/roles/nvidia_gpu_exporter/tasks/build.yml b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/tasks/build.yml new file mode 100644 index 00000000..2dacd7a4 --- /dev/null +++ b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/tasks/build.yml @@ -0,0 +1,57 @@ +--- +- name: Nvidia_gpu_exporter | Create temporary build directory + ansible.builtin.file: + path: "{{ item }}" + recurse: true + state: directory + mode: '0755' + loop: + - "{{ build_work_dir }}/download" + - "{{ build_work_dir }}/{{ upstream_default_arch }}" + +- name: Nvidia_gpu_exporter | Download Github release + ansible.builtin.get_url: + url: "{{ upstream_file_url }}" + dest: "{{ build_work_dir }}/download/" + mode: '0644' + register: download_result + +- name: Nvidia_gpu_exporter | Unarchive GitHub release + ansible.builtin.unarchive: + src: "{{ build_work_dir }}/download/{{ upstream_file_name }}" + dest: "{{ build_work_dir }}/download" + remote_src: true + when: download_result.changed + +- name: Nvidia_gpu_exporter | Find binary + ansible.builtin.include_role: + name: upstream + tasks_from: find-binary + loop: + - "{{ image.upstream.binary }}" + +- name: Nvidia_gpu_exporter | Copy binary + ansible.builtin.copy: + src: "{{ build_work_dir }}/{{ upstream_default_arch }}/{{ image.upstream.binary }}" + dest: /usr/local/bin/nvidia_gpu_exporter + owner: root + group: root + mode: '0755' + remote_src: true + +- name: Nvidia_gpu_exporter | Clean up + ansible.builtin.file: + path: "{{ build_work_dir }}" + state: absent + +- name: Nvidia_gpu_exporter | Backup software version + ansible.builtin.copy: + content: | + #!/bin/bash + cat << EOF + "{{ latest_version }}" + EOF + dest: "/etc/ansible/facts.d/{{ image.name }}.fact" + owner: root + group: root + mode: '0755' diff --git a/ansible/playbooks/paas/roles/nvidia_gpu_exporter/tasks/main.yml b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/tasks/main.yml new file mode 100644 index 00000000..8500b652 --- /dev/null +++ b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/tasks/main.yml @@ -0,0 +1,23 @@ +--- +- name: End the play for hosts that don't have nvidia gpu + ansible.builtin.meta: end_host + when: not nvidia_gpu_exporter_enable + +- name: Nvidia_gpu_exporter | Include upstream variables + ansible.builtin.include_vars: upstream.yml + +- name: Nvidia_gpu_exporter | Get binary + ansible.builtin.include_tasks: build.yml + when: ansible_local[image.name] is not defined or ansible_local[image.name] != latest_version + +- name: Nvidia_gpu_exporter | Copy templates + ansible.builtin.template: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + mode: '0644' + owner: prometheus + group: prometheus + loop: + - src: service.j2 + dest: /etc/systemd/system/nvidia_gpu_exporter.service + notify: Restart nvidia_gpu_exporter diff --git a/ansible/playbooks/paas/roles/nvidia_gpu_exporter/templates/service.j2 b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/templates/service.j2 new file mode 100644 index 00000000..1b213d68 --- /dev/null +++ b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/templates/service.j2 @@ -0,0 +1,16 @@ +[Unit] +Description=Nvidia GPU Exporter +Documentation=https://github.com/utkuozdemir/nvidia_gpu_exporter +After=network-online.target + +[Service] +Type=simple +User=prometheus +Group=prometheus +ExecStart=/usr/local/bin/nvidia_gpu_exporter +SyslogIdentifier=nvidia_gpu_exporter +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target \ No newline at end of file diff --git a/ansible/playbooks/paas/roles/nvidia_gpu_exporter/vars/main.yml b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/vars/main.yml new file mode 100644 index 00000000..8b1563b4 --- /dev/null +++ b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/vars/main.yml @@ -0,0 +1,23 @@ +--- +image: + build: false + upstream: + source: github + user: utkuozdemir + repo: nvidia_gpu_exporter + type: release + format: tar.gz + file: nvidia_gpu_exporter_VERSION_OS_ARCH.FORMAT + os: linux + binary: nvidia_gpu_exporter + labels: {} + name: nginx_exporter + +build_work_dir: /tmp/nvidia_gpu_exporter + +architecture_map: + amd64: amd64 + x86_64: x86_64 + armv7l: arm + aarch64: arm64 + arm64: arm64 diff --git a/ansible/playbooks/paas/roles/nvidia_gpu_exporter/vars/upstream.yml b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/vars/upstream.yml new file mode 100644 index 00000000..036aa64c --- /dev/null +++ b/ansible/playbooks/paas/roles/nvidia_gpu_exporter/vars/upstream.yml @@ -0,0 +1,4 @@ +--- +latest_version: "{{ (lookup('url', 'https://api.github.com/repos/{{ image.upstream.user }}/{{ image.upstream.repo }}/releases/latest', headers={'Accept': 'application/vnd.github+json', 'Authorization': 'Bearer ' + lookup('ansible.builtin.env', 'GITHUB_API_TOKEN') }) | from_json).get('tag_name') | replace('v', '') }}" +upstream_file_name: "{{ image.upstream.file | replace('REPO', image.upstream.repo) | replace('VERSION', latest_version) | replace('OS', image.upstream.os) | replace('ARCH', upstream_default_arch) | replace('FORMAT', image.upstream.format) }}" +upstream_file_url: "https://github.com/{{ image.upstream.user }}/{{ image.upstream.repo }}/releases/download/v{{ latest_version }}/{{ upstream_file_name }}" diff --git a/ansible/playbooks/paas/roles/prometheus/tasks/main.yml b/ansible/playbooks/paas/roles/prometheus/tasks/main.yml index 1911b2de..55e64042 100644 --- a/ansible/playbooks/paas/roles/prometheus/tasks/main.yml +++ b/ansible/playbooks/paas/roles/prometheus/tasks/main.yml @@ -33,7 +33,7 @@ ansible.builtin.template: src: "{{ item.src }}" dest: "{{ item.dest }}" - mode: 0644 + mode: '0644' owner: prometheus group: prometheus loop: diff --git a/ansible/playbooks/paas/roles/prometheus/templates/config.j2 b/ansible/playbooks/paas/roles/prometheus/templates/config.j2 index 360d93c9..a4530264 100644 --- a/ansible/playbooks/paas/roles/prometheus/templates/config.j2 +++ b/ansible/playbooks/paas/roles/prometheus/templates/config.j2 @@ -15,9 +15,11 @@ remote_write: headers: X-Scope-OrgID: demo send_exemplars: true +{% if prometheus_remote_write.basic_auth %} basic_auth: username: "{{ prometheus_remote_write.login }}" password: "{{ prometheus_remote_write.password }}" +{% endif %} queue_config: capacity: 25000 # Capacité totale de la file d'attente max_shards: 10 # Nombre de shards parallèles (trop haut = surcharge CPU) @@ -64,14 +66,10 @@ scrape_configs: regex: "^(__tmp_keep_me)$" static_configs: - - targets: ['127.0.0.1:9100'] +{% for item in groups['infrastructure'] | default([]) if item.split('.')[4] == project %} + - targets: ['{{ hostvars[item]['ansible_' + hostvars[item].nomad_iface]['ipv4']['address'] }}:9100'] labels: - instance: "{{ inventory_hostname }}" - project: "{{ prometheus_project }}" -{% for item in prometheus_nodes_exporter | default([]) %} - - targets: ['{{ item.target }}:9100'] - labels: - instance: "{{ item.instance }}" + instance: "{{ item }}" project: "{{ prometheus_project }}" {% endfor %} scrape_interval: 60s @@ -169,11 +167,39 @@ scrape_configs: project: "{{ prometheus_project }}" - job_name: "systemd_exporter" + metric_relabel_configs: + - action: drop + regex: "^(go_|prometheus_|promhttp_).*" + source_labels: [__name__] + bearer_token: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" + params: + format: ['prometheus'] + metrics_path: /metrics + tls_config: + insecure_skip_verify: true static_configs: - - targets: ['127.0.0.1:9558'] +{% for item in groups['infrastructure'] | default([]) if item.split('.')[4] == project %} + - targets: ['{{ hostvars[item]['ansible_' + hostvars[item].nomad_iface]['ipv4']['address'] }}:9558'] labels: - instance: "{{ inventory_hostname }}" + instance: "{{ item }}" + project: "{{ prometheus_project }}" +{% endfor %} + + - job_name: "nvidia_gpu_exporter" + metric_relabel_configs: + - action: drop + regex: "^(go_|prometheus_|promhttp_).*" + source_labels: [__name__] + params: + format: ['prometheus'] + metrics_path: /metrics + static_configs: +{% for item in groups['infrastructure'] | default([]) if item.split('.')[4] == project %} + - targets: ['{{ hostvars[item]['ansible_' + hostvars[item].nomad_iface]['ipv4']['address'] }}:9835'] + labels: + instance: "{{ item }}" project: "{{ prometheus_project }}" +{% endfor %} - job_name: "nomad_exporter" metric_relabel_configs: @@ -181,38 +207,51 @@ scrape_configs: regex: "^(go_|prometheus_|promhttp_).*" source_labels: [__name__] scheme: https - bearer_token: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }}" + bearer_token: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" params: format: ['prometheus'] metrics_path: /v1/metrics tls_config: insecure_skip_verify: true static_configs: - - targets: ['127.0.0.1:4646'] +{% for item in groups['infrastructure'] | default([]) if item.split('.')[4] == project %} + - targets: ['{{ hostvars[item]['ansible_' + hostvars[item].nomad_iface]['ipv4']['address'] }}:4646'] labels: - instance: "{{ inventory_hostname }}" + instance: "{{ item }}" project: "{{ prometheus_project }}" - +{% endfor %} - job_name: 'mimir_exporter' nomad_sd_configs: - - server: "https://127.0.0.1:4646" + - server: "https://{{ nomad_primary_master_address | default(inventory_hostname) }}:4646" region: "{{ fact_instance.region }}" tls_config: insecure_skip_verify: true authorization: - credentials: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }}" + credentials: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" relabel_configs: - source_labels: ['__meta_nomad_service'] regex: 'mimir-exporter' action: keep - job_name: 'promtail' + metric_relabel_configs: + - action: drop + regex: "^(go_|prometheus_|promhttp_).*" + source_labels: [__name__] + bearer_token: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" + params: + format: ['prometheus'] + metrics_path: /metrics + tls_config: + insecure_skip_verify: true static_configs: - - targets: ['127.0.0.1:9080'] +{% for item in groups['infrastructure'] | default([]) if item.split('.')[4] == project %} + - targets: ['{{ hostvars[item]['ansible_' + hostvars[item].nomad_iface]['ipv4']['address'] }}:9080'] labels: - instance: "{{ inventory_hostname }}" + instance: "{{ item }}" project: "{{ prometheus_project }}" +{% endfor %} - job_name: 'blackbox' static_configs: @@ -223,12 +262,12 @@ scrape_configs: - job_name: 'traefik' nomad_sd_configs: - - server: "https://127.0.0.1:4646" + - server: "https://{{ nomad_primary_master_address | default(inventory_hostname) }}:4646" region: "{{ fact_instance.region }}" tls_config: insecure_skip_verify: true authorization: - credentials: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }}" + credentials: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" relabel_configs: - target_label: instance replacement: "{{ inventory_hostname }}" @@ -248,16 +287,22 @@ scrape_configs: - source_labels: [__tmp_fqdn] target_label: fqdn + # Traefik exposes its metrics on a different port + - source_labels: [__address__] + regex: '(.+):\d+' + target_label: __address__ + replacement: '${1}:8081' + - job_name: 'minio' metrics_path: /minio/v2/metrics/cluster scheme: http nomad_sd_configs: - - server: "https://127.0.0.1:4646" + - server: "https://{{ nomad_primary_master_address | default(inventory_hostname) }}:4646" region: "{{ fact_instance.region }}" tls_config: insecure_skip_verify: true authorization: - credentials: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }}" + credentials: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" relabel_configs: - target_label: instance replacement: "{{ inventory_hostname }}" @@ -279,12 +324,12 @@ scrape_configs: - job_name: 'caddy' nomad_sd_configs: - - server: "https://127.0.0.1:4646" + - server: "https://{{ nomad_primary_master_address | default(inventory_hostname) }}:4646" region: "{{ fact_instance.region }}" tls_config: insecure_skip_verify: true authorization: - credentials: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }}" + credentials: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" relabel_configs: - target_label: instance replacement: "{{ inventory_hostname }}" @@ -308,14 +353,47 @@ scrape_configs: # replacement: '$1' # target_label: instance + + - job_name: 'vllm' + nomad_sd_configs: + - server: "https://{{ nomad_primary_master_address | default(inventory_hostname) }}:4646" + region: "{{ fact_instance.region }}" + tls_config: + insecure_skip_verify: true + authorization: + credentials: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" + relabel_configs: + - target_label: instance + replacement: "{{ inventory_hostname }}" + - target_label: project + replacement: "{{ prometheus_project }}" + + - source_labels: ['__meta_nomad_service'] + regex: 'vllm' + action: keep + + - source_labels: [__meta_nomad_tags] + regex: .*,fqdn:([^,]+),.* + target_label: __tmp_fqdn + replacement: $1 + action: replace + + - source_labels: [__tmp_fqdn] + target_label: fqdn + + # - source_labels: ['__meta_nomad_node'] + # replacement: '$1' + # target_label: instance + + - job_name: 'mysql_exporter' nomad_sd_configs: - - server: "https://127.0.0.1:4646" + - server: "https://{{ nomad_primary_master_address | default(inventory_hostname) }}:4646" region: "{{ fact_instance.region }}" tls_config: insecure_skip_verify: true authorization: - credentials: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }}" + credentials: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" relabel_configs: - target_label: instance replacement: "{{ inventory_hostname }}" @@ -341,12 +419,12 @@ scrape_configs: - job_name: 'nginx_exporter' nomad_sd_configs: - - server: "https://127.0.0.1:4646" + - server: "https://{{ nomad_primary_master_address | default(inventory_hostname) }}:4646" region: "{{ fact_instance.region }}" tls_config: insecure_skip_verify: true authorization: - credentials: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }}" + credentials: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" relabel_configs: - target_label: instance replacement: "{{ inventory_hostname }}" @@ -372,12 +450,12 @@ scrape_configs: - job_name: 'phpfpm_exporter' nomad_sd_configs: - - server: "https://127.0.0.1:4646" + - server: "https://{{ nomad_primary_master_address | default(inventory_hostname) }}:4646" region: "{{ fact_instance.region }}" tls_config: insecure_skip_verify: true authorization: - credentials: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }}" + credentials: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" relabel_configs: - target_label: instance replacement: "{{ inventory_hostname }}" @@ -406,12 +484,12 @@ scrape_configs: metrics_path: /api/prometheus scheme: http nomad_sd_configs: - - server: "https://127.0.0.1:4646" + - server: "https://{{ nomad_primary_master_address | default(inventory_hostname) }}:4646" region: "{{ fact_instance.region }}" tls_config: insecure_skip_verify: true authorization: - credentials: "{{ lookup('simple-stack-ui', type='secret', key=inventory_hostname, subkey='nomad_management_token', missing='error') }}" + credentials: "{{ lookup('simple-stack-ui', type='secret', key=nomad_primary_master_node | default(inventory_hostname), subkey='nomad_management_token', missing='error') }}" relabel_configs: - target_label: instance diff --git a/ansible/playbooks/paas/roles/promtail/tasks/build.yml b/ansible/playbooks/paas/roles/promtail/tasks/build.yml index c5b28439..f3b6b120 100644 --- a/ansible/playbooks/paas/roles/promtail/tasks/build.yml +++ b/ansible/playbooks/paas/roles/promtail/tasks/build.yml @@ -14,7 +14,7 @@ url: "{{ upstream_file_url }}" dest: "{{ build_work_dir }}/download/" mode: '0644' - force: no + force: false register: download_result - name: Promtail | Unarchive GitHub release diff --git a/ansible/playbooks/paas/roles/promtail/tasks/main.yml b/ansible/playbooks/paas/roles/promtail/tasks/main.yml index 69fd20f6..21864a15 100644 --- a/ansible/playbooks/paas/roles/promtail/tasks/main.yml +++ b/ansible/playbooks/paas/roles/promtail/tasks/main.yml @@ -1,43 +1,40 @@ --- -- name: Promtail | Service is enabled - when: loki_remote_write is not defined - block: - - name: Promtail | Include upstream variables - ansible.builtin.include_vars: upstream.yml +- name: Promtail | Include upstream variables + ansible.builtin.include_vars: upstream.yml - - name: Promtail | Set custom variables - ansible.builtin.set_fact: - image_version: "{{ latest_version }}" - image_name: "{{ image.name }}" +- name: Promtail | Set custom variables + ansible.builtin.set_fact: + image_version: "{{ latest_version }}" + image_name: "{{ image.name }}" - - name: Promtail | Get binary - include_tasks: build.yml - when: ansible_local[image.name] is not defined or ansible_local[image.name] != latest_version +- name: Promtail | Get binary + ansible.builtin.include_tasks: build.yml + when: ansible_local[image.name] is not defined or ansible_local[image.name] != latest_version - - name: Promtail | Create custom directories - ansible.builtin.file: - dest: "{{ item }}" - state: directory - recurse: true - with_items: - - /etc/promtail - - /var/lib/promtail +- name: Promtail | Create custom directories + ansible.builtin.file: + dest: "{{ item }}" + state: directory + recurse: true + with_items: + - /etc/promtail + - /var/lib/promtail - - name: Promtail | Copy templates - ansible.builtin.template: - src: "{{ item.src }}" - dest: "{{ item.dest }}" - mode: 0644 - owner: root - group: root - loop: - - src: default.j2 - dest: /etc/default/promtail - - src: config.yaml.j2 - dest: /etc/promtail/config.yaml - - src: service.j2 - dest: /etc/systemd/system/promtail.service - notify: Restart promtail +- name: Promtail | Copy templates + ansible.builtin.template: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + mode: '0644' + owner: root + group: root + loop: + - src: default.j2 + dest: /etc/default/promtail + - src: config.yaml.j2 + dest: /etc/promtail/config.yaml + - src: service.j2 + dest: /etc/systemd/system/promtail.service + notify: Restart promtail - - name: Promtail | Flush handlers - ansible.builtin.meta: flush_handlers +- name: Promtail | Flush handlers + ansible.builtin.meta: flush_handlers diff --git a/ansible/playbooks/paas/roles/promtail/templates/config.yaml.j2 b/ansible/playbooks/paas/roles/promtail/templates/config.yaml.j2 index ac2220d3..50a2def9 100644 --- a/ansible/playbooks/paas/roles/promtail/templates/config.yaml.j2 +++ b/ansible/playbooks/paas/roles/promtail/templates/config.yaml.j2 @@ -1,5 +1,5 @@ server: - http_listen_address: 127.0.0.1 + http_listen_address: {{ hostvars[inventory_hostname]['ansible_' + nomad_iface]['ipv4']['address'] }} http_listen_port: 9080 grpc_listen_port: 0 log_level: warn @@ -7,11 +7,17 @@ server: positions: filename: /var/lib/promtail/positions.yaml +{% if loki_remote_write is defined %} clients: - url: {{ loki_remote_write.url }}/api/prom/push +{% if loki_remote_write.basic_auth %} basic_auth: username: {{ loki_remote_write.login }} password: {{ loki_remote_write.password }} +{% endif %} +{% else %} +clients: [] +{% endif %} scrape_configs: - job_name: system diff --git a/ansible/playbooks/paas/roles/systemd_exporter/templates/default.j2 b/ansible/playbooks/paas/roles/systemd_exporter/templates/default.j2 index 1a27d383..f64efe14 100644 --- a/ansible/playbooks/paas/roles/systemd_exporter/templates/default.j2 +++ b/ansible/playbooks/paas/roles/systemd_exporter/templates/default.j2 @@ -1,2 +1,2 @@ -ARGS="--web.listen-address=127.0.0.1:9558 \ +ARGS="--web.listen-address={{ hostvars[inventory_hostname]['ansible_' + nomad_iface]['ipv4']['address'] }}:9558 \ --systemd.collector.unit-include=docker.service|promtail.service|coredns.service|prometheus.service|blackbox_exporter.service|node_exporter.service|scan_exporter.service" diff --git a/ansible/playbooks/paas/systemd-resolved.yml b/ansible/playbooks/paas/systemd-resolved.yml index f07ac0bb..9cfc4f6d 100644 --- a/ansible/playbooks/paas/systemd-resolved.yml +++ b/ansible/playbooks/paas/systemd-resolved.yml @@ -16,6 +16,7 @@ content: | [Resolve] DNSStubListener=yes + DNSStubListenerExtra=172.17.0.1:53 dest: /etc/systemd/resolved.conf.d/systemd-resolved.conf mode: '0644' notify: Restart systemd-resolved diff --git a/ansible/playbooks/saas/basic_auth.yml b/ansible/playbooks/saas/basic_auth.yml index 06e2d7af..cb400913 100644 --- a/ansible/playbooks/saas/basic_auth.yml +++ b/ansible/playbooks/saas/basic_auth.yml @@ -14,4 +14,4 @@ tasks: - name: Debug hash ansible.builtin.debug: - msg: "{{ login }}:{{ password | password_hash('blowfish') }}" + msg: "{{ login }}:{{ password | ansible.builtin.password_hash('blowfish') }}" diff --git a/ansible/playbooks/saas/image.yml b/ansible/playbooks/saas/image.yml index 9f819d3d..63b71ebd 100644 --- a/ansible/playbooks/saas/image.yml +++ b/ansible/playbooks/saas/image.yml @@ -26,25 +26,9 @@ state: directory mode: '0755' loop: - - /root/.docker - "{{ build_work_dir }}/download" - "{{ build_work_dir }}/{{ upstream_default_arch }}" - - name: Copy docker config file - ansible.builtin.copy: - content: | - { - "auths": { - "{{ docker_private_registry.url }}": { - "auth": "{{ (docker_private_registry.username + ':' + docker_private_registry.password) | b64encode }}" - } - } - } - dest: /root/.docker/config.json - owner: root - group: root - mode: '0600' - tasks: - name: Install dependencies ansible.builtin.include_role: @@ -59,9 +43,9 @@ - name: Build when: image_build block: - - name: Build image + - name: Build and publish image community.docker.docker_image_build: - name: "{{ docker_private_registry.url }}/{{ image_name }}:{{ image_version }}" + name: "{{ docker_private_registry.url }}/{{ docker_private_registry.project is defined | ternary(docker_private_registry.project + '/', '') }}{{ image_name }}:{{ image_version }}" tag: latest path: "/tmp/{{ catalog }}" dockerfile: Dockerfile @@ -91,6 +75,7 @@ version: "{{ image_version }}" force_basic_auth: true status_code: 200 - ignore_errors: true + register: ui_update + failed_when: ui_update.status != 200 delegate_to: localhost - become: false \ No newline at end of file + become: false diff --git a/ansible/playbooks/saas/main.yml b/ansible/playbooks/saas/main.yml index 0097343b..07729907 100644 --- a/ansible/playbooks/saas/main.yml +++ b/ansible/playbooks/saas/main.yml @@ -30,8 +30,8 @@ software: "{{ lookup('simple-stack-ui', type='software', key=domain, subkey='', missing='warn') }}" - name: Debug software - debug: - msg: "{{ software }}" + ansible.builtin.debug: + msg: "{{ software }}" tasks: - name: Deploy service diff --git a/ansible/playbooks/saas/roles/adguard/templates/nomad.hcl b/ansible/playbooks/saas/roles/adguard/templates/nomad.hcl index c3a315a5..3bb996f3 100644 --- a/ansible/playbooks/saas/roles/adguard/templates/nomad.hcl +++ b/ansible/playbooks/saas/roles/adguard/templates/nomad.hcl @@ -3,7 +3,7 @@ job "{{ domain }}" { datacenters = ["{{ fact_instance.datacenter }}"] type = "service" -{% if software.constraints.location %} +{% if software.constraints is defined and software.constraints.location is defined %} constraint { attribute = "${meta.location}" set_contains = "{{ software.constraints.location }}" diff --git a/ansible/playbooks/saas/roles/arangodb/templates/nomad.hcl b/ansible/playbooks/saas/roles/arangodb/templates/nomad.hcl index e6956e43..ccd7cace 100644 --- a/ansible/playbooks/saas/roles/arangodb/templates/nomad.hcl +++ b/ansible/playbooks/saas/roles/arangodb/templates/nomad.hcl @@ -3,7 +3,7 @@ job "{{ domain }}" { datacenters = ["{{ fact_instance.datacenter }}"] type = "service" -{% if software.constraints.location %} +{% if software.constraints is defined and software.constraints.location is defined %} constraint { attribute = "${meta.location}" set_contains = "{{ software.constraints.location }}" diff --git a/ansible/playbooks/saas/roles/caddy/tasks/main.yml b/ansible/playbooks/saas/roles/caddy/tasks/main.yml index b45d0b3e..f61aa1fa 100644 --- a/ansible/playbooks/saas/roles/caddy/tasks/main.yml +++ b/ansible/playbooks/saas/roles/caddy/tasks/main.yml @@ -1,15 +1,4 @@ --- -- name: Create default directory - ansible.builtin.file: - path: "{{ item }}" - state: directory - owner: root - group: root - mode: "0755" - loop: - - "{{ software_path }}/etc/caddy" - delegate_to: "{{ software.instance }}" - - name: Copy nomad job to destination ansible.builtin.template: src: nomad.hcl diff --git a/ansible/playbooks/saas/roles/caddy/templates/Dockerfile.j2 b/ansible/playbooks/saas/roles/caddy/templates/Dockerfile.j2 index 387b89ba..a423587b 100644 --- a/ansible/playbooks/saas/roles/caddy/templates/Dockerfile.j2 +++ b/ansible/playbooks/saas/roles/caddy/templates/Dockerfile.j2 @@ -15,4 +15,4 @@ RUN mkdir -p /var/log/caddy /var/lib/caddy /etc/caddy \ USER caddy -CMD ["caddy", "run", "--config", "/etc/caddy/Caddyfile"] +CMD ["caddy", "run"] diff --git a/ansible/playbooks/saas/roles/caddy/templates/nomad.hcl b/ansible/playbooks/saas/roles/caddy/templates/nomad.hcl index edbe32a0..cb828014 100644 --- a/ansible/playbooks/saas/roles/caddy/templates/nomad.hcl +++ b/ansible/playbooks/saas/roles/caddy/templates/nomad.hcl @@ -3,17 +3,19 @@ job "{{ domain }}" { datacenters = ["{{ fact_instance.datacenter }}"] type = "service" -{% if software.constraints.location %} +{% if software.constraints is defined and software.constraints.location is defined %} constraint { attribute = "${meta.location}" set_contains = "{{ software.constraints.location }}" } {% endif %} +{% if software.constraints is defined and software.constraints.instance is defined %} constraint { attribute = "${meta.instance}" - set_contains = "{{ software.instance }}" + set_contains = "{{ software.constraints.instance }}" } +{% endif %} group "{{ domain }}" { count = {{ software.scale | default(1) }} @@ -49,22 +51,26 @@ job "{{ domain }}" { task "{{ domain }}-caddy" { driver = "docker" - config { - image = "{{ docker_private_registry.url }}/caddy:{{ softwares.caddy.version }}" - volumes = [ - "{{ software_path }}/etc/caddy:/etc/caddy:ro" - ] - ports = ["caddy", "metrics"] - } - template { - change_mode = "noop" - destination = "{{ software_path }}/etc/caddy/Caddyfile" + change_mode = "restart" + destination = "local/Caddyfile" + perms = "644" data = < fields that will be converted to json and stored in jsonData. Custom per app. -# # jsonData: -# # # key/value pairs of string to object -# # key: value -# # # fields that will be converted to json, encrypted and stored in secureJsonData. Custom per app. -# # secureJsonData: -# # # key/value pairs of string to string -# # key: value diff --git a/ansible/playbooks/saas/roles/grafana/tasks/destroy.yml b/ansible/playbooks/saas/roles/grafana/tasks/destroy.yml index ce77a12b..e00b7de1 100644 --- a/ansible/playbooks/saas/roles/grafana/tasks/destroy.yml +++ b/ansible/playbooks/saas/roles/grafana/tasks/destroy.yml @@ -8,3 +8,4 @@ ansible.builtin.file: path: "{{ software_path }}" state: absent + delegate_to: "{{ software.instance }}" \ No newline at end of file diff --git a/ansible/playbooks/saas/roles/grafana/tasks/main.yml b/ansible/playbooks/saas/roles/grafana/tasks/main.yml index 3211d5c1..32f26abd 100644 --- a/ansible/playbooks/saas/roles/grafana/tasks/main.yml +++ b/ansible/playbooks/saas/roles/grafana/tasks/main.yml @@ -1,4 +1,15 @@ --- +- name: Create default directory + ansible.builtin.file: + path: "{{ item }}" + state: directory + owner: root + group: root + mode: "0755" + loop: + - "{{ software_path }}" + delegate_to: "{{ software.instance }}" + - name: Copy Grafana content files ansible.builtin.copy: src: "{{ item }}" @@ -9,16 +20,21 @@ loop: - dashboards - provisioning + delegate_to: "{{ software.instance }}" - name: Copy Grafana content templates ansible.builtin.template: - src: "provisioning/datasources/{{ item }}.j2" - dest: "{{ software_path }}/provisioning/datasources/{{ item }}" + src: "provisioning/{{ item.path }}/{{ item.file }}.j2" + dest: "{{ software_path }}/provisioning/{{ item.path }}/{{ item.file }}" owner: root group: root mode: '0644' loop: - - prometheus.yaml + - path: datasources + file: prometheus.yaml + - path: plugins + file: llm.yaml + delegate_to: "{{ software.instance }}" - name: Copy nomad job ansible.builtin.template: diff --git a/ansible/playbooks/saas/roles/grafana/templates/nomad.hcl b/ansible/playbooks/saas/roles/grafana/templates/nomad.hcl index a17e4205..2b68803f 100644 --- a/ansible/playbooks/saas/roles/grafana/templates/nomad.hcl +++ b/ansible/playbooks/saas/roles/grafana/templates/nomad.hcl @@ -3,7 +3,7 @@ job "{{ domain }}" { datacenters = ["{{ fact_instance.datacenter }}"] type = "service" -{% if software.constraints.location %} +{% if software.constraints is defined and software.constraints.location is defined %} constraint { attribute = "${meta.location}" set_contains = "{{ software.constraints.location }}" @@ -41,7 +41,7 @@ job "{{ domain }}" { env { GF_LOG_MODE = "console" GF_SERVER_HTTP_PORT = "3000" - GF_INSTALL_PLUGINS = "grafana-piechart-panel" + GF_INSTALL_PLUGINS = "grafana-piechart-panel,grafana-llm-app" GF_SECURITY_ADMIN_USER = "{{ lookup('simple-stack-ui', type='secret', key=domain, subkey='user', missing='create', nosymbols=true, length=8) }}" GF_SECURITY_ADMIN_PASSWORD = "{{ lookup('simple-stack-ui', type='secret', key=domain, subkey='passwd', missing='create', length=12) }}" DS_PROMETHEUS = "prometheus" diff --git a/ansible/playbooks/saas/roles/grafana/templates/provisioning/datasources/prometheus.yaml.j2 b/ansible/playbooks/saas/roles/grafana/templates/provisioning/datasources/prometheus.yaml.j2 index 81abd7e0..539aa307 100644 --- a/ansible/playbooks/saas/roles/grafana/templates/provisioning/datasources/prometheus.yaml.j2 +++ b/ansible/playbooks/saas/roles/grafana/templates/provisioning/datasources/prometheus.yaml.j2 @@ -1,5 +1,6 @@ apiVersion: 1 +{% if prometheus_remote_write is defined %} datasources: - name: Mimir uid: prometheus @@ -47,3 +48,6 @@ datasources: basicAuthPassword: "{{ loki_remote_write.password }}" isDefault: false {% endif %} +{% else %} +datasources: [] +{% endif %} \ No newline at end of file diff --git a/ansible/playbooks/saas/roles/grafana/templates/provisioning/plugins/llm.yaml.j2 b/ansible/playbooks/saas/roles/grafana/templates/provisioning/plugins/llm.yaml.j2 new file mode 100644 index 00000000..2d5cdd9e --- /dev/null +++ b/ansible/playbooks/saas/roles/grafana/templates/provisioning/plugins/llm.yaml.j2 @@ -0,0 +1,4 @@ +apiVersion: 1 + +apps: +{{ (lookup('simple-stack-ui', type='secret', key=domain, subkey='plugins', missing='error') | from_json) | to_nice_yaml }} \ No newline at end of file diff --git a/ansible/playbooks/saas/roles/homeassistant/templates/nomad.hcl b/ansible/playbooks/saas/roles/homeassistant/templates/nomad.hcl index ec5ba3a6..68e101f1 100644 --- a/ansible/playbooks/saas/roles/homeassistant/templates/nomad.hcl +++ b/ansible/playbooks/saas/roles/homeassistant/templates/nomad.hcl @@ -3,7 +3,7 @@ job "{{ domain }}" { datacenters = ["{{ fact_instance.datacenter }}"] type = "service" -{% if software.constraints.location %} +{% if software.constraints is defined and software.constraints.location is defined %} constraint { attribute = "${meta.location}" set_contains = "{{ software.constraints.location }}" diff --git a/ansible/playbooks/saas/roles/kresus/templates/nomad.hcl b/ansible/playbooks/saas/roles/kresus/templates/nomad.hcl index 75da59ff..1e8d7e7d 100644 --- a/ansible/playbooks/saas/roles/kresus/templates/nomad.hcl +++ b/ansible/playbooks/saas/roles/kresus/templates/nomad.hcl @@ -3,7 +3,7 @@ job "{{ domain }}" { datacenters = ["{{ fact_instance.datacenter }}"] type = "service" -{% if software.constraints.location %} +{% if software.constraints is defined and software.constraints.location is defined %} constraint { attribute = "${meta.location}" set_contains = "{{ software.constraints.location }}" diff --git a/ansible/playbooks/saas/roles/loki/tasks/main.yml b/ansible/playbooks/saas/roles/loki/tasks/main.yml index e10eec4a..10a2e33a 100644 --- a/ansible/playbooks/saas/roles/loki/tasks/main.yml +++ b/ansible/playbooks/saas/roles/loki/tasks/main.yml @@ -8,15 +8,7 @@ mode: '0755' loop: - "{{ software_path }}/var/lib/loki" - - "{{ software_path }}/etc/loki" - -- name: Copy config file - ansible.builtin.template: - src: config.yaml.j2 - dest: "{{ software_path }}/etc/loki/local-config.yaml" - owner: 10001 - group: 10001 - mode: '0644' + delegate_to: "{{ software.instance }}" - name: Copy nomad job ansible.builtin.template: diff --git a/ansible/playbooks/saas/roles/loki/templates/nomad.hcl b/ansible/playbooks/saas/roles/loki/templates/nomad.hcl index ed695b61..514509d3 100644 --- a/ansible/playbooks/saas/roles/loki/templates/nomad.hcl +++ b/ansible/playbooks/saas/roles/loki/templates/nomad.hcl @@ -3,7 +3,7 @@ job "{{ domain }}" { datacenters = ["{{ fact_instance.datacenter }}"] type = "service" -{% if software.constraints.location %} +{% if software.constraints is defined and software.constraints.location is defined %} constraint { attribute = "${meta.location}" set_contains = "{{ software.constraints.location }}" @@ -40,12 +40,23 @@ job "{{ domain }}" { config { image = "grafana/loki:{{ softwares.loki.version }}" volumes = [ - "{{ software_path }}/var/lib/loki:/var/lib/loki:rw", - "{{ software_path }}/etc/loki:/etc/loki:ro" + "{{ software_path }}/var/lib/loki:/var/lib/loki:rw" + ] + args = [ + "-config.file", + "/local/config.yaml" ] ports = ["loki"] } + template { + change_mode = "restart" + destination = "local/config.yaml" + data = < Softwares/execute'); // Variables - ROUTE('+API /api/ +variables --> Variables/list'); - ROUTE('+API /api/ +variables_read/{id} --> Variables/read'); - ROUTE('+API /api/ +variables_create --> Variables/create'); - ROUTE('+API /api/ +variables_update/{id} --> Variables/update'); - ROUTE('+POST /api/secret --> Variables/secret'); + ROUTE('+API /api/ +variables --> Variables/list'); + ROUTE('+API /api/ +variables_read/{id} --> Variables/read'); + ROUTE('+API /api/ +variables_create --> Variables/create'); + ROUTE('+API /api/ +variables_update/{id} --> Variables/update'); + ROUTE('+API /api/ +variables_remove/{id} --> Variables/remove'); + ROUTE('+POST /api/secret --> Variables/secret'); // 3dForceGraph ROUTE('+API /api/ -graphs --> Graphs/list'); diff --git a/ui/public/forms/catalogs.html b/ui/public/forms/catalogs.html index c00feb78..bad80e7c 100644 --- a/ui/public/forms/catalogs.html +++ b/ui/public/forms/catalogs.html @@ -17,6 +17,7 @@