From e5b9bc77511e921665e69a0d0f869ce4667f6862 Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Wed, 16 Apr 2025 15:12:09 -0400 Subject: [PATCH 01/14] Bump linode-api4 to 5.29.0 We need linode-api4 5.29 to assign firewalls to newly created linodes. --- apps/manual-kafka-cluster/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/manual-kafka-cluster/requirements.txt b/apps/manual-kafka-cluster/requirements.txt index f2e0448..d8932ec 100644 --- a/apps/manual-kafka-cluster/requirements.txt +++ b/apps/manual-kafka-cluster/requirements.txt @@ -8,6 +8,6 @@ pyyaml==6.0.1 dnspython==2.2.1 passlib==1.7.4 ## cloud.linode module dependancies ## -linode-api4==5.15.1 +linode-api4==5.29.0 polling==0.3.2 ansible-specdoc==0.0.14 From a1474259dd6ae8ed96088cdb787bae6eb3f2da8c Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Wed, 16 Apr 2025 15:16:09 -0400 Subject: [PATCH 02/14] Add ability to assign a firewall We can assign a firewall to newly created linodes if the user sets a firewall_label in `vars` --- apps/manual-kafka-cluster/provision.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/apps/manual-kafka-cluster/provision.yml b/apps/manual-kafka-cluster/provision.yml index 05bfe76..df5dfcb 100644 --- a/apps/manual-kafka-cluster/provision.yml +++ b/apps/manual-kafka-cluster/provision.yml @@ -8,7 +8,16 @@ tasks: + - name: get firewall info + # https://galaxy.ansible.com/ui/repo/published/linode/cloud/content/module/firewall_info/ + linode.cloud.firewall_info: + label: '{{ firewall_label }}' + api_token: '{{ api_token }}' + register: firewall_info + when: firewall_label|d(False) + - name: creating kafka servers + # https://galaxy.ansible.com/ui/repo/published/linode/cloud/content/module/instance/ linode.cloud.instance: label: '{{ instance_prefix }}{{ item }}' api_token: '{{ api_token }}' @@ -21,6 +30,7 @@ ua_prefix: 'docs-kafka-occ' tags: '{{ linode_tags }}' state: present + firewall_id: '{{ (firewall_info.firewall|default({})).id|default(omit) }}' with_sequence: count='{{ cluster_size }}' - name: get info about the instances From c75807ef5f2e41753ff0daf0c0b2b00efebdc286 Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Wed, 16 Apr 2025 15:17:32 -0400 Subject: [PATCH 03/14] Add firewall_label Add the firewall_label variable to vars that allows setting a firewall when deploying linodes --- apps/manual-kafka-cluster/group_vars/kafka/vars | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/manual-kafka-cluster/group_vars/kafka/vars b/apps/manual-kafka-cluster/group_vars/kafka/vars index 0a8513b..9858d49 100644 --- a/apps/manual-kafka-cluster/group_vars/kafka/vars +++ b/apps/manual-kafka-cluster/group_vars/kafka/vars @@ -9,6 +9,7 @@ region: us-southeast image: linode/ubuntu24.04 group: linode_tags: +firewall_label: cluster_size: 3 client_count: 2 @@ -26,4 +27,4 @@ state_or_province_name: Pennsylvania locality_name: Philadelphia organization_name: Akamai Technologies email_address: webmaster@example.com -ca_common_name: Kafka RootCA \ No newline at end of file +ca_common_name: Kafka RootCA From 739cbac7c64f38ee7f7fe45070e6835383a1a153 Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Wed, 16 Apr 2025 15:20:49 -0400 Subject: [PATCH 04/14] Use linode.cloud 0.37.1 --- apps/manual-kafka-cluster/collections.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/manual-kafka-cluster/collections.yml b/apps/manual-kafka-cluster/collections.yml index 51bd91c..a4ed148 100644 --- a/apps/manual-kafka-cluster/collections.yml +++ b/apps/manual-kafka-cluster/collections.yml @@ -2,6 +2,6 @@ collections: - name: community.crypto version: 2.15.1 - name: linode.cloud - version: 0.16.1 + version: 0.37.1 - name: community.general - version: 8.6.0 \ No newline at end of file + version: 8.6.0 From 73520ae0ecd82d01274a4c21cea0d157caadd546 Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Thu, 17 Apr 2025 13:36:34 -0400 Subject: [PATCH 05/14] Use rdns instead of IP for ssh Use the rdns name for the linode rather than the IP when doing ssh since this allows corporate DNS servers to reroute traffic through their own proxies. --- apps/manual-kafka-cluster/provision.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/manual-kafka-cluster/provision.yml b/apps/manual-kafka-cluster/provision.yml index df5dfcb..4625dd9 100644 --- a/apps/manual-kafka-cluster/provision.yml +++ b/apps/manual-kafka-cluster/provision.yml @@ -64,13 +64,13 @@ #jinja2: trim_blocks:False [kafka] {%- for count in range(cluster_size) %} - {{ info.results[count].instance.ipv4[0] }} {% if count < controller_count %}role='controller and broker'{%else%}role='broker only'{%endif%} + {{ info.results[count].networking.ipv4.public[0].rdns }} {% if count < controller_count %}role='controller and broker'{%else%}role='broker only'{%endif%} {%- endfor %} - name: wait for port 22 to become open wait_for: port: 22 - host: '{{ item.instance.ipv4[0] }}' + host: '{{ item.networking.ipv4.public[0].rdns }}' search_regex: OpenSSH delay: 10 connection: local From c423c463b9b45b9384d944716c269528b21b4cbb Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Thu, 17 Apr 2025 13:39:19 -0400 Subject: [PATCH 06/14] Add support for adding hosts to DNS If a `domain_name` is configured in `vars`, then we should also add these hosts to DNS. Before adding, we will remove any stale records. The hostname in `kafka_data` is used to generate self signed certificates. If we're using a FQDN name to connect to kafka, then the certificate needs to have this FQDN as well. --- apps/manual-kafka-cluster/provision.yml | 41 ++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/apps/manual-kafka-cluster/provision.yml b/apps/manual-kafka-cluster/provision.yml index 4625dd9..7570c98 100644 --- a/apps/manual-kafka-cluster/provision.yml +++ b/apps/manual-kafka-cluster/provision.yml @@ -40,6 +40,45 @@ register: info with_sequence: count='{{ cluster_size }}' + - name: check if hosts are in DNS + # https://galaxy.ansible.com/ui/repo/published/linode/cloud/content/module/domain_info/ + linode.cloud.domain_info: + api_token: '{{ api_token }}' + domain: '{{ domain_name }}' + when: domain_name|d(False) + register: domain_info + + - name: remove old hosts from dns + # https://galaxy.ansible.com/ui/repo/published/linode/cloud/content/module/domain_record/ + linode.cloud.domain_record: + api_token: '{{ api_token }}' + domain_id: '{{ domain_info.domain.id }}' + record_id: '{{ item.id }}' + state: absent + vars: + instance_ips: "{{ dict(info.results | map(attribute='instance.label') | zip(info.results | map(attribute='networking.ipv4.public.0.address'))) }}" + when: + - domain_name|d(False) + - domain_info|d(False) + - item.name in instance_ips + - item.target != instance_ips[item.name] + with_items: "{{ domain_info.records }}" + + - name: add new hosts to dns + # https://galaxy.ansible.com/ui/repo/published/linode/cloud/content/module/domain_record/ + linode.cloud.domain_record: + api_token: '{{ api_token }}' + domain_id: '{{ domain_info.domain.id }}' + name: '{{ instance_prefix + item }}' + target: '{{ info.results[item|int-1].instance.ipv4[0] }}' + ttl_sec: '{{ ttl_sec | default(omit) }}' + type: 'A' + state: present + with_sequence: count='{{ cluster_size }}' + when: + - domain_name|d(False) + - domain_info|d(False) + - name: update group_vars blockinfile: path: ./group_vars/kafka/vars @@ -51,7 +90,7 @@ {%- for count in range(cluster_size) %} - kafka{{ count + 1 }}: instance: - hostname: kafka{{ count + 1 }} + hostname: kafka{{ count + 1 }}{% if domain_name|d(False) and domain_info %}.{{ domain_name }}{% endif %} ip_pub1: {{ info.results[count].instance.ipv4[0] }} ip_priv1: {{ info.results[count].instance.ipv4[1] }} {%- endfor %} From dfd2c2de8a7d384182701230c0756a62cdb1c2c5 Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Thu, 17 Apr 2025 13:40:40 -0400 Subject: [PATCH 07/14] Add placeholders for DNS --- apps/manual-kafka-cluster/group_vars/kafka/vars | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/apps/manual-kafka-cluster/group_vars/kafka/vars b/apps/manual-kafka-cluster/group_vars/kafka/vars index 9858d49..c766d0a 100644 --- a/apps/manual-kafka-cluster/group_vars/kafka/vars +++ b/apps/manual-kafka-cluster/group_vars/kafka/vars @@ -11,6 +11,10 @@ group: linode_tags: firewall_label: +# Optional settings for DNS +domain_name: +ttl_sec: + cluster_size: 3 client_count: 2 sudo_username: admin From 678af8d05df70375c79b98d586e8f50544034edb Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Tue, 22 Apr 2025 15:07:04 -0400 Subject: [PATCH 08/14] Use cluster hosts' FQDNs in their /etc/hosts file as well as the alias --- apps/manual-kafka-cluster/roles/kafka/tasks/hostname.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/manual-kafka-cluster/roles/kafka/tasks/hostname.yml b/apps/manual-kafka-cluster/roles/kafka/tasks/hostname.yml index 0f9d261..d18f019 100644 --- a/apps/manual-kafka-cluster/roles/kafka/tasks/hostname.yml +++ b/apps/manual-kafka-cluster/roles/kafka/tasks/hostname.yml @@ -8,7 +8,7 @@ block: | #jinja2: trim_blocks:False {%- for count in range(cluster_size) %} - {{ kafka_data.server[count].instance.ip_priv1 }} {{ kafka_data.server[count].instance.hostname }} + {{ kafka_data.server[count].instance.ip_priv1 }} {{ kafka_data.server[count].instance.hostname }} {{ instance_prefix }}{{ count+1 }} {%- endfor %} - name: configure hostnames @@ -19,4 +19,4 @@ delegate_to: "{{ item }}" loop: "{{ groups['kafka'] }}" loop_control: - index_var: count \ No newline at end of file + index_var: count From f026855ccad6be2605521f59e5c5928ddcf72574 Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Fri, 25 Apr 2025 14:35:51 -0400 Subject: [PATCH 09/14] Use instance_prefix in hostnames instead of hardcoding to kafka --- apps/manual-kafka-cluster/provision.yml | 4 ++-- apps/manual-kafka-cluster/roles/kafka/tasks/hostname.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/manual-kafka-cluster/provision.yml b/apps/manual-kafka-cluster/provision.yml index 7570c98..61b84f2 100644 --- a/apps/manual-kafka-cluster/provision.yml +++ b/apps/manual-kafka-cluster/provision.yml @@ -88,9 +88,9 @@ kafka_data: server: {%- for count in range(cluster_size) %} - - kafka{{ count + 1 }}: + - name: {{instance_prefix}}{{ count + 1 }} instance: - hostname: kafka{{ count + 1 }}{% if domain_name|d(False) and domain_info %}.{{ domain_name }}{% endif %} + hostname: {{instance_prefix}}{{ count + 1 }}{% if domain_name|d(False) and domain_info %}.{{ domain_name }}{% endif %} ip_pub1: {{ info.results[count].instance.ipv4[0] }} ip_priv1: {{ info.results[count].instance.ipv4[1] }} {%- endfor %} diff --git a/apps/manual-kafka-cluster/roles/kafka/tasks/hostname.yml b/apps/manual-kafka-cluster/roles/kafka/tasks/hostname.yml index d18f019..3086be8 100644 --- a/apps/manual-kafka-cluster/roles/kafka/tasks/hostname.yml +++ b/apps/manual-kafka-cluster/roles/kafka/tasks/hostname.yml @@ -8,7 +8,7 @@ block: | #jinja2: trim_blocks:False {%- for count in range(cluster_size) %} - {{ kafka_data.server[count].instance.ip_priv1 }} {{ kafka_data.server[count].instance.hostname }} {{ instance_prefix }}{{ count+1 }} + {{ kafka_data.server[count].instance.ip_priv1 }} {{ kafka_data.server[count].instance.hostname }} {{ kafka_data.server[count].name }} {%- endfor %} - name: configure hostnames From 0afebc4be5be89133390ea004fa4034b4c38aeaa Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Fri, 25 Apr 2025 14:53:33 -0400 Subject: [PATCH 10/14] Only use instance_prefix when creating or checking linodes. Use instance.label for everything else --- apps/manual-kafka-cluster/provision.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/manual-kafka-cluster/provision.yml b/apps/manual-kafka-cluster/provision.yml index 61b84f2..dcb451c 100644 --- a/apps/manual-kafka-cluster/provision.yml +++ b/apps/manual-kafka-cluster/provision.yml @@ -69,12 +69,12 @@ linode.cloud.domain_record: api_token: '{{ api_token }}' domain_id: '{{ domain_info.domain.id }}' - name: '{{ instance_prefix + item }}' - target: '{{ info.results[item|int-1].instance.ipv4[0] }}' + name: '{{ item.instance.label }}' + target: '{{ item.instance.ipv4[0] }}' ttl_sec: '{{ ttl_sec | default(omit) }}' type: 'A' state: present - with_sequence: count='{{ cluster_size }}' + with_items: "{{ info.results }}" when: - domain_name|d(False) - domain_info|d(False) @@ -88,9 +88,9 @@ kafka_data: server: {%- for count in range(cluster_size) %} - - name: {{instance_prefix}}{{ count + 1 }} + - name: {{ info.results[count].instance.label }} instance: - hostname: {{instance_prefix}}{{ count + 1 }}{% if domain_name|d(False) and domain_info %}.{{ domain_name }}{% endif %} + hostname: {{ info.results[count].instance.label }}{% if domain_name|d(False) and domain_info %}.{{ domain_name }}{% endif %} ip_pub1: {{ info.results[count].instance.ipv4[0] }} ip_priv1: {{ info.results[count].instance.ipv4[1] }} {%- endfor %} From 19e91d9ef917060082cd73607f87e34c55b385e4 Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Thu, 17 Apr 2025 13:33:59 -0400 Subject: [PATCH 11/14] Only create instances that do not already exist This commit checks which kafka instances have already been created before trying to create new ones. It will only create instances that do not already exist. instance_list gets us information about all instances in a single call which is more efficient than looping through instance_info just to check if an instance has been created. --- apps/manual-kafka-cluster/provision.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/apps/manual-kafka-cluster/provision.yml b/apps/manual-kafka-cluster/provision.yml index dcb451c..f868c14 100644 --- a/apps/manual-kafka-cluster/provision.yml +++ b/apps/manual-kafka-cluster/provision.yml @@ -16,6 +16,20 @@ register: firewall_info when: firewall_label|d(False) + - name: check if instances already created + # https://galaxy.ansible.com/ui/repo/published/linode/cloud/content/module/instance_list/ + linode.cloud.instance_list: + api_token: '{{ api_token }}' + filters: + - name: label + values: "{{ [ instance_prefix ] | product(range(1, cluster_size+1)) | map('join') | list }}" + order_by: label + register: existing_instances + + - name: convert instances to a dict + set_fact: + existing_instances: "{{ dict(existing_instances.instances | map(attribute='label') | zip(existing_instances.instances)) }}" + - name: creating kafka servers # https://galaxy.ansible.com/ui/repo/published/linode/cloud/content/module/instance/ linode.cloud.instance: @@ -32,6 +46,7 @@ state: present firewall_id: '{{ (firewall_info.firewall|default({})).id|default(omit) }}' with_sequence: count='{{ cluster_size }}' + when: (instance_prefix + item) not in existing_instances - name: get info about the instances linode.cloud.instance_info: From f8140b57a6a1f4fa3d3a2d08555796bb6c01818d Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Tue, 22 Apr 2025 15:06:09 -0400 Subject: [PATCH 12/14] Allow retries when running the site playbook that will reuse the previously created cluster id --- .../roles/kafka/tasks/configure.yml | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/apps/manual-kafka-cluster/roles/kafka/tasks/configure.yml b/apps/manual-kafka-cluster/roles/kafka/tasks/configure.yml index 53124e4..3b1609b 100644 --- a/apps/manual-kafka-cluster/roles/kafka/tasks/configure.yml +++ b/apps/manual-kafka-cluster/roles/kafka/tasks/configure.yml @@ -51,16 +51,31 @@ index_var: count when: hostvars[groups['kafka'][count]].role == 'broker only' +- name: check existing kafka cluster uuid + shell: + cmd: grep -s cluster.id {{ kafka_data_directory }}/data/kraft-combined-logs/meta.properties | cut -f 2 -d = || /bin/true + removes: "{{ kafka_data_directory}}/data/kraft-combined-logs/meta.properties" + register: old_cluster_uuid + run_once: true + delegate_to: "{{ groups['kafka'][0] }}" + - name: create kafka cluster uuid command: cmd: "{{ kafka_bin_directory }}/kafka-storage.sh random-uuid" - register: cluster_uuid + creates: "{{ kafka_data_directory}}/data/kraft-combined-logs/meta.properties" + register: new_cluster_uuid + run_once: true + delegate_to: "{{ groups['kafka'][0] }}" + +- name: determine kafka cluster uuid + set_fact: + cluster_uuid: "{{ (old_cluster_uuid.changed) | ternary(old_cluster_uuid.stdout, new_cluster_uuid.stdout) }}" run_once: true delegate_to: "{{ groups['kafka'][0] }}" - name: format data directory for controller and broker nodes command: - cmd: "{{ kafka_bin_directory }}/kafka-storage.sh format -t {{ cluster_uuid.stdout }} -c {{ kafka_config_directory }}/config/kraft/server.properties" + cmd: "{{ kafka_bin_directory }}/kafka-storage.sh format -t {{ cluster_uuid}} -c {{ kafka_config_directory }}/config/kraft/server.properties --ignore-formatted" become: true become_user: kafka run_once: true @@ -72,7 +87,7 @@ - name: format data directory broker nodes command: - cmd: "{{ kafka_bin_directory }}/kafka-storage.sh format -t {{ cluster_uuid.stdout }} -c {{ kafka_config_directory }}/config/kraft/broker.properties" + cmd: "{{ kafka_bin_directory }}/kafka-storage.sh format -t {{ cluster_uuid}} -c {{ kafka_config_directory }}/config/kraft/broker.properties --ignore-formatted" become: true become_user: kafka run_once: true From 40bc42b7824e4bd06851b50a5eae6c51ed658aef Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Mon, 28 Apr 2025 09:55:23 -0400 Subject: [PATCH 13/14] Add a playbook to shutdown all nodes in the cluster --- apps/manual-kafka-cluster/shutdown.yml | 66 ++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 apps/manual-kafka-cluster/shutdown.yml diff --git a/apps/manual-kafka-cluster/shutdown.yml b/apps/manual-kafka-cluster/shutdown.yml new file mode 100644 index 0000000..0daf8e9 --- /dev/null +++ b/apps/manual-kafka-cluster/shutdown.yml @@ -0,0 +1,66 @@ +--- +# shutdown kafka server and client instances +- name: shutdown kafka instances + hosts: localhost + vars_files: + - group_vars/kafka/vars + - group_vars/kafka/secret_vars + + tasks: + + # DNS + + - name: check if hosts are in DNS + # https://galaxy.ansible.com/ui/repo/published/linode/cloud/content/module/domain_info/ + linode.cloud.domain_info: + api_token: '{{ api_token }}' + domain: '{{ domain_name }}' + when: domain_name|d(False) + register: domain_info + + - name: remove hosts from dns + # https://galaxy.ansible.com/ui/repo/published/linode/cloud/content/module/domain_record/ + linode.cloud.domain_record: + api_token: '{{ api_token }}' + domain_id: '{{ domain_info.domain.id }}' + record_id: '{{ item.id }}' + state: absent + when: + - domain_name|d(False) + - domain_info|d(False) + - item.target in (kafka_data.server | map(attribute='instance.ip_pub1')) + with_items: "{{ domain_info.records }}" + + + # Instances + + - name: get list of instances + # https://galaxy.ansible.com/ui/repo/published/linode/cloud/content/module/instance_list/ + linode.cloud.instance_list: + api_token: '{{ api_token }}' + filters: + - name: label + values: "{{ [ instance_prefix ] | product(range(1, cluster_size+1)) | map('join') | list }}" + order_by: label + register: existing_instances + + - name: shutdown kafka servers + # https://galaxy.ansible.com/ui/repo/published/linode/cloud/content/module/instance/ + linode.cloud.instance: + label: '{{ item.label }}' + api_token: '{{ api_token }}' + region: '{{ region }}' + state: absent + with_items: '{{ existing_instances.instances }}' + + - name: update group_vars + blockinfile: + path: ./group_vars/kafka/vars + marker: "# {mark} INSTANCE VARS" + state: absent + + - name: remove kafka nodes from inventory + blockinfile: + path: ./hosts + marker: "# {mark} KAFKA INSTANCES" + state: absent From 960d82d2065c46c0288365f80a97e8a1618e5279 Mon Sep 17 00:00:00 2001 From: Philip Tellis Date: Fri, 25 Apr 2025 15:24:31 -0400 Subject: [PATCH 14/14] Reboot linode after installing packages but before starting kafka --- apps/manual-kafka-cluster/roles/common/tasks/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/manual-kafka-cluster/roles/common/tasks/main.yml b/apps/manual-kafka-cluster/roles/common/tasks/main.yml index 1648f23..49104c0 100644 --- a/apps/manual-kafka-cluster/roles/common/tasks/main.yml +++ b/apps/manual-kafka-cluster/roles/common/tasks/main.yml @@ -46,3 +46,6 @@ - name: apply ufw rules import_tasks: ufw_rules.yml + +- name: reboot linode + reboot: