From 5bebced657a9682b7808cb218b4d02a684564ce1 Mon Sep 17 00:00:00 2001 From: Xan Manning Date: Wed, 15 Jun 2022 21:03:39 +0000 Subject: [PATCH] fix: control plane start retries --- .github/workflows/ci.yml | 3 +-- tasks/ensure_cluster.yml | 26 +++++++++++++++++++ tasks/ensure_control_plane_started_openrc.yml | 14 +++++----- .../ensure_control_plane_started_systemd.yml | 8 +++--- tasks/ensure_installed.yml | 3 +++ tasks/post_checks_nodes.yml | 5 ++-- 6 files changed, 44 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f6b3d47..10b5b51 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,11 +41,10 @@ jobs: - distro: geerlingguy/docker-fedora34-ansible:latest scenario: highavailabilitydb prebuilt: 'true' - - distro: geerlingguy/docker-fedora33-ansible:latest + - distro: geerlingguy/docker-debian10-ansible:latest scenario: autodeploy - distro: xanmanning/docker-alpine-ansible:3.16 scenario: highavailabilityetcd - command: "init" prebuilt: 'false' - distro: geerlingguy/docker-rockylinux8-ansible:latest scenario: highavailabilityetcd diff --git a/tasks/ensure_cluster.yml b/tasks/ensure_cluster.yml index 938c463..767a451 100644 --- a/tasks/ensure_cluster.yml +++ b/tasks/ensure_cluster.yml @@ -46,10 +46,36 @@ dest: "{{ k3s_systemd_unit_dir }}/k3s.service" mode: 0644 become: "{{ k3s_become }}" + when: + - k3s_service_handler[ansible_service_mgr] == 'systemd' notify: - "reload {{ k3s_service_handler[ansible_service_mgr] }}" - "restart k3s {{ k3s_service_handler[ansible_service_mgr] }}" +- name: Ensure k3s service file is present + ansible.builtin.template: + src: k3s.openrc.j2 + dest: "{{ k3s_openrc_service_dir }}/k3s" + mode: 0744 + when: + - k3s_service_handler[ansible_service_mgr] == 'service' + notify: + - "reload {{ k3s_service_handler[ansible_service_mgr] }}" + - "restart k3s {{ k3s_service_handler[ansible_service_mgr] }}" + become: "{{ k3s_become }}" + +- name: Ensure k3s logrotate file is present + ansible.builtin.template: + src: k3s.logrotate.j2 + dest: "{{ k3s_logrotate_dir }}/k3s" + mode: 0640 + when: + - k3s_service_handler[ansible_service_mgr] == 'service' + notify: + - "reload {{ k3s_service_handler[ansible_service_mgr] }}" + - "restart k3s {{ k3s_service_handler[ansible_service_mgr] }}" + become: "{{ k3s_become }}" + - name: Ensure k3s config file exists ansible.builtin.template: src: config.yaml.j2 diff --git a/tasks/ensure_control_plane_started_openrc.yml b/tasks/ensure_control_plane_started_openrc.yml index b7dcf52..9926173 100644 --- a/tasks/ensure_control_plane_started_openrc.yml +++ b/tasks/ensure_control_plane_started_openrc.yml @@ -1,15 +1,15 @@ --- -- name: Ensure k3s initial control plane server is started +- name: Ensure k3s control plane server is started ansible.builtin.service: name: k3s state: started enabled: "{{ k3s_start_on_boot }}" register: k3s_service_start_k3s - failed_when: - - k3s_service_start_k3s is not succeeded - - not ansible_check_mode - when: (k3s_control_node and k3s_controller_list | length == 1) - or (k3s_primary_control_node and k3s_controller_list | length > 1) - or k3s_token_cluster_check.stat.exists + until: k3s_service_start_k3s is succeeded + retries: 3 + delay: 3 + # failed_when: + # - k3s_service_start_k3s is not succeeded + # - not ansible_check_mode become: "{{ k3s_become }}" diff --git a/tasks/ensure_control_plane_started_systemd.yml b/tasks/ensure_control_plane_started_systemd.yml index e4b159e..e2855d4 100644 --- a/tasks/ensure_control_plane_started_systemd.yml +++ b/tasks/ensure_control_plane_started_systemd.yml @@ -1,16 +1,16 @@ --- -- name: Ensure k3s initial control plane server is started +- name: Ensure k3s control plane server is started ansible.builtin.systemd: name: k3s state: started enabled: "{{ k3s_start_on_boot }}" scope: "{{ k3s_systemd_context }}" register: k3s_systemd_start_k3s + until: k3s_systemd_start_k3s is succeeded + retries: 3 + delay: 3 failed_when: - k3s_systemd_start_k3s is not succeeded - not ansible_check_mode - when: (k3s_control_node and k3s_controller_list | length == 1) - or (k3s_primary_control_node and k3s_controller_list | length > 1) - or k3s_token_cluster_check.stat.exists become: "{{ k3s_become }}" diff --git a/tasks/ensure_installed.yml b/tasks/ensure_installed.yml index e1d973b..fc19ea9 100644 --- a/tasks/ensure_installed.yml +++ b/tasks/ensure_installed.yml @@ -23,3 +23,6 @@ register: k3s_token_cluster_check - include_tasks: ensure_control_plane_started_{{ ansible_service_mgr }}.yml + when: (k3s_control_node and k3s_controller_list | length == 1) + or (k3s_primary_control_node and k3s_controller_list | length > 1) + or k3s_token_cluster_check.stat.exists diff --git a/tasks/post_checks_nodes.yml b/tasks/post_checks_nodes.yml index 8383f9a..5a87485 100644 --- a/tasks/post_checks_nodes.yml +++ b/tasks/post_checks_nodes.yml @@ -4,8 +4,9 @@ ansible.builtin.command: cmd: "{{ k3s_install_dir }}/kubectl get nodes" changed_when: false - failed_when: kubectl_get_nodes_result.stdout.find("was refused") != -1 or - kubectl_get_nodes_result.stdout.find("ServiceUnavailable") != -1 + failed_when: >- + kubectl_get_nodes_result.stdout.find("was refused") != -1 or + kubectl_get_nodes_result.stdout.find("ServiceUnavailable") != -1 register: kubectl_get_nodes_result until: - kubectl_get_nodes_result.rc == 0