fix: control plane start retries

This commit is contained in:
Xan Manning 2022-06-15 21:03:39 +00:00
parent c1341eb62c
commit 5bebced657
6 changed files with 44 additions and 15 deletions

View File

@ -41,11 +41,10 @@ jobs:
- distro: geerlingguy/docker-fedora34-ansible:latest
scenario: highavailabilitydb
prebuilt: 'true'
- distro: geerlingguy/docker-fedora33-ansible:latest
- distro: geerlingguy/docker-debian10-ansible:latest
scenario: autodeploy
- distro: xanmanning/docker-alpine-ansible:3.16
scenario: highavailabilityetcd
command: "init"
prebuilt: 'false'
- distro: geerlingguy/docker-rockylinux8-ansible:latest
scenario: highavailabilityetcd

View File

@ -46,10 +46,36 @@
dest: "{{ k3s_systemd_unit_dir }}/k3s.service"
mode: 0644
become: "{{ k3s_become }}"
when:
- k3s_service_handler[ansible_service_mgr] == 'systemd'
notify:
- "reload {{ k3s_service_handler[ansible_service_mgr] }}"
- "restart k3s {{ k3s_service_handler[ansible_service_mgr] }}"
- name: Ensure k3s service file is present
ansible.builtin.template:
src: k3s.openrc.j2
dest: "{{ k3s_openrc_service_dir }}/k3s"
mode: 0744
when:
- k3s_service_handler[ansible_service_mgr] == 'service'
notify:
- "reload {{ k3s_service_handler[ansible_service_mgr] }}"
- "restart k3s {{ k3s_service_handler[ansible_service_mgr] }}"
become: "{{ k3s_become }}"
- name: Ensure k3s logrotate file is present
ansible.builtin.template:
src: k3s.logrotate.j2
dest: "{{ k3s_logrotate_dir }}/k3s"
mode: 0640
when:
- k3s_service_handler[ansible_service_mgr] == 'service'
notify:
- "reload {{ k3s_service_handler[ansible_service_mgr] }}"
- "restart k3s {{ k3s_service_handler[ansible_service_mgr] }}"
become: "{{ k3s_become }}"
- name: Ensure k3s config file exists
ansible.builtin.template:
src: config.yaml.j2

View File

@ -1,15 +1,15 @@
---
- name: Ensure k3s initial control plane server is started
- name: Ensure k3s control plane server is started
ansible.builtin.service:
name: k3s
state: started
enabled: "{{ k3s_start_on_boot }}"
register: k3s_service_start_k3s
failed_when:
- k3s_service_start_k3s is not succeeded
- not ansible_check_mode
when: (k3s_control_node and k3s_controller_list | length == 1)
or (k3s_primary_control_node and k3s_controller_list | length > 1)
or k3s_token_cluster_check.stat.exists
until: k3s_service_start_k3s is succeeded
retries: 3
delay: 3
# failed_when:
# - k3s_service_start_k3s is not succeeded
# - not ansible_check_mode
become: "{{ k3s_become }}"

View File

@ -1,16 +1,16 @@
---
- name: Ensure k3s initial control plane server is started
- name: Ensure k3s control plane server is started
ansible.builtin.systemd:
name: k3s
state: started
enabled: "{{ k3s_start_on_boot }}"
scope: "{{ k3s_systemd_context }}"
register: k3s_systemd_start_k3s
until: k3s_systemd_start_k3s is succeeded
retries: 3
delay: 3
failed_when:
- k3s_systemd_start_k3s is not succeeded
- not ansible_check_mode
when: (k3s_control_node and k3s_controller_list | length == 1)
or (k3s_primary_control_node and k3s_controller_list | length > 1)
or k3s_token_cluster_check.stat.exists
become: "{{ k3s_become }}"

View File

@ -23,3 +23,6 @@
register: k3s_token_cluster_check
- include_tasks: ensure_control_plane_started_{{ ansible_service_mgr }}.yml
when: (k3s_control_node and k3s_controller_list | length == 1)
or (k3s_primary_control_node and k3s_controller_list | length > 1)
or k3s_token_cluster_check.stat.exists

View File

@ -4,8 +4,9 @@
ansible.builtin.command:
cmd: "{{ k3s_install_dir }}/kubectl get nodes"
changed_when: false
failed_when: kubectl_get_nodes_result.stdout.find("was refused") != -1 or
kubectl_get_nodes_result.stdout.find("ServiceUnavailable") != -1
failed_when: >-
kubectl_get_nodes_result.stdout.find("was refused") != -1 or
kubectl_get_nodes_result.stdout.find("ServiceUnavailable") != -1
register: kubectl_get_nodes_result
until:
- kubectl_get_nodes_result.rc == 0