HA fixes and code tweaks

- Reboot playbook updated for HA testing
- Small adjustment made after testing HA with playbook
- Fix reset playbook failing in some cases

Signed-off-by: Derek Nola <derek.nola@suse.com>
This commit is contained in:
Nicholas Malcolm 2023-02-01 10:22:54 -05:00 committed by Derek Nola
parent cd725780f5
commit 7cc70ad609
5 changed files with 38 additions and 29 deletions

View File

@ -1,6 +1,16 @@
---
- name: Reboot cluster
hosts: k3s_cluster
- name: Reboot cluster servers staggered
hosts: server
become: true
gather_facts: true
serial: 1
tasks:
- name: Reboot
ansible.builtin.reboot:
test_command: kubectl get nodes
- name: Reboot cluster agents staggered
hosts: agent
become: true
gather_facts: true
serial: 1

View File

@ -7,7 +7,7 @@ After=network-online.target
Type=notify
ExecStartPre=-/sbin/modprobe br_netfilter
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ hostvars[groups['server'][0]]['token'] }} {{ extra_agent_args | default("") }}
ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ hostvars[groups['server'][0]]['token'] }} {{ extra_agent_args }}
KillMode=process
Delegate=yes
# Having non-zero Limit*s causes performance problems due to accounting overhead

View File

@ -1,25 +1,20 @@
---
- name: Clean previous runs of k3s-init
ansible.builtin.command: systemctl reset-failed k3s-init
failed_when: false
changed_when: false
- name: Init first server node
block:
- name: Start temporary service with cluster-init
- name: Start temporary service for HA cluster
ansible.builtin.command:
cmd: >
systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server
--cluster-init --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }}
{{ extra_server_args | default('') }}
{{ extra_server_args}}
creates: "{{ systemd_dir }}/k3s-init.service"
when: groups['server'] | length > 1
- name: Start temporary service
- name: Start temporary service for single server cluster
ansible.builtin.command:
cmd: >
systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server
--tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} { extra_server_args | default('') }}
--tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }}
creates: "{{ systemd_dir }}/k3s-init.service"
when: groups['server'] | length == 1
@ -80,12 +75,12 @@
flat: true
when: ansible_hostname == groups['server'][0]
- name: Init additonal server nodes
- name: Init additonal server nodes if any
ansible.builtin.command:
cmd: >
systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server --token "{{ hostvars[groups['server'][0]]['token'] }}"
--server https://{{ hostvars[groups['server'][0]]['ansible_host'] | default(groups['server'][0]) }}:{{ api_port }}
--data-dir {{ k3s_server_location }} {{ extra_server_args | default('') }}
systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server
--token "{{ hostvars[groups['server'][0]]['token'] }}" --server https://{{ api_endpoint }}:{{ api_port }}
--tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }}
creates: "{{ systemd_dir }}/k3s-init.service"
when: ansible_hostname != groups['server'][0]
@ -107,13 +102,13 @@
failed_when: false
- name: Copy K3s service file
register: k3s_service
template:
ansible.builtin.template:
src: "k3s-server.service.j2"
dest: "{{ systemd_dir }}/k3s-server.service"
owner: root
group: root
mode: 0644
register: k3s_service
- name: Enable and check K3s service
ansible.builtin.systemd:
@ -122,14 +117,11 @@
state: restarted
enabled: true
- name: Create kubectl symlink
- name: Create symlinks
ansible.builtin.file:
src: /usr/local/bin/k3s
dest: /usr/local/bin/kubectl
state: link
- name: Create crictl symlink
ansible.builtin.file:
src: /usr/local/bin/k3s
dest: /usr/local/bin/crictl
dest: /usr/local/bin/{{ item }}
state: link
with_items:
- kubectl
- crictl

View File

@ -7,7 +7,7 @@ After=network-online.target
Type=notify
ExecStartPre=-/sbin/modprobe br_netfilter
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ extra_server_args | default("") }}
ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ extra_server_args }}
KillMode=process
Delegate=yes
# Having non-zero Limit*s causes performance problems due to accounting overhead

View File

@ -1,4 +1,10 @@
---
- name: Clean previous failed runs of k3s-init
# systemd builtin does not support reset-failed
ansible.builtin.command: systemctl reset-failed k3s-init
failed_when: false
changed_when: false
- name: Disable services
ansible.builtin.systemd:
name: "{{ item }}"
@ -6,12 +12,13 @@
enabled: false
failed_when: false
with_items:
- k3s-init
- k3s-server
- k3s-agent
- name: Kill container shim
register: pkill_containerd_shim_runc
ansible.builtin.command: pkill -9 -f "k3s/data/[^/]+/bin/containerd-shim-runc"
register: pkill_containerd_shim_runc
changed_when: "pkill_containerd_shim_runc.rc == 0"
failed_when: false
@ -25,7 +32,7 @@
loop_control:
loop_var: mounted_fs
- name: Remove service files, binaries and data
- name: Remove service files, binaries, and data
ansible.builtin.file:
name: "{{ item }}"
state: absent