diff --git a/playbook/reboot.yml b/playbook/reboot.yml index 8e5fbe5..cdd9804 100644 --- a/playbook/reboot.yml +++ b/playbook/reboot.yml @@ -1,6 +1,16 @@ --- -- name: Reboot cluster - hosts: k3s_cluster +- name: Reboot cluster servers staggered + hosts: server + become: true + gather_facts: true + serial: 1 + tasks: + - name: Reboot + ansible.builtin.reboot: + test_command: kubectl get nodes + +- name: Reboot cluster agents staggered + hosts: agent become: true gather_facts: true serial: 1 diff --git a/roles/k3s/agent/templates/k3s-agent.service.j2 b/roles/k3s/agent/templates/k3s-agent.service.j2 index b4a331d..a806bbb 100644 --- a/roles/k3s/agent/templates/k3s-agent.service.j2 +++ b/roles/k3s/agent/templates/k3s-agent.service.j2 @@ -7,7 +7,7 @@ After=network-online.target Type=notify ExecStartPre=-/sbin/modprobe br_netfilter ExecStartPre=-/sbin/modprobe overlay -ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ hostvars[groups['server'][0]]['token'] }} {{ extra_agent_args | default("") }} +ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ hostvars[groups['server'][0]]['token'] }} {{ extra_agent_args }} KillMode=process Delegate=yes # Having non-zero Limit*s causes performance problems due to accounting overhead diff --git a/roles/k3s/server/tasks/main.yml b/roles/k3s/server/tasks/main.yml index 116bfad..efd387d 100644 --- a/roles/k3s/server/tasks/main.yml +++ b/roles/k3s/server/tasks/main.yml @@ -1,25 +1,20 @@ --- -- name: Clean previous runs of k3s-init - ansible.builtin.command: systemctl reset-failed k3s-init - failed_when: false - changed_when: false - - name: Init first server node block: - - name: Start temporary service with cluster-init + - name: Start temporary service for HA cluster ansible.builtin.command: cmd: > systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server --cluster-init --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} - {{ extra_server_args | default('') }} + {{ extra_server_args}} creates: "{{ systemd_dir }}/k3s-init.service" when: groups['server'] | length > 1 - - name: Start temporary service + - name: Start temporary service for single server cluster ansible.builtin.command: cmd: > systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server - --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} { extra_server_args | default('') }} + --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }} creates: "{{ systemd_dir }}/k3s-init.service" when: groups['server'] | length == 1 @@ -80,12 +75,12 @@ flat: true when: ansible_hostname == groups['server'][0] -- name: Init additonal server nodes +- name: Init additonal server nodes if any ansible.builtin.command: cmd: > - systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server --token "{{ hostvars[groups['server'][0]]['token'] }}" - --server https://{{ hostvars[groups['server'][0]]['ansible_host'] | default(groups['server'][0]) }}:{{ api_port }} - --data-dir {{ k3s_server_location }} {{ extra_server_args | default('') }} + systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server + --token "{{ hostvars[groups['server'][0]]['token'] }}" --server https://{{ api_endpoint }}:{{ api_port }} + --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }} creates: "{{ systemd_dir }}/k3s-init.service" when: ansible_hostname != groups['server'][0] @@ -107,13 +102,13 @@ failed_when: false - name: Copy K3s service file - register: k3s_service - template: + ansible.builtin.template: src: "k3s-server.service.j2" dest: "{{ systemd_dir }}/k3s-server.service" owner: root group: root mode: 0644 + register: k3s_service - name: Enable and check K3s service ansible.builtin.systemd: @@ -122,14 +117,11 @@ state: restarted enabled: true -- name: Create kubectl symlink +- name: Create symlinks ansible.builtin.file: src: /usr/local/bin/k3s - dest: /usr/local/bin/kubectl - state: link - -- name: Create crictl symlink - ansible.builtin.file: - src: /usr/local/bin/k3s - dest: /usr/local/bin/crictl + dest: /usr/local/bin/{{ item }} state: link + with_items: + - kubectl + - crictl diff --git a/roles/k3s/server/templates/k3s-server.service.j2 b/roles/k3s/server/templates/k3s-server.service.j2 index a56ab10..6e898eb 100644 --- a/roles/k3s/server/templates/k3s-server.service.j2 +++ b/roles/k3s/server/templates/k3s-server.service.j2 @@ -7,7 +7,7 @@ After=network-online.target Type=notify ExecStartPre=-/sbin/modprobe br_netfilter ExecStartPre=-/sbin/modprobe overlay -ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ extra_server_args | default("") }} +ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ extra_server_args }} KillMode=process Delegate=yes # Having non-zero Limit*s causes performance problems due to accounting overhead diff --git a/roles/reset/tasks/main.yml b/roles/reset/tasks/main.yml index 98a5645..750dbe2 100644 --- a/roles/reset/tasks/main.yml +++ b/roles/reset/tasks/main.yml @@ -1,4 +1,10 @@ --- +- name: Clean previous failed runs of k3s-init + # systemd builtin does not support reset-failed + ansible.builtin.command: systemctl reset-failed k3s-init + failed_when: false + changed_when: false + - name: Disable services ansible.builtin.systemd: name: "{{ item }}" @@ -6,12 +12,13 @@ enabled: false failed_when: false with_items: + - k3s-init - k3s-server - k3s-agent - name: Kill container shim - register: pkill_containerd_shim_runc ansible.builtin.command: pkill -9 -f "k3s/data/[^/]+/bin/containerd-shim-runc" + register: pkill_containerd_shim_runc changed_when: "pkill_containerd_shim_runc.rc == 0" failed_when: false @@ -25,7 +32,7 @@ loop_control: loop_var: mounted_fs -- name: Remove service files, binaries and data +- name: Remove service files, binaries, and data ansible.builtin.file: name: "{{ item }}" state: absent