From 1d93c2115d8ef6fffca594aa51957f22fd41f54a Mon Sep 17 00:00:00 2001 From: Xan Manning Date: Sun, 28 Feb 2021 16:45:38 +0000 Subject: [PATCH] Cluster-init checks added, tidy up of task format --- .github/stale.yml | 18 +++++++++++++++++ CHANGELOG.md | 12 +++++++++++ handlers/main.yml | 8 ++++++++ tasks/build/configure-k3s-cluster.yml | 3 +++ tasks/build/get-version.yml | 1 + tasks/build/install-k3s.yml | 4 ++++ tasks/build/preconfigure-k3s.yml | 6 +++--- tasks/state-installed.yml | 5 +++++ tasks/teardown/drain-and-remove-nodes.yml | 6 +++++- tasks/validate/configuration/cluster-init.yml | 18 +++++++++++++++++ .../configuration/control-node-count.yml | 12 ++++++++--- tasks/validate/configuration/variables.yml | 2 +- tasks/validate/environment/local/packages.yml | 2 +- tasks/validate/state/uninstalled.yml | 20 ++++++------------- 14 files changed, 94 insertions(+), 23 deletions(-) create mode 100644 .github/stale.yml create mode 100644 tasks/validate/configuration/cluster-init.yml diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 0000000..82c0d00 --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,18 @@ +--- +# Number of days of inactivity before an issue becomes stale +daysUntilStale: 60 +# Number of days of inactivity before a stale issue is closed +daysUntilClose: 7 +# Issues with these labels will never be considered stale +exemptLabels: + - pinned + - security +# Label to use when marking an issue as stale +staleLabel: wontfix +# Comment to post when marking an issue as stale. Set to `false` to disable +markComment: > + This issue has been automatically marked as stale because it has not had + recent activity. It will be closed if no further activity occurs. Thank you + for your contributions. +# Comment to post when closing a stale issue. Set to `false` to disable +closeComment: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 081a1ae..6e1e6ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,16 @@ --- --> +## 2021-02-27, v2.7.0 + +### Notable changes + + - Cluster init checks added. + - Tidy up of tasks, failed checks. + - Possible fix for #93 - force draining of nodes added. + +--- + ## 2021-02-27, v2.6.1 ### Notable changes @@ -21,6 +31,8 @@ - Bugfix: Templating error for single control plane nodes using Etcd. - Bugfix: a number of typos fixed. +--- + ## 2021-02-16, v2.6.0 ### Notable changes diff --git a/handlers/main.yml b/handlers/main.yml index 0b9fce1..2dd40eb 100644 --- a/handlers/main.yml +++ b/handlers/main.yml @@ -14,6 +14,10 @@ enabled: "{{ k3s_start_on_boot }}" retries: 3 delay: 3 + register: k3s_systemd_restart_k3s + failed_when: + - k3s_systemd_restart_k3s is not success + - not ansible_check_mode become: "{{ k3s_become_for_systemd | ternary(true, false, k3s_become_for_all) }}" - name: restart docker @@ -21,4 +25,8 @@ name: docker state: restarted enabled: true + register: k3s_systemd_restart_docker + failed_when: + - k3s_systemd_restart_docker is not success + - not ansible_check_mode become: "{{ k3s_become_for_systemd | ternary(true, false, k3s_become_for_all) }}" diff --git a/tasks/build/configure-k3s-cluster.yml b/tasks/build/configure-k3s-cluster.yml index 4a12f91..d285bbe 100644 --- a/tasks/build/configure-k3s-cluster.yml +++ b/tasks/build/configure-k3s-cluster.yml @@ -65,6 +65,9 @@ state: started enabled: "{{ k3s_start_on_boot }}" register: ensure_secondary_controllers_started + failed_when: + - ensure_secondary_controllers_started is not succeeded + - not ansible_check_mode until: ensure_secondary_controllers_started is succeeded retries: "{{ ansible_play_hosts_all | length }}" delay: 5 diff --git a/tasks/build/get-version.yml b/tasks/build/get-version.yml index 30dcd53..9969c54 100644 --- a/tasks/build/get-version.yml +++ b/tasks/build/get-version.yml @@ -19,6 +19,7 @@ return_content: true body_format: json register: k3s_latest_release + no_log: true check_mode: false - name: Ensure the release version is set as a fact diff --git a/tasks/build/install-k3s.yml b/tasks/build/install-k3s.yml index 5b7dccf..f6be16a 100644 --- a/tasks/build/install-k3s.yml +++ b/tasks/build/install-k3s.yml @@ -22,6 +22,10 @@ state: started enabled: "{{ k3s_start_on_boot }}" scope: "{{ k3s_systemd_context }}" + register: k3s_systemd_start_k3s + failed_when: + - k3s_systemd_start_k3s is not succeeded + - not ansible_check_mode when: (k3s_control_node and k3s_controller_list | length == 1) or (k3s_primary_control_node and k3s_controller_list | length > 1) become: "{{ k3s_become_for_systemd | ternary(true, false, k3s_become_for_all) }}" diff --git a/tasks/build/preconfigure-k3s.yml b/tasks/build/preconfigure-k3s.yml index 9a2a58d..0572e36 100644 --- a/tasks/build/preconfigure-k3s.yml +++ b/tasks/build/preconfigure-k3s.yml @@ -22,7 +22,7 @@ k3s_control_plane_port: "{{ k3s_runtime_config['https-listen-port'] | default(6443) }}" delegate_to: k3s_primary_control_node -- name: Ensure a count of control nodes is generated +- name: Ensure a count of control nodes is generated from ansible_play_hosts_all ansible.builtin.set_fact: k3s_controller_list: "{{ k3s_controller_list + [ item ] }}" when: @@ -79,7 +79,7 @@ check_mode: false when: hostvars[item].k3s_control_node is defined -- name: Delegate a control plane node +- name: Delegate an initializing control plane node block: - name: Lookup control node from file ansible.builtin.command: "grep '{{ 'P_True' if (k3s_controller_list | length > 1) else 'C_True' }}' /tmp/inventory.txt" @@ -87,7 +87,7 @@ check_mode: false register: k3s_control_delegate_raw - - name: Ensure control node is delegated to for obtaining a token + - name: Ensure control node is delegated for obtaining a cluster token ansible.builtin.set_fact: k3s_control_delegate: "{{ k3s_control_delegate_raw.stdout.split(' @@@ ')[0] }}" check_mode: false diff --git a/tasks/state-installed.yml b/tasks/state-installed.yml index 9c4f821..eeebc5c 100644 --- a/tasks/state-installed.yml +++ b/tasks/state-installed.yml @@ -40,6 +40,11 @@ - import_tasks: build/install-k3s.yml +- include_tasks: validate/configuration/cluster-init.yml + when: + - k3s_control_delegate is defined + - k3s_control_delegate == inventory_hostname + - import_tasks: build/configure-k3s-cluster.yml when: - k3s_build_cluster is defined diff --git a/tasks/teardown/drain-and-remove-nodes.yml b/tasks/teardown/drain-and-remove-nodes.yml index 2f92255..b99a843 100644 --- a/tasks/teardown/drain-and-remove-nodes.yml +++ b/tasks/teardown/drain-and-remove-nodes.yml @@ -19,7 +19,11 @@ become: "{{ k3s_become_for_kubectl | ternary(true, false, k3s_become_for_all) }}" - name: Ensure uninstalled nodes are drained - ansible.builtin.command: "{{ k3s_install_dir }}/kubectl drain {{ item }} --ignore-daemonsets --delete-local-data" + ansible.builtin.command: >- + {{ k3s_install_dir }}/kubectl drain {{ item }} + --ignore-daemonsets + --delete-local-data + --force delegate_to: "{{ k3s_control_delegate }}" run_once: true when: diff --git a/tasks/validate/configuration/cluster-init.yml b/tasks/validate/configuration/cluster-init.yml new file mode 100644 index 0000000..5bcde64 --- /dev/null +++ b/tasks/validate/configuration/cluster-init.yml @@ -0,0 +1,18 @@ +--- + +- name: Check that the initial control plane server is available to accept connections + ansible.builtin.wait_for: + port: "{{ k3s_runtime_config['https-listen-port'] | default('6443') }}" + host: "{{ k3s_runtime_config['bind-address'] | default('127.0.0.1') }}" + delay: 5 + sleep: 5 + timeout: 300 + +- name: Check that cluster-token exists + ansible.builtin.stat: + path: "{{ k3s_runtime_config['data-dir'] | default(k3s_data_dir) }}/server/token" + register: k3s_check_cluster_token + check_mode: false + failed_when: + - not k3s_check_cluster_token.stat.exists + - not ansible_check_mode diff --git a/tasks/validate/configuration/control-node-count.yml b/tasks/validate/configuration/control-node-count.yml index 8eaf18f..72f8581 100644 --- a/tasks/validate/configuration/control-node-count.yml +++ b/tasks/validate/configuration/control-node-count.yml @@ -7,7 +7,9 @@ - ("datastore-endpoint" not in k3s_runtime_config or not k3s_runtime_config['datastore-endpoint']) - (k3s_etcd_datastore is not defined or not k3s_etcd_datastore) success_msg: "Control plane configuration is valid." - fail_msg: "Control plane configuration is invalid. Please see notes about k3s_control_node and HA in README.md." + fail_msg: >- + Control plane configuration is invalid. + Please see notes about k3s_control_node and HA in README.md. when: - k3s_controller_list | length == 1 - not k3s_use_unsupported_config @@ -20,7 +22,9 @@ - (("datastore-endpoint" in k3s_runtime_config and k3s_runtime_config['datastore-endpoint']) or (k3s_etcd_datastore is defined and k3s_etcd_datastore)) success_msg: "Control plane configuration is valid." - fail_msg: "Control plane configuration is invalid. Please see notes about k3s_control_node and HA in README.md." + fail_msg: >- + Control plane configuration is invalid. Please see notes about + k3s_control_node and HA in README.md. when: - k3s_controller_list | length >= 2 - k3s_control_node @@ -31,7 +35,9 @@ - (k3s_controller_list | length >= 3) - (((k3s_controller_list | length) % 2) == 1) success_msg: "Control plane configuration is valid." - fail_msg: "Etcd should have a minimum of 3 defined members and the number of members should be odd. Please see notes about HA in README.md" + fail_msg: >- + Etcd should have a minimum of 3 defined members and the number of + members should be odd. Please see notes about HA in README.md when: - k3s_etcd_datastore is defined - k3s_etcd_datastore diff --git a/tasks/validate/configuration/variables.yml b/tasks/validate/configuration/variables.yml index 3230c70..38a9c7b 100644 --- a/tasks/validate/configuration/variables.yml +++ b/tasks/validate/configuration/variables.yml @@ -7,7 +7,7 @@ success_msg: "{{ k3s_release_version }} is supported by this role." fail_msg: "{{ k3s_release_version }} is not supported by this role, please use xanmanning.k3s v1.x." -- name: Check configuration in k3s_server and k3s_agent that needs special configuration +- name: Check configuration in k3s_server and k3s_agent that needs alternate configuration ansible.builtin.assert: that: - (item.setting not in k3s_runtime_config) diff --git a/tasks/validate/environment/local/packages.yml b/tasks/validate/environment/local/packages.yml index 7a24af3..d0d99b1 100644 --- a/tasks/validate/environment/local/packages.yml +++ b/tasks/validate/environment/local/packages.yml @@ -6,7 +6,7 @@ - ansible_version.string is version_compare(k3s_ansible_min_version, '>=') fail_msg: >- Ansible v{{ ansible_version.string }} is not supported by this role. - Please install >= v{{ k3s_ansible_min_version }} + Please install >= v{{ k3s_ansible_min_version }}. success_msg: "Ansible v{{ ansible_version.string }} is supported." become: false delegate_to: localhost diff --git a/tasks/validate/state/uninstalled.yml b/tasks/validate/state/uninstalled.yml index 4480838..c666281 100644 --- a/tasks/validate/state/uninstalled.yml +++ b/tasks/validate/state/uninstalled.yml @@ -2,31 +2,23 @@ - name: Check that k3s is not running ansible.builtin.command: pgrep k3s - ignore_errors: true + failed_when: + - check_k3s_process.rc == 0 + - not ansible_check_mode changed_when: false register: check_k3s_process -- name: Fail if k3s is still running - ansible.builtin.fail: - msg: k3s is still running, uninstall script failed. Please investigate. - when: check_k3s_process.rc == 0 - - name: Check that docker is not running ansible.builtin.command: pgrep docker - ignore_errors: true + failed_when: + - check_k3s_docker_process.rc == 0 + - not ansible_check_mode changed_when: false register: check_k3s_docker_process when: - k3s_runtime_config.docker is defined - k3s_runtime_config.docker -- name: Fail if docker is still running - ansible.builtin.fail: - msg: docker is still running, uninstall script failed. Please investigate. - when: - - k3s_runtime_config.docker is defined - - k3s_runtime_config.docker - - name: Fail if k3s binaries have not been removed ansible.builtin.stat: path: "{{ k3s_install_dir }}/{{ item }}"