Merge pull request #228 from k3s-io/default_k3s_script

Fix HA, simplify provisioning, add Vagrant test cluster
This commit is contained in:
Derek Nola 2023-11-08 15:40:11 -08:00 committed by GitHub
commit 2e1da471c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 237 additions and 133 deletions

5
.ansible-lint Normal file
View File

@ -0,0 +1,5 @@
---
warn_list:
- var-naming[no-role-prefix]
- yaml[comments-indentation]
- yaml[line-length]

View File

@ -22,7 +22,7 @@ on processor architecture:
## System requirements
Deployment environment must have Ansible 2.4.0+
Master and nodes must have passwordless SSH access
Server and agent nodes must have passwordless SSH access
## Usage
@ -48,7 +48,7 @@ k3s_cluster:
If needed, you can also edit `vars` section at the bottom to match your environment.
If multiple hosts are in the server group the playbook will automatically setup k3s in HA mode with embedded etcd.
An odd number of server nodes is recommended (3,5,7). Read the offical documentation below for more information and options.
An odd number of server nodes is required (3,5,7). Read the offical documentation below for more information and options.
https://rancher.com/docs/k3s/latest/en/installation/ha-embedded/
Using a loadbalancer or VIP as the API endpoint is preferred but not covered here.
@ -61,8 +61,19 @@ ansible-playbook playbook/site.yml -i inventory.yml
## Kubeconfig
To confirm access to your **Kubernetes** cluster use the following:
After successful bringup, the kubeconfig of the cluster is copied to the control-node and set as default (`~/.kube/config`).
Assuming you have [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) installed, you to confirm access to your **Kubernetes** cluster use the following:
```bash
kubectl get nodes
```
## Local Testing
A Vagrantfile is provided that provision a 5 nodes cluster using LibVirt or Virtualbox and Vagrant. To use it:
```bash
vagrant up
```
By default, each node is given 2 cores and 2GB of RAM and runs Ubuntu 20.04. You can customize these settings by editing the `Vagrantfile`.

56
Vagrantfile vendored Normal file
View File

@ -0,0 +1,56 @@
# ENV['VAGRANT_NO_PARALLEL'] = 'no'
NODE_ROLES = ["server-0", "server-1", "server-2", "agent-0", "agent-1"]
NODE_BOXES = ['generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004']
NODE_CPUS = 2
NODE_MEMORY = 2048
# Virtualbox >= 6.1.28 require `/etc/vbox/network.conf` for expanded private networks
NETWORK_PREFIX = "10.10.10"
def provision(vm, role, node_num)
vm.box = NODE_BOXES[node_num]
vm.hostname = role
# We use a private network because the default IPs are dynamicly assigned
# during provisioning. This makes it impossible to know the server-0 IP when
# provisioning subsequent servers and agents. A private network allows us to
# assign static IPs to each node, and thus provide a known IP for the API endpoint.
node_ip = "#{NETWORK_PREFIX}.#{100+node_num}"
# An expanded netmask is required to allow VM<-->VM communication, virtualbox defaults to /32
vm.network "private_network", ip: node_ip, netmask: "255.255.255.0"
vm.provision "ansible", run: 'once' do |ansible|
ansible.compatibility_mode = "2.0"
ansible.playbook = "playbook/site.yml"
ansible.groups = {
"server" => NODE_ROLES.grep(/^server/),
"agent" => NODE_ROLES.grep(/^agent/),
"k3s_cluster:children" => ["server", "agent"],
}
ansible.extra_vars = {
k3s_version: "v1.26.5+k3s1",
api_endpoint: "#{NETWORK_PREFIX}.100",
token: "myvagrant",
# Required to use the private network configured above
extra_server_args: "--node-external-ip #{node_ip} --flannel-iface eth1",
extra_agent_args: "--node-external-ip #{node_ip} --flannel-iface eth1",
}
end
end
Vagrant.configure("2") do |config|
# Default provider is libvirt, virtualbox is only provided as a backup
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
end
NODE_ROLES.each_with_index do |name, i|
config.vm.define name do |node|
provision(node.vm, name, i)
end
end
end

View File

@ -9,14 +9,17 @@ k3s_cluster:
192.16.35.12
192.16.35.13
# Required Vars
vars:
ansible_port: 22
ansible_user: debian
k3s_version: v1.25.5+k3s2
k3s_server_location: /var/lib/rancher/k3s
systemd_dir: /etc/systemd/system
token: "mytoken" # Use ansible vault if you want to keep it secret
api_endpoint: "{{ hostvars[groups['server'][0]]['ansible_host'] | default(groups['server'][0]) }}"
api_port: 6443
extra_server_args: ""
extra_server_init_args: ""
extra_agent_args: ""
# Optional vars
# api_port: 6443
# k3s_server_location: /var/lib/rancher/k3s
# systemd_dir: /etc/systemd/system

View File

@ -1,38 +1,17 @@
---
- name: Download k3s binary x64
- name: Download k3s install script
ansible.builtin.get_url:
url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s
checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-amd64.txt
url: https://get.k3s.io/
timeout: 120
dest: /usr/local/bin/k3s
dest: /usr/local/bin/k3s-install.sh
owner: root
group: root
mode: 0755
when: ansible_facts.architecture == "x86_64"
- name: Download k3s binary arm64
ansible.builtin.get_url:
url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s-arm64
checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-arm64.txt
timeout: 120
dest: /usr/local/bin/k3s
owner: root
group: root
mode: 0755
when:
- ( ansible_facts.architecture is search("arm") and
ansible_facts.userspace_bits == "64" ) or
ansible_facts.architecture is search("aarch64")
- name: Download k3s binary armhf
ansible.builtin.get_url:
url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s-armhf
checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-arm.txt
timeout: 120
dest: /usr/local/bin/k3s
owner: root
group: root
mode: 0755
when:
- ansible_facts.architecture is search("arm")
- ansible_facts.userspace_bits == "32"
- name: Download k3s binary
ansible.builtin.command:
cmd: /usr/local/bin/k3s-install.sh
environment:
INSTALL_K3S_SKIP_START: "true"
INSTALL_K3S_VERSION: "{{ k3s_version }}"
changed_when: true

View File

@ -0,0 +1,4 @@
---
k3s_server_location: "/var/lib/rancher/k3s"
systemd_dir: "/etc/systemd/system"
api_port: 6443

View File

@ -1,4 +1,5 @@
---
- name: Copy K3s service file
ansible.builtin.template:
src: "k3s-agent.service.j2"
@ -11,5 +12,5 @@
ansible.builtin.systemd:
name: k3s-agent
daemon_reload: true
state: restarted
state: started
enabled: true

View File

@ -1,13 +1,17 @@
[Unit]
Description=Lightweight Kubernetes
Documentation=https://k3s.io
Wants=network-online.target
After=network-online.target
[Install]
WantedBy=multi-user.target
[Service]
Type=notify
ExecStartPre=-/sbin/modprobe br_netfilter
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ hostvars[groups['server'][0]]['token'] }} {{ extra_agent_args }}
EnvironmentFile=-/etc/default/%N
EnvironmentFile=-/etc/sysconfig/%N
EnvironmentFile=-/etc/systemd/system/k3s.service.env
KillMode=process
Delegate=yes
# Having non-zero Limit*s causes performance problems due to accounting overhead
@ -19,6 +23,7 @@ TasksMax=infinity
TimeoutStartSec=0
Restart=always
RestartSec=5s
[Install]
WantedBy=multi-user.target
ExecStartPre=/bin/sh -xc '! /usr/bin/systemctl is-enabled --quiet nm-cloud-setup.service'
ExecStartPre=-/sbin/modprobe br_netfilter
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ token }} {{ extra_agent_args }}

View File

@ -0,0 +1,4 @@
---
k3s_server_location: "/var/lib/rancher/k3s"
systemd_dir: "/etc/systemd/system"
api_port: 6443

View File

@ -2,50 +2,30 @@
- name: Init first server node
when: ansible_hostname == groups['server'][0]
block:
- name: Start temporary service for HA cluster
ansible.builtin.command:
cmd: >
systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server
--cluster-init --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args}}
# noqa: jinja[spacing]
creates: "{{ k3s_server_location }}/server/node-token"
when: groups['server'] | length > 1
- name: Start temporary service for single server cluster
ansible.builtin.command:
cmd: >
systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server
--tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }}
creates: "{{ k3s_server_location }}/server/node-token"
- name: Copy K3s service file [Single]
when: groups['server'] | length == 1
ansible.builtin.template:
src: "k3s-single.service.j2"
dest: "{{ systemd_dir }}/k3s.service"
owner: root
group: root
mode: 0644
- name: Wait for node-token
ansible.builtin.wait_for:
path: "{{ k3s_server_location }}/server/node-token"
- name: Copy K3s service file [HA]
when: groups['server'] | length > 1
ansible.builtin.template:
src: "k3s-cluster-init.service.j2"
dest: "{{ systemd_dir }}/k3s.service"
owner: root
group: root
mode: 0644
- name: Register node-token file access mode
ansible.builtin.stat:
path: "{{ k3s_server_location }}/server/node-token"
register: p
- name: Change file access node-token
ansible.builtin.file:
path: "{{ k3s_server_location }}/server/node-token"
mode: "g+rx,o+rx"
- name: Read node-token from server
ansible.builtin.slurp:
path: "{{ k3s_server_location }}/server/node-token"
register: node_token
- name: Store server node-token
ansible.builtin.set_fact:
token: "{{ node_token.content | b64decode | regex_replace('\n', '') }}"
- name: Restore node-token file access
ansible.builtin.file:
path: "{{ k3s_server_location }}/server/node-token"
mode: "{{ p.stat.mode }}"
- name: Enable and check K3s service
ansible.builtin.systemd:
name: k3s
daemon_reload: true
state: started
enabled: true
- name: Create directory .kube
ansible.builtin.file:
@ -54,6 +34,10 @@
owner: "{{ ansible_user }}"
mode: "u=rwx,g=rx,o="
- name: Pause to allow server startup
ansible.builtin.pause:
seconds: 10
- name: Copy config file to user home directory
ansible.builtin.copy:
src: /etc/rancher/k3s/k3s.yaml
@ -76,17 +60,28 @@
flat: true
- name: Start other server if any and verify status
when:
- (groups['server'] | length) > 1
- ansible_hostname != groups['server'][0]
block:
- name: Init additonal server nodes
ansible.builtin.command:
cmd: >
systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server
--token "{{ hostvars[groups['server'][0]]['token'] }}" --server https://{{ api_endpoint }}:{{ api_port }}
--tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }}
creates: "{{ k3s_server_location }}/server/node-token"
when: ansible_hostname != groups['server'][0]
- name: Copy K3s service file [HA]
when: groups['server'] | length > 1
ansible.builtin.template:
src: "k3s-ha.service.j2"
dest: "{{ systemd_dir }}/k3s.service"
owner: root
group: root
mode: 0644
- name: Enable and check K3s service
ansible.builtin.systemd:
name: k3s
daemon_reload: true
state: started
enabled: true
- name: Verify that all server nodes joined
when: (groups['server'] | length) > 1
ansible.builtin.command:
cmd: >
k3s kubectl get nodes -l "node-role.kubernetes.io/control-plane=true" -o=jsonpath="{.items[*].metadata.name}"
@ -95,28 +90,6 @@
retries: 20
delay: 10
changed_when: false
always:
- name: Kill the temporary init service
ansible.builtin.systemd:
name: k3s-init
state: stopped
failed_when: false
- name: Copy K3s service file
ansible.builtin.template:
src: "k3s-server.service.j2"
dest: "{{ systemd_dir }}/k3s-server.service"
owner: root
group: root
mode: 0644
register: k3s_service
- name: Enable and check K3s service
ansible.builtin.systemd:
name: k3s-server
daemon_reload: true
state: restarted
enabled: true
- name: Create symlinks
ansible.builtin.file:

View File

@ -0,0 +1,28 @@
[Unit]
Description=Lightweight Kubernetes
Documentation=https://k3s.io
Wants=network-online.target
After=network-online.target
[Install]
WantedBy=multi-user.target
[Service]
Type=notify
EnvironmentFile=-/etc/default/%N
EnvironmentFile=-/etc/sysconfig/%N
EnvironmentFile=-/etc/systemd/system/k3s.service.env
KillMode=process
Delegate=yes
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNOFILE=1048576
LimitNPROC=infinity
LimitCORE=infinity
TasksMax=infinity
TimeoutStartSec=0
Restart=always
RestartSec=5s
ExecStartPre=-/sbin/modprobe br_netfilter
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s server --cluster-init --data-dir {{ k3s_server_location }} --token {{ token }} {{ extra_server_args }}

View File

@ -0,0 +1,28 @@
[Unit]
Description=Lightweight Kubernetes
Documentation=https://k3s.io
Wants=network-online.target
After=network-online.target
[Install]
WantedBy=multi-user.target
[Service]
Type=notify
EnvironmentFile=-/etc/default/%N
EnvironmentFile=-/etc/sysconfig/%N
EnvironmentFile=-/etc/systemd/system/k3s.service.env
KillMode=process
Delegate=yes
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNOFILE=1048576
LimitNPROC=infinity
LimitCORE=infinity
TasksMax=infinity
TimeoutStartSec=0
Restart=always
RestartSec=5s
ExecStartPre=-/sbin/modprobe br_netfilter
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ token }} {{ extra_server_args }}

View File

@ -1,13 +1,17 @@
[Unit]
Description=Lightweight Kubernetes
Documentation=https://k3s.io
Wants=network-online.target
After=network-online.target
[Install]
WantedBy=multi-user.target
[Service]
Type=notify
ExecStartPre=-/sbin/modprobe br_netfilter
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ extra_server_args }}
EnvironmentFile=-/etc/default/%N
EnvironmentFile=-/etc/sysconfig/%N
EnvironmentFile=-/etc/systemd/system/k3s.service.env
KillMode=process
Delegate=yes
# Having non-zero Limit*s causes performance problems due to accounting overhead
@ -19,6 +23,6 @@ TasksMax=infinity
TimeoutStartSec=0
Restart=always
RestartSec=5s
[Install]
WantedBy=multi-user.target
ExecStartPre=-/sbin/modprobe br_netfilter
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} --token {{ token }} {{ extra_server_args }}

View File

@ -4,6 +4,12 @@
state: disabled
when: ansible_distribution in ['CentOS', 'Red Hat Enterprise Linux','RedHat']
- name: Install Dependent Ubuntu Packages
when: ansible_distribution in ['Ubuntu']
ansible.builtin.apt:
name: policycoreutils # Used by install script to restore SELinux context
update_cache: yes
- name: Enable IPv4 forwarding
ansible.posix.sysctl:
name: net.ipv4.ip_forward
@ -53,16 +59,12 @@
validate: 'visudo -cf %s'
when: ansible_distribution in ['CentOS', 'Red Hat Enterprise Linux','RedHat']
- name: Make k3s directory
ansible.builtin.file:
path: "/var/lib/rancher"
mode: 0755
state: directory
- name: Create symlink
ansible.builtin.file:
dest: /var/lib/rancher/k3s
src: "{{ k3s_server_location }}"
force: true
state: link
when: k3s_server_location != "/var/lib/rancher/k3s"
when:
- k3s_server_location is defined
- k3s_server_location != "/var/lib/rancher/k3s"

View File

@ -34,7 +34,7 @@
ansible_facts.lsb.description|default("") is match("Debian") )
- name: Set detected_distribution to ArchLinux (ARM64)
set_fact:
ansible.builtin.set_fact:
detected_distribution: Archlinux
when:
- ansible_facts.architecture is search("aarch64")

View File

@ -1,14 +1,15 @@
---
- name: Enable cgroup via boot commandline if not already enabled for Archlinux
lineinfile:
ansible.builtin.lineinfile:
path: /boot/boot.txt
search_string: setenv bootargs console=ttyS1,115200 console=tty0 root=PARTUUID=${uuid} rw rootwait smsc95xx.macaddr="${usbethaddr}"
line: setenv bootargs console=ttyS1,115200 console=tty0 root=PARTUUID=${uuid} rw rootwait smsc95xx.macaddr="${usbethaddr}" cgroup_enable=cpuset cgroup_memory=1 cgroup_enable=memory
register: kernel_cmdline_cgroup
- name: Create
shell: ./mkscr
ansible.builtin.command: ./mkscr
args:
chdir: /boot
notify: reboot
when: kernel_cmdline_cgroup.changed
changed_when: false
when: kernel_cmdline_cgroup.changed # noqa: no-handler