RKE1 部署随记

部署 RKE1

前期准备

1
2
3
4
5
6
7
# RKE1 二进制
curl -LO "https://github.com/rancher/rke/releases/download/v1.5.12/rke_linux-amd64"

mv rke_linux-amd64 /usr/local/bin/rke && chmod +x /usr/local/bin/rke

# 各节点安装 Docker
curl https://releases.rancher.com/install-docker/20.10.sh | sh

生成配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
cat <<EOF > cluster.yml
# 旧版本 rke1 私钥类型不支持 rsa,需要选择 ed25519
ssh_key_path: /root/.ssh/id_ed25519
nodes:
- address: 172.16.0.106
hostname_override: rke1-server-0
internal_address: 172.16.0.106
user: root
role:
- controlplane
- etcd
- worker
- address: 172.16.0.105
hostname_override: rke1-server-1
internal_address: 172.16.0.105
user: root
role:
- controlplane
- etcd
- worker
- address: 172.16.0.104
hostname_override: rke1-server-2
internal_address: 172.16.0.104
user: root
role:
- controlplane
- etcd
- worker
private_registries:
- url: registry.cn-hangzhou.aliyuncs.com
is_default: true
kubernetes_version: "v1.20.15-rancher2-2"
network:
plugin: calico
EOF

安装 RKE1

1
rke up --config cluster.yml

方便后续运维配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
docker cp kube-apiserver:usr/local/bin/kubectl /usr/local/bin/kubectl

echo "source <(kubectl completion bash)" >> ~/.bashrc

mkdir ~/.kube

mv kube_config_cluster.yml ~/.kube/config

# https://www.suse.com/support/kb/doc/?id=000020018
# Rancher 2.7.14+/Rancher 2.8.5+, RKE 1.4.19+/RKE 1.5.10+
kubectl --kubeconfig $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl/kubecfg-kube-node.yaml get secrets -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | base64 -d | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_" > ~/.kube/config

# Without jq command
docker run --rm --net=host -v $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl:/etc/kubernetes/ssl:ro --entrypoint bash $(docker inspect $(docker images -q --filter=label=org.opencontainers.image.source=https://github.com/rancher/hyperkube) --format='{{index .RepoTags 0}}' | tail -1) -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get secret -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | base64 -d | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > ~/.kube/config

# Earlier versions of Rancher and RKE
kubectl --kubeconfig $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_" > ~/.kube/config

# Without jq command
docker run --rm --net=host -v $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl:/etc/kubernetes/ssl:ro --entrypoint bash $(docker inspect $(docker images -q --filter=label=org.opencontainers.image.source=https://github.com/rancher/hyperkube.git) --format='{{index .RepoTags 0}}' | tail -1) -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > ~/.kube/config

curl https://rancher-mirror.rancher.cn/helm/get-helm-3.sh | INSTALL_HELM_MIRROR=cn bash -s -- --version v3.17.1

echo "source <(helm completion bash)" >> ~/.bashrc

常见问题

如果是 CentOS 和 RHEL 系统,默认不允许使用 root 用户进行安装,报错信息如下:

1
WARN[0000] Failed to set up SSH tunneling for host [x.x.x.x]: Can’t retrieve Docker Info ,Failed to dial to /var/run/docker.sock: ssh: rejected: administratively prohibited (open failed)

需要准备其他用户:

1
groupadd rancher && useradd rancher -g rancher && usermod -aG docker rancher

如果出现下面错误,是由于指定的 ssh_key_path 文件对应的主机不正确或对应的用户名不正确,可以检查下节点对应用户的 ~/.ssh/authorized_keys 文件是否正确:

1
WARN[0000] Failed to set up SSH tunneling for host [x.x.x.x]: Can't retrieve Docker Info: error during connect: Get "http://%2Fvar%2Frun%2Fdocker.sock/v1.24/info": Unable to access node with address [x.x.x.x:22] using SSH. Please check if you are able to SSH to the node using the specified SSH Private Key and if you have configured the correct SSH username. Error: ssh: handshake failed: ssh: unable to authenticate, attempted methods [none publickey], no supported methods remain

如果出现下面错误:

1
WARN[0000] Failed to set up SSH tunneling for host [x.x.x.x]: Can't retrieve Docker Info: error during connect: Get http://%2Fvar%2Frun%2Fdocker.sock/v1.24/info: Unable to access the service on /var/run/docker.sock. The service might be still starting up. Error: ssh: rejected: connect failed (open failed) 

需要在 /etc/ssh/sshd_config 文件中添加以下内容:

1
AllowTcpForwarding yes

清理 iptables 规则

1
2
3
4
5
6
7
iptables -F \
&& iptables -X \
&& iptables -Z \
&& iptables -F -t nat \
&& iptables -X -t nat \
&& iptables -Z -t nat \
&& docker restart kube-proxy

清理节点

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/bin/bash

KUBE_SVC='
kubelet
kube-scheduler
kube-proxy
kube-controller-manager
kube-apiserver
'

for kube_svc in ${KUBE_SVC};
do
# 停止服务
if [[ `systemctl is-active ${kube_svc}` == 'active' ]]; then
systemctl stop ${kube_svc}
fi
# 禁止服务开机启动
if [[ `systemctl is-enabled ${kube_svc}` == 'enabled' ]]; then
systemctl disable ${kube_svc}
fi
done

# 停止所有容器
docker stop $(docker ps -aq)

# 删除所有容器
docker rm -f $(docker ps -qa)

# 删除所有容器卷
docker volume rm $(docker volume ls -q)

# 卸载mount目录
for mount in $(mount | grep tmpfs | grep '/var/lib/kubelet' | awk '{ print $3 }') /var/lib/kubelet /var/lib/rancher;
do
umount $mount;
done

# 备份目录
mv /etc/kubernetes /etc/kubernetes-bak-$(date +"%Y%m%d%H%M")
mv /var/lib/etcd /var/lib/etcd-bak-$(date +"%Y%m%d%H%M")
mv /var/lib/rancher /var/lib/rancher-bak-$(date +"%Y%m%d%H%M")
mv /opt/rke /opt/rke-bak-$(date +"%Y%m%d%H%M")

# 删除残留路径
rm -rf /etc/ceph \
/etc/cni \
/opt/cni \
/run/secrets/kubernetes.io \
/run/calico \
/run/flannel \
/var/lib/calico \
/var/lib/cni \
/var/lib/kubelet \
/var/log/containers \
/var/log/kube-audit \
/var/log/pods \
/var/run/calico \
/usr/libexec/kubernetes

# 清理网络接口
no_del_net_inter='
lo
docker0
eth
ens
bond
'

network_interface=`ls /sys/class/net`

for net_inter in $network_interface;
do
if ! echo "${no_del_net_inter}" | grep -qE ${net_inter:0:3}; then
ip link delete $net_inter
fi
done

# 清理残留进程
port_list='
80
443
6443
2376
2379
2380
8472
9099
10250
10254
'

for port in $port_list;
do
pid=`netstat -atlnup | grep $port | awk '{print $7}' | awk -F '/' '{print $1}' | grep -v - | sort -rnk2 | uniq`
if [[ -n $pid ]]; then
kill -9 $pid
fi
done

kube_pid=`ps -ef | grep -v grep | grep kube | awk '{print $2}'`

if [[ -n $kube_pid ]]; then
kill -9 $kube_pid
fi

# 清理Iptables表
## 注意:如果节点Iptables有特殊配置,以下命令请谨慎操作
sudo iptables --flush
sudo iptables --flush --table nat
sudo iptables --flush --table filter
sudo iptables --table nat --delete-chain
sudo iptables --table filter --delete-chain
systemctl restart docker

# 重启节点
reboot
Author

Warner Chen

Posted on

2024-09-05

Updated on

2025-04-08

Licensed under

You need to set install_url to use ShareThis. Please set it in _config.yml.
You forgot to set the business or currency_code for Paypal. Please set it in _config.yml.

Comments

You forgot to set the shortname for Disqus. Please set it in _config.yml.