Add scale thresholds to split etcd and k8s-masters

Also adds calico-rr group if there are standalone etcd nodes.
Now if there are 50 or more nodes, 3 etcd nodes will be standalone.
If there are 200 or more nodes, 2 kube-masters will be standalone.
If thresholds are exceeded, kube-node group cannot add nodes that
belong to etcd or kube-master groups (according to above statements).
This commit is contained in:
Matthew Mosesohn 2017-01-11 18:15:04 +03:00
parent 5420fa942e
commit f742fc3dd1
6 changed files with 96 additions and 62 deletions

View file

@ -40,7 +40,8 @@ import os
import re
import sys
ROLES = ['kube-master', 'all', 'k8s-cluster:children', 'kube-node', 'etcd']
ROLES = ['all', 'kube-master', 'kube-node', 'etcd', 'k8s-cluster:children',
'calico-rr']
PROTECTED_NAMES = ROLES
AVAILABLE_COMMANDS = ['help', 'print_cfg', 'print_ips', 'load']
_boolean_states = {'1': True, 'yes': True, 'true': True, 'on': True,
@ -51,10 +52,18 @@ def get_var_as_bool(name, default):
value = os.environ.get(name, '')
return _boolean_states.get(value.lower(), default)
# Configurable as shell vars start
CONFIG_FILE = os.environ.get("CONFIG_FILE", "./inventory.cfg")
# Reconfigures cluster distribution at scale
SCALE_THRESHOLD = int(os.environ.get("SCALE_THRESHOLD", 50))
MASSIVE_SCALE_THRESHOLD = int(os.environ.get("SCALE_THRESHOLD", 200))
DEBUG = get_var_as_bool("DEBUG", True)
HOST_PREFIX = os.environ.get("HOST_PREFIX", "node")
# Configurable as shell vars end
class KargoInventory(object):
@ -74,11 +83,16 @@ class KargoInventory(object):
if changed_hosts:
self.hosts = self.build_hostnames(changed_hosts)
self.purge_invalid_hosts(self.hosts.keys(), PROTECTED_NAMES)
self.set_kube_master(list(self.hosts.keys())[:2])
self.set_all(self.hosts)
self.set_k8s_cluster()
self.set_kube_node(self.hosts.keys())
self.set_etcd(list(self.hosts.keys())[:3])
if len(self.hosts) >= SCALE_THRESHOLD:
self.set_kube_master(list(self.hosts.keys())[3:5])
else:
self.set_kube_master(list(self.hosts.keys())[:2])
self.set_kube_node(self.hosts.keys())
if len(self.hosts) >= SCALE_THRESHOLD:
self.set_calico_rr(list(self.hosts.keys())[:3])
else: # Show help if no options
self.show_help()
sys.exit(0)
@ -205,8 +219,32 @@ class KargoInventory(object):
self.add_host_to_group('k8s-cluster:children', 'kube-node')
self.add_host_to_group('k8s-cluster:children', 'kube-master')
def set_calico_rr(self, hosts):
for host in hosts:
if host in self.config.items('kube-master'):
self.debug("Not adding {0} to calico-rr group because it "
"conflicts with kube-master group".format(host))
continue
if host in self.config.items('kube-node'):
self.debug("Not adding {0} to calico-rr group because it "
"conflicts with kube-node group".format(host))
continue
self.add_host_to_group('calico-rr', host)
def set_kube_node(self, hosts):
for host in hosts:
if len(self.config['all']) >= SCALE_THRESHOLD:
if self.config.has_option('etcd', host):
self.debug("Not adding {0} to kube-node group because of "
"scale deployment and host is in etcd "
"group.".format(host))
continue
if len(self.config['all']) >= MASSIVE_SCALE_THRESHOLD:
if self.config.has_option('kube-master', host):
self.debug("Not adding {0} to kube-node group because of "
"scale deployment and host is in kube-master "
"group.".format(host))
continue
self.add_host_to_group('kube-node', host)
def set_etcd(self, hosts):
@ -275,7 +313,15 @@ print_ips - Write a space-delimited list of IPs from "all" group
Advanced usage:
Add another host after initial creation: inventory.py 10.10.1.5
Delete a host: inventory.py -10.10.1.3
Delete a host by id: inventory.py -node1'''
Delete a host by id: inventory.py -node1
Configurable env vars:
DEBUG Enable debug printing. Default: True
CONFIG_FILE File to write config to Default: ./inventory.cfg
HOST_PREFIX Host prefix for generated hosts. Default: node
SCALE_THRESHOLD Separate ETCD role if # of nodes >= 50
MASSIVE_SCALE_THRESHOLD Separate K8s master and ETCD if # of nodes >= 200
'''
print(help_text)
def print_config(self):

View file

@ -1,48 +0,0 @@
---
- src: https://gitlab.com/kubespray-ansibl8s/k8s-common.git
path: roles/apps
scm: git
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-dashboard.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-kubedns.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-elasticsearch.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-redis.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-memcached.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-postgres.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-pgbouncer.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-heapster.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-influxdb.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-kubedash.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-kube-logstash.git
# path: roles/apps
# scm: git

View file

@ -210,3 +210,31 @@ class TestInventory(unittest.TestCase):
self.inv.set_etcd([host])
self.assertTrue(host in self.inv.config[group])
def test_scale_scenario_one(self):
num_nodes = 50
hosts = OrderedDict()
for hostid in range(1, num_nodes+1):
hosts["node" + str(hostid)] = ""
self.inv.set_all(hosts)
self.inv.set_etcd(hosts.keys()[0:3])
self.inv.set_kube_master(hosts.keys()[0:2])
self.inv.set_kube_node(hosts.keys())
for h in range(3):
self.assertFalse(hosts.keys()[h] in self.inv.config['kube-node'])
def test_scale_scenario_two(self):
num_nodes = 500
hosts = OrderedDict()
for hostid in range(1, num_nodes+1):
hosts["node" + str(hostid)] = ""
self.inv.set_all(hosts)
self.inv.set_etcd(hosts.keys()[0:3])
self.inv.set_kube_master(hosts.keys()[3:5])
self.inv.set_kube_node(hosts.keys())
for h in range(5):
self.assertFalse(hosts.keys()[h] in self.inv.config['kube-node'])

View file

@ -11,7 +11,7 @@ deps =
-r{toxinidir}/test-requirements.txt
setenv = VIRTUAL_ENV={envdir}
passenv = http_proxy HTTP_PROXY https_proxy HTTPS_PROXY no_proxy NO_PROXY
commands = py.test -vv #{posargs:./tests}
commands = pytest -vv #{posargs:./tests}
[testenv:pep8]
usedevelop = False

View file

@ -23,19 +23,17 @@ Building your own inventory
Ansible inventory can be stored in 3 formats: YAML, JSON, or inifile. There is
an example inventory located
[here](https://github.com/kubernetes-incubator/kargo/blob/master/inventory/inventory.example):
[here](https://github.com/kubernetes-incubator/kargo/blob/master/inventory/inventory.example).
```
cp -r inventory my_inventory
cp my_inventory/inventory.example my_inventory/inventory.cfg
# edit the inventory file as needed
```
Or you can use an
You can use an
[inventory generator](https://github.com/kubernetes-incubator/kargo/blob/master/contrib/inventory_builder/inventory.py)
to create or modify an Ansible inventory. Currently, it is limited in
functionality and is only use for making a basic Kargo cluster, but it does
support creating large clusters. For example:
support creating large clusters. It now supports
separated ETCD and Kubernetes master roles from node role if the size exceeds a
certain threshold. Run inventory.py help for more information.
Example inventory generator usage:
```
cp -r inventory my_inventory

View file

@ -27,5 +27,15 @@ For a large scaled deployments, consider the following configuration changes:
end up with the 'm' skipped for docker as well. This is required as docker does not
understand k8s units well.
* Add calico-rr nodes if you are deploying with Calico or Canal. Nodes recover
from host/network interruption much quicker with calico-rr. Note that
calico-rr role must be on a host without kube-master or kube-node role (but
etcd role is okay).
* Check out the
[Inventory](https://github.com/kubernetes-incubator/kargo/blob/master/docs/getting-started.md#building-your-own-inventory)
section of the Getting started guide for tips on creating a large scale
Ansible inventory.
For example, when deploying 200 nodes, you may want to run ansible with
``--forks=50``, ``--timeout=600`` and define the ``retry_stagger: 60``.