Add scale thresholds to split etcd and k8s-masters

Also adds calico-rr group if there are standalone etcd nodes.
Now if there are 50 or more nodes, 3 etcd nodes will be standalone.
If there are 200 or more nodes, 2 kube-masters will be standalone.
If thresholds are exceeded, kube-node group cannot add nodes that
belong to etcd or kube-master groups (according to above statements).
This commit is contained in:
Matthew Mosesohn 2017-01-11 18:15:04 +03:00
parent d4c9d9f7f5
commit c96fa2f4fc
6 changed files with 96 additions and 62 deletions

View file

@ -40,7 +40,8 @@ import os
import re import re
import sys import sys
ROLES = ['kube-master', 'all', 'k8s-cluster:children', 'kube-node', 'etcd'] ROLES = ['all', 'kube-master', 'kube-node', 'etcd', 'k8s-cluster:children',
'calico-rr']
PROTECTED_NAMES = ROLES PROTECTED_NAMES = ROLES
AVAILABLE_COMMANDS = ['help', 'print_cfg', 'print_ips', 'load'] AVAILABLE_COMMANDS = ['help', 'print_cfg', 'print_ips', 'load']
_boolean_states = {'1': True, 'yes': True, 'true': True, 'on': True, _boolean_states = {'1': True, 'yes': True, 'true': True, 'on': True,
@ -51,10 +52,18 @@ def get_var_as_bool(name, default):
value = os.environ.get(name, '') value = os.environ.get(name, '')
return _boolean_states.get(value.lower(), default) return _boolean_states.get(value.lower(), default)
# Configurable as shell vars start
CONFIG_FILE = os.environ.get("CONFIG_FILE", "./inventory.cfg") CONFIG_FILE = os.environ.get("CONFIG_FILE", "./inventory.cfg")
# Reconfigures cluster distribution at scale
SCALE_THRESHOLD = int(os.environ.get("SCALE_THRESHOLD", 50))
MASSIVE_SCALE_THRESHOLD = int(os.environ.get("SCALE_THRESHOLD", 200))
DEBUG = get_var_as_bool("DEBUG", True) DEBUG = get_var_as_bool("DEBUG", True)
HOST_PREFIX = os.environ.get("HOST_PREFIX", "node") HOST_PREFIX = os.environ.get("HOST_PREFIX", "node")
# Configurable as shell vars end
class KargoInventory(object): class KargoInventory(object):
@ -74,11 +83,16 @@ class KargoInventory(object):
if changed_hosts: if changed_hosts:
self.hosts = self.build_hostnames(changed_hosts) self.hosts = self.build_hostnames(changed_hosts)
self.purge_invalid_hosts(self.hosts.keys(), PROTECTED_NAMES) self.purge_invalid_hosts(self.hosts.keys(), PROTECTED_NAMES)
self.set_kube_master(list(self.hosts.keys())[:2])
self.set_all(self.hosts) self.set_all(self.hosts)
self.set_k8s_cluster() self.set_k8s_cluster()
self.set_kube_node(self.hosts.keys())
self.set_etcd(list(self.hosts.keys())[:3]) self.set_etcd(list(self.hosts.keys())[:3])
if len(self.hosts) >= SCALE_THRESHOLD:
self.set_kube_master(list(self.hosts.keys())[3:5])
else:
self.set_kube_master(list(self.hosts.keys())[:2])
self.set_kube_node(self.hosts.keys())
if len(self.hosts) >= SCALE_THRESHOLD:
self.set_calico_rr(list(self.hosts.keys())[:3])
else: # Show help if no options else: # Show help if no options
self.show_help() self.show_help()
sys.exit(0) sys.exit(0)
@ -205,8 +219,32 @@ class KargoInventory(object):
self.add_host_to_group('k8s-cluster:children', 'kube-node') self.add_host_to_group('k8s-cluster:children', 'kube-node')
self.add_host_to_group('k8s-cluster:children', 'kube-master') self.add_host_to_group('k8s-cluster:children', 'kube-master')
def set_calico_rr(self, hosts):
for host in hosts:
if host in self.config.items('kube-master'):
self.debug("Not adding {0} to calico-rr group because it "
"conflicts with kube-master group".format(host))
continue
if host in self.config.items('kube-node'):
self.debug("Not adding {0} to calico-rr group because it "
"conflicts with kube-node group".format(host))
continue
self.add_host_to_group('calico-rr', host)
def set_kube_node(self, hosts): def set_kube_node(self, hosts):
for host in hosts: for host in hosts:
if len(self.config['all']) >= SCALE_THRESHOLD:
if self.config.has_option('etcd', host):
self.debug("Not adding {0} to kube-node group because of "
"scale deployment and host is in etcd "
"group.".format(host))
continue
if len(self.config['all']) >= MASSIVE_SCALE_THRESHOLD:
if self.config.has_option('kube-master', host):
self.debug("Not adding {0} to kube-node group because of "
"scale deployment and host is in kube-master "
"group.".format(host))
continue
self.add_host_to_group('kube-node', host) self.add_host_to_group('kube-node', host)
def set_etcd(self, hosts): def set_etcd(self, hosts):
@ -275,7 +313,15 @@ print_ips - Write a space-delimited list of IPs from "all" group
Advanced usage: Advanced usage:
Add another host after initial creation: inventory.py 10.10.1.5 Add another host after initial creation: inventory.py 10.10.1.5
Delete a host: inventory.py -10.10.1.3 Delete a host: inventory.py -10.10.1.3
Delete a host by id: inventory.py -node1''' Delete a host by id: inventory.py -node1
Configurable env vars:
DEBUG Enable debug printing. Default: True
CONFIG_FILE File to write config to Default: ./inventory.cfg
HOST_PREFIX Host prefix for generated hosts. Default: node
SCALE_THRESHOLD Separate ETCD role if # of nodes >= 50
MASSIVE_SCALE_THRESHOLD Separate K8s master and ETCD if # of nodes >= 200
'''
print(help_text) print(help_text)
def print_config(self): def print_config(self):

View file

@ -1,48 +0,0 @@
---
- src: https://gitlab.com/kubespray-ansibl8s/k8s-common.git
path: roles/apps
scm: git
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-dashboard.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-kubedns.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-elasticsearch.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-redis.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-memcached.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-postgres.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-pgbouncer.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-heapster.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-influxdb.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-kubedash.git
# path: roles/apps
# scm: git
#
#- src: https://gitlab.com/kubespray-ansibl8s/k8s-kube-logstash.git
# path: roles/apps
# scm: git

View file

@ -210,3 +210,31 @@ class TestInventory(unittest.TestCase):
self.inv.set_etcd([host]) self.inv.set_etcd([host])
self.assertTrue(host in self.inv.config[group]) self.assertTrue(host in self.inv.config[group])
def test_scale_scenario_one(self):
num_nodes = 50
hosts = OrderedDict()
for hostid in range(1, num_nodes+1):
hosts["node" + str(hostid)] = ""
self.inv.set_all(hosts)
self.inv.set_etcd(hosts.keys()[0:3])
self.inv.set_kube_master(hosts.keys()[0:2])
self.inv.set_kube_node(hosts.keys())
for h in range(3):
self.assertFalse(hosts.keys()[h] in self.inv.config['kube-node'])
def test_scale_scenario_two(self):
num_nodes = 500
hosts = OrderedDict()
for hostid in range(1, num_nodes+1):
hosts["node" + str(hostid)] = ""
self.inv.set_all(hosts)
self.inv.set_etcd(hosts.keys()[0:3])
self.inv.set_kube_master(hosts.keys()[3:5])
self.inv.set_kube_node(hosts.keys())
for h in range(5):
self.assertFalse(hosts.keys()[h] in self.inv.config['kube-node'])

View file

@ -11,7 +11,7 @@ deps =
-r{toxinidir}/test-requirements.txt -r{toxinidir}/test-requirements.txt
setenv = VIRTUAL_ENV={envdir} setenv = VIRTUAL_ENV={envdir}
passenv = http_proxy HTTP_PROXY https_proxy HTTPS_PROXY no_proxy NO_PROXY passenv = http_proxy HTTP_PROXY https_proxy HTTPS_PROXY no_proxy NO_PROXY
commands = py.test -vv #{posargs:./tests} commands = pytest -vv #{posargs:./tests}
[testenv:pep8] [testenv:pep8]
usedevelop = False usedevelop = False

View file

@ -23,19 +23,17 @@ Building your own inventory
Ansible inventory can be stored in 3 formats: YAML, JSON, or inifile. There is Ansible inventory can be stored in 3 formats: YAML, JSON, or inifile. There is
an example inventory located an example inventory located
[here](https://github.com/kubernetes-incubator/kargo/blob/master/inventory/inventory.example): [here](https://github.com/kubernetes-incubator/kargo/blob/master/inventory/inventory.example).
``` You can use an
cp -r inventory my_inventory
cp my_inventory/inventory.example my_inventory/inventory.cfg
# edit the inventory file as needed
```
Or you can use an
[inventory generator](https://github.com/kubernetes-incubator/kargo/blob/master/contrib/inventory_builder/inventory.py) [inventory generator](https://github.com/kubernetes-incubator/kargo/blob/master/contrib/inventory_builder/inventory.py)
to create or modify an Ansible inventory. Currently, it is limited in to create or modify an Ansible inventory. Currently, it is limited in
functionality and is only use for making a basic Kargo cluster, but it does functionality and is only use for making a basic Kargo cluster, but it does
support creating large clusters. For example: support creating large clusters. It now supports
separated ETCD and Kubernetes master roles from node role if the size exceeds a
certain threshold. Run inventory.py help for more information.
Example inventory generator usage:
``` ```
cp -r inventory my_inventory cp -r inventory my_inventory

View file

@ -27,5 +27,15 @@ For a large scaled deployments, consider the following configuration changes:
end up with the 'm' skipped for docker as well. This is required as docker does not end up with the 'm' skipped for docker as well. This is required as docker does not
understand k8s units well. understand k8s units well.
* Add calico-rr nodes if you are deploying with Calico or Canal. Nodes recover
from host/network interruption much quicker with calico-rr. Note that
calico-rr role must be on a host without kube-master or kube-node role (but
etcd role is okay).
* Check out the
[Inventory](https://github.com/kubernetes-incubator/kargo/blob/master/docs/getting-started.md#building-your-own-inventory)
section of the Getting started guide for tips on creating a large scale
Ansible inventory.
For example, when deploying 200 nodes, you may want to run ansible with For example, when deploying 200 nodes, you may want to run ansible with
``--forks=50``, ``--timeout=600`` and define the ``retry_stagger: 60``. ``--forks=50``, ``--timeout=600`` and define the ``retry_stagger: 60``.