From c96fa2f4fc783b2169532d270de6dfebef88fffd Mon Sep 17 00:00:00 2001 From: Matthew Mosesohn Date: Wed, 11 Jan 2017 18:15:04 +0300 Subject: [PATCH] Add scale thresholds to split etcd and k8s-masters Also adds calico-rr group if there are standalone etcd nodes. Now if there are 50 or more nodes, 3 etcd nodes will be standalone. If there are 200 or more nodes, 2 kube-masters will be standalone. If thresholds are exceeded, kube-node group cannot add nodes that belong to etcd or kube-master groups (according to above statements). --- contrib/inventory_builder/inventory.py | 54 +++++++++++++++++-- contrib/inventory_builder/requirements.yml | 48 ----------------- .../inventory_builder/tests/test_inventory.py | 28 ++++++++++ contrib/inventory_builder/tox.ini | 2 +- docs/getting-started.md | 16 +++--- docs/large-deployments.md | 10 ++++ 6 files changed, 96 insertions(+), 62 deletions(-) delete mode 100644 contrib/inventory_builder/requirements.yml diff --git a/contrib/inventory_builder/inventory.py b/contrib/inventory_builder/inventory.py index 2f02375c6..7e0a89f09 100644 --- a/contrib/inventory_builder/inventory.py +++ b/contrib/inventory_builder/inventory.py @@ -40,7 +40,8 @@ import os import re import sys -ROLES = ['kube-master', 'all', 'k8s-cluster:children', 'kube-node', 'etcd'] +ROLES = ['all', 'kube-master', 'kube-node', 'etcd', 'k8s-cluster:children', + 'calico-rr'] PROTECTED_NAMES = ROLES AVAILABLE_COMMANDS = ['help', 'print_cfg', 'print_ips', 'load'] _boolean_states = {'1': True, 'yes': True, 'true': True, 'on': True, @@ -51,10 +52,18 @@ def get_var_as_bool(name, default): value = os.environ.get(name, '') return _boolean_states.get(value.lower(), default) +# Configurable as shell vars start + CONFIG_FILE = os.environ.get("CONFIG_FILE", "./inventory.cfg") +# Reconfigures cluster distribution at scale +SCALE_THRESHOLD = int(os.environ.get("SCALE_THRESHOLD", 50)) +MASSIVE_SCALE_THRESHOLD = int(os.environ.get("SCALE_THRESHOLD", 200)) + DEBUG = get_var_as_bool("DEBUG", True) HOST_PREFIX = os.environ.get("HOST_PREFIX", "node") +# Configurable as shell vars end + class KargoInventory(object): @@ -74,11 +83,16 @@ class KargoInventory(object): if changed_hosts: self.hosts = self.build_hostnames(changed_hosts) self.purge_invalid_hosts(self.hosts.keys(), PROTECTED_NAMES) - self.set_kube_master(list(self.hosts.keys())[:2]) self.set_all(self.hosts) self.set_k8s_cluster() - self.set_kube_node(self.hosts.keys()) self.set_etcd(list(self.hosts.keys())[:3]) + if len(self.hosts) >= SCALE_THRESHOLD: + self.set_kube_master(list(self.hosts.keys())[3:5]) + else: + self.set_kube_master(list(self.hosts.keys())[:2]) + self.set_kube_node(self.hosts.keys()) + if len(self.hosts) >= SCALE_THRESHOLD: + self.set_calico_rr(list(self.hosts.keys())[:3]) else: # Show help if no options self.show_help() sys.exit(0) @@ -205,8 +219,32 @@ class KargoInventory(object): self.add_host_to_group('k8s-cluster:children', 'kube-node') self.add_host_to_group('k8s-cluster:children', 'kube-master') + def set_calico_rr(self, hosts): + for host in hosts: + if host in self.config.items('kube-master'): + self.debug("Not adding {0} to calico-rr group because it " + "conflicts with kube-master group".format(host)) + continue + if host in self.config.items('kube-node'): + self.debug("Not adding {0} to calico-rr group because it " + "conflicts with kube-node group".format(host)) + continue + self.add_host_to_group('calico-rr', host) + def set_kube_node(self, hosts): for host in hosts: + if len(self.config['all']) >= SCALE_THRESHOLD: + if self.config.has_option('etcd', host): + self.debug("Not adding {0} to kube-node group because of " + "scale deployment and host is in etcd " + "group.".format(host)) + continue + if len(self.config['all']) >= MASSIVE_SCALE_THRESHOLD: + if self.config.has_option('kube-master', host): + self.debug("Not adding {0} to kube-node group because of " + "scale deployment and host is in kube-master " + "group.".format(host)) + continue self.add_host_to_group('kube-node', host) def set_etcd(self, hosts): @@ -275,7 +313,15 @@ print_ips - Write a space-delimited list of IPs from "all" group Advanced usage: Add another host after initial creation: inventory.py 10.10.1.5 Delete a host: inventory.py -10.10.1.3 -Delete a host by id: inventory.py -node1''' +Delete a host by id: inventory.py -node1 + +Configurable env vars: +DEBUG Enable debug printing. Default: True +CONFIG_FILE File to write config to Default: ./inventory.cfg +HOST_PREFIX Host prefix for generated hosts. Default: node +SCALE_THRESHOLD Separate ETCD role if # of nodes >= 50 +MASSIVE_SCALE_THRESHOLD Separate K8s master and ETCD if # of nodes >= 200 +''' print(help_text) def print_config(self): diff --git a/contrib/inventory_builder/requirements.yml b/contrib/inventory_builder/requirements.yml deleted file mode 100644 index 5e405b1ed..000000000 --- a/contrib/inventory_builder/requirements.yml +++ /dev/null @@ -1,48 +0,0 @@ ---- -- src: https://gitlab.com/kubespray-ansibl8s/k8s-common.git - path: roles/apps - scm: git - -#- src: https://gitlab.com/kubespray-ansibl8s/k8s-dashboard.git -# path: roles/apps -# scm: git -# -#- src: https://gitlab.com/kubespray-ansibl8s/k8s-kubedns.git -# path: roles/apps -# scm: git -# -#- src: https://gitlab.com/kubespray-ansibl8s/k8s-elasticsearch.git -# path: roles/apps -# scm: git -# -#- src: https://gitlab.com/kubespray-ansibl8s/k8s-redis.git -# path: roles/apps -# scm: git -# -#- src: https://gitlab.com/kubespray-ansibl8s/k8s-memcached.git -# path: roles/apps -# scm: git -# -#- src: https://gitlab.com/kubespray-ansibl8s/k8s-postgres.git -# path: roles/apps -# scm: git -# -#- src: https://gitlab.com/kubespray-ansibl8s/k8s-pgbouncer.git -# path: roles/apps -# scm: git -# -#- src: https://gitlab.com/kubespray-ansibl8s/k8s-heapster.git -# path: roles/apps -# scm: git -# -#- src: https://gitlab.com/kubespray-ansibl8s/k8s-influxdb.git -# path: roles/apps -# scm: git -# -#- src: https://gitlab.com/kubespray-ansibl8s/k8s-kubedash.git -# path: roles/apps -# scm: git -# -#- src: https://gitlab.com/kubespray-ansibl8s/k8s-kube-logstash.git -# path: roles/apps -# scm: git diff --git a/contrib/inventory_builder/tests/test_inventory.py b/contrib/inventory_builder/tests/test_inventory.py index 681883772..ad393079d 100644 --- a/contrib/inventory_builder/tests/test_inventory.py +++ b/contrib/inventory_builder/tests/test_inventory.py @@ -210,3 +210,31 @@ class TestInventory(unittest.TestCase): self.inv.set_etcd([host]) self.assertTrue(host in self.inv.config[group]) + + def test_scale_scenario_one(self): + num_nodes = 50 + hosts = OrderedDict() + + for hostid in range(1, num_nodes+1): + hosts["node" + str(hostid)] = "" + + self.inv.set_all(hosts) + self.inv.set_etcd(hosts.keys()[0:3]) + self.inv.set_kube_master(hosts.keys()[0:2]) + self.inv.set_kube_node(hosts.keys()) + for h in range(3): + self.assertFalse(hosts.keys()[h] in self.inv.config['kube-node']) + + def test_scale_scenario_two(self): + num_nodes = 500 + hosts = OrderedDict() + + for hostid in range(1, num_nodes+1): + hosts["node" + str(hostid)] = "" + + self.inv.set_all(hosts) + self.inv.set_etcd(hosts.keys()[0:3]) + self.inv.set_kube_master(hosts.keys()[3:5]) + self.inv.set_kube_node(hosts.keys()) + for h in range(5): + self.assertFalse(hosts.keys()[h] in self.inv.config['kube-node']) diff --git a/contrib/inventory_builder/tox.ini b/contrib/inventory_builder/tox.ini index 8ca254295..ae675f76b 100644 --- a/contrib/inventory_builder/tox.ini +++ b/contrib/inventory_builder/tox.ini @@ -11,7 +11,7 @@ deps = -r{toxinidir}/test-requirements.txt setenv = VIRTUAL_ENV={envdir} passenv = http_proxy HTTP_PROXY https_proxy HTTPS_PROXY no_proxy NO_PROXY -commands = py.test -vv #{posargs:./tests} +commands = pytest -vv #{posargs:./tests} [testenv:pep8] usedevelop = False diff --git a/docs/getting-started.md b/docs/getting-started.md index 906f78c07..8958d609d 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -23,19 +23,17 @@ Building your own inventory Ansible inventory can be stored in 3 formats: YAML, JSON, or inifile. There is an example inventory located -[here](https://github.com/kubernetes-incubator/kargo/blob/master/inventory/inventory.example): +[here](https://github.com/kubernetes-incubator/kargo/blob/master/inventory/inventory.example). -``` -cp -r inventory my_inventory -cp my_inventory/inventory.example my_inventory/inventory.cfg -# edit the inventory file as needed -``` - -Or you can use an +You can use an [inventory generator](https://github.com/kubernetes-incubator/kargo/blob/master/contrib/inventory_builder/inventory.py) to create or modify an Ansible inventory. Currently, it is limited in functionality and is only use for making a basic Kargo cluster, but it does -support creating large clusters. For example: +support creating large clusters. It now supports +separated ETCD and Kubernetes master roles from node role if the size exceeds a +certain threshold. Run inventory.py help for more information. + +Example inventory generator usage: ``` cp -r inventory my_inventory diff --git a/docs/large-deployments.md b/docs/large-deployments.md index 20bc7fefd..721064942 100644 --- a/docs/large-deployments.md +++ b/docs/large-deployments.md @@ -27,5 +27,15 @@ For a large scaled deployments, consider the following configuration changes: end up with the 'm' skipped for docker as well. This is required as docker does not understand k8s units well. +* Add calico-rr nodes if you are deploying with Calico or Canal. Nodes recover + from host/network interruption much quicker with calico-rr. Note that + calico-rr role must be on a host without kube-master or kube-node role (but + etcd role is okay). + +* Check out the + [Inventory](https://github.com/kubernetes-incubator/kargo/blob/master/docs/getting-started.md#building-your-own-inventory) + section of the Getting started guide for tips on creating a large scale + Ansible inventory. + For example, when deploying 200 nodes, you may want to run ansible with ``--forks=50``, ``--timeout=600`` and define the ``retry_stagger: 60``.