Merge pull request #1020 from mattymo/synthscale

Add synthetic scale deployment mode
This commit is contained in:
Matthew Mosesohn 2017-02-22 19:15:46 +03:00 committed by GitHub
commit 8cbf3fe5f8
11 changed files with 90 additions and 34 deletions

1
.gitignore vendored
View file

@ -1,6 +1,7 @@
.vagrant
*.retry
inventory/vagrant_ansible_inventory
inventory/group_vars/fake_hosts.yml
temp
.idea
.tox

View file

@ -124,6 +124,7 @@ before_script:
-e local_release_dir=${PWD}/downloads
-e resolvconf_mode=${RESOLVCONF_MODE}
-e vault_deployment_type=${VAULT_DEPLOYMENT}
--limit "all:!fake_hosts"
cluster.yml
# Repeat deployment if testing upgrade
@ -150,18 +151,19 @@ before_script:
-e resolvconf_mode=${RESOLVCONF_MODE}
-e weave_cpu_requests=${WEAVE_CPU_LIMIT}
-e weave_cpu_limit=${WEAVE_CPU_LIMIT}
--limit "all:!fake_hosts"
$PLAYBOOK;
fi
# Tests Cases
## Test Master API
- ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/010_check-apiserver.yml $LOG_LEVEL
- ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root --limit "all:!fake_hosts" tests/testcases/010_check-apiserver.yml $LOG_LEVEL
## Ping the between 2 pod
- ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL
- ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root --limit "all:!fake_hosts" tests/testcases/030_check-network.yml $LOG_LEVEL
## Advanced DNS checks
- ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/040_check-network-adv.yml $LOG_LEVEL
- ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root --limit "all:!fake_hosts" tests/testcases/040_check-network-adv.yml $LOG_LEVEL
## Idempotency checks 1/5 (repeat deployment)
- >
@ -178,6 +180,7 @@ before_script:
-e local_release_dir=${PWD}/downloads
-e etcd_deployment_type=${ETCD_DEPLOYMENT}
-e kubelet_deployment_type=${KUBELET_DEPLOYMENT}
--limit "all:!fake_hosts"
cluster.yml;
fi
@ -186,6 +189,7 @@ before_script:
if [ "${IDEMPOT_CHECK}" = "true" ]; then
ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH}
-u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root
--limit "all:!fake_hosts"
tests/testcases/040_check-network-adv.yml $LOG_LEVEL;
fi
@ -197,7 +201,8 @@ before_script:
--private-key=${HOME}/.ssh/id_rsa
-e bootstrap_os=${BOOTSTRAP_OS}
-e ansible_python_interpreter=${PYPATH}
-e reset_confirmation=yes
-e reset_confirmation=yes
--limit "all:!fake_hosts"
reset.yml;
fi
@ -216,6 +221,7 @@ before_script:
-e local_release_dir=${PWD}/downloads
-e etcd_deployment_type=${ETCD_DEPLOYMENT}
-e kubelet_deployment_type=${KUBELET_DEPLOYMENT}
--limit "all:!fake_hosts"
cluster.yml;
fi
@ -224,6 +230,7 @@ before_script:
if [ "${IDEMPOT_CHECK}" = "true" ]; then
ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH}
-u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root
--limit "all:!fake_hosts"
tests/testcases/040_check-network-adv.yml $LOG_LEVEL;
fi
@ -244,7 +251,7 @@ before_script:
.coreos_calico_sep_variables: &coreos_calico_sep_variables
# stage: deploy-gce-part1
KUBE_NETWORK_PLUGIN: calico
CLOUD_IMAGE: coreos-stable-1235-6-0-v20170111
CLOUD_IMAGE: coreos-stable-1235-6-0-v20170111
CLOUD_REGION: us-west1-b
CLUSTER_MODE: separate
BOOTSTRAP_OS: coreos
@ -287,8 +294,8 @@ before_script:
CLOUD_REGION: us-east1-b
CLUSTER_MODE: default
BOOTSTRAP_OS: coreos
RESOLVCONF_MODE: host_resolvconf # This is required as long as the CoreOS stable channel uses docker < 1.12
IDEMPOT_CHECK: "true"
RESOLVCONF_MODE: host_resolvconf # This is required as long as the CoreOS stable channel uses docker < 1.12
.rhel7_canal_sep_variables: &rhel7_canal_sep_variables
# stage: deploy-gce-special
@ -310,16 +317,17 @@ before_script:
KUBE_NETWORK_PLUGIN: calico
CLOUD_IMAGE: centos-7
CLOUD_REGION: europe-west1-b
CLUSTER_MODE: ha
CLUSTER_MODE: ha-scale
IDEMPOT_CHECK: "true"
.coreos_alpha_weave_ha_variables: &coreos_alpha_weave_ha_variables
# stage: deploy-gce-special
KUBE_NETWORK_PLUGIN: weave
CLOUD_IMAGE: coreos-alpha
CLOUD_IMAGE: coreos-alpha-1325-0-0-v20170216
CLOUD_REGION: us-west1-a
CLUSTER_MODE: ha
CLUSTER_MODE: ha-scale
BOOTSTRAP_OS: coreos
RESOLVCONF_MODE: host_resolvconf # This is required as long as the CoreOS stable channel uses docker < 1.12
.ubuntu_rkt_sep_variables: &ubuntu_rkt_sep_variables
# stage: deploy-gce-part1

View file

@ -4,25 +4,40 @@ Travis CI test matrix
GCE instances
-------------
Here is the test matrix for the Travis CI gates:
Here is the test matrix for the CI gates:
| Network plugin| OS type| GCE region| Nodes layout|
|-------------------------|-------------------------|-------------------------|-------------------------|
| canal| debian-8-kubespray| asia-east1-a| ha|
| canal| debian-8-kubespray| asia-east1-a| ha-scale|
| calico| debian-8-kubespray| europe-west1-c| default|
| flannel| centos-7| asia-northeast1-c| default|
| calico| centos-7| us-central1-b| ha|
| weave| rhel-7| us-east1-c| default|
| canal| coreos-stable| us-west1-b| default|
| canal| coreos-stable| us-west1-b| ha-scale|
| canal| rhel-7| asia-northeast1-b| separate|
| weave| ubuntu-1604-xenial| europe-west1-d| separate|
| calico| coreos-stable| us-central1-f| separate|
Where the nodes layout `default` is a non-HA two nodes setup with the separate `kube-node`
and the `etcd` group merged with the `kube-master`. The `separate` layout is when
there is only node of each type, which is a kube master, compute and etcd cluster member.
And the `ha` layout stands for a two etcd nodes, two masters and a single worker node,
partially intersecting though.
Node Layouts
------------
There are four node layout types: `default`, `separate`, `ha`, and `scale`.
`default` is a non-HA two nodes setup with one separate `kube-node`
and the `etcd` group merged with the `kube-master`.
`separate` layout is when there is only node of each type, which includes
a kube-master, kube-node, and etcd cluster member.
`ha` layout consists of two etcd nodes, two masters and a single worker node,
with role intersection.
`scale` layout can be combined with above layouts. It includes 200 fake hosts
in the Ansible inventory. This helps test TLS certificate generation at scale
to prevent regressions and profile certain long-running tasks. These nodes are
never actually deployed, but certificates are generated for them.
Note, the canal network plugin deploys flannel as well plus calico policy controller.
@ -40,15 +55,15 @@ GCE instances
| Stage| Network plugin| OS type| GCE region| Nodes layout
|--------------------|--------------------|--------------------|--------------------|--------------------|
| part1| calico| coreos-stable| us-west1-b| separated|
| part1| calico| coreos-stable| us-west1-b| separate|
| part1| canal| debian-8-kubespray| us-east1-b| ha|
| part1| weave| rhel-7| europe-west1-b| default|
| part2| flannel| centos-7| us-west1-a| default|
| part2| calico| debian-8-kubespray| us-central1-b| default|
| part2| canal| coreos-stable| us-east1-b| default|
| special| canal| rhel-7| us-east1-b| separated|
| special| weave| ubuntu-1604-xenial| us-central1-b| separated|
| special| calico| centos-7| europe-west1-b| ha|
| special| weave| coreos-alpha| us-west1-a| ha|
| special| canal| rhel-7| us-east1-b| separate|
| special| weave| ubuntu-1604-xenial| us-central1-b| default|
| special| calico| centos-7| europe-west1-b| ha-scale|
| special| weave| coreos-alpha| us-west1-a| ha-scale|
The "Stage" means a build step of the build pipeline. The steps are ordered as `part1->part2->special`.

View file

@ -10,6 +10,7 @@
- name: Register docker images info
raw: >-
{{ docker_bin_dir }}/docker images -q | xargs {{ docker_bin_dir }}/docker inspect -f "{{ '{{' }} .RepoTags {{ '}}' }},{{ '{{' }} .RepoDigests {{ '}}' }}"
no_log: true
register: docker_images_raw
failed_when: false
check_mode: no
@ -17,6 +18,7 @@
- set_fact:
docker_images: "{{docker_images_raw.stdout|regex_replace('\\[|\\]|\\n]','')|regex_replace('\\s',',')}}"
no_log: true
when: not download_always_pull|bool
- set_fact:

View file

@ -2,6 +2,7 @@
- name: "Check_certs | check if all certs have already been generated on first master"
stat:
path: "{{ etcd_cert_dir }}/{{ item }}"
get_md5: no
delegate_to: "{{groups['etcd'][0]}}"
register: etcdcert_master
run_once: true

View file

@ -87,6 +87,7 @@
args:
executable: /bin/bash
register: etcd_master_cert_data
no_log: true
check_mode: no
delegate_to: "{{groups['etcd'][0]}}"
when: inventory_hostname in groups['etcd'] and sync_certs|default(false) and
@ -98,6 +99,7 @@
args:
executable: /bin/bash
register: etcd_node_cert_data
no_log: true
check_mode: no
delegate_to: "{{groups['etcd'][0]}}"
when: (('calico-rr' in groups and inventory_hostname in groups['calico-rr']) or
@ -109,6 +111,7 @@
shell: "base64 -d <<< '{{etcd_master_cert_data.stdout|quote}}' | tar xz -C {{ etcd_cert_dir }}"
args:
executable: /bin/bash
no_log: true
changed_when: false
when: inventory_hostname in groups['etcd'] and sync_certs|default(false) and
inventory_hostname != groups['etcd'][0]

View file

@ -80,6 +80,7 @@
shell: "tar cfz - -C {{ kube_cert_dir }} -T /dev/stdin <<< {{ my_master_certs|join(' ') }} {{ all_node_certs|join(' ') }} | base64 --wrap=0"
args:
executable: /bin/bash
no_log: true
register: master_cert_data
check_mode: no
delegate_to: "{{groups['kube-master'][0]}}"
@ -90,6 +91,7 @@
shell: "tar cfz - -C {{ kube_cert_dir }} -T /dev/stdin <<< {{ my_node_certs|join(' ') }} | base64 --wrap=0"
args:
executable: /bin/bash
no_log: true
register: node_cert_data
check_mode: no
delegate_to: "{{groups['kube-master'][0]}}"
@ -116,6 +118,7 @@
- name: Gen_certs | Unpack certs on masters
shell: "base64 -d < {{ cert_tempfile.stdout }} | tar xz -C {{ kube_cert_dir }}"
no_log: true
changed_when: false
check_mode: no
when: inventory_hostname in groups['kube-master'] and sync_certs|default(false) and
@ -133,6 +136,7 @@
shell: "base64 -d <<< '{{node_cert_data.stdout|quote}}' | tar xz -C {{ kube_cert_dir }}"
args:
executable: /bin/bash
no_log: true
changed_when: false
check_mode: no
when: inventory_hostname in groups['kube-node'] and

View file

@ -13,7 +13,7 @@
- set_fact:
instance_names: >-
{%- if mode in ['separate', 'ha'] -%}
{%- if mode in ['separate', 'separate-scale', 'ha', 'ha-scale'] -%}
k8s-{{test_name}}-1,k8s-{{test_name}}-2,k8s-{{test_name}}-3
{%- else -%}
k8s-{{test_name}}-1,k8s-{{test_name}}-2
@ -39,6 +39,18 @@
src: ../templates/inventory-gce.j2
dest: "{{ inventory_path }}"
- name: Make group_vars directory
file:
path: "{{ inventory_path|dirname }}/group_vars"
state: directory
when: mode in ['scale', 'separate-scale', 'ha-scale']
- name: Template fake hosts group vars
template:
src: ../templates/fake_hosts.yml.j2
dest: "{{ inventory_path|dirname }}/group_vars/fake_hosts.yml"
when: mode in ['scale', 'separate-scale', 'ha-scale']
- name: Wait for SSH to come up
wait_for: host={{item.public_ip}} port=22 delay=10 timeout=180 state=started
with_items: "{{gce.instance_data}}"

View file

@ -0,0 +1,3 @@
ansible_default_ipv4:
address: 255.255.255.255
ansible_hostname: "{{ '{{' }}inventory_hostname}}"

View file

@ -2,12 +2,11 @@
{% set node2 = gce.instance_data[1].name %}
{{node1}} ansible_ssh_host={{gce.instance_data[0].public_ip}}
{{node2}} ansible_ssh_host={{gce.instance_data[1].public_ip}}
{% if mode is defined and mode in ["separate", "ha"] %}
{% if mode is defined and mode in ["ha", "ha-scale", "separate", "separate-scale"] %}
{% set node3 = gce.instance_data[2].name %}
{{node3}} ansible_ssh_host={{gce.instance_data[2].public_ip}}
{% endif %}
{% if mode is defined and mode == "separate" %}
{% if mode is defined and mode in ["separate", "separate-scale"] %}
[kube-master]
{{node1}}
@ -19,7 +18,7 @@
[vault]
{{node3}}
{% elif mode is defined and mode == "ha" %}
{% elif mode is defined and mode in ["ha", "ha-scale"] %}
[kube-master]
{{node1}}
{{node2}}
@ -51,3 +50,12 @@
[k8s-cluster:children]
kube-node
kube-master
{% if mode is defined and mode in ["scale", "separate-scale", "ha-scale"] %}
[fake_hosts]
fake_scale_host[1:200]
[kube-node:children]
fake_hosts
{% endif %}

View file

@ -35,31 +35,30 @@
run_once: true
delegate_to: "{{groups['kube-master'][0]}}"
register: nca_pod
until: "{{ nca_pod.stdout_lines|length }} >= {{ groups['kube-node']|length * 2 }}"
until: "{{ nca_pod.stdout_lines|length }} >= {{ groups['kube-node']|intersect(play_hosts)|length * 2 }}"
retries: 3
delay: 10
- name: Get netchecker agents
uri: url=http://localhost:{{netchecker_port}}/api/v1/agents/ return_content=yes
run_once: true
delegate_to: "{{groups['kube-node'][0]}}"
delegate_to: "{{groups['kube-master'][0]}}"
register: agents
retries: 6
retries: 18
delay: "{{ agent_report_interval }}"
until: "{{ agents.content|length > 0 and
agents.content[0] == '{' and
agents.content|from_json|length >= groups['kube-node']|length * 2 }}"
agents.content|from_json|length >= groups['kube-node']|intersect(play_hosts)|length * 2 }}"
failed_when: false
no_log: true
- debug: var=agents.content|from_json
failed_when: not agents|success and not agents.content=='{}'
delegate_to: "{{groups['kube-node'][0]}}"
run_once: true
- name: Check netchecker status
uri: url=http://localhost:{{netchecker_port}}/api/v1/connectivity_check status_code=200 return_content=yes
delegate_to: "{{groups['kube-node'][0]}}"
delegate_to: "{{groups['kube-master'][0]}}"
run_once: true
register: result
retries: 3
@ -70,9 +69,9 @@
- debug: var=result.content|from_json
failed_when: not result|success
delegate_to: "{{groups['kube-node'][0]}}"
run_once: true
when: not agents.content=='{}'
delegate_to: "{{groups['kube-master'][0]}}"
- debug: msg="Cannot get reports from agents, consider as PASSING"
run_once: true