Merge pull request #1020 from mattymo/synthscale

Add synthetic scale deployment mode
2017-02-22 19:15:46 +03:00 · 2017-02-22 19:15:46 +03:00 · 8cbf3fe5f8
commit 8cbf3fe5f8
parent 02137f8cee d821448e2f
11 changed files with 90 additions and 34 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
 .vagrant
 *.retry
 inventory/vagrant_ansible_inventory
+inventory/group_vars/fake_hosts.yml
 temp
 .idea
 .tox
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -124,6 +124,7 @@ before_script:
      -e local_release_dir=${PWD}/downloads
      -e resolvconf_mode=${RESOLVCONF_MODE}
      -e vault_deployment_type=${VAULT_DEPLOYMENT}
+      --limit "all:!fake_hosts"
      cluster.yml

    # Repeat deployment if testing upgrade
@ -150,18 +151,19 @@ before_script:
      -e resolvconf_mode=${RESOLVCONF_MODE} 
      -e weave_cpu_requests=${WEAVE_CPU_LIMIT} 
      -e weave_cpu_limit=${WEAVE_CPU_LIMIT} 
+      --limit "all:!fake_hosts" 
      $PLAYBOOK; 
      fi

    # Tests Cases
    ## Test Master API
-    - ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/010_check-apiserver.yml $LOG_LEVEL
+    - ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root --limit "all:!fake_hosts" tests/testcases/010_check-apiserver.yml $LOG_LEVEL

    ## Ping the between 2 pod
-    - ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL
+    - ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root --limit "all:!fake_hosts" tests/testcases/030_check-network.yml $LOG_LEVEL

    ## Advanced DNS checks
-    - ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/040_check-network-adv.yml $LOG_LEVEL
+    - ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root --limit "all:!fake_hosts" tests/testcases/040_check-network-adv.yml $LOG_LEVEL

    ## Idempotency checks 1/5 (repeat deployment)
    - >
@ -178,6 +180,7 @@ before_script:
      -e local_release_dir=${PWD}/downloads 
      -e etcd_deployment_type=${ETCD_DEPLOYMENT} 
      -e kubelet_deployment_type=${KUBELET_DEPLOYMENT} 
+      --limit "all:!fake_hosts" 
      cluster.yml;
      fi

@ -186,6 +189,7 @@ before_script:
      if [ "${IDEMPOT_CHECK}" = "true" ]; then
      ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} 
      -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root 
+      --limit "all:!fake_hosts" 
      tests/testcases/040_check-network-adv.yml $LOG_LEVEL;
      fi

@ -197,7 +201,8 @@ before_script:
      --private-key=${HOME}/.ssh/id_rsa 
      -e bootstrap_os=${BOOTSTRAP_OS} 
      -e ansible_python_interpreter=${PYPATH} 
-      -e reset_confirmation=yes
+      -e reset_confirmation=yes 
+      --limit "all:!fake_hosts"
      reset.yml;
      fi

@ -216,6 +221,7 @@ before_script:
      -e local_release_dir=${PWD}/downloads 
      -e etcd_deployment_type=${ETCD_DEPLOYMENT} 
      -e kubelet_deployment_type=${KUBELET_DEPLOYMENT} 
+      --limit "all:!fake_hosts" 
      cluster.yml;
      fi

@ -224,6 +230,7 @@ before_script:
      if [ "${IDEMPOT_CHECK}" = "true" ]; then
      ansible-playbook -i inventory/inventory.ini -e ansible_python_interpreter=${PYPATH} 
      -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root 
+      --limit "all:!fake_hosts" 
      tests/testcases/040_check-network-adv.yml $LOG_LEVEL;
      fi

@ -244,7 +251,7 @@ before_script:
 .coreos_calico_sep_variables: &coreos_calico_sep_variables
 # stage: deploy-gce-part1
  KUBE_NETWORK_PLUGIN: calico
-  CLOUD_IMAGE: coreos-stable-1235-6-0-v20170111        
+  CLOUD_IMAGE: coreos-stable-1235-6-0-v20170111
  CLOUD_REGION: us-west1-b
  CLUSTER_MODE: separate
  BOOTSTRAP_OS: coreos
@ -287,8 +294,8 @@ before_script:
  CLOUD_REGION: us-east1-b
  CLUSTER_MODE: default
  BOOTSTRAP_OS: coreos
-  RESOLVCONF_MODE: host_resolvconf # This is required as long as the CoreOS stable channel uses docker < 1.12
  IDEMPOT_CHECK: "true"
+  RESOLVCONF_MODE: host_resolvconf # This is required as long as the CoreOS stable channel uses docker < 1.12

 .rhel7_canal_sep_variables: &rhel7_canal_sep_variables
 # stage: deploy-gce-special
@ -310,16 +317,17 @@ before_script:
  KUBE_NETWORK_PLUGIN: calico
  CLOUD_IMAGE: centos-7
  CLOUD_REGION: europe-west1-b
-  CLUSTER_MODE: ha
+  CLUSTER_MODE: ha-scale
  IDEMPOT_CHECK: "true"

 .coreos_alpha_weave_ha_variables: &coreos_alpha_weave_ha_variables
 # stage: deploy-gce-special
  KUBE_NETWORK_PLUGIN: weave
-  CLOUD_IMAGE: coreos-alpha
+  CLOUD_IMAGE: coreos-alpha-1325-0-0-v20170216
  CLOUD_REGION: us-west1-a
-  CLUSTER_MODE: ha
+  CLUSTER_MODE: ha-scale
  BOOTSTRAP_OS: coreos
+  RESOLVCONF_MODE: host_resolvconf # This is required as long as the CoreOS stable channel uses docker < 1.12

 .ubuntu_rkt_sep_variables: &ubuntu_rkt_sep_variables
 # stage: deploy-gce-part1
--- a/docs/test_cases.md
+++ b/docs/test_cases.md
@ -4,25 +4,40 @@ Travis CI test matrix
 GCE instances
 -------------

-Here is the test matrix for the Travis CI gates:
+Here is the test matrix for the CI gates:

 |           Network plugin|                  OS type|               GCE region|             Nodes layout|
 |-------------------------|-------------------------|-------------------------|-------------------------|
-|                    canal|       debian-8-kubespray|             asia-east1-a|                       ha|
+|                    canal|       debian-8-kubespray|             asia-east1-a|                 ha-scale|
 |                   calico|       debian-8-kubespray|           europe-west1-c|                  default|
 |                  flannel|                 centos-7|        asia-northeast1-c|                  default|
 |                   calico|                 centos-7|            us-central1-b|                       ha|
 |                    weave|                   rhel-7|               us-east1-c|                  default|
-|                    canal|            coreos-stable|               us-west1-b|                  default|
+|                    canal|            coreos-stable|               us-west1-b|                 ha-scale|
 |                    canal|                   rhel-7|        asia-northeast1-b|                 separate|
 |                    weave|       ubuntu-1604-xenial|           europe-west1-d|                 separate|
 |                   calico|            coreos-stable|            us-central1-f|                 separate|

-Where the nodes layout `default` is a non-HA two nodes setup with the separate `kube-node`
-and the `etcd` group merged with the `kube-master`. The `separate` layout is when
-there is only node of each type, which is a kube master, compute and etcd cluster member.
-And the `ha` layout stands for a two etcd nodes, two masters and a single worker node,
-partially intersecting though.
+
+Node Layouts
+------------
+
+There are four node layout types: `default`, `separate`, `ha`, and `scale`.
+
+
+`default` is a non-HA two nodes setup with one separate `kube-node`
+and the `etcd` group merged with the `kube-master`.
+
+`separate` layout is when there is only node of each type, which includes
+ a kube-master, kube-node, and etcd cluster member.
+
+`ha` layout consists of two etcd nodes, two masters and a single worker node,
+with role intersection.
+
+`scale` layout can be combined with above layouts. It includes 200 fake hosts
+in the Ansible inventory. This helps test TLS certificate generation at scale
+to prevent regressions and profile certain long-running tasks. These nodes are
+never actually deployed, but certificates are generated for them.

 Note, the canal network plugin deploys flannel as well plus calico policy controller.

@ -40,15 +55,15 @@ GCE instances

 |               Stage|      Network plugin|             OS type|          GCE region|        Nodes layout
 |--------------------|--------------------|--------------------|--------------------|--------------------|
-|               part1|              calico|       coreos-stable|          us-west1-b|           separated|
+|               part1|              calico|       coreos-stable|          us-west1-b|            separate|
 |               part1|               canal|  debian-8-kubespray|          us-east1-b|                  ha|
 |               part1|               weave|              rhel-7|      europe-west1-b|             default|
 |               part2|             flannel|            centos-7|          us-west1-a|             default|
 |               part2|              calico|  debian-8-kubespray|       us-central1-b|             default|
 |               part2|               canal|       coreos-stable|          us-east1-b|             default|
-|             special|               canal|              rhel-7|          us-east1-b|           separated|
-|             special|               weave|  ubuntu-1604-xenial|       us-central1-b|           separated|
-|             special|              calico|            centos-7|      europe-west1-b|                  ha|
-|             special|               weave|        coreos-alpha|          us-west1-a|                  ha|
+|             special|               canal|              rhel-7|          us-east1-b|            separate|
+|             special|               weave|  ubuntu-1604-xenial|       us-central1-b|             default|
+|             special|              calico|            centos-7|      europe-west1-b|            ha-scale|
+|             special|               weave|        coreos-alpha|          us-west1-a|            ha-scale|

 The "Stage" means a build step of the build pipeline. The steps are ordered as `part1->part2->special`.
--- a/roles/download/tasks/set_docker_image_facts.yml
+++ b/roles/download/tasks/set_docker_image_facts.yml
@ -10,6 +10,7 @@
 - name: Register docker images info
  raw: >-
    {{ docker_bin_dir }}/docker images -q | xargs {{ docker_bin_dir }}/docker inspect -f "{{ '{{' }} .RepoTags {{ '}}' }},{{ '{{' }} .RepoDigests {{ '}}' }}"
+  no_log: true
  register: docker_images_raw
  failed_when: false
  check_mode: no
@ -17,6 +18,7 @@

 - set_fact:
    docker_images: "{{docker_images_raw.stdout|regex_replace('\\[|\\]|\\n]','')|regex_replace('\\s',',')}}"
+  no_log: true
  when: not download_always_pull|bool

 - set_fact:
--- a/roles/etcd/tasks/check_certs.yml
+++ b/roles/etcd/tasks/check_certs.yml
@ -2,6 +2,7 @@
 - name: "Check_certs | check if all certs have already been generated on first master"
  stat:
    path: "{{ etcd_cert_dir }}/{{ item }}"
+    get_md5: no
  delegate_to: "{{groups['etcd'][0]}}"
  register: etcdcert_master
  run_once: true
--- a/roles/etcd/tasks/gen_certs_script.yml
+++ b/roles/etcd/tasks/gen_certs_script.yml
@ -87,6 +87,7 @@
  args:
    executable: /bin/bash
  register: etcd_master_cert_data
+  no_log: true
  check_mode: no
  delegate_to: "{{groups['etcd'][0]}}"
  when: inventory_hostname in groups['etcd'] and sync_certs|default(false) and
@ -98,6 +99,7 @@
  args:
    executable: /bin/bash
  register: etcd_node_cert_data
+  no_log: true
  check_mode: no
  delegate_to: "{{groups['etcd'][0]}}"
  when: (('calico-rr' in groups and inventory_hostname in groups['calico-rr']) or
@ -109,6 +111,7 @@
  shell: "base64 -d <<< '{{etcd_master_cert_data.stdout|quote}}' | tar xz -C {{ etcd_cert_dir }}"
  args:
    executable: /bin/bash
+  no_log: true
  changed_when: false
  when: inventory_hostname in groups['etcd'] and sync_certs|default(false) and
        inventory_hostname != groups['etcd'][0]
--- a/roles/kubernetes/secrets/tasks/gen_certs_script.yml
+++ b/roles/kubernetes/secrets/tasks/gen_certs_script.yml
@ -80,6 +80,7 @@
  shell: "tar cfz - -C {{ kube_cert_dir }} -T /dev/stdin <<< {{ my_master_certs|join(' ') }} {{ all_node_certs|join(' ') }} | base64 --wrap=0"
  args:
    executable: /bin/bash
+  no_log: true
  register: master_cert_data
  check_mode: no
  delegate_to: "{{groups['kube-master'][0]}}"
@ -90,6 +91,7 @@
  shell: "tar cfz - -C {{ kube_cert_dir }} -T /dev/stdin <<< {{ my_node_certs|join(' ') }} | base64 --wrap=0"
  args:
    executable: /bin/bash
+  no_log: true
  register: node_cert_data
  check_mode: no
  delegate_to: "{{groups['kube-master'][0]}}"
@ -116,6 +118,7 @@

 - name: Gen_certs | Unpack certs on masters
  shell: "base64 -d < {{ cert_tempfile.stdout }} | tar xz -C {{ kube_cert_dir }}"
+  no_log: true
  changed_when: false
  check_mode: no
  when: inventory_hostname in groups['kube-master'] and sync_certs|default(false) and
@ -133,6 +136,7 @@
  shell: "base64 -d <<< '{{node_cert_data.stdout|quote}}' | tar xz -C {{ kube_cert_dir }}"
  args:
    executable: /bin/bash
+  no_log: true
  changed_when: false
  check_mode: no
  when: inventory_hostname in groups['kube-node'] and
--- a/tests/cloud_playbooks/create-gce.yml
+++ b/tests/cloud_playbooks/create-gce.yml
@ -13,7 +13,7 @@

    - set_fact:
        instance_names: >-
-          {%- if mode in ['separate', 'ha'] -%}
+          {%- if mode in ['separate', 'separate-scale', 'ha', 'ha-scale'] -%}
          k8s-{{test_name}}-1,k8s-{{test_name}}-2,k8s-{{test_name}}-3
          {%- else -%}
          k8s-{{test_name}}-1,k8s-{{test_name}}-2
@ -39,6 +39,18 @@
        src: ../templates/inventory-gce.j2
        dest: "{{ inventory_path }}"

+    - name: Make group_vars directory
+      file:
+        path: "{{ inventory_path|dirname }}/group_vars"
+        state: directory
+      when: mode in ['scale', 'separate-scale', 'ha-scale']
+
+    - name: Template fake hosts group vars
+      template:
+        src: ../templates/fake_hosts.yml.j2
+        dest: "{{ inventory_path|dirname }}/group_vars/fake_hosts.yml"
+      when: mode in ['scale', 'separate-scale', 'ha-scale']
+
    - name: Wait for SSH to come up
      wait_for: host={{item.public_ip}} port=22 delay=10 timeout=180 state=started
      with_items: "{{gce.instance_data}}"
--- a/tests/templates/fake_hosts.yml.j2
+++ b/tests/templates/fake_hosts.yml.j2
@ -0,0 +1,3 @@
+ansible_default_ipv4:
+  address: 255.255.255.255
+ansible_hostname: "{{ '{{' }}inventory_hostname}}"
--- a/tests/templates/inventory-gce.j2
+++ b/tests/templates/inventory-gce.j2
@ -2,12 +2,11 @@
 {% set node2 = gce.instance_data[1].name %}
 {{node1}} ansible_ssh_host={{gce.instance_data[0].public_ip}}
 {{node2}} ansible_ssh_host={{gce.instance_data[1].public_ip}}
-{% if mode is defined and mode in ["separate", "ha"] %}
+{% if mode is defined and mode in ["ha", "ha-scale", "separate", "separate-scale"] %}
 {% set node3 = gce.instance_data[2].name %}
 {{node3}} ansible_ssh_host={{gce.instance_data[2].public_ip}}
 {% endif %}
-
-{% if mode is defined and mode == "separate" %}
+{% if mode is defined and mode in ["separate", "separate-scale"] %}
 [kube-master]
 {{node1}}

@ -19,7 +18,7 @@

 [vault]
 {{node3}}
-{% elif mode is defined and mode == "ha" %}
+{% elif mode is defined and mode in ["ha", "ha-scale"] %}
 [kube-master]
 {{node1}}
 {{node2}}
@ -51,3 +50,12 @@
 [k8s-cluster:children]
 kube-node
 kube-master
+
+{% if mode is defined and mode in ["scale", "separate-scale", "ha-scale"] %}
+[fake_hosts]
+fake_scale_host[1:200]
+
+[kube-node:children]
+fake_hosts
+{% endif %}
+
--- a/tests/testcases/040_check-network-adv.yml
+++ b/tests/testcases/040_check-network-adv.yml
@ -35,31 +35,30 @@
      run_once: true
      delegate_to: "{{groups['kube-master'][0]}}"
      register: nca_pod
-      until: "{{ nca_pod.stdout_lines|length }} >= {{ groups['kube-node']|length * 2 }}"
+      until: "{{ nca_pod.stdout_lines|length }} >= {{ groups['kube-node']|intersect(play_hosts)|length * 2 }}"
      retries: 3
      delay: 10

    - name: Get netchecker agents
      uri: url=http://localhost:{{netchecker_port}}/api/v1/agents/ return_content=yes
      run_once: true
-      delegate_to: "{{groups['kube-node'][0]}}"
+      delegate_to: "{{groups['kube-master'][0]}}"
      register: agents
-      retries: 6
+      retries: 18
      delay: "{{ agent_report_interval }}"
      until: "{{ agents.content|length > 0 and
        agents.content[0] == '{' and
-        agents.content|from_json|length >= groups['kube-node']|length * 2 }}"
+        agents.content|from_json|length >= groups['kube-node']|intersect(play_hosts)|length * 2 }}"
      failed_when: false
      no_log: true

    - debug: var=agents.content|from_json
      failed_when: not agents|success and not agents.content=='{}'
-      delegate_to: "{{groups['kube-node'][0]}}"
      run_once: true

    - name: Check netchecker status
      uri: url=http://localhost:{{netchecker_port}}/api/v1/connectivity_check status_code=200 return_content=yes
-      delegate_to: "{{groups['kube-node'][0]}}"
+      delegate_to: "{{groups['kube-master'][0]}}"
      run_once: true
      register: result
      retries: 3
@ -70,9 +69,9 @@

    - debug: var=result.content|from_json
      failed_when: not result|success
-      delegate_to: "{{groups['kube-node'][0]}}"
      run_once: true
      when: not agents.content=='{}'
+      delegate_to: "{{groups['kube-master'][0]}}"

    - debug: msg="Cannot get reports from agents, consider as PASSING"
      run_once: true