Fix random failure in debug: var=result.content|from_json (#4094)
* Fix random failure in debug: var=result.content|from_json * netchecker agents are deployed on all k8s-cluster group members * reducing limits/requests is not enough, switching to n1-standard-2 * gce_centos7 need more cpu
This commit is contained in:
parent
3e52f1a4e9
commit
61d88b8db2
4 changed files with 57 additions and 8 deletions
|
@ -1,7 +1,7 @@
|
|||
# Instance settings
|
||||
cloud_image_family: centos-7
|
||||
cloud_region: us-central1-c
|
||||
cloud_machine_type: "n1-standard-1"
|
||||
cloud_machine_type: "n1-standard-2"
|
||||
mode: ha
|
||||
|
||||
# Deployment settings
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# Instance settings
|
||||
cloud_image_family: centos-7
|
||||
cloud_region: us-central1-c
|
||||
cloud_machine_type: "n1-standard-1"
|
||||
cloud_machine_type: "n1-standard-2"
|
||||
mode: ha
|
||||
|
||||
# Deployment settings
|
||||
|
@ -15,7 +15,7 @@ deploy_netchecker: true
|
|||
dns_min_replicas: 1
|
||||
cloud_provider: gce
|
||||
kube_encrypt_secret_data: true
|
||||
ingress_nginx_enabled: true
|
||||
#ingress_nginx_enabled: true
|
||||
cert_manager_enabled: true
|
||||
metrics_server_enabled: true
|
||||
kube_token_auth: true
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# Instance settings
|
||||
cloud_image_family: centos-7
|
||||
cloud_region: us-central1-c
|
||||
cloud_machine_type: "n1-standard-1"
|
||||
cloud_machine_type: "n1-standard-2"
|
||||
mode: default
|
||||
|
||||
# Deployment settings
|
||||
|
|
|
@ -37,9 +37,23 @@
|
|||
run_once: true
|
||||
delegate_to: "{{groups['kube-master'][0]}}"
|
||||
register: nca_pod
|
||||
until: nca_pod.stdout_lines|length >= groups['kube-node']|intersect(play_hosts)|length * 2
|
||||
until: nca_pod.stdout_lines|length >= groups['k8s-cluster']|intersect(play_hosts)|length * 2
|
||||
retries: 3
|
||||
delay: 10
|
||||
failed_when: false
|
||||
|
||||
- command: "{{ bin_dir }}/kubectl -n {{netcheck_namespace}} describe pod -l app={{ item }}"
|
||||
run_once: true
|
||||
delegate_to: "{{groups['kube-master'][0]}}"
|
||||
no_log: false
|
||||
with_items:
|
||||
- netchecker-agent
|
||||
- netchecker-agent-hostnet
|
||||
when: not nca_pod is success
|
||||
|
||||
- debug: var=nca_pod.stdout_lines
|
||||
failed_when: not nca_pod is success
|
||||
run_once: true
|
||||
|
||||
- name: Get netchecker agents
|
||||
uri: url=http://{{ ansible_default_ipv4.address }}:{{netchecker_port}}/api/v1/agents/ return_content=yes
|
||||
|
@ -50,7 +64,7 @@
|
|||
delay: "{{ agent_report_interval }}"
|
||||
until: agents.content|length > 0 and
|
||||
agents.content[0] == '{' and
|
||||
agents.content|from_json|length >= groups['kube-node']|intersect(play_hosts)|length * 2
|
||||
agents.content|from_json|length >= groups['k8s-cluster']|intersect(play_hosts)|length * 2
|
||||
failed_when: false
|
||||
no_log: true
|
||||
|
||||
|
@ -65,16 +79,51 @@
|
|||
register: result
|
||||
retries: 3
|
||||
delay: "{{ agent_report_interval }}"
|
||||
until: result.content|length > 0 and
|
||||
result.content[0] == '{'
|
||||
no_log: true
|
||||
failed_when: false
|
||||
when:
|
||||
- agents.content != '{}'
|
||||
|
||||
- debug: var=ncs_pod
|
||||
run_once: true
|
||||
when: not result is success
|
||||
|
||||
- command: "{{ bin_dir }}/kubectl -n kube-system logs -l k8s-app=kube-proxy"
|
||||
run_once: true
|
||||
when: not result is success
|
||||
delegate_to: "{{groups['kube-master'][0]}}"
|
||||
no_log: false
|
||||
|
||||
- command: "{{ bin_dir }}/kubectl -n kube-system logs -l k8s-app={{item}} --all-containers"
|
||||
run_once: true
|
||||
when: not result is success
|
||||
delegate_to: "{{groups['kube-master'][0]}}"
|
||||
no_log: false
|
||||
with_items:
|
||||
- kube-router
|
||||
- flannel
|
||||
- contiv-ovs
|
||||
- contiv-netplugin
|
||||
- contiv-netmaster
|
||||
- canal-node
|
||||
- calico-node
|
||||
- cilium
|
||||
|
||||
- debug: var=result.content|from_json
|
||||
failed_when: not result is success
|
||||
run_once: true
|
||||
when: not agents.content == '{}'
|
||||
delegate_to: "{{groups['kube-master'][0]}}"
|
||||
when:
|
||||
- not agents.content == '{}'
|
||||
- result.content[0] == '{'
|
||||
|
||||
- debug: var=result
|
||||
failed_when: not result is success
|
||||
run_once: true
|
||||
when:
|
||||
- not agents.content == '{}'
|
||||
- result.content[0] != '{'
|
||||
|
||||
- debug: msg="Cannot get reports from agents, consider as PASSING"
|
||||
run_once: true
|
||||
|
|
Loading…
Reference in a new issue