From 9f6c2fd32a2c44341249e17b851299170e3fda74 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Wed, 9 Nov 2016 14:15:27 +0100 Subject: [PATCH] Label k8s apps, adjust collect/upload info steps - Drop debugs from collect-info playbook - Drop sudo from collect-info step and add target dir var (required for travis jobs) - Label all k8s apps, including static manifests - Add logs for K8s apps to be collected as well - Fix upload to GCS as a public-read tarball Signed-off-by: Bogdan Dobrelya --- .travis.yml | 11 ++-- .../manifests/kube-apiserver.manifest.j2 | 2 + .../kube-controller-manager.manifest.j2 | 2 + .../manifests/kube-scheduler.manifest.j2 | 2 + .../manifests/kube-proxy.manifest.j2 | 2 + .../manifests/nginx-proxy.manifest.j2 | 2 + scripts/collect-info.yaml | 49 ++++++++++++--- tests/cloud_playbooks/templates/boto.j2 | 11 ++++ .../gcs_life.json.j2} | 2 +- tests/cloud_playbooks/upload-logs-gcs.yml | 62 +++++++++++-------- 10 files changed, 106 insertions(+), 39 deletions(-) create mode 100644 tests/cloud_playbooks/templates/boto.j2 rename tests/cloud_playbooks/{files/gcs_life.json => templates/gcs_life.json.j2} (59%) diff --git a/.travis.yml b/.travis.yml index 3bbb46d1c..e2a9f9f07 100644 --- a/.travis.yml +++ b/.travis.yml @@ -103,11 +103,11 @@ env: before_install: # Install Ansible. - - pip install --user boto -U - pip install --user ansible - pip install --user netaddr # W/A https://github.com/ansible/ansible-modules-core/issues/5196#issuecomment-253766186 - pip install --user apache-libcloud==0.20.1 + - pip install --user boto==2.9.0 -U cache: - directories: @@ -149,16 +149,19 @@ script: - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root tests/testcases/030_check-network.yml $LOG_LEVEL after_failure: - - $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root scripts/collect-info.yaml + - > + $HOME/.local/bin/ansible-playbook -i inventory/inventory.ini -u $SSH_USER + -e ansible_ssh_user=$SSH_USER $SSH_ARGS -b --become-user=root -e dir=$HOME + scripts/collect-info.yaml - > $HOME/.local/bin/ansible-playbook tests/cloud_playbooks/upload-logs-gcs.yml -i "localhost," -c local - -e test_id=${TEST_ID} -e kube_network_plugin=${KUBE_NETWORK_PLUGIN} + -e gce_project_id=${GCE_PROJECT_ID} -e gs_key=${GS_ACCESS_KEY_ID} -e gs_skey=${GS_SECRET_ACCESS_KEY} -e ostype=${CLOUD_IMAGE} -e commit=${TRAVIS_COMMIT} - -e pr=${TRAVIS_PULL_REQUEST} + -e dir=${HOME} after_script: - > diff --git a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 index 4100e8a34..a6718f9e5 100644 --- a/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-apiserver namespace: kube-system + labels: + k8s-app: kube-apiserver spec: hostNetwork: true containers: diff --git a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 index 3a9e1ef1b..a528f361e 100644 --- a/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-controller-manager namespace: kube-system + labels: + k8s-app: kube-controller spec: hostNetwork: true containers: diff --git a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 index 024ddbfaa..15a705937 100644 --- a/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 +++ b/roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-scheduler namespace: kube-system + labels: + k8s-app: kube-scheduler spec: hostNetwork: true containers: diff --git a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 index 7abffe053..86d1e6f9e 100644 --- a/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 +++ b/roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: kube-proxy namespace: kube-system + labels: + k8s-app: kube-proxy spec: hostNetwork: true containers: diff --git a/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 b/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 index 50e054268..8e5dfcc11 100644 --- a/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 +++ b/roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2 @@ -3,6 +3,8 @@ kind: Pod metadata: name: nginx-proxy namespace: kube-system + labels: + k8s-app: kube-nginx spec: hostNetwork: true containers: diff --git a/scripts/collect-info.yaml b/scripts/collect-info.yaml index 685b8b787..0ba47866e 100644 --- a/scripts/collect-info.yaml +++ b/scripts/collect-info.yaml @@ -1,10 +1,9 @@ --- - hosts: all - become: true + become: false gather_facts: no vars: - debug: false commands: - name: timedate_info cmd: timedatectl status @@ -26,6 +25,37 @@ cmd: journalctl -p err --utc --no-pager - name: etcd_info cmd: etcdctl --debug cluster-health + - name: weave_info + cmd: weave report | jq "." + - name: weave_logs + cmd: docker logs weave > weave.log + - name: kubedns_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kubedns -o name`; + do kubectl logs $i --namespace kube-system kubedns > kubedns.log; done" + - name: apiserver_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-apiserver -o name`; + do kubectl logs $i --namespace kube-system > kube-apiserver.log; done" + - name: controller_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-controller -o name`; + do kubectl logs $i --namespace kube-system > kube-controller.log; done" + - name: scheduler_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-scheduler -o name`; + do kubectl logs $i --namespace kube-system > kube-scheduler.log; done" + - name: proxy_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-proxy -o name`; + do kubectl logs $i --namespace kube-system > kube-proxy.log; done" + - name: nginx_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=kube-nginx -o name`; + do kubectl logs $i --namespace kube-system > kube-nginx.log; done" + - name: flannel_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l app=flannel -o name`; + do kubectl logs $i --namespace kube-system flannel-container > flannel.log; done" + - name: canal_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=canal-node -o name`; + do kubectl logs $i --namespace kube-system flannel > flannel.log; done" + - name: calico_policy_logs + cmd: sh -c "for i in `kubectl get pods --all-namespaces -l k8s-app=calico-policy -o name`; + do kubectl logs $i --namespace kube-system calico-policy-controller > calico-policy-controller.log; done" logs: - /var/log/syslog @@ -38,6 +68,15 @@ - /var/log/calico/bird6/current - /var/log/calico/felix/current - /var/log/calico/confd/current + - weave.log + - kubedns.log + - kube-apiserver.log + - kube-controller.log + - kube-scheduler.log + - kube-proxy.log + - kube-nginx.log + - flannel.log + - calico-policy-controller.log tasks: - name: Storing commands output @@ -47,10 +86,6 @@ with_items: "{{commands}}" no_log: True - - debug: var=item - with_items: "{{output.results}}" - when: debug - - name: Fetch results fetch: src={{ item.name }} dest=/tmp/collect-info/commands with_items: "{{commands}}" @@ -60,7 +95,7 @@ with_items: "{{logs}}" - name: Pack results and logs - local_action: shell GZIP=-9 tar --remove-files -cvzf logs.tar.gz -C /tmp collect-info + local_action: shell GZIP=-9 tar --remove-files -cvzf {{dir|default(".")}}/logs.tar.gz -C /tmp collect-info run_once: true - name: Clean up collected command outputs diff --git a/tests/cloud_playbooks/templates/boto.j2 b/tests/cloud_playbooks/templates/boto.j2 new file mode 100644 index 000000000..660f1a0a3 --- /dev/null +++ b/tests/cloud_playbooks/templates/boto.j2 @@ -0,0 +1,11 @@ +[Credentials] +gs_access_key_id = {{ gs_key }} +gs_secret_access_key = {{ gs_skey }} +[Boto] +https_validate_certificates = True +[GoogleCompute] +[GSUtil] +default_project_id = {{ gce_project_id }} +content_language = en +default_api_version = 2 +[OAuth2] diff --git a/tests/cloud_playbooks/files/gcs_life.json b/tests/cloud_playbooks/templates/gcs_life.json.j2 similarity index 59% rename from tests/cloud_playbooks/files/gcs_life.json rename to tests/cloud_playbooks/templates/gcs_life.json.j2 index eaab30b4f..a666c8fef 100644 --- a/tests/cloud_playbooks/files/gcs_life.json +++ b/tests/cloud_playbooks/templates/gcs_life.json.j2 @@ -3,7 +3,7 @@ [ { "action": {"type": "Delete"}, - "condition": {"age": 2} + "condition": {"age": {{expire_days}}} } ] } diff --git a/tests/cloud_playbooks/upload-logs-gcs.yml b/tests/cloud_playbooks/upload-logs-gcs.yml index 12013798d..80d651ba4 100644 --- a/tests/cloud_playbooks/upload-logs-gcs.yml +++ b/tests/cloud_playbooks/upload-logs-gcs.yml @@ -3,65 +3,73 @@ become: false gather_facts: no + vars: + expire_days: 2 + tasks: - name: Generate uniq bucket name prefix - shell: date +%s | sha256sum | base64 | head -c 32 + shell: date +%Y%m%d register: out - name: replace_test_id set_fact: - test_name: "kargo-{{ commit }}-{{ pr }}-{{ out.stdout|lower }}-{{ test_id | regex_replace('\\.', '-') }}" + test_name: "kargo-ci-{{ out.stdout }}" + + - set_fact: + file_name: "{{ostype}}-{{kube_network_plugin}}-{{commit}}-logs.tar.gz" - name: Create a bucket gc_storage: bucket: "{{ test_name }}" mode: create - permission: private + permission: public-read gs_access_key: "{{ gs_key }}" gs_secret_key: "{{ gs_skey }}" no_log: True + - name: Create a lifecycle template for the bucket + template: + src: gcs_life.json.j2 + dest: "{{dir}}/gcs_life.json" + + - name: Create a boto config to access GCS + template: + src: boto.j2 + dest: "{{dir}}/.boto" + no_log: True + - name: Download gsutil cp installer get_url: url: https://dl.google.com/dl/cloudsdk/channels/rapid/install_google_cloud_sdk.bash - dest: /tmp/gcp-installer.sh + dest: "{{dir}}/gcp-installer.sh" - name: Get gsutil tool - script: /tmp/gcp-installer.sh + script: "{{dir}}/gcp-installer.sh" environment: CLOUDSDK_CORE_DISABLE_PROMPTS: 1 + CLOUDSDK_INSTALL_DIR: "{{dir}}" no_log: True - - - name: Create a lifecycle template for the bucket - file: src=gcs_life.json path=/tmp/gcs_life.json - - - name: Hack the boto config for GCS access keys - lineinfile: - dest: .boto - line: "gs_access_key_id = {{ gs_key }}" - regexp: "^#gs_access_key_id = .*$" - no_log: True - - - name: Hack the boto config for GCS secret access keys - lineinfile: - dest: .boto - line: "gs_secret_access_key = {{ gs_skey }}" - regexp: "^#gs_secret_access_key = .*$" - no_log: True + ignore_errors: true - name: Apply the lifecycle rules - shell: bash google-cloud-sdk/bin/gsutil lifecycle set /tmp/gcs_life.json gs://{{ test_name }} + command: "{{dir}}/google-cloud-sdk/bin/gsutil lifecycle set {{dir}}/gcs_life.json gs://{{test_name}}" environment: - BOTO_CONFIG: .boto + BOTO_CONFIG: "{{dir}}/.boto" + no_log: True - name: Upload collected diagnostic info gc_storage: bucket: "{{ test_name }}" mode: put - permission: private - object: "build-{{ ostype }}-{{ kube_network_plugin }}-logs.tar.gz" - src: logs.tar.gz + permission: public-read + object: "{{ file_name }}" + src: "{{dir}}/logs.tar.gz" headers: '{"Content-Encoding": "x-gzip"}' gs_access_key: "{{ gs_key }}" gs_secret_key: "{{ gs_skey }}" + expiration: "{{expire_days * 36000|int}}" ignore_errors: true + no_log: True + + - debug: + msg: "A public url https://storage.googleapis.com/{{test_name}}/{{file_name}}"