Monitor gitlab-ci deployment
Solve: #96 (closed)
Aim of this MR is to provide a quick view of the sylva deployments status. The idea is to generate junit xml reports to track the sylva units status and upload them as tests result in gitlab.
The xml reports are generated via units-report.py
script. The script parse the flux dump of management cluster to generate the report.
each unit will generate a testcase
where:
- name is based on the deployment job name + unit name
- status is extracted from the ready status (OK if ready status == True, else it considered as FAILED)
- time is calculated via a diff between the
creationTimestamp
and thelastTransitionTime
of ready status
Example of xml report:
<?xml version="1.0" ?>
<testsuites disabled="0" errors="0" failures="14" tests="20" time="1730.0">
<testsuite disabled="0" errors="0" failures="14" name="kubeadm-capd:sylva-unit-validation" skipped="0" tests="20" time="1730.0">
<testcase name="kubeadm-capd:cabpk" time="192.000000" classname="sylva-unit-validation">
<failure type="failure" message="cabpk unit ready status was: Unknown"/>
</testcase>
<testcase name="kubeadm-capd:calico" time="17.000000" classname="sylva-unit-validation"/>
<testcase name="kubeadm-capd:capd" time="190.000000" classname="sylva-unit-validation">
<failure type="failure" message="capd unit ready status was: Unknown"/>
</testcase>
<testcase name="kubeadm-capd:capi" time="196.000000" classname="sylva-unit-validation">
<failure type="failure" message="capi unit ready status was: Unknown"/>
</testcase>
<testcase name="kubeadm-capd:cert-manager" time="118.000000" classname="sylva-unit-validation"/>
<testcase name="kubeadm-capd:first-login-rancher" time="1.000000" classname="sylva-unit-validation">
<failure type="failure" message="first-login-rancher unit ready status was: False"/>
</testcase>
<testcase name="kubeadm-capd:flux-system" time="11.000000" classname="sylva-unit-validation"/>
<testcase name="kubeadm-capd:ingress-nginx" time="1.000000" classname="sylva-unit-validation">
<failure type="failure" message="ingress-nginx unit ready status was: False"/>
</testcase>
<testcase name="kubeadm-capd:k8s-gateway" time="1.000000" classname="sylva-unit-validation">
<failure type="failure" message="k8s-gateway unit ready status was: False"/>
</testcase>
<testcase name="kubeadm-capd:local-path-provisioner" time="32.000000" classname="sylva-unit-validation"/>
<testcase name="kubeadm-capd:metallb" time="233.000000" classname="sylva-unit-validation">
<failure type="failure" message="metallb unit ready status was: Unknown"/>
</testcase>
<testcase name="kubeadm-capd:metallb-config" time="17.000000" classname="sylva-unit-validation">
<failure type="failure" message="metallb-config unit ready status was: False"/>
</testcase>
<testcase name="kubeadm-capd:namespace-defs" time="12.000000" classname="sylva-unit-validation"/>
<testcase name="kubeadm-capd:rancher" time="183.000000" classname="sylva-unit-validation">
<failure type="failure" message="rancher unit ready status was: False"/>
</testcase>
<testcase name="kubeadm-capd:rancher-webhook-service" time="11.000000" classname="sylva-unit-validation">
<failure type="failure" message="rancher-webhook-service unit ready status was: False"/>
</testcase>
<testcase name="kubeadm-capd:sylva-ca" time="233.000000" classname="sylva-unit-validation">
<failure type="failure" message="sylva-ca unit ready status was: False"/>
</testcase>
<testcase name="kubeadm-capd:sylva-ca-certs" time="187.000000" classname="sylva-unit-validation">
<failure type="failure" message="sylva-ca-certs unit ready status was: False"/>
</testcase>
<testcase name="kubeadm-capd:sylva-units-status" time="1.000000" classname="sylva-unit-validation">
<failure type="failure" message="sylva-units-status unit ready status was: False"/>
</testcase>
<testcase name="kubeadm-capd:trivy-operator" time="93.000000" classname="sylva-unit-validation"/>
<testcase name="kubeadm-capd:vault" time="1.000000" classname="sylva-unit-validation">
<failure type="failure" message="vault unit ready status was: False"/>
</testcase>
</testsuite>
</testsuites>
To avoid the creation of dedicated jobs to run the script for each kind of deployment, I run it in the exit_trap. It something that we probably will revisit later if we use this method to do more complex tests.
Edited by Loic Nicolle