Commit 31316213 authored by John Jarvis's avatar John Jarvis Committed by Victor Lopez

Adding oncall report generator.

parent 0a5c757b
.settings.yaml
vendor
......@@ -19,9 +19,15 @@
revision = "c35b3f712f72376e21b645514ec3635e63fe0cd2"
version = "v0.6.2"
[[projects]]
branch = "v2"
name = "gopkg.in/yaml.v2"
packages = ["."]
revision = "eb3733d160e74a9c7e442f435eb3bea458e1d19f"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "307de7d0d5ecdf343165706091689604c18dc85d1f3b8e24c2e8994615163cd1"
inputs-digest = "fd6b96c78b52a2948a330675c47e1673bbbc437fad9e7df8d888fb4dcbcad5d7"
solver-name = "gps-cdcl"
solver-version = 1
# Oncall Robot Assistant
This is a utility for helping to automate on-call reporting
tasks. It's main purpose to help generate the weekly reports.
# How to use
* `go get -d gitlab.com/gl-infra/oncall-robot-assistant`
* Install dep https://github.com/golang/dep#setup
* `dep init`
* `go build`
* `go install`
* `./on-call-robot-assistant`
This will create an issue link, modify the issue as you see fit
and move it into infrastructure, and add the "oncall report" label.
package oncall
package config
import (
"encoding/json"
"errors"
"fmt"
"io"
......@@ -11,35 +10,41 @@ import (
"gopkg.in/yaml.v2"
)
// PagerDutyConfig keeps the configuration for the pagerduty service
type PagerDutyConfig struct {
Token string `yaml:"token"`
Schedules []string `yaml:"schedules"`
PrimarySchedule string `yaml:"primary"`
SecondarySchedule string `yaml:"secondary"`
type WeeklyOps struct {
Name string `yaml:"name"`
Url string `yaml:"url"`
}
// RealConfig the real deal
type RealConfig struct {
PagerDuty PagerDutyConfig `yaml:"pagerduty"`
GitLabToken string `yaml:"gitlab-token"`
type PagerDutySchedule struct {
Name string `yaml:"name"`
Id string `yaml:"id"`
}
// Config : map the json fields on the config file
type PagerDuty struct {
Token string `yaml:"token"`
ServiceId string `yaml:"service_id"`
Schedules []PagerDutySchedule `yaml:"schedules"`
PrimarySchedule string `yaml:"primary"`
SecondarySchedule string `yaml:"secondary"`
}
type GitLab struct {
Token string `yaml: "token"`
Id int `yaml: "project_id"`
}
type Config struct {
GitLabToken string `json:"gitlab_token"`
PDToken string `json:"pd_token"`
OncallPriID string `json:"oncall_pri_id"`
OncallSecID string `json:"oncall_sec_id"`
OncallShadowID string `json:"oncall_shadow_id"`
OncallStartTime string `json:"oncall_start_time"`
OncallEndTime string `json:"oncall_end_time"`
Milestone string `yaml:"milestone"`
PagerDuty PagerDuty `yaml:"pagerduty"`
ProjectId int `yaml:"project_id"`
GitLab GitLab `yaml:"gitlab"`
DayOffset int `yaml:"day_offset"`
OncallLabel string `yaml:"oncall_label"`
WeeklyOps []WeeklyOps `yaml:"weekly_ops"`
}
// Parses the yaml configuration from a reader
func ParseConfig(r io.Reader) (RealConfig, error) {
func ParseConfig(r io.Reader) (Config, error) {
configBytes, err := ioutil.ReadAll(r)
config := RealConfig{}
config := Config{}
if err != nil {
return config, fmt.Errorf("could not read configuration: %s", err)
}
......@@ -51,16 +56,16 @@ func ParseConfig(r io.Reader) (RealConfig, error) {
}
// readConfig will attempt to read the different configuration
// parameters from a JSON formatted file.
// parameters from a yaml formatted file.
func ReadConfig(f string) (*Config, error) {
var cfg Config
if _, err := os.Stat(f); os.IsNotExist(err) {
return nil, errors.New(err.Error())
}
content, err := ioutil.ReadFile(f)
if err != nil {
if content, err := ioutil.ReadFile(f); err != nil {
return nil, errors.New(err.Error())
} else {
yaml.Unmarshal(content, &cfg)
}
var cfg Config
json.Unmarshal(content, &cfg)
return &cfg, nil
}
......@@ -3,64 +3,25 @@ package main
import (
"flag"
"fmt"
"gitlab.com/gl-infra/oncall-robot-assistant/config"
oncall "gitlab.com/gl-infra/oncall-robot-assistant/oncall"
"log"
"time"
"github.com/xanzy/go-gitlab"
"gitlab.com/gl-infra/oncall-robot-assistant/oncall"
)
func main() {
cfgFile := flag.String("config", "./.settings.json", "the configuration file")
cfgFile := flag.String("config", "./.settings.yaml", "the configuration file")
flag.Parse()
config, err := oncall.ReadConfig(*cfgFile)
if err != nil {
log.Fatalln(err)
}
// get current primary
primaryNow, err := oncall.GetOncallPerson(config, "primary", "current")
config, err := config.ReadConfig(*cfgFile)
if err != nil {
log.Fatalln(err)
}
// get current secondary
secondaryNow, err := oncall.GetOncallPerson(config, "secondary", "current")
if err != nil {
log.Fatalln(err)
}
fmt.Printf("pri oncall now: %s\n", primaryNow)
fmt.Printf("sec oncall now: %s\n", secondaryNow)
// get next primary
primaryNext, err := oncall.GetOncallPerson(config, "primary", "next")
if err != nil {
log.Fatalln(err)
}
// get next secondary
secondaryNext, err := oncall.GetOncallPerson(config, "secondary", "next")
if err != nil {
log.Fatalln(err)
}
fmt.Printf("pri oncall next: %s\n", primaryNext)
fmt.Printf("sec oncall next: %s\n", secondaryNext)
git := gitlab.NewClient(nil, config.GitLabToken)
labels := []string{"oncall"}
opt := &gitlab.ListProjectIssuesOptions{Labels: labels}
issues, _, err := git.Issues.ListProjectIssues(1304532, opt)
fmt.Println("Last week oncall issues:")
for _, p := range issues {
if time.Now().Sub(*p.CreatedAt).Seconds()/86400 <= 7 {
created := p.CreatedAt.Format(time.RFC822)
if p.Assignee.Username == "" {
p.Assignee.Username = "unassigned"
}
fmt.Printf("[%s:%s %s] %s (%s)\n", p.Assignee.Username, p.State, created, p.Title, p.WebURL)
}
}
desc := oncall.GenerateTemplate(config)
title := "OnCall report for period: " +
time.Now().UTC().AddDate(0, 0, -7).Format("2006-01-02") +
" - " + time.Now().UTC().Format("2006-01-02")
fmt.Println("Creating issue ...")
issue := oncall.CreateReportIssue(config, title, desc.String())
fmt.Println("Created issue ", issue.WebURL)
}
package oncall
import (
"fmt"
"os"
"reflect"
"strings"
"testing"
)
func TestReadConfig(t *testing.T) {
filename := "invalid-cfg.json"
_, err := ReadConfig(filename)
if err == nil {
t.Fatalf("could not read configuration file %s", filename)
}
want := "stat invalid-cfg.json: no such file or directory"
got := err.Error()
if got != want {
t.Fatalf("wrong configuration error, got %s; expected %s", got, want)
}
}
func TestParseConfigWrong(t *testing.T) {
_, err := ParseConfig(strings.NewReader("invalid yaml"))
expected := "could not parse configuration yaml: yaml: unmarshal errors:\n line 1: cannot unmarshal !!str `invalid...` into oncall.RealConfig"
if fmt.Sprintf("%s", err) != expected {
t.Fatalf("unexpected error, got %s", err)
}
}
func TestParseConfigCorrectly(t *testing.T) {
f, err := os.Open("../test-fixtures/settings.yaml")
defer f.Close()
if err != nil {
t.Fatalf("failed to open the configuration file: %s", err)
}
c, err := ParseConfig(f)
if err != nil {
t.Fatalf("failed to parse the configuration: %s", err)
}
assertEquals(t, "gitlab token", "ZZXX", c.GitLabToken)
assertEquals(t, "pagerduty token", "XXYY", c.PagerDuty.Token)
assertEquals(t, "schedules", []string{"PrimaryID", "SecondaryID"}, c.PagerDuty.Schedules)
}
func assertEquals(t *testing.T, name string, expected, actual interface{}) {
if !reflect.DeepEqual(expected, actual) {
t.Fatalf("%s is not as expected: %+v; got %+v", name, expected, actual)
}
}
package oncall
import (
"fmt"
gitlab "github.com/xanzy/go-gitlab"
config "gitlab.com/gl-infra/oncall-robot-assistant/config"
"time"
)
func CreateReportIssue(config *config.Config, title string, report string) *gitlab.Issue {
git := gitlab.NewClient(nil, config.GitLab.Token)
opts := gitlab.CreateIssueOptions{}
opts.Title = &title
opts.Description = &report
if issue, _, err := git.Issues.CreateIssue(config.ProjectId, &opts); err != nil {
panic(err)
} else {
return issue
}
}
func GetIssuesClosedDuringMilestone(config *config.Config) []*gitlab.Issue {
git := gitlab.NewClient(nil, config.GitLab.Token)
labels := []string{config.OncallLabel}
opts := gitlab.ListProjectIssuesOptions{}
opts.Labels = labels
state := "closed"
opts.State = &state
opts.Milestone = &config.Milestone
day_interval := time.Now().UTC().AddDate(0, 0, -config.DayOffset)
opts.CreatedAfter = &day_interval
if issues, _, err := git.Issues.ListProjectIssues(config.GitLab.Id, &opts); err != nil {
panic(err)
} else {
return issues
}
}
func GetIssuesOpenedDuringShift(config *config.Config) []*gitlab.Issue {
git := gitlab.NewClient(nil, config.GitLab.Token)
labels := []string{config.OncallLabel}
var opts gitlab.ListProjectIssuesOptions
opts.Labels = labels
day_interval := time.Now().UTC().AddDate(0, 0, -config.DayOffset)
opts.CreatedAfter = &day_interval
if issues, _, err := git.Issues.ListProjectIssues(config.GitLab.Id, &opts); err != nil {
panic(err)
} else {
return issues
}
}
func GetIssuesOpenAll(config *config.Config) []*gitlab.Issue {
git := gitlab.NewClient(nil, config.GitLab.Token)
labels := []string{config.OncallLabel}
var opts gitlab.ListProjectIssuesOptions
opts.Labels = labels
state := "opened"
opts.State = &state
if issues, _, err := git.Issues.ListProjectIssues(config.GitLab.Id, &opts); err != nil {
panic(err)
} else {
return issues
}
}
func FilterIssuesByLabel(label string, issues []*gitlab.Issue) (filtered_issues []*gitlab.Issue) {
for _, p := range issues {
if stringInSlice(label, p.Labels) {
filtered_issues = append(filtered_issues, p)
}
}
return
}
func FilterIssuesByState(state string, issues []*gitlab.Issue) (filtered_issues []*gitlab.Issue) {
for _, p := range issues {
fmt.Println(state, p.Labels)
if p.State == state {
filtered_issues = append(filtered_issues, p)
}
}
return
}
func stringInSlice(a string, list []string) bool {
for _, b := range list {
if b == a {
return true
}
}
return false
}
package oncall
import (
"fmt"
"time"
pagerduty "github.com/PagerDuty/go-pagerduty"
)
// PagerDuty helper
type PagerDuty struct {
token string
}
// NewPagerDuty returns a new PagerDuty object
func NewPagerDuty(token string) PagerDuty {
return PagerDuty{
token: token,
}
}
// ListOncallPeople returns a list of the people that is going to be oncall for the next X days
func (pd PagerDuty) ListOncallPeople(ScheduleID string, days int) ([]pagerduty.OnCall, error) {
client := pagerduty.NewClient(pd.token)
resp, err := client.ListOnCalls(pagerduty.ListOnCallOptions{
ScheduleIDs: []string{ScheduleID},
Since: fmt.Sprintf("%sT04:00:00Z", time.Now().Add(-24*time.Duration(days)*time.Hour).Format("2006-01-02")),
Until: fmt.Sprintf("%sT16:00:00Z", time.Now().Add(24*time.Duration(days)*time.Hour).Format("2006-01-02")),
})
if err != nil {
return nil, fmt.Errorf("could not list oncall people for the next week: %s", err)
}
return resp.OnCalls, nil
}
// GetOncallPerson returns the person who happens to be on call
func GetOncallPerson(c *Config, shift string, period string) (string, error) {
// if we ask for current oncall period, there is no offset to apply.
// otherwise offset = 1 because we will run this the day before
// the rotation changes.
var offset int
if period == "current" {
offset = 0
} else {
offset = 5
}
// switch oncall shift type
switch shift {
case "primary":
shift = c.OncallPriID
case "secondary":
shift = c.OncallSecID
}
var options pagerduty.ListOnCallUsersOptions
options.Since, options.Until = getOncallDates(c, offset)
client := pagerduty.NewClient(c.PDToken)
oncall, err := client.ListOnCallUsers(shift, options)
if err != nil {
return "", err
}
return oncall[0].Name, nil
}
// getOncallDates returns formatted dates as string for the current or future oncall shift
func getOncallDates(c *Config, offset int) (start string, end string) {
start = fmt.Sprintf("%sT%sZ", time.Now().Add(24*time.Duration(offset)*time.Hour).Format("2006-01-02"), c.OncallStartTime)
end = fmt.Sprintf("%sT%sZ", time.Now().Add(24*time.Duration(offset)*time.Hour).Format("2006-01-02"), c.OncallEndTime)
return
}
package oncall
import (
"fmt"
"os"
"testing"
"time"
)
func TestOncallDates(t *testing.T) {
c := &Config{
OncallStartTime: "04:00:00",
OncallEndTime: "16:00:00",
}
tt := []struct {
offset int
start string
end string
}{
{
0,
fmt.Sprintf("%sT%sZ", time.Now().Format("2006-01-02"), c.OncallStartTime),
fmt.Sprintf("%sT%sZ", time.Now().Format("2006-01-02"), c.OncallEndTime),
},
{
1,
fmt.Sprintf("%sT%sZ", time.Now().Add(24*time.Hour).Format("2006-01-02"), c.OncallStartTime),
fmt.Sprintf("%sT%sZ", time.Now().Add(24*time.Hour).Format("2006-01-02"), c.OncallEndTime),
},
{
7,
fmt.Sprintf("%sT%sZ", time.Now().Add(24*7*time.Hour).Format("2006-01-02"), c.OncallStartTime),
fmt.Sprintf("%sT%sZ", time.Now().Add(24*7*time.Hour).Format("2006-01-02"), c.OncallEndTime),
},
}
for _, tc := range tt {
t.Run(fmt.Sprintf("With %d offset", tc.offset), func(t *testing.T) {
start, end := getOncallDates(c, tc.offset)
if tc.start != start {
t.Fatalf("incorrect start, expected %s; got %s", tc.start, start)
}
if tc.end != end {
t.Fatalf("incorrect end, expected %s; got %s", tc.end, end)
}
})
}
}
func TestPagerDutyStuff(t *testing.T) {
pd := NewPagerDuty(os.Getenv("PD_API_KEY"))
people, err := pd.ListOncallPeople(os.Getenv("PD_PRIMARY_SCHEDULE"), 7)
if err != nil {
t.Fatalf("could not list the oncall people: %s", err)
}
for _, o := range people {
fmt.Printf("On Call from %s to %s for %s: %s\n", o.Start, o.End, o.Schedule.Summary, o.User.Summary)
}
}
package oncall
import (
pagerduty "github.com/PagerDuty/go-pagerduty"
config "gitlab.com/gl-infra/oncall-robot-assistant/config"
"time"
)
// PagerDuty helper
type PagerDuty struct {
token string
}
// NewPagerDuty returns a new PagerDuty object
func NewPagerDuty(token string) PagerDuty {
return PagerDuty{
token: token,
}
}
// GetOncallPersons returns the team members who were on call
// for the last 7 days for the given schedule
func GetOncallPersons(config *config.Config, schedule string) []pagerduty.User {
var options pagerduty.ListOnCallUsersOptions
options.Since = nowPdDateWithOffset(-config.DayOffset)
options.Until = nowPdDate()
client := pagerduty.NewClient(config.PagerDuty.Token)
if oncall, err := client.ListOnCallUsers(schedule, options); err != nil {
panic(err)
} else {
return oncall
}
}
func GetOncallIncidents(config *config.Config) []pagerduty.Incident {
client := pagerduty.NewClient(config.PagerDuty.Token)
var opts pagerduty.ListIncidentsOptions
opts.ServiceIDs = []string{config.PagerDuty.ServiceId}
opts.Since = nowPdDateWithOffset(-config.DayOffset)
if incs, err := client.ListIncidents(opts); err != nil {
panic(err)
} else {
return incs.Incidents
}
}
func nowPdDate() string {
return time.Now().UTC().Format("2006-01-02T15:04:05Z")
}
func nowPdDateWithOffset(offset int) string {
return time.Now().UTC().AddDate(0, 0, offset).Format("2006-01-02T15:04:05Z")
}
package oncall
import (
"bytes"
"fmt"
config "gitlab.com/gl-infra/oncall-robot-assistant/config"
"log"
"os"
"path"
"text/template"
"time"
)
type WeeklyOpsGraph struct {
Name string
Url string
}
type OnCallTeamMember struct {
Schedule string
User string
}
/* Stats for oncall related
issues */
type IssueStats struct {
Count int
AccessRequest int
Critical int
}
type Issue struct {
Summary string
Url string
CreatedAt string
Assignee string
}
type Incident struct {
Summary string
Url string
CreatedAt string
}
type TemplateData struct {
WeeklyOpsGraphs []WeeklyOpsGraph
OnCallTeamMembers []OnCallTeamMember
Incidents []Incident
Issues []Issue
IncidentCount int
IssuesOpenAll IssueStats
IssuesOpenedDuringShift IssueStats
IssuesClosedDuringMilestone IssueStats
}
func GenerateTemplate(config *config.Config) *bytes.Buffer {
template_data := TemplateData{}
for _, schd := range config.PagerDuty.Schedules {
onCallUsers := GetOncallPersons(config, schd.Id)
for _, u := range onCallUsers {
otm := OnCallTeamMember{}
otm.Schedule = schd.Name
otm.User = u.Name
template_data.OnCallTeamMembers = append(template_data.OnCallTeamMembers, otm)
}
}
incidents := GetOncallIncidents(config)
template_data.IncidentCount = len(incidents)
// Populate list of incidents for the template
for _, p := range incidents {
inc := Incident{}
inc.Summary = p.Summary
inc.Url = p.HTMLURL
inc.CreatedAt = p.CreatedAt
template_data.Incidents = append(template_data.Incidents, inc)
}
oncall_issues := GetIssuesOpenedDuringShift(config)
closed_issues := GetIssuesClosedDuringMilestone(config)
all_open_issues := GetIssuesOpenAll(config)
// Issue stats
template_data.IssuesOpenedDuringShift.Count = len(oncall_issues)
template_data.IssuesOpenedDuringShift.AccessRequest = len(FilterIssuesByLabel("access request", oncall_issues))
template_data.IssuesOpenedDuringShift.Critical = len(FilterIssuesByLabel("critical", oncall_issues))
template_data.IssuesClosedDuringMilestone.Count = len(closed_issues)
template_data.IssuesClosedDuringMilestone.AccessRequest = len(FilterIssuesByLabel("access request", closed_issues))
template_data.IssuesClosedDuringMilestone.Critical = len(FilterIssuesByLabel("critical", closed_issues))
template_data.IssuesOpenAll.Count = len(all_open_issues)
template_data.IssuesOpenAll.AccessRequest = len(FilterIssuesByLabel("access request", all_open_issues))
template_data.IssuesOpenAll.Critical = len(FilterIssuesByLabel("critical", all_open_issues))
// Populate list of issues for the template
for _, p := range all_open_issues {
issue := Issue{}
created := p.CreatedAt.Format(time.RFC822)
if p.Assignee.Username == "" {
p.Assignee.Username = "unassigned"
}
issue.CreatedAt = created
issue.Summary = p.Title
issue.Url = p.WebURL
issue.Assignee = p.Assignee.Username
template_data.Issues = append(template_data.Issues, issue)
}
// Populate weekly ops graphs for template
for _, graph := range config.WeeklyOps {
ops_graph := WeeklyOpsGraph{}
ops_graph.Name = graph.Name
millis_to := time.Now().UnixNano() / 1000000
millis_from := millis_to - (86400000 * int64(config.DayOffset))
ops_graph.Url = graph.Url + fmt.Sprintf("&from=%d&to=%d", millis_from, millis_to)
template_data.WeeklyOpsGraphs = append(template_data.WeeklyOpsGraphs, ops_graph)
}
ex, err := os.Executable()
if err != nil {
log.Fatal(err)
}
dir := path.Dir(ex)
tmpl, err := template.New("on-call-report.tmpl").
ParseFiles(path.Join(dir, "./templates/on-call-report.tmpl"))
var desc bytes.Buffer
tmpl.Execute(&desc, template_data)
return &desc
}
{
"gitlab_token": "your_gitlab_api_token",
"pd_token": "your_pagerduty_api_token",
"oncall_pri_id": "XXXXXXX",
"oncall_sec_id": "YYYYYYY",
"oncall_shadow_id": "ZZZZZZZ",
"oncall_start_time": "04:00:00",
"oncall_end_time": "16:00:00"
}
\ No newline at end of file
# Copy this file to .settings.yaml
# and change the pagerduty and gitlab
# tokens
# Number of days to look back for the
# oncall report
day_offset: 7
# Project ID of the oncall-robot-assistant
# which is where the report is staged as
# an issue
project_id: 3950829
# Label used for oncall
oncall_label: oncall
# Milestone name for counting closed oncall
# issues
milestone: WoW
# Pagerduty configuration
pagerduty:
token: **************
service_id: PATDFCE
schedules:
- name: AMA
id: PKN8L5Q
- name: EU
id: PWDTHYI
primary:
secondary:
# GitLab API configuration
gitlab:
token: ***************
# Project id of the infrastructure project
id: 1304532
# To add weekly ops graphs:
# * clicking "share" for the graph to add
# * copy the "direct link rendered image link"
# * remove the from/to url params, these will be generated
weekly_ops:
- name: API CPU
url: https://performance.gitlab.net/render/dashboard-solo/db/fleet-overview?refresh=5m&orgId=1&panelId=40&var-environment=prd&width=1000&height=500&tz=UTC
- name: Web CPU
url: https://performance.gitlab.net/render/dashboard-solo/db/fleet-overview?refresh=5m&panelId=39&orgId=1&var-environment=prd&width=1000&height=500&tz=UTC
- name: Git CPU
url: https://performance.gitlab.net/render/dashboard-solo/db/fleet-overview?refresh=5m&orgId=1&var-environment=prd&panelId=41&width=1000&height=500&tz=UTC
- name: Sidekiq CPU