Create issues for teams to audit their database tables
With Table ownership information (#183 - closed) closed, we should ask teams to self audit the tables that were either manually or automatically assigned to them.
I've got a script (below) that creates the issues, I plan on running it to create them as soon as the check for the tables is done.
Script
#!/usr/bin/env ruby
known_tables = {}
require 'yaml'
require 'gitlab'
require 'optparse'
require 'pry'
options = {
dry_run: true,
www_path: '/Users/alexives/repos/www-gitlab-com'
}
OptionParser.new do |opts|
opts.banner = "Usage: feature_categories [options]"
opts.on("-d", "--[no-]dry-run", "Just print out the issues instead of creating them") do |d|
options[:dry_run] = d
end
opts.on("-l", "--live", "Publish issues instead of printing them") do
options[:dry_run] = false
end
opts.on("-g", "--group=GROUP", "Limit to GROUP") do |g|
options[:group] = g
end
opts.on("-w", "--www-path", "Path to the www-gitlab-com repo") do |p|
options[:www_path] = p
end
opts.on('-c', '--check-tables', 'check tables that were marked as owned in the spreadsheet') do
options[:check_tables] = true
end
opts.on('--groups', 'list the groups names') do
options[:print_groups] = true
end
end.parse!
Gitlab.endpoint = "https://gitlab.com/api/v4"
Gitlab.private_token = ""
exceptions = ['jihu']
files = Dir['db/docs/*.yml'].map do |file|
[file, YAML.load_file(file)]
end.to_h
stages = YAML.load_file("#{options[:www_path]}/data/stages.yml")['stages']
categories = stages.values.flat_map do |stage|
stage['groups'].values.flat_map do |group|
group['categories'].map do |category|
[category, group]
end
end
end.to_h
categories_to_table = categories.map do |category, group|
[category, files.values.select{|t| t['feature_categories'].include?(category) }.map{|t| t['table_name']}]
end.to_h
# puts categories_to_table['authentication_and_authorization']
groups = stages.values.flat_map do |stage|
stage['groups'].map do |name, group|
group['stage_name'] = stage['display_name']
group
end
end
limited_groups = groups
limited_groups = groups.select{|g| g['name'] == options[:group]} if options[:group]
if options[:print_groups]
puts groups.map{ |g| g['name'] }.join("\n")
exit 0
end
limited_groups.each do |group|
tables = group['categories'].flat_map{|c| categories_to_table[c].map{|t| "| :x: | #{t} | #{c} |" }}.join("\n")
title = "Database Dictionary Audit for #{group['stage_name']}:#{group['name']}"
description = <<EOF
Recently the database team has started documenting all database tables in `db/doc`. As an MVP, we've assigned
each table to a feature category. We attempted to automate this, and then made rough guesses on the ones that
couldn't be automatically identified. In order to make sure these are accurate, I'm asking every team to take
a look at the tables associated with feature categories assoicated with their stages.
What we need from you? Look through the list of tables and fix any mistaken labels and update the description
field to help folks know what purpose the table serves.
## Process (per table)
1. Are the feature categories correct?
- Keep in mind that tables can have multiple categories. Look at [the list](https://about.gitlab.com/handbook/product/categories/#categories-a-z) and if there are any other categories it belongs to, add them.
- If a table has no category that seems to be a perfect match, assign it to the closest match. Tables _must_ have feature categories.
- If the table is assigned to a feature category for a different group, open a merge request changing the category and assign it to the EM for the group
responsible for that category.
2. Update the description of the table to give anybody interacting with it context around the purpose and submit a merge request with the
information.
## Tables
| Reviewed? | Table | Feature Category |
| --------- | ----- | ---------------- |
#{tables}
## Why is this effort happening?
There have been many times that issues occur in production, but the team doesn't have the information it needs
to identify subject matter experts who can help resolve things quickly. This in turn can delay the time it takes
to find and fix pressing issues.
/label ~"Category:Database" ~"Database tooling"
/assign @alexives
/epic gitlab-org&7842
EOF
if tables == ""
puts "#{group['stage_name']}:#{group['name']} had no tables assigned, skipping"
next
end
if options[:check_tables]
group['categories'].flat_map{|c| categories_to_table[c]}.each do |t|
next if known_tables[t].nil?
unless known_tables[t].include?(group['name'])
puts "Possible mismatch for #{t} and #{group['stage_name']}:#{group['name']}, should be a category for #{known_tables[t]}"
end
end
elsif options[:dry_run]
puts title
puts
puts description
else
Gitlab.create_issue('gitlab-org/database-team/team-tasks', title, {description: description})
end
end