Commit 59e4ba51 authored by Vasilii Iakliushin's avatar Vasilii Iakliushin 🔵

Linter for handbook links and anchors

Contributes to
#6695

Adds a new linter to check if links and anchors point to existing
pages and sections.
parent f8f2a259
......@@ -57,9 +57,11 @@ group :development, :test do
gem 'html-proofer', '~> 3.10.2'
gem 'docopt', '~> 0.6.1'
gem 'rspec', '~> 3.5', require: false
gem 'mdl', '0.6.0', require: false
gem 'rubocop', '~> 0.59.0', require: false
gem 'capybara', '~> 3.31.0'
gem 'capybara-screenshot', '~> 1.0.22'
gem 'selenium-webdriver', '~> 3.142.7'
gem 'webmock', '~> 3.5.1'
gem 'pry-byebug', '~> 3.9.0'
end
......@@ -16,6 +16,7 @@ GEM
execjs
backports (3.11.4)
builder (3.2.3)
byebug (11.1.3)
capybara (3.31.0)
addressable
mini_mime (>= 0.1.3)
......@@ -28,6 +29,7 @@ GEM
capybara (>= 1.0, < 4)
launchy
childprocess (3.0.0)
coderay (1.1.3)
coffee-script (2.4.1)
coffee-script-source
execjs
......@@ -111,8 +113,13 @@ GEM
listen (3.0.8)
rb-fsevent (~> 0.9, >= 0.9.4)
rb-inotify (~> 0.9, >= 0.9.7)
mdl (0.6.0)
kramdown (~> 1.12, >= 1.12.0)
mixlib-cli (~> 2.1, >= 2.1.1)
mixlib-config (~> 2.2, >= 2.2.1)
memoist (0.16.0)
mercenary (0.3.6)
method_source (1.0.0)
middleman (4.3.3)
coffee-script (~> 2.2)
haml (>= 4.0.5)
......@@ -170,6 +177,9 @@ GEM
mini_racer (0.2.6)
libv8 (>= 6.9.411)
minitest (5.11.3)
mixlib-cli (2.1.6)
mixlib-config (2.2.18)
tomlrb
multi_xml (0.6.0)
multipart-post (2.0.0)
nokogiri (1.10.9)
......@@ -187,6 +197,12 @@ GEM
parser (2.6.0.0)
ast (~> 2.4.0)
powerpack (0.1.2)
pry (0.13.1)
coderay (~> 1.1)
method_source (~> 1.0)
pry-byebug (3.9.0)
byebug (~> 11.0)
pry (~> 0.13.0)
public_suffix (4.0.3)
rack (2.2.2)
rack-livereload (0.3.17)
......@@ -239,6 +255,7 @@ GEM
thor (0.20.3)
thread_safe (0.3.6)
tilt (2.0.9)
tomlrb (1.3.0)
typhoeus (1.3.1)
ethon (>= 0.9.0)
tzinfo (1.2.5)
......@@ -274,6 +291,7 @@ DEPENDENCIES
html-proofer (~> 3.10.2)
kramdown (~> 1.10)
linter
mdl (= 0.6.0)
middleman (~> 4.3)
middleman-autoprefixer (~> 2.7.1)
middleman-blog (~> 4.0)
......@@ -283,6 +301,7 @@ DEPENDENCIES
mini_racer (~> 0.2.6)
nokogiri (>= 1.10.9)
openssl (~> 2.1.2)
pry-byebug (~> 3.9.0)
rack (~> 2.2.2)
rake (~> 12.3)
rspec (~> 3.5)
......
require 'colorize'
module Lint
class HandbookLinkLinter
HANDBOOK_DIRECTORY = File.join(File.expand_path('../..', __dir__), 'sites/handbook/source/')
EXCEPTIONS = [
'/handbook/engineering/projects',
'/handbook/product/product-categories',
'/handbook/business-ops/order-processing',
'/handbook/marketing/product-marketing/competitive/compete',
'/handbook/hiring/charts/sales'
].freeze
# TODO: delete entries from this list and fix detected failures
TEMPORARY_IGNORE = [
'/handbook/acquisition-process',
'/handbook/business-ops',
'/handbook/customer-success',
'/handbook/engineering/development',
'/handbook/engineering/frontend/configure',
'/handbook/engineering/infrastructure',
'/handbook/engineering/ops',
'/handbook/engineering/quality/guidelines/debugging-qa-test-failures',
'/handbook/engineering/security',
'/handbook/engineering/ux',
'/handbook/finance',
'/handbook/handbook-usage',
'/handbook/hiring',
'/handbook/incentives',
'/handbook/internal-audit/sarbanes-oxley',
'/handbook/legal/global-compliance',
'/handbook/marketing/blog',
'/handbook/marketing/campaigns',
'/handbook/marketing/community-relations',
'/handbook/marketing/corporate-marketing',
'/handbook/marketing/events',
'/handbook/marketing/growth-marketing',
'/handbook/marketing/marketing-operations',
'/handbook/marketing/product-marketing',
'/handbook/marketing/revenue-marketing',
'/handbook/marketing/technical-evangelism',
'/handbook/on-call',
'/handbook/paid-time-off',
'/handbook/people-group',
'/handbook/resellers',
'/handbook/sales',
'/handbook/security',
'/handbook/source/handbook/product-development-flow',
'/handbook/spending-company-money',
'/handbook/support',
'/handbook/tax/performance-indicators',
'/handbook/tools-and-tips',
'/handbook/total-rewards/benefits/benefits-survey',
'/handbook/underperformance',
'/handbook/use-cases',
'/handbook/values'
].freeze
class LinterError
def initialize(line, description: nil)
@line = line
@description = description
end
def +(other)
puts
puts
puts @description
@line += other
end
end
attr_reader :errors
def initialize(links)
@links = links
@errors = []
end
def check
links.each do |element|
link = element.attr['href']
attributes = extract_link_attributes(link)
partial_filename = File.basename(attributes[:path])
relative_dirname = File.dirname(attributes[:path])
absolute_dirname = File.join(HANDBOOK_DIRECTORY, relative_dirname)
next if redirect?(relative_dirname)
next if exception?(relative_dirname)
next if temporary_ignore?(relative_dirname)
unless File.directory?(absolute_dirname)
errors << LinterError.new(element.options[:location], description: "Cannot find directory #{absolute_dirname}".red)
next
end
filename = Dir.children(absolute_dirname).find { |file| file.include?(partial_filename) }
unless filename
errors << LinterError.new(element.options[:location], description: "Cannot find #{partial_filename} in #{absolute_dirname}".red)
next
end
file_path = File.join(absolute_dirname, filename)
unless File.file?(file_path)
errors << LinterError.new(element.options[:location], description: "#{file_path} does not exist".red)
next
end
next if unsupported_file?(file_path)
unless correct_anchor?(file_path, attributes[:anchor])
errors << LinterError.new(element.options[:location], description: "Anchor #{attributes[:anchor]} cannot be found in #{file_path}".red)
next
end
end
end
private
attr_reader :links
def correct_anchor?(path_to_file, anchor)
return true unless anchor
anchors = AnchorExtractor.instance.fetch(path_to_file)
anchors.include?(anchor)
end
def redirect?(source)
redirects.any? { |redirect| redirect.start_with?(source) || source.start_with?(redirect) }
end
def exception?(source)
EXCEPTIONS.include?(source)
end
def temporary_ignore?(source)
TEMPORARY_IGNORE.any? { |ignore| source.start_with?(ignore) }
end
def unsupported_file?(file_path)
file_path.end_with?('.erb', '.haml')
end
def extract_link_attributes(href)
path, anchor = href.split('#')
path = File.join(path, 'index.html') if File.extname(path).empty?
{ path: path, anchor: anchor }
end
def redirects
@redirects ||= YAML.load_file('data/redirects.yml').map { |redirect| redirect['sources'] }.flatten.uniq
end
end
class AnchorExtractor
PUNCTUATION_REGEXP = /[^\p{Word}\- ]/u
include Singleton
def initialize
@anchors_for_file = {}
super
end
def fetch(filepath)
return anchors_for_file[filepath] if anchors_for_file.key?(filepath)
doc = MarkdownLint::Doc.new(File.read(filepath))
used_ids = {}
anchors = doc.find_type_elements(:header).map do |header|
header.attr.key?('id') ? header.attr['id'] : string_to_anchor(doc.extract_text(header).first, used_ids)
end
anchors += doc.find_type_elements(:li).map { |li| li.attr['id'] }.compact
anchors_for_file[filepath] = anchors
anchors
end
private
attr_reader :anchors_for_file
def extract_text_from_link(string)
result = string.match(/\[(.+?)\]\(.+\)/)
return result[1] if result
string
end
def string_to_anchor(string, used_ids)
anchor = extract_text_from_link(string)
.strip
.downcase
.gsub(PUNCTUATION_REGEXP, '') # remove punctuation
.gsub(/^\W+/, '')
.tr(' ', '-') # replace spaces with dash
if used_ids.key?(anchor)
used_ids[anchor] += 1
anchor = "#{anchor}-#{used_ids[anchor]}"
else
used_ids[anchor] = 0
end
anchor
end
end
end
rule "LINK", "Link linter failure" do
tags :links
check do |doc|
handbook_links = doc.find_type_elements(:a).select { |link| link.attr['href'].start_with?('/handbook') }
linter = Lint::HandbookLinkLinter.new(handbook_links)
linter.check
linter.errors
end
end
require 'time'
require 'yaml'
require 'colorize'
require 'mdl'
require_relative '../devops_tool.rb'
require_relative '../redirect.rb'
require_relative '../team.rb'
......@@ -8,6 +9,7 @@ require_relative '../team.rb'
desc 'Run all lint tasks'
task lint: ['lint:scss',
'lint:yamllint',
'lint:handbook_links',
'lint:devops_tools:categories',
'lint:events:event_type',
'lint:events:date',
......@@ -59,6 +61,21 @@ namespace :lint do
end
end
desc 'Lint Handbook links'
task :handbook_links do
puts ''
puts '=> Lint Handbook links'
begin
MarkdownLint.run(['-u', 'lib/lint/handbook_link_linter.rb', '-r', 'LINK'] + Dir['sites/handbook/source/handbook/**/*.md'])
rescue SystemExit => e
unless e.success?
puts 'How-to fix the problem: https://about.gitlab.com/handbook/about/support/#handbook-links-and-anchors'.yellow
raise e
end
end
end
namespace :devops_tools do
desc "Ensure every devops tool has at least one valid category"
task :categories do
......
......@@ -14,11 +14,11 @@ title: "Data Program Level 2 Reference Solution"
`This page contains forward-looking content and may not accurately reflect current-state or planned feature sets or capabilities.`
Public companies need to reliably and predictably share key financial, customer, and growth metrics as well as analyze lead-to-cash and product idea-to-adoption processes to continually improve business performance. These activities are supported by capabilities defined in **Level 2** of the [Data Capability Model](/handbook/source/handbook/business-ops/data-team/direction/). To provide a realistic example and to serve as a reference for future development, this page presents the [Level 2 Data Solution](/handbook/business-ops/data-team/direction/#data-capability-model) for 'Product Geolocation Analysis'.
Public companies need to reliably and predictably share key financial, customer, and growth metrics as well as analyze lead-to-cash and product idea-to-adoption processes to continually improve business performance. These activities are supported by capabilities defined in **Level 2** of the [Data Capability Model](/handbook/business-ops/data-team/direction/index.html). To provide a realistic example and to serve as a reference for future development, this page presents the [Level 2 Data Solution](/handbook/business-ops/data-team/direction/#data-capability-model) for 'Product Geolocation Analysis'.
## Solution Overview - Product Geolocation Analysis
Understanding where your product is used around the world is an important step towards developing a more complete understanding of your customers, your product's global reach, and related location-aware insights.
Understanding where your product is used around the world is an important step towards developing a more complete understanding of your customers, your product's global reach, and related location-aware insights.
This data solution delivers three [Self-Service Data](/handbook/business-ops/data-team/direction/self-service/) capabilities:
1. Dashboard Viewer - a new SiSense dashboard to visualize GitLab deployments by Country, Territory, Month, and Year (Self-Service Dashboard)
......@@ -27,7 +27,7 @@ This data solution delivers three [Self-Service Data](/handbook/business-ops/dat
From a Data Platform perspective, the solution delivers:
1. an extension to the Enterprise Dimensional Model for GeoLocation analysis
1. a new `dim_country` table
1. a new `dim_country` table
1. testing and data validation extensions to the Data Pipeline Health dashboard
1. ERDs, dbt models, and related platform components
......@@ -68,18 +68,18 @@ Finally, this is the long-term automated solution for several ad-hoc issues comp
### Self-Service Dashboard Viewer
| Dashboard | Purpose |
| ----- | ----- |
| Dashboard | Purpose |
| ----- | ----- |
| Worldwide Product Growth | Visualize the adoption of GitLab by country, region, and time. |
| Data Health Dashbard for Geolocation Data | Data Health of the Geolocation data used to support this solution. |
| Data Health Dashbard for Geolocation Data | Data Health of the Geolocation data used to support this solution. |
### Self-Service Dashboard Developer
| Data Space | Description |
| ----- | ----- |
| Data Space | Description |
| ----- | ----- |
| Global | Contains a data model containing a 1-1 relationship with the Product Geolocation Analysis model detailed below |
### Self-Service SQL Analysis
### Self-Service SQL Analysis
#### Key Fields and Business Logic
......@@ -89,8 +89,8 @@ Finally, this is the long-term automated solution for several ad-hoc issues comp
#### Entity Relationship Diagrams
| Diagram/Entity | Grain | Purpose | Keywords |
| ----- | ----- | ----- | ----- |
| Diagram/Entity | Grain | Purpose | Keywords |
| ----- | ----- | ----- | ----- |
| [Product Geolocation Analysis](https://app.lucidchart.com/documents/view/be5f5dc8-8ad5-4586-af53-93ff5e00f720/0_0) | Activity By Day | Dimensions and Facts that can be used to analyze GitLab usage by country, territory, and time. | dim_date, dim_country, fct_country_activity_by_day |
| dim_date | Day | Central dimension for all dates. | |
| dim_country | ISO_Country | Central dimension for all countries and territories, sourced from ISO-3166 and GitLab Sales Territories | |
......@@ -105,20 +105,20 @@ SELECT
dc.country_name AS country_name,
SUM(f.num_page_views) AS number_of_page_views
FROM fct_country_activity_by_day f
JOIN dim_country dc
ON f.country_key = f.country_key
JOIN dim_country dc
ON f.country_key = f.country_key
WHERE dc.reporting_region = 'NORAM'
```
##### Top 100 namespaces by country in 2020
```sql
SELECT
SELECT
dc.country_name AS country_name,
f.namespace_key AS namespace_key,
SUM(f.num_page_views) AS number_of_total_page_views
FROM fct_country_activity_by_day f
JOIN dim_country dc
ON f.country_key = f.country_key
JOIN dim_country dc
ON f.country_key = f.country_key
WHERE dd.year = 2020
GROUP BY dc.country_name, f.namespace_key
PARTITION BY dc.country_name LIMIT 100
......@@ -136,7 +136,7 @@ See [dbt documentation](https://www.getdbt.com/) for a complete lineage graph.
graph LR
A[usage_ping-14dRR] --> 1{ETL}
B[raw.snowplow-24hRR] --> 1{ETL}
1 --> C[analytics.snowplow_unnested_events_90-24hRR]
1 --> C[analytics.snowplow_unnested_events_90-24hRR]
C --> 2{ETL}
F[edm.dim_date-static] --> 2{ETL}
G[edm.dim_user-24hRR] --> 2{ETL}
......
......@@ -65,7 +65,7 @@ The action items created from the “Renewal Review” meeting should be incorpo
For greater predictability across the customer base for renewal, upselling, downgrade, and churn, tracking Opportunity Health and Risks is incorporated into Gainsight. This process creates tighter collaboration with the overall CRO Organization and the rest of GitLab.
The [Renewal CTA](/handbook/source/handbook/customer-success/tam/renewals#timeframe) will include a step for the TAM to update the Opportunity information within Gainsight. The TAM can and should update the Opportunity Health and Risks throughout the renewal when something changes or because of new information.
The [Renewal CTA](#timeframe) will include a step for the TAM to update the Opportunity information within Gainsight. The TAM can and should update the Opportunity Health and Risks throughout the renewal when something changes or because of new information.
To access this information, TAMs should either:
* On the Renewal CTA, click the “1” next to “Call to Action” and select “New GS Opportunity”, OR
......@@ -80,15 +80,15 @@ This can then be discussed with the [Account Team](/handbook/customer-success/ac
Similar to Account Health, the TAM will set the health of the Opportunity. Nuances exist between Opportunities and Account, such as if a customer is happy with GitLab (Account) but is considering downgrading their renewal (Opportunity), or if there are two distinct Opportunities. However, the [Health assessment guidelines](/handbook/customer-success/tam/health-score-triage/#health-assessment-guidelines) are similar. Opportunity Health should be thought of in this way:
* Green - very likely to renew at or above current ARR
* Green - very likely to renew at or above current ARR
* Yellow - some risk of downgrade or churn
* Red - high probability of downgrade or churn
##### Risk Reason
* AzureDevOps - competitive risk due to MSFT
* Competition - risk of loss to a competitor
* Competition - risk of loss to a competitor
* Customer Sentiment - customer is unhappy with GitLab
* Lack of adoption - customer has not deeply adopted GitLab
* Lack of adoption - customer has not deeply adopted GitLab
* Lack of budget - customer is unlikely to have budgetary coverage for maintaining the current GitLab license
* Lack of engagement - customer is nonresponsive or rarely engages
* Loss of sponsor or champion - our key contact(s) left the company
......@@ -104,4 +104,3 @@ Similar to Account Health, the TAM will set the health of the Opportunity. Nuanc
##### TAM Notes
This is where the TAM can enter notes on this specific opportunity and will be saved to the Opportunity record.
......@@ -60,7 +60,7 @@ In this stage, the PM establishes new work that they would like to see in the Re
##### Problem validation
The PM tries to establish that the issue describes a problem with the Release feature that is customer-relevant (internal and external),
or has some other clear merit (e.g. a technical improvement in the backend). The PM will draw on other group members, customers and the UX organization where necessary.
or has some other clear merit (e.g. a technical improvement in the backend). The PM will draw on other group members, customers and the UX organization where necessary.
If the problem is validated, the issue is moved forward to the next stage, otherwise it is closed with a short explanation on _why_ the problem will not be considered. This explanation should also contain a link [to the category strategy](/handbook/product/product-processes/#category-direction).
##### Solution validation
......@@ -68,7 +68,7 @@ If the problem is validated, the issue is moved forward to the next stage, other
Once the problem is validated, the PM will consider creating an Epic if [the scope of the solution is large enough](/handbook/product/product-processes/#epics-for-a-single-iteration).
They will then reach out to the Engineering Manager (EM) for a technical contact in the team. The EM assigns [ownership](#epic-ownership) to an engineer. The engineer will work with the PM to determine a technical solution for the problem.
If designs are required, the PM will add the `workflow::design` label and collaborate with UX to create the user stories required to solve the customer problem. Story mapping is a recommended way to visualize the user stories of a new feature in a holistic way. The story map can even be annotated to indicate which user stories need to be included in each release increment. More information on this process is noted in the [Product Development Workflow](/handbook/product-development-flow/#validation-phase-3-solution-validation). In some situations, the PM and UX may decide the issue is ready for `cicd::active` and will move the issue to `workflow::scheduling` for EM review.
If designs are required, the PM will add the `workflow::design` label and collaborate with UX to create the user stories required to solve the customer problem. Story mapping is a recommended way to visualize the user stories of a new feature in a holistic way. The story map can even be annotated to indicate which user stories need to be included in each release increment. More information on this process is noted in the [Product Development Workflow](/handbook/product-development-flow/#validation-phase-4-solution-validation). In some situations, the PM and UX may decide the issue is ready for `cicd::active` and will move the issue to `workflow::scheduling` for EM review.
Occasionally, a proof-of-concept (POC) is necessary to determine a feasible technical path. When one is required, the PM
will create a POC issue that contains the context of the research to be conducted along with the goals of the POC. This
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment