Commit ec097ac1 authored by Douwe Maan's avatar Douwe Maan

Merge branch 'ee-30272-bvl-reject-more-namespaces' into 'master'

Port of "Reject more reserved paths" to EE

See merge request !1778
parents d368100f 7cb36159
Pipeline #8000823 passed with stages
in 148 minutes and 1 second
......@@ -78,6 +78,9 @@ module CacheMarkdownField
def cached_html_up_to_date?(markdown_field)
html_field = cached_markdown_fields.html_field(markdown_field)
cached = !cached_html_for(markdown_field).nil? && !__send__(markdown_field).nil?
return false unless cached
markdown_changed = attribute_changed?(markdown_field) || false
html_changed = attribute_changed?(html_field) || false
......
......@@ -34,7 +34,7 @@ class Namespace < ActiveRecord::Base
validates :path,
presence: true,
length: { maximum: 255 },
namespace: true
dynamic_path: true
validate :nesting_level_allowed
......@@ -225,6 +225,10 @@ class Namespace < ActiveRecord::Base
Project.inside_path(full_path)
end
def has_parent?
parent.present?
end
private
def repository_storage_paths
......
......@@ -205,13 +205,14 @@ class Project < ActiveRecord::Base
message: Gitlab::Regex.project_name_regex_message }
validates :path,
presence: true,
project_path: true,
dynamic_path: true,
length: { maximum: 255 },
format: { with: Gitlab::Regex.project_path_regex,
message: Gitlab::Regex.project_path_regex_message }
message: Gitlab::Regex.project_path_regex_message },
uniqueness: { scope: :namespace_id }
validates :namespace, presence: true
validates :name, uniqueness: { scope: :namespace_id }
validates :path, uniqueness: { scope: :namespace_id }
validates :import_url, addressable_url: true, if: :external_import?
validates :import_url, importable_url: true, if: [:external_import?, :import_url_changed?]
validates :star_count, numericality: { greater_than_or_equal_to: 0 }
......
......@@ -128,7 +128,7 @@ class User < ActiveRecord::Base
presence: true,
numericality: { greater_than_or_equal_to: 0, less_than_or_equal_to: Gitlab::Database::MAX_INT_VALUE }
validates :username,
namespace: true,
dynamic_path: true,
presence: true,
uniqueness: { case_sensitive: false }
......
# DynamicPathValidator
#
# Custom validator for GitLab path values.
# These paths are assigned to `Namespace` (& `Group` as a subclass) & `Project`
#
# Values are checked for formatting and exclusion from a list of reserved path
# names.
class DynamicPathValidator < ActiveModel::EachValidator
# All routes that appear on the top level must be listed here.
# This will make sure that groups cannot be created with these names
# as these routes would be masked by the paths already in place.
#
# Example:
# /api/api-project
#
# the path `api` shouldn't be allowed because it would be masked by `api/*`
#
TOP_LEVEL_ROUTES = %w[
-
.well-known
abuse_reports
admin
all
api
assets
autocomplete
ci
dashboard
explore
files
groups
health_check
help
hooks
import
invites
issues
jwt
koding
member
merge_requests
new
notes
notification_settings
oauth
profile
projects
public
repository
robots.txt
s
search
sent_notifications
services
snippets
teams
u
unicorn_test
unsubscribes
uploads
users
].freeze
# This list should contain all words following `/*namespace_id/:project_id` in
# routes that contain a second wildcard.
#
# Example:
# /*namespace_id/:project_id/badges/*ref/build
#
# If `badges` was allowed as a project/group name, we would not be able to access the
# `badges` route for those projects:
#
# Consider a namespace with path `foo/bar` and a project called `badges`.
# The route to the build badge would then be `/foo/bar/badges/badges/master/build.svg`
#
# When accessing this path the route would be matched to the `badges` path
# with the following params:
# - namespace_id: `foo`
# - project_id: `bar`
# - ref: `badges/master`
#
# Failing to find the project, this would result in a 404.
#
# By rejecting `badges` the router can _count_ on the fact that `badges` will
# be preceded by the `namespace/project`.
WILDCARD_ROUTES = %w[
badges
blame
blob
builds
commits
create
create_dir
edit
environments/folders
files
find_file
gitlab-lfs/objects
info/lfs/objects
new
preview
raw
refs
tree
update
wikis
].freeze
# These are all the paths that follow `/groups/*id/ or `/groups/*group_id`
# We need to reject these because we have a `/groups/*id` page that is the same
# as the `/*id`.
#
# If we would allow a subgroup to be created with the name `activity` then
# this group would not be accessible through `/groups/parent/activity` since
# this would map to the activity-page of it's parent.
GROUP_ROUTES = %w[
activity
avatar
edit
group_members
issues
labels
merge_requests
milestones
projects
subgroups
analytics
audit_events
hooks
ldap
ldap_group_links
notification_setting
pipeline_quota
].freeze
CHILD_ROUTES = (WILDCARD_ROUTES | GROUP_ROUTES).freeze
def self.without_reserved_wildcard_paths_regex
@without_reserved_wildcard_paths_regex ||= regex_excluding_child_paths(WILDCARD_ROUTES)
end
def self.without_reserved_child_paths_regex
@without_reserved_child_paths_regex ||= regex_excluding_child_paths(CHILD_ROUTES)
end
# This is used to validate a full path.
# It doesn't match paths
# - Starting with one of the top level words
# - Containing one of the child level words in the middle of a path
def self.regex_excluding_child_paths(child_routes)
reserved_top_level_words = Regexp.union(TOP_LEVEL_ROUTES)
not_starting_in_reserved_word = %r{\A/?(?!(#{reserved_top_level_words})(/|\z))}
reserved_child_level_words = Regexp.union(child_routes)
not_containing_reserved_child = %r{(?!\S+/(#{reserved_child_level_words})(/|\z))}
%r{#{not_starting_in_reserved_word}
#{not_containing_reserved_child}
#{Gitlab::Regex.full_namespace_regex}}x
end
def self.valid?(path)
path =~ Gitlab::Regex.full_namespace_regex && !full_path_reserved?(path)
end
def self.full_path_reserved?(path)
path = path.to_s.downcase
_project_part, namespace_parts = path.reverse.split('/', 2).map(&:reverse)
wildcard_reserved?(path) || child_reserved?(namespace_parts)
end
def self.child_reserved?(path)
return false unless path
path !~ without_reserved_child_paths_regex
end
def self.wildcard_reserved?(path)
return false unless path
path !~ without_reserved_wildcard_paths_regex
end
delegate :full_path_reserved?,
:child_reserved?,
to: :class
def path_reserved_for_record?(record, value)
full_path = record.respond_to?(:full_path) ? record.full_path : value
# For group paths the entire path cannot contain a reserved child word
# The path doesn't contain the last `_project_part` so we need to validate
# if the entire path.
# Example:
# A *group* with full path `parent/activity` is reserved.
# A *project* with full path `parent/activity` is allowed.
if record.is_a? Group
child_reserved?(full_path)
else
full_path_reserved?(full_path)
end
end
def validate_each(record, attribute, value)
unless value =~ Gitlab::Regex.namespace_regex
record.errors.add(attribute, Gitlab::Regex.namespace_regex_message)
return
end
if path_reserved_for_record?(record, value)
record.errors.add(attribute, "#{value} is a reserved name")
end
end
end
# NamespaceValidator
#
# Custom validator for GitLab namespace values.
#
# Values are checked for formatting and exclusion from a list of reserved path
# names.
class NamespaceValidator < ActiveModel::EachValidator
RESERVED = %w[
.well-known
admin
all
assets
ci
dashboard
files
groups
help
hooks
issues
merge_requests
new
notes
profile
projects
public
repository
robots.txt
s
search
services
snippets
teams
u
unsubscribes
users
].freeze
WILDCARD_ROUTES = %w[tree commits wikis new edit create update logs_tree
preview blob blame raw files create_dir find_file
artifacts graphs refs badges].freeze
STRICT_RESERVED = (RESERVED + WILDCARD_ROUTES).freeze
def self.valid?(value)
!reserved?(value) && follow_format?(value)
end
def self.reserved?(value, strict: false)
if strict
STRICT_RESERVED.include?(value)
else
RESERVED.include?(value)
end
end
def self.follow_format?(value)
value =~ Gitlab::Regex.namespace_regex
end
delegate :reserved?, :follow_format?, to: :class
def validate_each(record, attribute, value)
unless follow_format?(value)
record.errors.add(attribute, Gitlab::Regex.namespace_regex_message)
end
strict = record.is_a?(Group) && record.parent_id
if reserved?(value, strict: strict)
record.errors.add(attribute, "#{value} is a reserved name")
end
end
end
# ProjectPathValidator
#
# Custom validator for GitLab project path values.
#
# Values are checked for formatting and exclusion from a list of reserved path
# names.
class ProjectPathValidator < ActiveModel::EachValidator
# All project routes with wildcard argument must be listed here.
# Otherwise it can lead to routing issues when route considered as project name.
#
# Example:
# /group/project/tree/deploy_keys
#
# without tree as reserved name routing can match 'group/project' as group name,
# 'tree' as project name and 'deploy_keys' as route.
#
RESERVED = (NamespaceValidator::STRICT_RESERVED -
%w[dashboard help ci admin search notes services assets profile public]).freeze
def self.valid?(value)
!reserved?(value)
end
def self.reserved?(value)
RESERVED.include?(value)
end
delegate :reserved?, to: :class
def validate_each(record, attribute, value)
if reserved?(value)
record.errors.add(attribute, "#{value} is a reserved name")
end
end
end
---
title: Improve validation of namespace & project paths
merge_request: 10413
author:
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class RenameReservedDynamicPaths < ActiveRecord::Migration
include Gitlab::Database::RenameReservedPathsMigration::V1
DOWNTIME = false
disable_ddl_transaction!
DISALLOWED_ROOT_PATHS = %w[
-
abuse_reports
api
autocomplete
explore
health_check
import
invites
jwt
koding
member
notification_settings
oauth
sent_notifications
unicorn_test
uploads
users
]
DISALLOWED_WILDCARD_PATHS = %w[
environments/folders
gitlab-lfs/objects
info/lfs/objects
]
DISSALLOWED_GROUP_PATHS = %w[
activity
avatar
group_members
labels
milestones
subgroups
analytics
audit_events
hooks
ldap
ldap_group_links
notification_setting
pipeline_quota
]
def up
rename_root_paths(DISALLOWED_ROOT_PATHS)
rename_wildcard_paths(DISALLOWED_WILDCARD_PATHS)
rename_child_paths(DISSALLOWED_GROUP_PATHS)
end
def down
# nothing to do
end
end
......@@ -270,3 +270,28 @@ end
When doing so be sure to explicitly set the model's table name so it's not
derived from the class name or namespace.
### Renaming reserved paths
When a new route for projects is introduced that could conflict with any
existing records. The path for this records should be renamed, and the
related data should be moved on disk.
Since we had to do this a few times already, there are now some helpers to help
with this.
To use this you can include `Gitlab::Database::RenameReservedPathsMigration::V1`
in your migration. This will provide 3 methods which you can pass one or more
paths that need to be rejected.
**`rename_root_paths`**: This will rename the path of all _namespaces_ with the
given name that don't have a `parent_id`.
**`rename_child_paths`**: This will rename the path of all _namespaces_ with the
given name that have a `parent_id`.
**`rename_wildcard_paths`**: This will rename the path of all _projects_, and all
_namespaces_ that have a `project_id`.
The `path` column for these rows will be renamed to their previous value followed
by an integer. For example: `users` would turn into `users0`
......@@ -2,16 +2,8 @@ class GroupUrlConstrainer
def matches?(request)
id = request.params[:id]
return false unless valid?(id)
return false unless DynamicPathValidator.valid?(id)
Group.find_by_full_path(id).present?
end
private
def valid?(id)
id.split('/').all? do |namespace|
NamespaceValidator.valid?(namespace)
end
end
end
......@@ -4,9 +4,7 @@ class ProjectUrlConstrainer
project_path = request.params[:project_id] || request.params[:id]
full_path = namespace_path + '/' + project_path
unless ProjectPathValidator.valid?(project_path)
return false
end
return false unless DynamicPathValidator.valid?(full_path)
Project.find_by_full_path(full_path).present?
end
......
......@@ -498,6 +498,29 @@ module Gitlab
columns(table).find { |column| column.name == name }
end
# This will replace the first occurance of a string in a column with
# the replacement
# On postgresql we can use `regexp_replace` for that.
# On mysql we find the location of the pattern, and overwrite it
# with the replacement
def replace_sql(column, pattern, replacement)
quoted_pattern = Arel::Nodes::Quoted.new(pattern.to_s)
quoted_replacement = Arel::Nodes::Quoted.new(replacement.to_s)
if Database.mysql?
locate = Arel::Nodes::NamedFunction.
new('locate', [quoted_pattern, column])
insert_in_place = Arel::Nodes::NamedFunction.
new('insert', [column, locate, pattern.size, quoted_replacement])
Arel::Nodes::SqlLiteral.new(insert_in_place.to_sql)
else
replace = Arel::Nodes::NamedFunction.
new("regexp_replace", [column, quoted_pattern, quoted_replacement])
Arel::Nodes::SqlLiteral.new(replace.to_sql)
end
end
end
end
end
# This module can be included in migrations to make it easier to rename paths
# of `Namespace` & `Project` models certain paths would become `reserved`.
#
# If the way things are stored on the filesystem related to namespaces and
# projects ever changes. Don't update this module, or anything nested in `V1`,
# since it needs to keep functioning for all migrations using it using the state
# that the data is in at the time. Instead, create a `V2` module that implements
# the new way of reserving paths.
module Gitlab
module Database
module RenameReservedPathsMigration
module V1
def self.included(kls)
kls.include(MigrationHelpers)
end
def rename_wildcard_paths(one_or_more_paths)
rename_child_paths(one_or_more_paths)
paths = Array(one_or_more_paths)
RenameProjects.new(paths, self).rename_projects
end
def rename_child_paths(one_or_more_paths)
paths = Array(one_or_more_paths)
RenameNamespaces.new(paths, self).rename_namespaces(type: :child)
end
def rename_root_paths(paths)
paths = Array(paths)
RenameNamespaces.new(paths, self).rename_namespaces(type: :top_level)
end
end
end
end
end
module Gitlab
module Database
module RenameReservedPathsMigration
module V1
module MigrationClasses
module Routable
def full_path
if route && route.path.present?
@full_path ||= route.path
else
update_route if persisted?
build_full_path
end
end
def build_full_path
if parent && path
parent.full_path + '/' + path
else
path
end
end
def update_route
prepare_route
route.save
end
def prepare_route
route || build_route(source: self)
route.path = build_full_path
@full_path = nil
end
end
class Namespace < ActiveRecord::Base
include MigrationClasses::Routable
self.table_name = 'namespaces'
belongs_to :parent,
class_name: "#{MigrationClasses.name}::Namespace"
has_one :route, as: :source
has_many :children,
class_name: "#{MigrationClasses.name}::Namespace",
foreign_key: :parent_id
# Overridden to have the correct `source_type` for the `route` relation
def self.name
'Namespace'
end
end
class Route < ActiveRecord::Base
self.table_name = 'routes'
belongs_to :source, polymorphic: true
end
class Project < ActiveRecord::Base
include MigrationClasses::Routable
has_one :route, as: :source
self.table_name = 'projects'
def repository_storage_path
Gitlab.config.repositories.storages[repository_storage]['path']
end
# Overridden to have the correct `source_type` for the `route` relation
def self.name
'Project'
end
end
end
end
end
end
end
module Gitlab
module Database
module RenameReservedPathsMigration
module V1
class RenameBase
attr_reader :paths, :migration
delegate :update_column_in_batches,
:replace_sql,
to: :migration
def initialize(paths, migration)
@paths = paths
@migration = migration
end
def path_patterns
@path_patterns ||= paths.map { |path| "%#{path}" }
end
def rename_path_for_routable(routable)
old_path = routable.path
old_full_path = routable.full_path
# Only remove the last occurrence of the path name to get the parent namespace path
namespace_path = remove_last_occurrence(old_full_path, old_path)
new_path = rename_path(namespace_path, old_path)
new_full_path = join_routable_path(namespace_path, new_path)
# skips callbacks & validations
routable.class.where(id: routable).
update_all(path: new_path)
rename_routes(old_full_path, new_full_path)
[old_full_path, new_full_path]
end
def rename_routes(old_full_path, new_full_path)
replace_statement = replace_sql(Route.arel_table[:path],
old_full_path,
new_full_path)
update_column_in_batches(:routes, :path, replace_statement) do |table, query|
query.where(MigrationClasses::Route.arel_table[:path].matches("#{old_full_path}%"))
end
end
def rename_path(namespace_path, path_was)
counter = 0