path_regex.rb 8.76 KB
Newer Older
1 2
# frozen_string_literal: true

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
module Gitlab
  module PathRegex
    extend self

    # All routes that appear on the top level must be listed here.
    # This will make sure that groups cannot be created with these names
    # as these routes would be masked by the paths already in place.
    #
    # Example:
    #   /api/api-project
    #
    #  the path `api` shouldn't be allowed because it would be masked by `api/*`
    #
    TOP_LEVEL_ROUTES = %w[
      -
      .well-known
19 20 21 22 23
      404.html
      422.html
      500.html
      502.html
      503.html
24 25 26
      abuse_reports
      admin
      api
27 28
      apple-touch-icon-precomposed.png
      apple-touch-icon.png
29 30 31
      assets
      autocomplete
      dashboard
32
      deploy.html
33
      explore
34
      favicon.ico
35
      favicon.png
36 37 38 39 40 41 42
      files
      groups
      health_check
      help
      import
      invites
      jwt
43
      login
44 45 46 47 48 49 50 51
      oauth
      profile
      projects
      public
      robots.txt
      s
      search
      sent_notifications
52 53
      sitemap.xml
      sitemap.xml.gz
54
      slash-command-logo.png
55 56 57 58
      snippets
      unsubscribes
      uploads
      users
59
      v2
60 61
    ].freeze

62 63 64 65
    # NOTE: Do not add new items to this list unless necessary as this will
    # cause conflicts with existing namespaced routes for groups or projects.
    # See https://docs.gitlab.com/ee/development/routing.html#project-routes
    #
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
    # This list should contain all words following `/*namespace_id/:project_id` in
    # routes that contain a second wildcard.
    #
    # Example:
    #   /*namespace_id/:project_id/badges/*ref/build
    #
    # If `badges` was allowed as a project/group name, we would not be able to access the
    # `badges` route for those projects:
    #
    # Consider a namespace with path `foo/bar` and a project called `badges`.
    # The route to the build badge would then be `/foo/bar/badges/badges/master/build.svg`
    #
    # When accessing this path the route would be matched to the `badges` path
    # with the following params:
    #   - namespace_id: `foo`
    #   - project_id: `bar`
    #   - ref: `badges/master`
    #
    # Failing to find the project, this would result in a 404.
    #
    # By rejecting `badges` the router can _count_ on the fact that `badges` will
    # be preceded by the `namespace/project`.
    PROJECT_WILDCARD_ROUTES = %w[
89
      -
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
      badges
      blame
      blob
      builds
      commits
      create
      create_dir
      edit
      environments/folders
      files
      find_file
      gitlab-lfs/objects
      info/lfs/objects
      new
      preview
      raw
      refs
      tree
      update
      wikis
    ].freeze

112 113 114 115
    # NOTE: Do not add new items to this list unless necessary as this will
    # cause conflicts with existing namespaced routes for groups or projects.
    # See https://docs.gitlab.com/ee/development/routing.html#group-routes
    #
116 117 118 119 120 121 122 123
    # These are all the paths that follow `/groups/*id/ or `/groups/*group_id`
    # We need to reject these because we have a `/groups/*id` page that is the same
    # as the `/*id`.
    #
    # If we would allow a subgroup to be created with the name `activity` then
    # this group would not be accessible through `/groups/parent/activity` since
    # this would map to the activity-page of its parent.
    GROUP_ROUTES = %w[
Shinya Maeda's avatar
Shinya Maeda committed
124
      -
125 126 127 128 129 130 131 132 133 134 135
    ].freeze

    ILLEGAL_PROJECT_PATH_WORDS = PROJECT_WILDCARD_ROUTES
    ILLEGAL_GROUP_PATH_WORDS = (PROJECT_WILDCARD_ROUTES | GROUP_ROUTES).freeze

    # The namespace regex is used in JavaScript to validate usernames in the "Register" form. However, Javascript
    # does not support the negative lookbehind assertion (?<!) that disallows usernames ending in `.git` and `.atom`.
    # Since this is a non-trivial problem to solve in Javascript (heavily complicate the regex, modify view code to
    # allow non-regex validations, etc), `NAMESPACE_FORMAT_REGEX_JS` serves as a Javascript-compatible version of
    # `NAMESPACE_FORMAT_REGEX`, with the negative lookbehind assertion removed. This means that the client-side validation
    # will pass for usernames ending in `.atom` and `.git`, but will be caught by the server-side validation.
136 137 138
    PATH_START_CHAR = '[a-zA-Z0-9_\.]'
    PATH_REGEX_STR = PATH_START_CHAR + '[a-zA-Z0-9_\-\.]*'
    NAMESPACE_FORMAT_REGEX_JS = PATH_REGEX_STR + '[a-zA-Z0-9_\-]|[a-zA-Z0-9_]'
139 140 141 142

    NO_SUFFIX_REGEX = /(?<!\.git|\.atom)/.freeze
    NAMESPACE_FORMAT_REGEX = /(?:#{NAMESPACE_FORMAT_REGEX_JS})#{NO_SUFFIX_REGEX}/.freeze
    PROJECT_PATH_FORMAT_REGEX = /(?:#{PATH_REGEX_STR})#{NO_SUFFIX_REGEX}/.freeze
143
    FULL_NAMESPACE_FORMAT_REGEX = %r{(#{NAMESPACE_FORMAT_REGEX}/){,#{Namespace::NUMBER_OF_ANCESTORS_ALLOWED}}#{NAMESPACE_FORMAT_REGEX}}.freeze
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185

    def root_namespace_route_regex
      @root_namespace_route_regex ||= begin
        illegal_words = Regexp.new(Regexp.union(TOP_LEVEL_ROUTES).source, Regexp::IGNORECASE)

        single_line_regexp %r{
          (?!(#{illegal_words})/)
          #{NAMESPACE_FORMAT_REGEX}
        }x
      end
    end

    def full_namespace_route_regex
      @full_namespace_route_regex ||= begin
        illegal_words = Regexp.new(Regexp.union(ILLEGAL_GROUP_PATH_WORDS).source, Regexp::IGNORECASE)

        single_line_regexp %r{
          #{root_namespace_route_regex}
          (?:
            /
            (?!#{illegal_words}/)
            #{NAMESPACE_FORMAT_REGEX}
          )*
        }x
      end
    end

    def project_route_regex
      @project_route_regex ||= begin
        illegal_words = Regexp.new(Regexp.union(ILLEGAL_PROJECT_PATH_WORDS).source, Regexp::IGNORECASE)

        single_line_regexp %r{
          (?!(#{illegal_words})/)
          #{PROJECT_PATH_FORMAT_REGEX}
        }x
      end
    end

    def project_git_route_regex
      @project_git_route_regex ||= /#{project_route_regex}\.git/.freeze
    end

186 187 188 189
    def project_wiki_git_route_regex
      @project_wiki_git_route_regex ||= /#{PATH_REGEX_STR}\.wiki/.freeze
    end

190 191 192 193 194 195 196 197
    def full_namespace_path_regex
      @full_namespace_path_regex ||= %r{\A#{full_namespace_route_regex}/\z}
    end

    def full_project_path_regex
      @full_project_path_regex ||= %r{\A#{full_namespace_route_regex}/#{project_route_regex}/\z}
    end

Tiago Botelho's avatar
Tiago Botelho committed
198 199 200 201
    def full_project_git_path_regex
      @full_project_git_path_regex ||= %r{\A\/?(?<namespace_path>#{full_namespace_route_regex})\/(?<project_path>#{project_route_regex})\.git\z}
    end

202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
    def namespace_format_regex
      @namespace_format_regex ||= /\A#{NAMESPACE_FORMAT_REGEX}\z/.freeze
    end

    def namespace_format_message
      "can contain only letters, digits, '_', '-' and '.'. " \
      "Cannot start with '-' or end in '.', '.git' or '.atom'." \
    end

    def project_path_format_regex
      @project_path_format_regex ||= /\A#{PROJECT_PATH_FORMAT_REGEX}\z/.freeze
    end

    def project_path_format_message
      "can contain only letters, digits, '_', '-' and '.'. " \
      "Cannot start with '-', end in '.git' or end in '.atom'" \
    end

    def archive_formats_regex
      #                           |zip|tar|    tar.gz    |         tar.bz2         |
      @archive_formats_regex ||= /(zip|tar|tar\.gz|tgz|gz|tar\.bz2|tbz|tbz2|tb2|bz2)/.freeze
    end

    def git_reference_regex
      # Valid git ref regex, see:
      # https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html

      @git_reference_regex ||= single_line_regexp %r{
        (?!
           (?# doesn't begins with)
           \/|                    (?# rule #6)
           (?# doesn't contain)
           .*(?:
              [\/.]\.|            (?# rule #1,3)
              \/\/|               (?# rule #6)
              @\{|                (?# rule #8)
              \\                  (?# rule #9)
           )
        )
        [^\000-\040\177~^:?*\[]+  (?# rule #4-5)
        (?# doesn't end with)
        (?<!\.lock)               (?# rule #1)
        (?<![\/.])                (?# rule #6-7)
      }x
    end

248 249 250 251 252 253 254 255
    def full_snippets_repository_path_regex
      %r{\A(#{personal_snippet_repository_path_regex}|#{project_snippet_repository_path_regex})\z}
    end

    def personal_and_project_snippets_path_regex
      %r{#{personal_snippet_path_regex}|#{project_snippet_path_regex}}
    end

256 257
    private

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
    def personal_snippet_path_regex
      /snippets/
    end

    def personal_snippet_repository_path_regex
      %r{#{personal_snippet_path_regex}/\d+}
    end

    def project_snippet_path_regex
      %r{#{full_namespace_route_regex}/#{project_route_regex}/snippets}
    end

    def project_snippet_repository_path_regex
      %r{#{project_snippet_path_regex}/\d+}
    end

274 275
    def single_line_regexp(regex)
      # Turns a multiline extended regexp into a single line one,
276
      # because `rake routes` breaks on multiline regexes.
Douwe Maan's avatar
Douwe Maan committed
277
      Regexp.new(regex.source.gsub(/\(\?#.+?\)/, '').gsub(/\s*/, ''), regex.options ^ Regexp::EXTENDED).freeze
278 279 280
    end
  end
end
281

282
Gitlab::PathRegex.prepend_if_ee('EE::Gitlab::PathRegex')