Commit 3ef4f74b authored by Markus Koller's avatar Markus Koller 🔴

Add more storage statistics

This adds counters for build artifacts and LFS objects, and moves
the preexisting repository_size and commit_count from the projects
table into a new project_statistics table.

The counters are displayed in the administration area for projects
and groups, and also available through the API for admins (on */all)
and normal users (on */owned)

The statistics are updated through ProjectCacheWorker, which can now
do more granular updates with the new :statistics argument.
parent 6fd58ee4
class Admin::GroupsController < Admin::ApplicationController
before_action :group, only: [:edit, :show, :update, :destroy, :project_update, :members_update]
before_action :group, only: [:edit, :update, :destroy, :project_update, :members_update]
def index
@groups = Group.all
@groups = Group.with_statistics
@groups = @groups.sort(@sort = params[:sort])
@groups = @groups.search(params[:name]) if params[:name].present?
@groups = @groups.page(params[:page])
end
def show
@group = Group.with_statistics.find_by_full_path(params[:id])
@members = @group.members.order("access_level DESC").page(params[:members_page])
@requesters = AccessRequestsFinder.new(@group).execute(current_user)
@projects = @group.projects.page(params[:projects_page])
@projects = @group.projects.with_statistics.page(params[:projects_page])
end
def new
......
......@@ -3,7 +3,7 @@ class Admin::ProjectsController < Admin::ApplicationController
before_action :group, only: [:show, :transfer]
def index
@projects = Project.all
@projects = Project.with_statistics
@projects = @projects.in_namespace(params[:namespace_id]) if params[:namespace_id].present?
@projects = @projects.where(visibility_level: params[:visibility_level]) if params[:visibility_level].present?
@projects = @projects.with_push if params[:with_push].present?
......
......@@ -75,7 +75,7 @@ class GroupsController < Groups::ApplicationController
end
def projects
@projects = @group.projects.page(params[:page])
@projects = @group.projects.with_statistics.page(params[:page])
end
def update
......
......@@ -246,11 +246,6 @@ module ProjectsHelper
end
end
def repository_size(project = @project)
size_in_bytes = project.repository_size * 1.megabyte
number_to_human_size(size_in_bytes, delimiter: ',', precision: 2)
end
def default_url_to_repo(project = @project)
case default_clone_protocol
when 'ssh'
......
......@@ -11,6 +11,7 @@ module SortingHelper
sort_value_due_date_soon => sort_title_due_date_soon,
sort_value_due_date_later => sort_title_due_date_later,
sort_value_largest_repo => sort_title_largest_repo,
sort_value_largest_group => sort_title_largest_group,
sort_value_recently_signin => sort_title_recently_signin,
sort_value_oldest_signin => sort_title_oldest_signin,
sort_value_downvotes => sort_title_downvotes,
......@@ -92,6 +93,10 @@ module SortingHelper
'Largest repository'
end
def sort_title_largest_group
'Largest group'
end
def sort_title_recently_signin
'Recent sign in'
end
......@@ -193,7 +198,11 @@ module SortingHelper
end
def sort_value_largest_repo
'repository_size_desc'
'storage_size_desc'
end
def sort_value_largest_group
'storage_size_desc'
end
def sort_value_recently_signin
......
module StorageHelper
def storage_counter(size_in_bytes)
precision = size_in_bytes < 1.megabyte ? 0 : 1
number_to_human_size(size_in_bytes, delimiter: ',', precision: precision, significant: false)
end
end
......@@ -43,6 +43,8 @@ module Ci
before_destroy { project }
after_create :execute_hooks
after_save :update_project_statistics, if: :artifacts_size_changed?
after_destroy :update_project_statistics
class << self
def first_pending
......@@ -584,5 +586,9 @@ module Ci
Ci::MaskSecret.mask!(trace, token)
trace
end
def update_project_statistics
ProjectCacheWorker.perform_async(project_id, [], [:build_artifacts_size])
end
end
end
......@@ -48,7 +48,13 @@ class Group < Namespace
end
def sort(method)
order_by(method)
if method == 'storage_size_desc'
# storage_size is a virtual column so we need to
# pass a string to avoid AR adding the table name
reorder('storage_size DESC, namespaces.id DESC')
else
order_by(method)
end
end
def reference_prefix
......
......@@ -5,4 +5,13 @@ class LfsObjectsProject < ActiveRecord::Base
validates :lfs_object_id, presence: true
validates :lfs_object_id, uniqueness: { scope: [:project_id], message: "already exists in project" }
validates :project_id, presence: true
after_create :update_project_statistics
after_destroy :update_project_statistics
private
def update_project_statistics
ProjectCacheWorker.perform_async(project_id, [], [:lfs_objects_size])
end
end
......@@ -9,6 +9,7 @@ class Namespace < ActiveRecord::Base
cache_markdown_field :description, pipeline: :description
has_many :projects, dependent: :destroy
has_many :project_statistics
belongs_to :owner, class_name: "User"
belongs_to :parent, class_name: "Namespace"
......@@ -38,6 +39,18 @@ class Namespace < ActiveRecord::Base
scope :root, -> { where('type IS NULL') }
scope :with_statistics, -> do
joins('LEFT JOIN project_statistics ps ON ps.namespace_id = namespaces.id')
.group('namespaces.id')
.select(
'namespaces.*',
'COALESCE(SUM(ps.storage_size), 0) AS storage_size',
'COALESCE(SUM(ps.repository_size), 0) AS repository_size',
'COALESCE(SUM(ps.lfs_objects_size), 0) AS lfs_objects_size',
'COALESCE(SUM(ps.build_artifacts_size), 0) AS build_artifacts_size',
)
end
class << self
def by_path(path)
find_by('lower(path) = :value', value: path.downcase)
......
......@@ -44,6 +44,7 @@ class Project < ActiveRecord::Base
after_create :ensure_dir_exist
after_create :create_project_feature, unless: :project_feature
after_save :ensure_dir_exist, if: :namespace_id_changed?
after_save :update_project_statistics, if: :namespace_id_changed?
# set last_activity_at to the same as created_at
after_create :set_last_activity_at
......@@ -151,6 +152,7 @@ class Project < ActiveRecord::Base
has_one :import_data, dependent: :destroy, class_name: "ProjectImportData"
has_one :project_feature, dependent: :destroy
has_one :statistics, class_name: 'ProjectStatistics', dependent: :delete
has_many :commit_statuses, dependent: :destroy, foreign_key: :gl_project_id
has_many :pipelines, dependent: :destroy, class_name: 'Ci::Pipeline', foreign_key: :gl_project_id
......@@ -220,6 +222,7 @@ class Project < ActiveRecord::Base
scope :with_push, -> { joins(:events).where('events.action = ?', Event::PUSHED) }
scope :with_project_feature, -> { joins('LEFT JOIN project_features ON projects.id = project_features.project_id') }
scope :with_statistics, -> { includes(:statistics) }
# "enabled" here means "not disabled". It includes private features!
scope :with_feature_enabled, ->(feature) {
......@@ -332,8 +335,10 @@ class Project < ActiveRecord::Base
end
def sort(method)
if method == 'repository_size_desc'
reorder(repository_size: :desc, id: :desc)
if method == 'storage_size_desc'
# storage_size is a joined column so we need to
# pass a string to avoid AR adding the table name
reorder('project_statistics.storage_size DESC, projects.id DESC')
else
order_by(method)
end
......@@ -1036,14 +1041,6 @@ class Project < ActiveRecord::Base
forked? && project == forked_from_project
end
def update_repository_size
update_attribute(:repository_size, repository.size)
end
def update_commit_count
update_attribute(:commit_count, repository.commit_count)
end
def forks_count
forks.count
end
......@@ -1322,4 +1319,9 @@ class Project < ActiveRecord::Base
def full_path_changed?
path_changed? || namespace_id_changed?
end
def update_project_statistics
stats = statistics || build_statistics
stats.update(namespace_id: namespace_id)
end
end
class ProjectStatistics < ActiveRecord::Base
belongs_to :project
belongs_to :namespace
before_save :update_storage_size
STORAGE_COLUMNS = [:repository_size, :lfs_objects_size, :build_artifacts_size]
STATISTICS_COLUMNS = [:commit_count] + STORAGE_COLUMNS
def total_repository_size
repository_size + lfs_objects_size
end
def refresh!(only: nil)
STATISTICS_COLUMNS.each do |column, generator|
if only.blank? || only.include?(column)
public_send("update_#{column}")
end
end
save!
end
def update_commit_count
self.commit_count = project.repository.commit_count
end
def update_repository_size
self.repository_size = project.repository.size
end
def update_lfs_objects_size
self.lfs_objects_size = project.lfs_objects.sum(:size)
end
def update_build_artifacts_size
self.build_artifacts_size = project.builds.sum(:artifacts_size)
end
def update_storage_size
self.storage_size = STORAGE_COLUMNS.sum(&method(:read_attribute))
end
end
......@@ -74,7 +74,7 @@ class GitPushService < BaseService
types = []
end
ProjectCacheWorker.perform_async(@project.id, types)
ProjectCacheWorker.perform_async(@project.id, types, [:commit_count, :repository_size])
end
protected
......
......@@ -12,7 +12,7 @@ class GitTagPushService < BaseService
project.execute_hooks(@push_data.dup, :tag_push_hooks)
project.execute_services(@push_data.dup, :tag_push_hooks)
Ci::CreatePipelineService.new(project, current_user, @push_data).execute
ProjectCacheWorker.perform_async(project.id)
ProjectCacheWorker.perform_async(project.id, [], [:commit_count, :repository_size])
true
end
......
......@@ -5,6 +5,9 @@
= link_to 'Edit', admin_group_edit_path(group), id: "edit_#{dom_id(group)}", class: 'btn'
= link_to 'Delete', [:admin, group], data: { confirm: "Are you sure you want to remove #{group.name}?" }, method: :delete, class: 'btn btn-remove'
.stats
%span.badge
= storage_counter(group.storage_size)
%span
= icon('bookmark')
= number_with_delimiter(group.projects.count)
......
......@@ -27,6 +27,8 @@
= sort_title_recently_updated
= link_to admin_groups_path(sort: sort_value_oldest_updated, name: project_name) do
= sort_title_oldest_updated
= link_to admin_groups_path(sort: sort_value_largest_group, name: project_name) do
= sort_title_largest_group
= link_to new_admin_group_path, class: "btn btn-new" do
New Group
%ul.content-list
......
......@@ -38,6 +38,18 @@
%strong
= @group.created_at.to_s(:medium)
%li
%span.light Storage:
%strong= storage_counter(@group.storage_size)
(
= storage_counter(@group.repository_size)
repositories,
= storage_counter(@group.build_artifacts_size)
build artifacts,
= storage_counter(@group.lfs_objects_size)
LFS
)
%li
%span.light Group Git LFS status:
%strong
......@@ -55,8 +67,8 @@
%li
%strong
= link_to project.name_with_namespace, [:admin, project.namespace.becomes(Namespace), project]
%span.label.label-gray
= repository_size(project)
%span.badge
= storage_counter(project.statistics.storage_size)
%span.pull-right.light
%span.monospace= project.path_with_namespace + ".git"
.panel-footer
......@@ -73,8 +85,8 @@
%li
%strong
= link_to project.name_with_namespace, [:admin, project.namespace.becomes(Namespace), project]
%span.label.label-gray
= repository_size(project)
%span.badge
= storage_counter(project.statistics.storage_size)
%span.pull-right.light
%span.monospace= project.path_with_namespace + ".git"
......
......@@ -69,8 +69,8 @@
.controls
- if project.archived
%span.label.label-warning archived
%span.label.label-gray
= repository_size(project)
%span.badge
= storage_counter(project.statistics.storage_size)
= link_to 'Edit', edit_namespace_project_path(project.namespace, project), id: "edit_#{dom_id(project)}", class: "btn"
= link_to 'Delete', [project.namespace.becomes(Namespace), project], data: { confirm: remove_project_message(project) }, method: :delete, class: "btn btn-remove"
.title
......
......@@ -65,9 +65,16 @@
= @project.repository.path_to_repo
%li
%span.light Size
%strong
= repository_size(@project)
%span.light Storage:
%strong= storage_counter(@project.statistics.storage_size)
(
= storage_counter(@project.statistics.repository_size)
repository,
= storage_counter(@project.statistics.build_artifacts_size)
build artifacts,
= storage_counter(@project.statistics.lfs_objects_size)
LFS
)
%li
%span.light last commit:
......
......@@ -18,8 +18,8 @@
.pull-right
- if project.archived
%span.label.label-warning archived
%span.label.label-gray
= repository_size(project)
%span.badge
= storage_counter(project.statistics.storage_size)
= link_to 'Members', namespace_project_project_members_path(project.namespace, project), id: "edit_#{dom_id(project)}", class: "btn btn-sm"
= link_to 'Edit', edit_namespace_project_path(project.namespace, project), id: "edit_#{dom_id(project)}", class: "btn btn-sm"
= link_to 'Remove', project, data: { confirm: remove_project_message(project)}, method: :delete, class: "btn btn-sm btn-remove"
......
......@@ -17,10 +17,10 @@
%ul.nav
%li
= link_to project_files_path(@project) do
Files (#{repository_size})
Files (#{storage_counter(@project.statistics.total_repository_size)})
%li
= link_to namespace_project_commits_path(@project.namespace, @project, current_ref) do
#{'Commit'.pluralize(@project.commit_count)} (#{number_with_delimiter(@project.commit_count)})
#{'Commit'.pluralize(@project.statistics.commit_count)} (#{number_with_delimiter(@project.statistics.commit_count)})
%li
= link_to namespace_project_branches_path(@project.namespace, @project) do
#{'Branch'.pluralize(@repository.branch_count)} (#{number_with_delimiter(@repository.branch_count)})
......
......@@ -6,26 +6,27 @@ class ProjectCacheWorker
LEASE_TIMEOUT = 15.minutes.to_i
# project_id - The ID of the project for which to flush the cache.
# refresh - An Array containing extra types of data to refresh such as
# `:readme` to flush the README and `:changelog` to flush the
# CHANGELOG.
def perform(project_id, refresh = [])
# files - An Array containing extra types of files to refresh such as
# `:readme` to flush the README and `:changelog` to flush the
# CHANGELOG.
# statistics - An Array containing columns from ProjectStatistics to
# refresh, if empty all columns will be refreshed
def perform(project_id, files = [], statistics = [])
project = Project.find_by(id: project_id)
return unless project && project.repository.exists?
update_repository_size(project)
project.update_commit_count
update_statistics(project, statistics.map(&:to_sym))
project.repository.refresh_method_caches(refresh.map(&:to_sym))
project.repository.refresh_method_caches(files.map(&:to_sym))
end
def update_repository_size(project)
return unless try_obtain_lease_for(project.id, :update_repository_size)
def update_statistics(project, statistics = [])
return unless try_obtain_lease_for(project.id, :update_statistics)
Rails.logger.info("Updating repository size for project #{project.id}")
Rails.logger.info("Updating statistics for project #{project.id}")
project.update_repository_size
project.statistics.refresh!(only: statistics)
end
private
......
---
title: Add more storage statistics
merge_request: 7754
author: Markus Koller
......@@ -10,5 +10,5 @@
# end
#
ActiveSupport::Inflector.inflections do |inflect|
inflect.uncountable %w(award_emoji)
inflect.uncountable %w(award_emoji project_statistics)
end
class CreateProjectStatistics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
# use bigint columns to support values >2GB
counter_column = { limit: 8, null: false, default: 0 }
create_table :project_statistics do |t|
t.references :project, null: false, index: { unique: true }, foreign_key: { on_delete: :cascade }
t.references :namespace, null: false, index: true
t.integer :commit_count, counter_column
t.integer :storage_size, counter_column
t.integer :repository_size, counter_column
t.integer :lfs_objects_size, counter_column
t.integer :build_artifacts_size, counter_column
end
end
end
class MigrateProjectStatistics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = true
DOWNTIME_REASON = 'Removes two columns from the projects table'
def up
# convert repository_size in float (megabytes) to integer (bytes),
# initialize total storage_size with repository_size
execute <<-EOF
INSERT INTO project_statistics (project_id, namespace_id, commit_count, storage_size, repository_size)
SELECT id, namespace_id, commit_count, (repository_size * 1024 * 1024), (repository_size * 1024 * 1024) FROM projects
EOF
remove_column :projects, :repository_size
remove_column :projects, :commit_count
end
def down
add_column_with_default :projects, :repository_size, :float, default: 0.0
add_column_with_default :projects, :commit_count, :integer, default: 0
end
end
......@@ -901,6 +901,19 @@ ActiveRecord::Schema.define(version: 20161220141214) do
add_index "project_import_data", ["project_id"], name: "index_project_import_data_on_project_id", using: :btree
create_table "project_statistics", force: :cascade do |t|
t.integer "project_id", null: false
t.integer "namespace_id", null: false
t.integer "commit_count", limit: 8, default: 0, null: false
t.integer "storage_size", limit: 8, default: 0, null: false
t.integer "repository_size", limit: 8, default: 0, null: false
t.integer "lfs_objects_size", limit: 8, default: 0, null: false
t.integer "build_artifacts_size", limit: 8, default: 0, null: false
end
add_index "project_statistics", ["namespace_id"], name: "index_project_statistics_on_namespace_id", using: :btree
add_index "project_statistics", ["project_id"], name: "index_project_statistics_on_project_id", unique: true, using: :btree
create_table "projects", force: :cascade do |t|
t.string "name"
t.string "path"
......@@ -915,11 +928,9 @@ ActiveRecord::Schema.define(version: 20161220141214) do
t.boolean "archived", default: false, null: false
t.string "avatar"
t.string "import_status"
t.float "repository_size", default: 0.0
t.integer "star_count", default: 0, null: false
t.string "import_type"
t.string "import_source"
t.integer "commit_count", default: 0
t.text "import_error"
t.integer "ci_id"
t.boolean "shared_runners_enabled", default: true, null: false
......@@ -1288,6 +1299,7 @@ ActiveRecord::Schema.define(version: 20161220141214) do
add_foreign_key "personal_access_tokens", "users"
add_foreign_key "project_authorizations", "projects", on_delete: :cascade
add_foreign_key "project_authorizations", "users", on_delete: :cascade
add_foreign_key "project_statistics", "projects", on_delete: :cascade
add_foreign_key "protected_branch_merge_access_levels", "protected_branches"
add_foreign_key "protected_branch_push_access_levels", "protected_branches"
add_foreign_key "subscriptions", "projects", on_delete: :cascade
......
......@@ -88,3 +88,9 @@ artifacts through the [Admin area settings](../user/admin_area/settings/continuo
[reconfigure gitlab]: restart_gitlab.md "How to restart GitLab"
[restart gitlab]: restart_gitlab.md "How to restart GitLab"
## Storage statistics
You can see the total storage used for build artifacts on groups and projects
in the administration area, as well as through the [groups](../api/groups.md)
and [projects APIs](../api/projects.md).
......@@ -13,6 +13,7 @@ Parameters:
| `search` | string | no | Return list of authorized groups matching the search criteria |
| `order_by` | string | no | Order groups by `name` or `path`. Default is `name` |
| `sort` | string | no | Order groups in `asc` or `desc` order. Default is `asc` |
| `statistics` | boolean | no | Include group statistics (admins only) |
```
GET /groups
......@@ -31,7 +32,6 @@ GET /groups
You can search for groups by name or path, see below.
=======
## List owned groups
Get a list of groups which are owned by the authenticated user.
......@@ -40,6 +40,12 @@ Get a list of groups which are owned by the authenticated user.
GET /groups/owned
```
Parameters:
| Attribute | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `statistics` | boolean | no | Include group statistics |
## List a group's projects
Get a list of projects in this group.
......
......@@ -307,6 +307,8 @@ Parameters:
| `order_by` | string | no | Return projects ordered by `id`, `name`, `path`, `created_at`, `updated_at`, or `last_activity_at` fields. Default is `created_at` |
| `sort` | string | no | Return projects sorted in `asc` or `desc` order. Default is `desc` |
| `search` | string | no | Return list of authorized projects matching the search criteria |
| `simple` | boolean | no | Return only the ID, URL, name, and path of each project |
| `statistics` | boolean | no | Include project statistics |
### List starred projects
......@@ -325,6 +327,7 @@ Parameters:
| `order_by` | string | no | Return projects ordered by `id`, `name`, `path`, `created_at`, `updated_at`, or `last_activity_at` fields. Default is `created_at` |
| `sort` | string | no | Return projects sorted in `asc` or `desc` order. Default is `desc` |
| `search` | string | no | Return list of authorized projects matching the search criteria |
| `simple` | boolean | no | Return only the ID, URL, name, and path of each project |
### List ALL projects
......@@ -343,6 +346,7 @@ Parameters:
| `order_by` | string | no | Return projects ordered by `id`, `name`, `path`, `created_at`, `updated_at`, or `last_activity_at` fields. Default is `created_at` |
| `sort` | string | no | Return projects sorted in `asc` or `desc` order. Default is `desc` |
| `search` | string | no | Return list of authorized projects matching the search criteria |
| `statistics` | boolean | no | Include project statistics |
### Get single project
......
......@@ -40,6 +40,12 @@ In `config/gitlab.yml`:
storage_path: /mnt/storage/lfs-objects
```
## Storage statistics
You can see the total storage used for LFS objects on groups and projects
in the administration area, as well as through the [groups](../api/groups.md)
and [projects APIs](../api/projects.md).
## Known limitations
* Currently, storing GitLab Git LFS objects on a non-local storage (like S3 buckets)
......@@ -47,3 +53,5 @@ In `config/gitlab.yml`:
* Currently, removing LFS objects from GitLab Git LFS storage is not supported
* LFS authentications via SSH was added with GitLab 8.12
* Only compatible with the GitLFS client versions 1.1.0 and up, or 1.0.2.
* The storage statistics currently count each LFS object multiple times for
every project linking to it
......@@ -101,6 +101,16 @@ module API
expose :only_allow_merge_if_build_succeeds
expose :request_access_enabled
expose :only_allow_merge_if_all_discussions_are_resolved
expose :statistics, using: 'API::Entities::ProjectStatistics', if: :statistics
end
class ProjectStatistics < Grape::Entity
expose :commit_count
expose :storage_size
expose :repository_size
expose :lfs_objects_size
expose :build_artifacts_size
end
class Member < UserBasic
......@@ -127,6 +137,15 @@ module API
expose :avatar_url
expose :web_url
expose :request_access_enabled
expose :statistics, if: :statistics do
with_options format_with: -> (value) { value.to_i } do
expose :storage_size
expose :repository_size
expose :lfs_objects_size
expose :build_artifacts_size
end
end
end
class GroupDetail < Group
......
......@@ -11,6 +11,20 @@ module API
optional :lfs_enabled, type: Boolean, desc: 'Enable/disable LFS for the projects in this group'
optional :request_access_enabled, type: Boolean, desc: 'Allow users to request member access'
end
params :statistics_params do
optional :statistics, type: Boolean, default: false, desc: 'Include project statistics'
end
def present_groups(groups, options = {})
options = options.reverse_merge(
with: Entities::Group,
current_user: current_user,
)
groups = groups.with_statistics if options[:statistics]
present paginate(groups), options
end
end
resource :groups do
......@@ -18,6 +32,7 @@ module API
success Entities::Group
end
params do
use :statistics_params
optional :skip_groups, type: Array[Integer], desc: 'Array of group ids to exclude from list'
optional :all_available, type: Boolean, desc: 'Show all group that you have access to'
optional :search, type: String, desc: 'Search for a specific group'
......@@ -38,7 +53,7 @@ module API
groups = groups.where.not(id: params[:skip_groups]) if params[:skip_groups].present?
groups = groups.reorder(params[:order_by] => params[:sort])
present paginate(groups), with: Entities::Group, current_user: current_user
present_groups groups, statistics: params[:statistics] && current_user.is_admin?
end
desc 'Get list of owned groups for authenticated user' do
......@@ -46,10 +61,10 @@ module API
end
params do
use :pagination
use :statistics_params