Skip to content
Snippets Groups Projects
Unverified Commit 0eff75fa authored by Nick Thomas's avatar Nick Thomas
Browse files

Cache branch and tag names as Redis sets

This allows us to check inclusion for the *_exists? methods without
downloading the full list of branch names, which is over 100KiB in size
for gitlab-ce at the moment.
parent 648e5a94
No related branches found
No related tags found
No related merge requests found
......@@ -239,13 +239,13 @@ def ref_names
def branch_exists?(branch_name)
return false unless raw_repository
branch_names.include?(branch_name)
branch_names_include?(branch_name)
end
def tag_exists?(tag_name)
return false unless raw_repository
tag_names.include?(tag_name)
tag_names_include?(tag_name)
end
def ref_exists?(ref)
......@@ -561,10 +561,10 @@ def commit_count_for_ref(ref)
end
delegate :branch_names, to: :raw_repository
cache_method :branch_names, fallback: []
cache_method_as_redis_set :branch_names, fallback: []
delegate :tag_names, to: :raw_repository
cache_method :tag_names, fallback: []
cache_method_as_redis_set :tag_names, fallback: []
delegate :branch_count, :tag_count, :has_visible_content?, to: :raw_repository
cache_method :branch_count, fallback: 0
......@@ -1126,6 +1126,10 @@ def cache
@cache ||= Gitlab::RepositoryCache.new(self)
end
def redis_set_cache
@redis_set_cache ||= Gitlab::RepositorySetCache.new(self)
end
def request_store_cache
@request_store_cache ||= Gitlab::RepositoryCache.new(self, backend: Gitlab::SafeRequestStore)
end
......
---
title: Cache branch and tag names as Redis sets
merge_request: 30476
author:
type: performance
......@@ -23,6 +23,37 @@ def cache_method(name, fallback: nil)
end
end
# Caches and strongly memoizes the method as a Redis Set.
#
# This only works for methods that do not take any arguments. The method
# should return an Array of Strings to be cached.
#
# In addition to overriding the named method, a "name_include?" method is
# defined. This uses the "SISMEMBER" query to efficiently check membership
# without needing to load the entire set into memory.
#
# name - The name of the method to be cached.
# fallback - A value to fall back to if the repository does not exist, or
# in case of a Git error. Defaults to nil.
def cache_method_as_redis_set(name, fallback: nil)
uncached_name = alias_uncached_method(name)
define_method(name) do
cache_method_output_as_redis_set(name, fallback: fallback) do
__send__(uncached_name) # rubocop:disable GitlabSecurity/PublicSend
end
end
define_method("#{name}_include?") do |value|
# If the cache isn't populated, we can't rely on it
return redis_set_cache.include?(name, value) if redis_set_cache.exist?(name)
# Since we have to pull all branch names to populate the cache, use
# the data we already have to answer the query just this once
__send__(name).include?(value) # rubocop:disable GitlabSecurity/PublicSend
end
end
# Caches truthy values from the method. All values are strongly memoized,
# and cached in RequestStore.
#
......@@ -84,6 +115,11 @@ def cache
raise NotImplementedError
end
# RepositorySetCache to be used. Should be overridden by the including class
def redis_set_cache
raise NotImplementedError
end
# List of cached methods. Should be overridden by the including class
def cached_methods
raise NotImplementedError
......@@ -100,6 +136,18 @@ def cache_method_output(name, fallback: nil, &block)
end
end
# Caches and strongly memoizes the supplied block as a Redis Set. The result
# will be provided as a sorted array.
#
# name - The name of the method to be cached.
# fallback - A value to fall back to if the repository does not exist, or
# in case of a Git error. Defaults to nil.
def cache_method_output_as_redis_set(name, fallback: nil, &block)
memoize_method_output(name, fallback: fallback) do
redis_set_cache.fetch(name, &block).sort
end
end
# Caches truthy values from the supplied block. All values are strongly
# memoized, and cached in RequestStore.
#
......@@ -154,6 +202,7 @@ def expire_method_caches(methods)
clear_memoization(memoizable_name(name))
end
expire_redis_set_method_caches(methods)
expire_request_store_method_caches(methods)
end
......@@ -169,6 +218,10 @@ def expire_request_store_method_caches(methods)
end
end
def expire_redis_set_method_caches(methods)
methods.each { |name| redis_set_cache.expire(name) }
end
# All cached repository methods depend on the existence of a Git repository,
# so if the repository doesn't exist, we already know not to call it.
def fallback_early?(method_name)
......
# frozen_string_literal: true
# Interface to the Redis-backed cache store for keys that use a Redis set
module Gitlab
class RepositorySetCache
attr_reader :repository, :namespace, :expires_in
def initialize(repository, extra_namespace: nil, expires_in: 2.weeks)
@repository = repository
@namespace = "#{repository.full_path}:#{repository.project.id}"
@namespace = "#{@namespace}:#{extra_namespace}" if extra_namespace
@expires_in = expires_in
end
def cache_key(type)
[type, namespace, 'set'].join(':')
end
def expire(key)
with { |redis| redis.del(cache_key(key)) }
end
def exist?(key)
with { |redis| redis.exists(cache_key(key)) }
end
def read(key)
with { |redis| redis.smembers(cache_key(key)) }
end
def write(key, value)
full_key = cache_key(key)
with do |redis|
redis.multi do
redis.del(full_key)
# Splitting into groups of 1000 prevents us from creating a too-long
# Redis command
value.in_groups_of(1000, false) { |subset| redis.sadd(full_key, subset) }
redis.expire(full_key, expires_in)
end
end
value
end
def fetch(key, &block)
if exist?(key)
read(key)
else
write(key, yield)
end
end
def include?(key, value)
with { |redis| redis.sismember(cache_key(key), value) }
end
private
def with(&blk)
Gitlab::Redis::Cache.with(&blk) # rubocop:disable CodeReuse/ActiveRecord
end
end
end
......@@ -4,6 +4,7 @@
let(:project) { create(:project, :repository) }
let(:repository) { project.repository }
let(:cache) { repository.send(:cache) }
let(:redis_set_cache) { repository.send(:redis_set_cache) }
describe '#cache_method_output', :use_clean_rails_memory_store_caching do
let(:fallback) { 10 }
......@@ -206,9 +207,11 @@
describe '#expire_method_caches' do
it 'expires the caches of the given methods' do
expect(cache).to receive(:expire).with(:rendered_readme)
expect(cache).to receive(:expire).with(:gitignore)
expect(cache).to receive(:expire).with(:branch_names)
expect(redis_set_cache).to receive(:expire).with(:rendered_readme)
expect(redis_set_cache).to receive(:expire).with(:branch_names)
repository.expire_method_caches(%i(rendered_readme gitignore))
repository.expire_method_caches(%i(rendered_readme branch_names))
end
it 'does not expire caches for non-existent methods' do
......
require 'spec_helper'
describe Gitlab::RepositorySetCache, :clean_gitlab_redis_cache do
let(:project) { create(:project) }
let(:repository) { project.repository }
let(:namespace) { "#{repository.full_path}:#{project.id}" }
let(:cache) { described_class.new(repository) }
describe '#cache_key' do
subject { cache.cache_key(:foo) }
it 'includes the namespace' do
is_expected.to eq("foo:#{namespace}:set")
end
context 'with a given namespace' do
let(:extra_namespace) { 'my:data' }
let(:cache) { described_class.new(repository, extra_namespace: extra_namespace) }
it 'includes the full namespace' do
is_expected.to eq("foo:#{namespace}:#{extra_namespace}:set")
end
end
end
describe '#expire' do
it 'expires the given key from the cache' do
cache.write(:foo, ['value'])
expect(cache.read(:foo)).to contain_exactly('value')
expect(cache.expire(:foo)).to eq(1)
expect(cache.read(:foo)).to be_empty
end
end
describe '#exist?' do
it 'checks whether the key exists' do
expect(cache.exist?(:foo)).to be(false)
cache.write(:foo, ['value'])
expect(cache.exist?(:foo)).to be(true)
end
end
describe '#fetch' do
let(:blk) { -> { ['block value'] } }
subject { cache.fetch(:foo, &blk) }
it 'fetches the key from the cache when filled' do
cache.write(:foo, ['value'])
is_expected.to contain_exactly('value')
end
it 'writes the value of the provided block when empty' do
cache.expire(:foo)
is_expected.to contain_exactly('block value')
expect(cache.read(:foo)).to contain_exactly('block value')
end
end
describe '#include?' do
it 'checks inclusion in the Redis set' do
cache.write(:foo, ['value'])
expect(cache.include?(:foo, 'value')).to be(true)
expect(cache.include?(:foo, 'bar')).to be(false)
end
end
end
......@@ -1223,36 +1223,66 @@ def create_commit_with_invalid_utf8_path
end
describe '#branch_exists?' do
it 'uses branch_names' do
allow(repository).to receive(:branch_names).and_return(['foobar'])
let(:branch) { repository.root_ref }
expect(repository.branch_exists?('foobar')).to eq(true)
expect(repository.branch_exists?('master')).to eq(false)
subject { repository.branch_exists?(branch) }
it 'delegates to branch_names when the cache is empty' do
repository.expire_branches_cache
expect(repository).to receive(:branch_names).and_call_original
is_expected.to eq(true)
end
it 'uses redis set caching when the cache is filled' do
repository.branch_names # ensure the branch name cache is filled
expect(repository)
.to receive(:branch_names_include?)
.with(branch)
.and_call_original
is_expected.to eq(true)
end
end
describe '#tag_exists?' do
it 'uses tag_names' do
allow(repository).to receive(:tag_names).and_return(['foobar'])
let(:tag) { repository.tags.first.name }
subject { repository.tag_exists?(tag) }
it 'delegates to tag_names when the cache is empty' do
repository.expire_tags_cache
expect(repository).to receive(:tag_names).and_call_original
is_expected.to eq(true)
end
it 'uses redis set caching when the cache is filled' do
repository.tag_names # ensure the tag name cache is filled
expect(repository)
.to receive(:tag_names_include?)
.with(tag)
.and_call_original
expect(repository.tag_exists?('foobar')).to eq(true)
expect(repository.tag_exists?('master')).to eq(false)
is_expected.to eq(true)
end
end
describe '#branch_names', :use_clean_rails_memory_store_caching do
describe '#branch_names', :clean_gitlab_redis_cache do
let(:fake_branch_names) { ['foobar'] }
it 'gets cached across Repository instances' do
allow(repository.raw_repository).to receive(:branch_names).once.and_return(fake_branch_names)
expect(repository.branch_names).to eq(fake_branch_names)
expect(repository.branch_names).to match_array(fake_branch_names)
fresh_repository = Project.find(project.id).repository
expect(fresh_repository.object_id).not_to eq(repository.object_id)
expect(fresh_repository.raw_repository).not_to receive(:branch_names)
expect(fresh_repository.branch_names).to eq(fake_branch_names)
expect(fresh_repository.branch_names).to match_array(fake_branch_names)
end
end
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment