Commit ee401867 authored by Jacob Vosmaer's avatar Jacob Vosmaer

Give Sidekiq time to recover from '25 of 25 busy'

This change allows Sidekiq to have short periods with 25/25 workers busy
before Monit comes in and restarts Sidekiq.
parent 3bf02af2
......@@ -199,7 +199,8 @@ default['gitlab']['monitrc']['sidekiq'] = {
:mem_threshold => "225", # in MB
:mem_cycles_number => "10",
:restart_number => "5", # Number of consecutive restarts before alerting.
:restart_cycles_number => "5" # Number of cycles to monitor for consecutive restarts.
:restart_cycles_number => "5", # Number of cycles to monitor for consecutive restarts.
:max_workers_timeout => "60" # Number of consecutive seconds that Sidekiq may report 25/25 workers busy
}
default['gitlab']['monitrc']['unicorn'] = {
......
......@@ -25,9 +25,12 @@ monit_monitrc "sidekiq" do
})
end
file "/usr/local/bin/sidekiq_load_ok" do
content "#!/bin/sh\nexec #{gitlab['path']}/script/background_jobs load_ok\n"
template "/usr/local/bin/sidekiq_load_ok" do
mode 0755
variables ({
gitlab_root: gitlab['path'],
timeout: sidekiq['max_workers_timeout']
})
end
unicorn = monitrc['unicorn']
......
#!/bin/bash
# GitLab Sidekiq load check for Monit If Sidekiq reports that all its workers
# are busy for a prolonged period of time, we may have an situation where the
# queue is stuck. This can be resolved by letting Monit restart Sidekiq.
#
# This script exits immediately with status 0 if Sidekiq still has workers
# available. If all workers are busy, we repeatedly check again after 1
# second. As soon as Sidekiq has workers available again, we exit with status
# 0. If Sidekiq keeps reporting all its workers are busy after $timeout
# seconds, we exit with status 1 to signal to Monit that a restart is needed.
timeout=<%= @timeout %>
function exit_if_load_ok
{
if <%= @gitlab_root %>/script/background_jobs load_ok ; then
exit 0
fi
}
counter=0
while [[ $counter -lt $timeout ]]; do
counter=$[counter + 1]
exit_if_load_ok
sleep 1
done
# At this point, Sidekiq has had 25/25 workers busy for $timeout seconds;
# this is not OK.
exit 1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment