Add arkose anomaly detection module
This MR introduces Arkose anomaly detection by evaluating the current bucket’s token verification rate against a rolling baseline sourced from the Redis vrate stream. When the current rate deviates significantly from historical behavior, the system can surface an anomaly signal that downstream fail-open logic will use in future MRs.
NOTE: This MR needs to come after this one: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/211653
Reference
https://gitlab.com/gitlab-org/gitlab/-/work_items/578300
Verification Script
# rails c
Feature.enable(:track_arkose_token_verification_results, :instance)
mod = AntiAbuse::IdentityVerification::ArkoseFailOpen
bucket_hours = mod::BUCKET_DURATION_HOURS
bucket_secs = mod::BUCKET_DURATION_SECONDS
min_attempts = mod::MIN_ATTEMPTS_FOR_EVALUATION
stream_key = mod::VERIFICATION_RATE_STREAM_KEY
success_prefix = mod::COUNTER_SUCCESS_KEY_PREFIX
failure_prefix = mod::COUNTER_FAILURE_KEY_PREFIX
def bucket_id(at:, bucket_hours:)
"#{at.to_date.strftime('%Y%m%d')}-#{at.hour / bucket_hours}"
end
def last_vrate_entry(stream_key)
Gitlab::Redis::SharedState.with do |r|
r.xrevrange(stream_key, '+', '-', count: 1)&.first
end
end
now = Time.zone.now
prev_time = now - bucket_secs
prev_id = bucket_id(at: prev_time, bucket_hours: bucket_hours)
succ_key = "#{success_prefix}#{prev_id}"
fail_key = "#{failure_prefix}#{prev_id}"
puts "\nUsing previous bucket id: #{prev_id}"
puts " success key: #{succ_key}"
puts " failure key: #{fail_key}"
puts " stream key: #{stream_key}"
# 0) Clean slate
Gitlab::Redis::SharedState.with do |r|
r.del(stream_key, succ_key, fail_key)
end
# 1) Seed baseline vrates in the stream (normal historical behavior)
baseline_vrates = [90.0, 92.0, 88.0, 91.0, 89.0, 93.0, 87.0, 90.5, 89.5, 92.5]
Gitlab::Redis::SharedState.with do |r|
baseline_vrates.each_with_index do |vr, idx|
r.xadd(
stream_key,
{ 'bucket' => "baseline-#{idx}", 'vrate' => vr.to_s }
)
end
end
Gitlab::Redis::SharedState.with do |r|
entries = r.xrange(stream_key, '-', '+')
puts "\nSeeded vrate stream with baseline entries:"
puts " count=#{entries.size}"
puts " last=#{entries.last.inspect}"
end
# 2) Non-anomalous bucket: vrate ~90% should be recorded
normal_success = 360
normal_failure = 40
# seed counts to be "not an anomaly"
Gitlab::Redis::SharedState.with do |r|
r.setex(succ_key, bucket_secs, normal_success)
r.setex(fail_key, bucket_secs, normal_failure)
end
expected_normal_vrate = (100.0 * normal_success.to_f / (normal_success + normal_failure)).round(6)
puts "\n[Normal] previous bucket #{prev_id}:"
puts " success=#{normal_success} failure=#{normal_failure} expected_vrate=#{expected_normal_vrate}%"
before = last_vrate_entry(stream_key)
mod.send(:evaluate_previous_window!)
after = last_vrate_entry(stream_key)
if after == before
abort "\n❌ Expected a new vrate entry to be appended for normal bucket"
end
_, fields = after
puts "\n[Normal] last vrate entry after evaluation:"
puts " fields=#{fields.inspect}"
# 3) Anomalous bucket: very low vrate should NOT be recorded
# clear counts
Gitlab::Redis::SharedState.with do |r|
r.del(succ_key, fail_key)
end
anomaly_success = 10
anomaly_failure = 410
# seed counts to throw anomaly
Gitlab::Redis::SharedState.with do |r|
r.setex(succ_key, bucket_secs, anomaly_success)
r.setex(fail_key, bucket_secs, anomaly_failure)
end
expected_anomaly_vrate = (100.0 * anomaly_success.to_f / (anomaly_success + anomaly_failure)).round(6)
puts "\n[Anomalous] previous bucket #{prev_id}:"
puts " success=#{anomaly_success} failure=#{anomaly_failure} expected_vrate=#{expected_anomaly_vrate}%"
before = last_vrate_entry(stream_key)
mod.send(:evaluate_previous_window!)
after = last_vrate_entry(stream_key)
if after == before
puts "\n✅ No new vrate entry was recorded for anomalous bucket (as expected)"
else
puts "\n❌ Anomalous vrate was appended unexpectedly"
puts "before: #{before.inspect}"
puts "after: #{after.inspect}"
end
Edited by Matthew MacRae-Bovell