Add arkose anomaly detection module

This MR introduces Arkose anomaly detection by evaluating the current bucket’s token verification rate against a rolling baseline sourced from the Redis vrate stream. When the current rate deviates significantly from historical behavior, the system can surface an anomaly signal that downstream fail-open logic will use in future MRs.

NOTE: This MR needs to come after this one: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/211653

Reference

https://gitlab.com/gitlab-org/gitlab/-/work_items/578300

Verification Script

# rails c

Feature.enable(:track_arkose_token_verification_results, :instance)

mod = AntiAbuse::IdentityVerification::ArkoseFailOpen

bucket_hours   = mod::BUCKET_DURATION_HOURS
bucket_secs    = mod::BUCKET_DURATION_SECONDS
min_attempts   = mod::MIN_ATTEMPTS_FOR_EVALUATION
stream_key     = mod::VERIFICATION_RATE_STREAM_KEY
success_prefix = mod::COUNTER_SUCCESS_KEY_PREFIX
failure_prefix = mod::COUNTER_FAILURE_KEY_PREFIX

def bucket_id(at:, bucket_hours:)
  "#{at.to_date.strftime('%Y%m%d')}-#{at.hour / bucket_hours}"
end

def last_vrate_entry(stream_key)
  Gitlab::Redis::SharedState.with do |r|
    r.xrevrange(stream_key, '+', '-', count: 1)&.first
  end
end

now       = Time.zone.now
prev_time = now - bucket_secs
prev_id   = bucket_id(at: prev_time, bucket_hours: bucket_hours)

succ_key  = "#{success_prefix}#{prev_id}"
fail_key  = "#{failure_prefix}#{prev_id}"

puts "\nUsing previous bucket id: #{prev_id}"
puts "  success key: #{succ_key}"
puts "  failure key: #{fail_key}"
puts "  stream key:  #{stream_key}"


# 0) Clean slate

Gitlab::Redis::SharedState.with do |r|
  r.del(stream_key, succ_key, fail_key)
end


# 1) Seed baseline vrates in the stream (normal historical behavior)

baseline_vrates = [90.0, 92.0, 88.0, 91.0, 89.0, 93.0, 87.0, 90.5, 89.5, 92.5]

Gitlab::Redis::SharedState.with do |r|
  baseline_vrates.each_with_index do |vr, idx|
    r.xadd(
      stream_key,
      { 'bucket' => "baseline-#{idx}", 'vrate' => vr.to_s }
    )
  end
end

Gitlab::Redis::SharedState.with do |r|
  entries = r.xrange(stream_key, '-', '+')
  puts "\nSeeded vrate stream with baseline entries:"
  puts "  count=#{entries.size}"
  puts "  last=#{entries.last.inspect}"
end


# 2) Non-anomalous bucket: vrate ~90% should be recorded

normal_success = 360
normal_failure =  40

# seed counts to be "not an anomaly"
Gitlab::Redis::SharedState.with do |r|
  r.setex(succ_key, bucket_secs, normal_success)
  r.setex(fail_key, bucket_secs, normal_failure)
end

expected_normal_vrate = (100.0 * normal_success.to_f / (normal_success + normal_failure)).round(6)

puts "\n[Normal] previous bucket #{prev_id}:"
puts "  success=#{normal_success} failure=#{normal_failure} expected_vrate=#{expected_normal_vrate}%"

before = last_vrate_entry(stream_key)

mod.send(:evaluate_previous_window!)

after = last_vrate_entry(stream_key)

if after == before
  abort "\n❌ Expected a new vrate entry to be appended for normal bucket"
end

_, fields = after
puts "\n[Normal] last vrate entry after evaluation:"
puts "  fields=#{fields.inspect}"


# 3) Anomalous bucket: very low vrate should NOT be recorded

# clear counts
Gitlab::Redis::SharedState.with do |r|
  r.del(succ_key, fail_key)
end

anomaly_success = 10
anomaly_failure = 410

# seed counts to throw anomaly
Gitlab::Redis::SharedState.with do |r|
  r.setex(succ_key, bucket_secs, anomaly_success)
  r.setex(fail_key, bucket_secs, anomaly_failure)
end

expected_anomaly_vrate = (100.0 * anomaly_success.to_f / (anomaly_success + anomaly_failure)).round(6)

puts "\n[Anomalous] previous bucket #{prev_id}:"
puts "  success=#{anomaly_success} failure=#{anomaly_failure} expected_vrate=#{expected_anomaly_vrate}%"

before = last_vrate_entry(stream_key)

mod.send(:evaluate_previous_window!)

after = last_vrate_entry(stream_key)

if after == before
  puts "\n✅ No new vrate entry was recorded for anomalous bucket (as expected)"
else
  puts "\n❌ Anomalous vrate was appended unexpectedly"
  puts "before: #{before.inspect}"
  puts "after:  #{after.inspect}"
end
Edited by Matthew MacRae-Bovell

Merge request reports

Loading