Skip to content
Snippets Groups Projects
Verified Commit 6a34ec47 authored by Lucas Charles's avatar Lucas Charles :speech_balloon:
Browse files

Add WafAnomalySummary service

Adds service for fetching aggregate WAF stats from ElasticSearch.

This work contributes to #14707
in exposing summary statistics to populate the new Threat Monitoring
dashboard
parent 876c7d35
No related branches found
No related tags found
No related merge requests found
---
title: Add WAF Anomaly Summary service
merge_request: 19789
author:
type: added
# frozen_string_literal: true
module Security
# Service for fetching summary statistics from ElasticSearch.
# Queries ES and retrieves both total nginx requests & modsec violations
#
class WafAnomalySummaryService < ::BaseService
GITLAB_MANAGED_APPS_NAMESPACE = 'gitlab-managed-apps'
INGRESS_CONTAINER_NAME = 'nginx-ingress-controller'
MODSEC_LOG_CONTAINER_NAME = 'modsecurity-log'
def initialize(environment:, interval: 'day', from: 30.days.ago.iso8601, to: Time.zone.now.iso8601)
@environment = environment
@interval = interval
@from = from
@to = to
end
def execute
return if elasticsearch_client.nil?
aggregate_results = elasticsearch_client.msearch(body: body)
nginx_results, modsec_results = aggregate_results["responses"]
nginx_total_requests = nginx_results.dig("hits", "total").to_f || 0.0
modsec_total_requests = modsec_results.dig("hits", "total").to_f || 0.0
anomalous_traffic_count = nginx_total_requests.zero? ? 0 : (modsec_total_requests / nginx_total_requests).round(2)
{
total_traffic: nginx_total_requests.round,
anomalous_traffic: anomalous_traffic_count,
history: {
nominal: histogram_from(nginx_results),
anomalous: histogram_from(modsec_results)
},
interval: @interval,
from: @from,
to: @to,
status: :success
}
end
def body
aggregation = aggregations(@interval)
[
{}, # wildcard index
{
query: nginx_requests_query,
aggs: aggregation,
size: 0 # no docs needed, only counts
},
{}, # wildcard index
{
query: modsec_requests_query,
aggs: aggregation,
size: 0 # no docs needed, only counts
}
]
end
private
def modsec_requests_query
{
bool: {
must: [
{
range: {
"@timestamp".to_sym => {
gte: @from,
lte: @to
}
}
},
{
match_phrase: {
"kubernetes.container.name" => {
query: MODSEC_LOG_CONTAINER_NAME
}
}
},
{
match_phrase: {
"kubernetes.namespace" => {
query: GITLAB_MANAGED_APPS_NAMESPACE
}
}
}
]
}
}
end
def nginx_requests_query
{
bool: {
must: [
{
range: {
"@timestamp".to_sym => {
gte: @from,
lte: @to
}
}
},
{
match: {
message: {
query: environment_proxy_upstream_name
}
}
},
{
match_phrase: {
"kubernetes.container.name" => {
query: INGRESS_CONTAINER_NAME
}
}
},
{
match_phrase: {
"kubernetes.namespace" => {
query: GITLAB_MANAGED_APPS_NAMESPACE
}
}
}
]
}
}
end
def aggregations(interval)
{
counts: {
date_histogram: {
field: "@timestamp",
interval: interval
}
}
}
end
def histogram_from(results)
buckets = results.dig("aggregations", "counts", "buckets") || []
buckets.map { |bucket| { bucket["key_as_string"] => bucket["doc_count"] } }
end
# Derive proxy upstream name to filter nginx log by environment
# See https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/log-format/
def environment_proxy_upstream_name
"%s-%s" % [@environment.deployment_namespace, @environment.slug]
end
def elasticsearch_client
@client ||= @environment.deployment_platform.cluster.application_elastic_stack&.elasticsearch_client
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Security::WafAnomalySummaryService do
let(:environment) { create(:environment, :with_review_app) }
let!(:cluster) do
create(:cluster, :provided_by_gcp, environment_scope: '*', projects: [environment.project])
end
let(:es_client) { double(Elasticsearch::Client) }
let(:empty_response) do
{
"took" => 40,
"timed_out" => false,
"_shards" => { "total" => 11, "successful" => 11, "skipped" => 0, "failed" => 0 },
"hits" => { "total" => 0, "max_score" => 0.0, "hits" => [] },
"aggregations" => {
"counts" => {
"buckets" => []
}
},
"status" => 200
}
end
let(:nginx_response) do
empty_response.deep_merge(
"hits" => { "total" => 3 },
"aggregations" => {
"counts" => {
"buckets" => [
{ "key_as_string" => "2019-12-04T23:00:00.000Z", "key" => 1575500400000, "doc_count" => 1 },
{ "key_as_string" => "2019-12-05T00:00:00.000Z", "key" => 1575504000000, "doc_count" => 0 },
{ "key_as_string" => "2019-12-05T01:00:00.000Z", "key" => 1575507600000, "doc_count" => 0 },
{ "key_as_string" => "2019-12-05T08:00:00.000Z", "key" => 1575532800000, "doc_count" => 2 }
]
}
}
)
end
let(:modsec_response) do
empty_response.deep_merge(
"hits" => { "total" => 1 },
"aggregations" => {
"counts" => {
"buckets" => [
{ "key_as_string" => "2019-12-04T23:00:00.000Z", "key" => 1575500400000, "doc_count" => 0 },
{ "key_as_string" => "2019-12-05T00:00:00.000Z", "key" => 1575504000000, "doc_count" => 0 },
{ "key_as_string" => "2019-12-05T01:00:00.000Z", "key" => 1575507600000, "doc_count" => 0 },
{ "key_as_string" => "2019-12-05T08:00:00.000Z", "key" => 1575532800000, "doc_count" => 1 }
]
}
}
)
end
subject { described_class.new(environment: environment) }
describe '#execute' do
context 'without elastic_stack' do
it 'returns no results' do
expect(subject.execute).to be_nil
end
end
context 'with default histogram' do
using RSpec::Parameterized::TableSyntax
where(:case_name, :nginx_results, :modsec_results, :total_traffic, :anomalous_traffic) do
'no requests' | -> { empty_response } | -> { empty_response } | 0 | 0.0
'no violations' | -> { nginx_response } | -> { empty_response } | 3 | 0.0
'with violations' | -> { nginx_response } | -> { modsec_response } | 3 | 0.33
end
with_them do
before do
allow(es_client).to receive(:msearch) do
{ "responses" => [nginx_results.call, modsec_results.call] }
end
allow(environment.deployment_platform.cluster).to receive_message_chain(
:application_elastic_stack, :elasticsearch_client
) { es_client }
end
it 'returns results' do
results = subject.execute
expect(results.fetch(:status)).to eq :success
expect(results.fetch(:interval)).to eq 'day'
expect(results.fetch(:total_traffic)).to eq total_traffic
expect(results.fetch(:anomalous_traffic)).to eq anomalous_traffic
end
end
end
end
describe '#body' do
context 'with time window' do
it 'passes time frame to ElasticSearch' do
from = 1.day.ago
to = Time.now
subject = described_class.new(
environment: environment,
from: from,
to: to
)
expect(
subject.body.dig(1, :query, :bool, :must, 0, :range, :@timestamp, :gte)
).to eq from
expect(
subject.body.dig(1, :query, :bool, :must, 0, :range, :@timestamp, :lte)
).to eq to
expect(
subject.body.dig(3, :query, :bool, :must, 0, :range, :@timestamp, :gte)
).to eq from
expect(
subject.body.dig(3, :query, :bool, :must, 0, :range, :@timestamp, :lte)
).to eq to
end
end
context 'with interval' do
it 'passes interval to ElasticSearch' do
interval = 'hour'
subject = described_class.new(
environment: environment,
interval: interval
)
expect(
subject.body.dig(1, :aggs, :counts, :date_histogram, :interval)
).to eq interval
expect(
subject.body.dig(3, :aggs, :counts, :date_histogram, :interval)
).to eq interval
end
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment