Skip to content
Commits on Source (38)
......@@ -19,8 +19,8 @@ variables:
# and pushed into https://hub.docker.com/repository/docker/gitlab/gitlab-agent-ci-image
BUILD_IMAGE_NAME: "gitlab/gitlab-agent-ci-image"
# must use image digest to invalidate cache if image is updated.
BUILD_IMAGE_SHA: "latest@sha256:69c88bd673b5b99aad4ffc933fbf357fd24322d85501a3238c35b61ad7404f9e"
FIPS_BUILD_IMAGE_SHA: "latest-fips@sha256:d5668d3f6dd9f5b1be74ad39f8ee4578ebf6501f6f7cb4db9dc3cbd1a94702b4"
BUILD_IMAGE_SHA: "latest@sha256:0ec4c2cd4b5aa692ef300f553f264317b03e5e1e367378e66ebf50bd9951ef75"
FIPS_BUILD_IMAGE_SHA: "latest-fips@sha256:8ef56807eb3803fdc467fc8c166f1aea6e7fa2e47ddb9ed77d228c83bc0f7821"
SECURE_ANALYZERS_PREFIX: "registry.gitlab.com/gitlab-org/security-products/analyzers"
CS_MAJOR_VERSION: 2
DOCKER_VERSION: "24.0.2" # https://hub.docker.com/_/docker
......@@ -75,6 +75,8 @@ test:
extends: .bazel_build
services:
- redis
tags:
- saas-linux-large-amd64
script:
- echo 'test --test_env=REDIS_URL=redis://redis:6379' >> .bazelrc
- make test-ci
......@@ -106,12 +108,16 @@ test-fips:
verify:
stage: test
extends: .bazel_build
tags:
- saas-linux-large-amd64
script:
- make verify-ci
lint:
image: golangci/golangci-lint:v1.52.2-alpine
stage: test
tags:
- saas-linux-large-amd64
script:
# Write the code coverage report to gl-code-quality-report.json
# and print linting issues to stdout in the format: path/to/file:line description
......
golang 1.19.9
golang 1.19.10
......@@ -52,7 +52,7 @@ regenerate-proto: internal-regenerate-proto fmt update-bazel
.PHONY: internal-regenerate-mocks
internal-regenerate-mocks:
go generate -x -v \
PATH="${PATH}:$(shell pwd)/build" go generate -x -v \
"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/cmd/agentk/agentkapp" \
"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/cmd/kas/kasapp" \
"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/internal/module/modagent" \
......@@ -203,7 +203,8 @@ release-commit:
# Set TARGET_DIRECTORY variable to the target directory before running this target
.PHONY: gdk-install
gdk-install:
bazel run //cmd/kas:extract_kas_race -- '$(TARGET_DIRECTORY)'
bazel run //cmd/kas:extract_kas_race
mv 'cmd/kas/kas_race' '$(TARGET_DIRECTORY)'
# Set TARGET_DIRECTORY variable to the target directory before running this target
# Optional: set GIT_TAG and GIT_COMMIT variables to supply those values manually.
......
......@@ -168,7 +168,7 @@ bazel_skylib_workspace()
go_rules_dependencies()
go_register_toolchains(
version = "1.19.9",
version = "1.19.10",
)
gazelle_dependencies()
......
......@@ -4,12 +4,6 @@ exports_files([
"buildozer_commands.txt",
])
sh_binary(
name = "copy_absolute",
srcs = ["copy_absolute.sh"],
visibility = ["//visibility:public"],
)
multirun(
name = "extract_generated_proto",
commands = [
......
load("@com_github_ash2k_bazel_tools//multirun:def.bzl", "command")
load("@io_bazel_rules_go//go:def.bzl", "go_test")
# This macro expects target directory for the file as an additional command line argument.
def copy_absolute(name, label, file_to_copy):
command(
name = name,
command = "//build:copy_absolute",
data = [label],
arguments = ["$(rootpaths %s)" % label, file_to_copy],
visibility = ["//visibility:public"],
)
# go_custom_test is a macro around go_test that sets size="small" and race="on" if these
# arguments are not set explicitly.
def go_custom_test(size = "small", race = "on", **kwargs):
......
#!/usr/bin/env bash
set -e -o pipefail
# This is a helper script that allows copying files, generated by a bazel rule, into a directory, specified
# by an absolute path.
if (( $# != 3 )); then
echo 'Not enough or too many command line arguments' >&2
exit 1
fi
source_files="$1"
file_to_copy="$2"
absolute_target_directory="$3"
# Don't want to double quote because $(rootpaths //label) in build.bzl expands into a single argument which
# contains space-separated file names.
# shellcheck disable=SC2068
for file in $source_files
do
name=$(basename "$file")
if [[ "$name" == "$file_to_copy" ]]
then
to="$absolute_target_directory/$name"
cp "$file" "$to"
chmod +w "$to"
break
fi
done
#!/usr/bin/env bash
# Make sure version matches go.mod
exec go run github.com/golang/mock/mockgen@v1.7.0-rc.1.0.20220812172401-5b455625bd2c "$@"
......@@ -2365,8 +2365,8 @@ def go_repositories():
name = "com_github_redis_rueidis",
build_file_proto_mode = "disable_global",
importpath = "github.com/redis/rueidis",
sum = "h1:ak3sMqeleRDefAdV9Erj/Lzc78hC69bYkcqozglX0j8=",
version = "v1.0.6-go1.19",
sum = "h1:Vh6aUUcqZNcBsIMu8IytoOvc5lAxsztL75pj82JFxzw=",
version = "v1.0.7-go1.19",
)
go_repository(
name = "com_github_robfig_cron_v3",
......
package agentkapp
//go:generate go run github.com/golang/mock/mockgen -destination "mock_for_test.go" -package "agentkapp" "gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/cmd/agentk/agentkapp" "Runner,LeaderElector"
//go:generate mockgen.sh -destination "mock_for_test.go" -package "agentkapp" "gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/cmd/agentk/agentkapp" "Runner,LeaderElector"
load("@io_bazel_rules_go//go:def.bzl", "go_library")
load("@aspect_bazel_lib//lib:write_source_files.bzl", "write_source_file")
load("//cmd:cmd.bzl", "define_command_targets")
load("//build:build.bzl", "copy_absolute")
go_library(
name = "kas_lib",
......@@ -20,14 +20,22 @@ define_command_targets(
binary_embed = [":kas_lib"],
)
copy_absolute(
write_source_file(
name = "extract_kas",
file_to_copy = "kas",
label = ":kas",
diff_test = False,
executable = True,
in_file = ":kas",
out_file = "kas",
tags = ["manual"],
visibility = ["//visibility:public"],
)
copy_absolute(
write_source_file(
name = "extract_kas_race",
file_to_copy = "kas_race",
label = ":kas_race",
diff_test = False,
executable = True,
in_file = ":kas_race",
out_file = "kas_race",
tags = ["manual"],
visibility = ["//visibility:public"],
)
......@@ -120,7 +120,6 @@ go_library(
"@org_golang_google_protobuf//reflect/protoreflect",
"@org_golang_google_protobuf//runtime/protoimpl",
"@org_golang_google_protobuf//types/known/anypb",
"@org_golang_x_net//http2",
"@org_golang_x_time//rate",
"@org_uber_go_zap//:zap",
"@org_uber_go_zap//zapcore",
......
......@@ -8,10 +8,12 @@ import (
"github.com/ash2k/stager"
grpc_validator "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/validator"
"github.com/prometheus/client_golang/prometheus"
"github.com/redis/rueidis"
"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/internal/module/modserver"
"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/internal/module/observability"
"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/internal/tool/grpctool"
"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/internal/tool/httpz"
"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/internal/tool/logz"
"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/internal/tool/redistool"
"gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/internal/tool/tlstool"
......@@ -21,7 +23,6 @@ import (
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
"golang.org/x/net/http2"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/keepalive"
......@@ -29,7 +30,8 @@ import (
)
const (
defaultMaxMessageSize = 10 * 1024 * 1024
defaultMaxMessageSize = 10 * 1024 * 1024
agentConnectionRateExceededMetricName = "agent_server_rate_exceeded_total"
)
type agentServer struct {
......@@ -43,16 +45,26 @@ type agentServer struct {
func newAgentServer(log *zap.Logger, cfg *kascfg.ConfigurationFile, tp trace.TracerProvider,
redisClient rueidis.Client, ssh stats.Handler, factory modserver.AgentRpcApiFactory,
probeRegistry *observability.ProbeRegistry, streamProm grpc.StreamServerInterceptor, unaryProm grpc.UnaryServerInterceptor) (*agentServer, error) {
probeRegistry *observability.ProbeRegistry, reg *prometheus.Registry, streamProm grpc.StreamServerInterceptor,
unaryProm grpc.UnaryServerInterceptor) (*agentServer, error) {
listenCfg := cfg.Agent.Listen
tlsConfig, err := tlstool.MaybeDefaultServerTLSConfig(listenCfg.CertificateFile, listenCfg.KeyFile)
if err != nil {
return nil, err
}
rateExceededCounter := prometheus.NewCounter(prometheus.CounterOpts{
Name: agentConnectionRateExceededMetricName,
Help: "The total number of times configured rate limit of new agent connections was exceeded",
})
err = reg.Register(rateExceededCounter)
if err != nil {
return nil, err
}
agentConnectionLimiter := redistool.NewTokenLimiter(
redisClient,
cfg.Redis.KeyPrefix+":agent_limit",
uint64(listenCfg.ConnectionsPerTokenPerMinute),
rateExceededCounter,
func(ctx context.Context) redistool.RpcApi {
return &tokenLimiterApi{
rpcApi: modserver.AgentRpcApiFromContext(ctx),
......@@ -108,7 +120,7 @@ func (s *agentServer) Start(stage stager.Stage) {
var err error
if s.listenCfg.Websocket { // Explicitly handle TLS for a WebSocket server
if s.tlsConfig != nil {
s.tlsConfig.NextProtos = []string{http2.NextProtoTLS, "http/1.1"} // h2 for gRPC, http/1.1 for WebSocket
s.tlsConfig.NextProtos = []string{httpz.TLSNextProtoH2, httpz.TLSNextProtoH1} // h2 for gRPC, http/1.1 for WebSocket
lis, err = tls.Listen(*s.listenCfg.Network, s.listenCfg.Address, s.tlsConfig)
} else {
lis, err = net.Listen(*s.listenCfg.Network, s.listenCfg.Address)
......
......@@ -164,7 +164,7 @@ func (a *ConfiguredApp) Run(ctx context.Context) (retErr error) {
// Server for handling agentk requests
agentSrv, err := newAgentServer(a.Log, a.Configuration, tp, redisClient, ssh, agentRpcApiFactory, probeRegistry, // nolint: contextcheck
streamProm, unaryProm)
reg, streamProm, unaryProm)
if err != nil {
return fmt.Errorf("agent server: %w", err)
}
......
......@@ -28,7 +28,7 @@ const (
defaultAgentListenNetwork = "tcp"
defaultAgentListenAddress = "127.0.0.1:8150"
defaultAgentListenConnectionsPerTokenPerMinute = 10000
defaultAgentListenConnectionsPerTokenPerMinute = 40000
defaultAgentListenMaxConnectionAge = 30 * time.Minute
defaultGitalyGlobalApiRefillRate = 30.0
......
package kasapp
//go:generate go run github.com/golang/mock/mockgen -destination "mock_for_test.go" -package "kasapp" "gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/cmd/kas/kasapp" "SentryHub"
//go:generate mockgen.sh -destination "mock_for_test.go" -package "kasapp" "gitlab.com/gitlab-org/cluster-integration/gitlab-agent/v16/cmd/kas/kasapp" "SentryHub"
......@@ -119,7 +119,7 @@ func (f *tunnelFinder) Find(ctx context.Context) (readyTunnel, error) {
if f.tryNextKas(kasUrls) { // nolint: contextcheck
// Connected to an instance.
needToTryNewKas = false
t = time.NewTimer(tryNewKasInterval)
t.Reset(tryNewKasInterval)
} else {
// Couldn't find a kas instance we haven't connected to already.
needToTryNewKas = true
......@@ -145,7 +145,7 @@ func (f *tunnelFinder) Find(ctx context.Context) (readyTunnel, error) {
f.stopAllConnectionAttempts()
return readyTunnel{}, ctx.Err()
case <-f.noTunnel:
t.Stop()
stopAndDrain(t)
tryNextKasWhenTimerNotRunning()
case kasUrls = <-kasUrlsC:
if !needToTryNewKas {
......@@ -154,8 +154,8 @@ func (f *tunnelFinder) Find(ctx context.Context) (readyTunnel, error) {
if f.tryNextKas(kasUrls) { // nolint: contextcheck
// Connected to a new kas instance.
needToTryNewKas = false
t.Stop()
t = time.NewTimer(tryNewKasInterval)
stopAndDrain(t)
t.Reset(tryNewKasInterval)
}
case <-t.C:
tryNextKasWhenTimerNotRunning()
......@@ -299,3 +299,12 @@ func (f *tunnelFinder) stopAllConnectionAttempts() {
c.cancel()
}
}
func stopAndDrain(t *time.Timer) {
if !t.Stop() {
select {
case <-t.C:
default:
}
}
}
......@@ -105,12 +105,6 @@ func (a *serverApi) hub() (SentryHub, string) {
return a.Hub, ""
}
// OnGitPushEvent runs the given callback function for a received Git push event.
// The Git push event may come from any GitLab project and as such it's up to the
// callback to filter out the events that it's interested in.
// This particular implementation registers an unbuffered channel for the callback
// which receives the actual event from a redis subscription.
// This is mainly to unblock the redis subscription from the callback execution.
func (a *serverApi) OnGitPushEvent(ctx context.Context, callback modserver.GitPushEventCallback) {
ch := make(chan *modserver.Project)
a.gitPushEventSubscriptions.add(ch)
......@@ -172,24 +166,17 @@ func (a *serverApi) subscribeGitPushEvent(ctx context.Context) {
// dispatchGitPushEvent dispatches the given `project` which is the message of the Git push event
// to all registered subscriptions registered by OnGitPushEvent.
// This particular implementation will drop events per registered callback if their
// registered channel is blocked, e.g. when the callback is too slow to handle the produced events.
// This is suboptimal, but will decouple and unblock the redis subscription from callback function's performance.
func (a *serverApi) dispatchGitPushEvent(ctx context.Context, project *modserver.Project) {
done := ctx.Done()
a.gitPushEventSubscriptions.mu.Lock()
defer a.gitPushEventSubscriptions.mu.Unlock()
done := ctx.Done()
for _, ch := range a.gitPushEventSubscriptions.chs {
select {
case <-done:
return
case ch <- project:
default:
// NOTE: if for whatever reason the subscriber isn't able to keep up with the events,
// we just drop them for now.
a.log.Debug("Dropping Git push event", logz.ProjectId(project.FullPath))
continue
}
}
}
......
# Operational Container Scanning
## Essential readings
- [Operational container scanning](https://docs.gitlab.com/ee/user/clusters/agent/vulnerabilities.html) docs
- gitlab-agent [high-level-architecture](https://gitlab.com/gitlab-org/cluster-integration/gitlab-agent/-/blob/master/doc/architecture.md#high-level-architecture)
- [developing](./developing.md)
## Running OCS locally
- OCS code resides in [starboard_vulnerability](../internal/module/starboard_vulnerability/)
1. Follow the `running-kas-and-agentk-locally` steps in [developing.md](developing.md#running-kas-and-agentk-locally) to get agentk running locally
- For Step 3: `Setup kas in GDK`, also follow steps in [(Optional) Deploy the GitLab Agent (agentk) with k3d](https://gitlab.com/gitlab-org/gitlab-development-kit/-/blob/main/doc/howto/kubernetes_agent.md#optional-deploy-the-gitlab-agent-agentk-with-k3d)
- k3d enables you to create multiple clusters in your dev machine
- Note down the `namespace` that was created in Step 5. You will need it in the next step.
- You don't usually need to start kas locally unless you make changes to the [agent config](pkg/agentcfg/agentcfg.proto)
- Optional but good to setup [debugging locally](./doc/developing.md#debugging-locally) for productivity
1. k8s rbac for OCS
- The [helm chart](../build/deployment/gitlab-agent/), that users use to install gitlab-agent, handles the creation of a `service account`, `gitlab-agent namespace` and granting of `cluster-admin` role.
- OCS requires a `cluster-admin` role to get all the workloads in the cluster.
- You can use this yaml file to create the access needed.
- `kubectl apply -f <path_to_yaml>`
```yaml
apiVersion: v1
kind: Namespace
metadata:
name: gitlab-agent
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: gitlab-agent
namespace: gitlab-agent
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: gitlab-agent-cluster-admin
roleRef:
name: cluster-admin
kind: ClusterRole
apiGroup: rbac.authorization.k8s.io
subjects:
- kind: ServiceAccount
name: gitlab-agent
namespace: gitlab-agent
```
## Scanning workflow
1. Configuration changes
- `kas` notifies `gitlab agent` whenever there's an agent configuration change.
- When OCS starts, a [security policies worker](https://gitlab.com/gitlab-org/cluster-integration/gitlab-agent/-/blob/355eb39eb06679a0a417e31f59a7481b91dfe93a/internal/module/starboard_vulnerability/agent/module.go#L31) will constantly poll for changes to the scan execution policy.
- A change in config will [stop any currently running `Scan Job` and apply the new configuration](https://gitlab.com/gitlab-org/cluster-integration/gitlab-agent/-/blob/e06d93c3add33d865a12237910cfc63c17048520/internal/module/starboard_vulnerability/agent/module.go#L42-67).
2. When the cron schedule is reached
1. A [`Scan Job`](https://gitlab.com/gitlab-org/cluster-integration/gitlab-agent/-/blob/2453f2fde531372a278d6c9483e6522c9d0c90ca/internal/module/starboard_vulnerability/agent/scanner.go#L73-77) is started.
1. The [`Scan Job` creates a `Trivy Scanner pod`](https://gitlab.com/gitlab-org/cluster-integration/gitlab-agent/-/blob/2453f2fde531372a278d6c9483e6522c9d0c90ca/internal/module/starboard_vulnerability/agent/scanner.go#L113) for each namespace specified in the configuration.
1. The `Trivy Scanner Pod` scans all the pods in the namespace, excluding managed workloads such as pods managed by replicasets.
- This ensures no duplicate vulnerabilities are reported since the replicaset image is similar to the managed pod image.
1. Once the scan completes, the `Trivy Scanner Pod` will [exit with PodSucceeded status](https://gitlab.com/gitlab-org/cluster-integration/gitlab-agent/-/blob/2453f2fde531372a278d6c9483e6522c9d0c90ca/internal/module/starboard_vulnerability/agent/scanner.go#L216-239). This will trigger the `Scan job` to retrieve the logs from the `Trivy Scanner Pod`
1. `Scan Job` then parses the logs to retieve the vulnerabilities and transmits them to Gitlab via api requests.
```mermaid
graph
subgraph Gitlab
GL1["kubernetes agent server (kas)"]
GL2["rest API"]
GL1-->GL2
end
GL1-->|config changes|GA1
subgraph Kubernetes Cluster
subgraph "gitlab agent namespace"
GA1[gitlab agent]
GA2[Scan Job]
GA3["default ns scanner pod"]
GA4["kubesystem ns scanner pod"]
GA1-->|api requests|GL1
GA1-->|when cron schedule reached\nstart scanner job|GA2
GA2-->|"Create\nTrivy Scanner Pod\nto scan\ndefault namespace(ns)"|GA3
GA3-->|"Retrieve logs"|GA2
GA2-->|"Create\nTrivy Scanner Pod\nto scan\nkubesystem namespace(ns)"|GA4
GA4-->|"Retrieve logs"|GA2
GA2-->|Transmit\nvulnerabilities|GA1
end
subgraph "default namespace"
DN1[Pod]
DN2[Managed Pod]
DN3[Replicaset]
DN3-.manages.-DN2
end
GA3-->|scans image|DN1
GA3-->|scans image|DN3
subgraph "kubesystem namespace"
KSN1[Pod]
KSN2[Managed Pod]
KSN3[Replicaset]
KSN3-.manages.-KSN2
end
GA4-->|scans image|KSN1
GA4-->|scans image|KSN3
end
```
......@@ -13,6 +13,7 @@ require (
github.com/getsentry/sentry-go v0.21.0
github.com/go-logr/zapr v1.2.4
github.com/golang-jwt/jwt/v5 v5.0.0
// Make sure to update build/genmock.sh when updating
github.com/golang/mock v1.7.0-rc.1.0.20220812172401-5b455625bd2c
github.com/google/go-cmp v0.5.9
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.0-rc.0
......@@ -21,7 +22,7 @@ require (
github.com/imdario/mergo v0.3.16
github.com/piotrkowalczuk/promgrpc/v4 v4.1.0
github.com/prometheus/client_golang v1.15.1
github.com/redis/rueidis v1.0.6-go1.19
github.com/redis/rueidis v1.0.7-go1.19
github.com/robfig/cron/v3 v3.0.1
github.com/spf13/cobra v1.7.0
github.com/stretchr/testify v1.8.3
......@@ -34,7 +35,7 @@ require (
go.opentelemetry.io/otel/sdk v1.16.0
go.opentelemetry.io/otel/trace v1.16.0
go.uber.org/zap v1.24.0
golang.org/x/net v0.10.0
golang.org/x/net v0.10.0 // indirect
golang.org/x/sync v0.2.0
golang.org/x/time v0.3.0
google.golang.org/api v0.124.0
......