Intermittent auth failures in dependency proxy
Example: https://gitlab.com/gitlab-org/gitlab/-/jobs/1323069858
Running with gitlab-runner 13.12.0-rc1 (b21d5c5b)
on docker-auto-scale-com 1d6b581d
feature flags: FF_USE_FASTZIP:true, FF_GITLAB_REGISTRY_HELPER_IMAGE:true, FF_SKIP_DOCKER_MACHINE_PROVISION_ON_CREATION_FAILURE:true
Resolving secrets 00:00
Preparing the "docker+machine" executor 00:19
Using Docker executor with image gitlab.com:443/gitlab-org/dependency_proxy/containers/ruby:2.7-alpine ...
Authenticating with credentials from job payload (GitLab Registry)
Pulling docker image gitlab.com:443/gitlab-org/dependency_proxy/containers/ruby:2.7-alpine ...
WARNING: Failed to pull image with policy "always": Error response from daemon: unauthorized: authentication required (manager.go:205:0s)
ERROR: Preparation failed: failed to pull image "gitlab.com:443/gitlab-org/dependency_proxy/containers/ruby:2.7-alpine" with specified policies [always]: Error response from daemon: unauthorized: authentication required (manager.go:205:0s)
Will be retried in 3s ...
Using Docker executor with image gitlab.com:443/gitlab-org/dependency_proxy/containers/ruby:2.7-alpine ...
Authenticating with credentials from job payload (GitLab Registry)
Pulling docker image gitlab.com:443/gitlab-org/dependency_proxy/containers/ruby:2.7-alpine ...
WARNING: Failed to pull image with policy "always": Error response from daemon: unauthorized: authentication required (manager.go:205:0s)
ERROR: Preparation failed: failed to pull image "gitlab.com:443/gitlab-org/dependency_proxy/containers/ruby:2.7-alpine" with specified policies [always]: Error response from daemon: unauthorized: authentication required (manager.go:205:0s)
Will be retried in 3s ...
Using Docker executor with image gitlab.com:443/gitlab-org/dependency_proxy/containers/ruby:2.7-alpine ...
Authenticating with credentials from job payload (GitLab Registry)
Pulling docker image gitlab.com:443/gitlab-org/dependency_proxy/containers/ruby:2.7-alpine ...
WARNING: Failed to pull image with policy "always": Error response from daemon: unauthorized: authentication required (manager.go:205:0s)
ERROR: Preparation failed: failed to pull image "gitlab.com:443/gitlab-org/dependency_proxy/containers/ruby:2.7-alpine" with specified policies [always]: Error response from daemon: unauthorized: authentication required (manager.go:205:0s)
Will be retried in 3s ...
ERROR: Job failed (system failure): failed to pull image "gitlab.com:443/gitlab-org/dependency_proxy/containers/ruby:2.7-alpine" with specified policies [always]: Error response from daemon: unauthorized: authentication required (manager.go:205:0s)
One thing interesting from the logs (https://log.gprd.gitlab.net/goto/463e1ba5e0128a4f4823b9df06d1c958). There are two log messages. The first one shows the status code being 0, which usually happens when authentication fails (more details in https://github.com/roidrage/lograge/issues/67#issuecomment-601604344):
{
"_index": "pubsub-rails-inf-gprd-005287",
"_type": "_doc",
"_id": "0ihH5XkBp7mLvmpCgGPw",
"_version": 1,
"_score": null,
"_source": {
"@timestamp": "2021-06-07T07:02:18.657Z",
"json": {
"path": "/v2/gitlab-org/dependency_proxy/containers/ruby/manifests/2.7-alpine",
"method": "GET",
"db_primary_wal_count": 0,
"db_cached_count": 5,
"params": [
{
"key": "group_id",
"value": "gitlab-org"
},
{
"key": "image",
"value": "ruby"
},
{
"key": "tag",
"value": "2.7-alpine"
}
],
"db_replica_wal_count": 0,
"db_primary_cached_count": 0,
"correlation_id": "01F7JME2ECA6E5T09DJZXSV4EY",
"hostname": "web-11-sv-gprd",
"db_write_count": 2,
"mem_bytes": 806704,
"cpu_s": 0.027842,
"redis_duration_s": 0.001067,
"format": "json",
"db_duration_s": 0.00319,
"action": "manifest",
"redis_cache_calls": 3,
"mem_total_bytes": 1321184,
"cf_request_id": "0a86e021a50000ec1552956000000001",
"tier": "sv",
"rack_attack_redis_duration_s": 0.0006354004144668579,
"mem_objects": 12862,
"status": 0,
"redis_cache_write_bytes": 224,
"db_count": 12,
"fqdn": "web-11-sv-gprd.c.gitlab-production.internal",
"redis_shared_state_calls": 3,
"view_duration_s": 0,
"db_primary_count": 4,
"redis_cache_read_bytes": 217,
"mem_mallocs": 2335,
"stage": "main",
"tag": "rails.production",
"redis_shared_state_read_bytes": 3,
"redis_read_bytes": 220,
"db_replica_count": 8,
"ua": "docker/19.03.9 go/go1.13.5 git-commit/9d98839 kernel/5.4.49+ os/linux arch/amd64 UpstreamClient(Go-http-client/1.1)",
"environment": "gprd",
"db_replica_duration_s": 0.002,
"redis_shared_state_write_bytes": 288,
"duration_s": 0.0167,
"db_primary_duration_s": 0.002,
"redis_cache_duration_s": 0.000577,
"time": "2021-06-07T07:01:48.653Z",
"queue_duration_s": 0.011785,
"redis_write_bytes": 512,
"cf_ray": "65b802e2adc1ec15-ATL",
"db_replica_cached_count": 5,
"redis_calls": 6,
"type": "web",
"remote_ip": "35.196.76.72",
"controller": "Groups::DependencyProxyForContainersController",
"rack_attack_redis_count": 1,
"redis_shared_state_duration_s": 0.00049,
"shard": "default"
},
"type": "pubsubbeat-pubsub-rails-inf-gprd-5f5d4bcd57-8ddsd",
"publish_time": "2021-06-07T07:02:18.546Z",
"host": {
"name": "pubsubbeat-pubsub-rails-inf-gprd-5f5d4bcd57-8ddsd"
}
},
"fields": {
"@timestamp": [
"2021-06-07T07:02:18.657Z"
],
"publish_time": [
"2021-06-07T07:02:18.546Z"
],
"json.time": [
"2021-06-07T07:01:48.653Z"
],
"json.remote_ip": [
"35.196.76.72"
]
},
"highlight": {
"json.correlation_id": [
"@kibana-highlighted-field@01F7JME2ECA6E5T09DJZXSV4EY@/kibana-highlighted-field@"
]
},
"sort": [
1623049308653
]
The other thing to note is that shortly before this message appeared, the load balancer message showed up:
{
"_index": "pubsub-rails-inf-gprd-005287",
"_type": "_doc",
"_id": "zyhH5XkBp7mLvmpCgGPw",
"_version": 1,
"_score": null,
"_source": {
"@timestamp": "2021-06-07T07:02:18.657Z",
"json": {
"message": "Host is online after replica status check",
"fqdn": "web-11-sv-gprd.c.gitlab-production.internal",
"db_port": 6433,
"time": "2021-06-07T07:01:48.639Z",
"severity": "INFO",
"tier": "sv",
"shard": "default",
"event": "host_online",
"db_host": "10.220.20.103",
"stage": "main",
"tag": "rails.database_load_balancing",
"correlation_id": "01F7JME2ECA6E5T09DJZXSV4EY",
"environment": "gprd",
"hostname": "web-11-sv-gprd",
"type": "web"
},
"type": "pubsubbeat-pubsub-rails-inf-gprd-5f5d4bcd57-8ddsd",
"publish_time": "2021-06-07T07:02:18.546Z",
"host": {
"name": "pubsubbeat-pubsub-rails-inf-gprd-5f5d4bcd57-8ddsd"
}
},
"fields": {
"@timestamp": [
"2021-06-07T07:02:18.657Z"
],
"publish_time": [
"2021-06-07T07:02:18.546Z"
],
"json.time": [
"2021-06-07T07:01:48.639Z"
]
},
"highlight": {
"json.correlation_id": [
"@kibana-highlighted-field@01F7JME2ECA6E5T09DJZXSV4EY@/kibana-highlighted-field@"
]
},
"sort": [
1623049308639
]
}