Skip to content
Snippets Groups Projects
Commit 97c4287e authored by Suleimi Ahmed's avatar Suleimi Ahmed :palm_tree:
Browse files

Merge branch 'test-skip-post-deploy' into 'master'

Draft: Test skip post deploy

See merge request !2115



Merged-by: default avatarSuleimi Ahmed <sahmed@gitlab.com>
parents 835224e6 1d4f66db
No related branches found
No related tags found
No related merge requests found
Pipeline #1689415228 failed
......@@ -5,6 +5,7 @@ include:
- local: .gitlab/ci/release.yml
- local: .gitlab/ci/document.yml
- local: .gitlab/ci/child_jobs.yml
- local: .gitlab/ci/migrate.yml
- component: ${CI_SERVER_FQDN}/gitlab-org/components/danger-review/danger-review@2.0.0
inputs:
job_allow_failure: true
......@@ -39,6 +40,7 @@ stages:
- validate
- test
- integration
- migrate
- release
- child jobs
- document
......
# This pipeline is designed to verify the migration continuity between the self-managed version in the `REGISTRY_SELF_MANAGED_RELEASE_VERSION` variable and the current development version.
# This is useful for ensuring a smooth upgrade experience between current development versions and current self-manged version.
# While not a complete solution, this pipeline is a crucial first step in identifying potential upgrade issues, as discussed in https://gitlab.com/gitlab-org/container-registry/-/issues/1516.
# NOTE: The `REGISTRY_SELF_MANAGED_RELEASE_VERSION` must be updated manually to match the last registry version of the last GitLab release, whenever adding a new schema migration to the registry.
.base_migration_config:
variables:
REGISTRY_SELF_MANAGED_RELEASE_VERSION: "v4.16.0-gitlab"
DOCKER_HOST: tcp://docker:2375
POSTGRES_DB: registry_dev
POSTGRES_USER: registry
POSTGRES_PASSWORD: apassword
REGISTRY_DATABASE_HOST: db
REGISTRY_OLD_IMAGE: registry.gitlab.com/gitlab-org/build/cng/gitlab-container-registry:${REGISTRY_SELF_MANAGED_RELEASE_VERSION}
#TODO: This job is convoluted because of the nature of dind. Consider building the older registry from source.
migrate_old_registry:
extends: .base_migration_config
image: docker:27.2.0
stage: migrate
tags:
- gitlab-org-docker # For dind runners
services:
- name: docker:27.2.0-dind
alias: docker
- name: postgres:${PG_CURR_VERSION}-alpine
alias: "db"
script:
- echo "Pulling old registry image $REGISTRY_OLD_IMAGE"
- docker pull $REGISTRY_OLD_IMAGE
- echo "Running migrations for old registry version..."
# Attempt to extract the IP address of the PostgreSQL service from /etc/hosts so we can link it to the containers spun up by dind
- POSTGRES_IP=$(awk '{if ($2 == "db") print $1;}' /etc/hosts | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' || true)
# Check if POSTGRES_IP is empty (meaning the first method failed)
- |
if [ -z "$POSTGRES_IP" ]; then
echo "No IP found in /etc/hosts for 'db', attempting to resolve using dig..."
# Install bind-tools to use dig
apk add --no-cache bind-tools
# Use dig to resolve the IP address for db (fallback method)
POSTGRES_IP=$(dig +short db)
# Check if dig was successful in resolving the IP address
if [ -z "$POSTGRES_IP" ]; then
echo "Error: Could not resolve 'db' IP address using dig."
exit 1
else
echo "Resolved database IP address using dig: $POSTGRES_IP"
fi
else
echo "Found PostgreSQL IP in /etc/hosts: $POSTGRES_IP"
fi
- ping -w 2 $POSTGRES_IP
- docker run --rm --add-host="db:$POSTGRES_IP" -v $(pwd)/config/database-filesystem.yml:/config/database-filesystem.yml -e REGISTRY_DATABASE_HOST=${REGISTRY_DATABASE_HOST} --entrypoint \registry $REGISTRY_OLD_IMAGE \database migrate up /config/database-filesystem.yml
- docker run --rm --add-host="db:$POSTGRES_IP" -e PGPASSWORD=$POSTGRES_PASSWORD postgres:${PG_CURR_VERSION}-alpine pg_dump -h $REGISTRY_DATABASE_HOST -U $POSTGRES_USER -d $POSTGRES_DB > db_dump.sql
needs: []
artifacts:
paths:
- db_dump.sql
expire_in: 1 hour
migrate_new_registry:
extends: .base_migration_config
needs: [ "migrate_old_registry"]
image: golang:$GO_VERSION
stage: migrate
services:
- name: postgres:${PG_CURR_VERSION}-alpine
alias: "db"
dependencies:
- migrate_old_registry # Ensure the artifact from this job is available
script:
# Install psql client tools
- apt-get update && apt-get install -y lsb-release
- apt update && apt -y install gnupg2 wget nano
- sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
- curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/postgresql.gpg
- apt update && apt -y install postgresql-client-${PG_CURR_VERSION}
- echo "Restoring database from dump..."
- PGPASSWORD=$POSTGRES_PASSWORD psql -h ${REGISTRY_DATABASE_HOST} -U $POSTGRES_USER -d $POSTGRES_DB -f db_dump.sql # Load the dump file into PostgreSQL
- make bin/registry
- echo "Running new registry migration..."
- ./bin/registry database migrate up --skip-post-deployment config/database-filesystem.yml
- echo "Dumping the database after migration..."
- PGPASSWORD=$POSTGRES_PASSWORD pg_dump -h db -U $POSTGRES_USER -d $POSTGRES_DB > db_dump_new.sql
artifacts:
paths:
- db_dump_new.sql # Save the new dump file
expire_in: 1 hour
# This job is purely informational and is used to provide users with an overview of the changes introduced between the current development registry version
# and the `REGISTRY_SELF_MANAGED_RELEASE_VERSION`, from the database's perspective.
compare_dumps:
needs: [ "migrate_old_registry", "migrate_new_registry"]
image: alpine:latest
stage: migrate
dependencies:
- migrate_old_registry
- migrate_new_registry
script:
- apk add --no-cache colordiff
- echo "Comparing old and new database dumps..."
- colordiff --fakeexitcode db_dump.sql db_dump_new.sql > db_diff_output.txt
- if [ -s db_diff_output.txt ]; then
echo "Differences found in the database dumps:";
cat db_diff_output.txt;
else
echo "No differences found in the database dumps.";
fi
artifacts:
paths:
- db_diff_output.txt
expire_in: 1 hour
......@@ -35,6 +35,7 @@ Related to <!-- add the issue URL here -->
- MR includes DB chagnes
- **Do not** include code that depends on the schema migrations in the same commit. Split the MR into two or more.
- [ ] Manually run up and down migrations in a [postgres.ai](https://console.postgres.ai/gitlab/joe-instances/68) production database clone and post a screenshot of the result here.
- [ ] If adding new schema migrations make sure the `REGISTRY_SELF_MANAGED_RELEASE_VERSION` CI variable in [migrate.yml](../ci/migrate.yml) is pointing to the latest GitLab self-managed released registry version.
- [ ] If adding new queries, extract a query plan from [postgres.ai](https://console.postgres.ai/gitlab/joe-instances/68) and post the link here. If changing existing queries, also extract a query plan for the current version for comparison.
- [ ] I do not have access to postgres.ai and have made a comment on this MR asking for these to be run on my behalf.
- [ ] If adding new background migration, follow the guide for [performance testing new background migrations](../../docs/spec/gitlab/database-background-migrations.md#performance-testing-guide) and add a report/summary to the MR with your analysis.
......
package migrations
import (
migrate "github.com/rubenv/sql-migrate"
)
func init() {
m := &Migration{
PostDeployment: false,
Migration: &migrate.Migration{
Id: "20250205055504_create_media_type_id_convert_to_bigint_column",
Up: []string{
// Add the new column with a default value of 0 and a NOT NULL constraint
"ALTER TABLE manifests ADD COLUMN media_type_id_convert_to_bigint BIGINT NOT NULL DEFAULT 0",
},
Down: []string{
// Remove the column if it exists
"ALTER TABLE manifests DROP COLUMN IF EXISTS media_type_id_convert_to_bigint",
},
},
}
allMigrations = append(allMigrations, m)
}
package migrations
import (
"fmt"
migrate "github.com/rubenv/sql-migrate"
)
func init() {
m := &Migration{
PostDeployment: true,
Migration: &migrate.Migration{
Id: "20250205070732_backfill_manifests_media_type_id_convert_to_bigint_column",
Up: []string{
fmt.Sprintf(
`INSERT INTO batched_background_migrations ("name", "min_value", "max_value", "batch_size", "status", "job_signature_name", "table_name", "column_name")
VALUES ('copy_manifests_media_type_id_column_to_media_type_id_convert_to_bigint_column', 0, 9223372036854775807, 10000, 1, 'copyManifestMediaTypeIDToNewBigIntColumn', '%s', 'id')`,
"public.manifests",
),
},
Down: []string{
`DELETE FROM batched_background_migrations WHERE "name" = 'copy_manifests_media_type_id_column_to_media_type_id_convert_to_bigint_column'`,
},
},
}
allMigrations = append(allMigrations, m)
}
package migrations
import (
"fmt"
migrate "github.com/rubenv/sql-migrate"
)
func init() {
m := &Migration{
PostDeployment: false,
Migration: &migrate.Migration{
Id: "20250226070732_another_test",
Up: []string{
fmt.Sprintf(
`INSERT INTO batched_background_migrations ("name", "min_value", "max_value", "batch_size", "status", "job_signature_name", "table_name", "column_name")
VALUES ('copy_layers_media_type_id_column_to_media_type_id_convert_to_bigint_column', 0, 9223372036854775807, 10000, 1, 'copyLayersMediaTypeIDToNewBigIntColumn', '%s', 'id')`,
"public.layers",
),
},
Down: []string{
`DELETE FROM batched_background_migrations WHERE "name" = 'copy_layers_media_type_id_column_to_media_type_id_convert_to_bigint_column'`,
},
},
}
allMigrations = append(allMigrations, m)
}
......@@ -428,3 +428,98 @@ func (m *MigratorImpl) applyMigration(src *migrate.MemoryMigrationSource, migrat
}
return nil
}
// CanSkipPostDeploy checks whether post-deployment (PD) migrations can be safely skipped.
// It ensures that all unapplied PD migrations are positioned at the end of the migration list.
// If a PD migration appears between non-PD migrations, skipping is unsafe.
// TODO (suleimiahmed): this is a roundabout way of ensuring that users do not harm themselves when skipping PD migrations
// due to the sequential way that PD and non-PD schema migrations are applied. We should revisit this and only rely on
// strictly defined functional dependencies when skipping post-deploy migrations. https://gitlab.com/gitlab-org/container-registry/-/issues/1516.
func (m *MigratorImpl) CanSkipPostDeploy(migrationlimit int) (bool, int, error) {
// retrieve applied migration records from the database.
records, err := migrate.GetMigrationRecords(m.db, dialect)
if err != nil {
return false, 0, fmt.Errorf("skip-post-deployment migration check failed: %w", err)
}
// sort unapplied migrations and classify post-deployment migrations.
sortedMigrations, unappliedPDSet := classifyUnappliedMigrations(migrationlimit, records, m.migrations)
// validate that all post-deployment migrations appear in the correct order.
if failure, valid := validatePostDeployMigrationOrder(sortedMigrations, unappliedPDSet); !valid {
return false, failure.SafeToMigrateLimit, fmt.Errorf("cannot safely skip post-deployment migration: %s", failure.MigrationID)
}
return true, 0, nil
}
// PostDeployOrderFailure represents a failure case where a post-deployment migration is incorrectly ordered.
type PostDeployOrderFailure struct {
MigrationID string // id of the improperly ordered migration
SafeToMigrateLimit int // number of non-PD migrations that can be safely applied before failure
}
// validatePostDeployOrder ensures that post-deployment migrations are correctly positioned.
// It returns an error if a non-PD migration appears after an unapplied PD migration.
func validatePostDeployMigrationOrder(sortedMigrations []*migrate.Migration, unappliedPDSet map[string]struct{}) (*PostDeployOrderFailure, bool) {
var (
lastPDID string
safeMigrateLimit int
pdSeen bool
)
for _, migration := range sortedMigrations {
if _, isPostDeploy := unappliedPDSet[migration.Id]; isPostDeploy {
// mark the first occurrence of a post-deployment migration.
lastPDID = migration.Id
pdSeen = true
} else if pdSeen {
// a non-PD migration appearing after a PD migration is a violation.
if safeMigrateLimit == 0 {
safeMigrateLimit = -1 // no safe non-PD migration can be applied.
}
return &PostDeployOrderFailure{
MigrationID: lastPDID,
SafeToMigrateLimit: safeMigrateLimit,
}, false
}
if !pdSeen {
// Only count non-PD migrations before encountering a PD migration.
safeMigrateLimit++
}
}
return nil, true
}
// classifyUnappliedMigrations sorts unapplied migrations and identifies post-deployment migrations.
func classifyUnappliedMigrations(migrationlimit int, appliedRecords []*migrate.MigrationRecord, allMigrations []*Migration) ([]*migrate.Migration, map[string]struct{}) {
src := &migrate.MemoryMigrationSource{}
unappliedPDSet := make(map[string]struct{})
unappliedNonPDCount := 0
// identify unapplied migrations
for _, migration := range allMigrations {
if migrationApplied(appliedRecords, migration.Id) {
continue // skip already applied migrations
}
if migration.PostDeployment {
unappliedPDSet[migration.Id] = struct{}{} // track unapplied post-deployment migrations
} else {
unappliedNonPDCount++
}
src.Migrations = append(src.Migrations, migration.Migration)
// stop early if we've already reached the migration limit for non-post-deployment migrations
if migrationlimit != 0 && unappliedNonPDCount >= migrationlimit {
// migrationlimit == 0 means no limit
break
}
}
// sort the migrations by ID
sortedMigrations, _ := src.FindMigrations()
return sortedMigrations, unappliedPDSet
}
package migrations
import (
"testing"
migrate "github.com/rubenv/sql-migrate"
"github.com/stretchr/testify/require"
)
func TestValidatePostDeployMigrationOrder(t *testing.T) {
tests := []struct {
name string
sortedMigrations []*migrate.Migration
unappliedPDSet map[string]struct{}
expectedFailure *PostDeployOrderFailure
expectedValid bool
}{
{
name: "All migrations in correct order",
sortedMigrations: []*migrate.Migration{
{Id: "001"}, {Id: "002"}, {Id: "003"}, {Id: "PD-004"},
},
unappliedPDSet: map[string]struct{}{"PD-004": {}},
expectedFailure: nil,
expectedValid: true,
},
{
name: "Post-deployment migration appears before a non-post-deployment migration",
sortedMigrations: []*migrate.Migration{
{Id: "001"}, {Id: "PD-002"}, {Id: "003"},
},
unappliedPDSet: map[string]struct{}{"PD-002": {}},
expectedFailure: &PostDeployOrderFailure{
MigrationID: "PD-002",
SafeToMigrateLimit: 1,
},
expectedValid: false,
},
{
name: "No post-deployment migrations",
sortedMigrations: []*migrate.Migration{{Id: "001"}, {Id: "002"}, {Id: "003"}},
unappliedPDSet: make(map[string]struct{}),
expectedFailure: nil,
expectedValid: true,
},
{
name: "Only post-deployment migrations",
sortedMigrations: []*migrate.Migration{
{Id: "PD-001"}, {Id: "PD-002"}, {Id: "PD-003"},
},
unappliedPDSet: map[string]struct{}{"PD-001": {}, "PD-002": {}, "PD-003": {}},
expectedFailure: nil,
expectedValid: true,
},
{
name: "Mixed migrations but in correct order",
sortedMigrations: []*migrate.Migration{
{Id: "001"}, {Id: "002"}, {Id: "PD-003"}, {Id: "PD-004"},
},
unappliedPDSet: map[string]struct{}{"PD-003": {}, "PD-004": {}},
expectedFailure: nil,
expectedValid: true,
},
{
name: "Multiple post-deployment migrations with a non-post-deployment migration in between (violation)",
sortedMigrations: []*migrate.Migration{
{Id: "001"}, {Id: "002"}, {Id: "PD-002"}, {Id: "003"}, {Id: "PD-004"},
},
unappliedPDSet: map[string]struct{}{"PD-002": {}, "PD-004": {}},
expectedFailure: &PostDeployOrderFailure{
MigrationID: "PD-002",
SafeToMigrateLimit: 2,
},
expectedValid: false,
},
{
name: "Edge case: Single migration (NPD)",
sortedMigrations: []*migrate.Migration{{Id: "001"}},
unappliedPDSet: make(map[string]struct{}),
expectedFailure: nil,
expectedValid: true,
},
{
name: "Edge case: Single migration (PD)",
sortedMigrations: []*migrate.Migration{{Id: "PD-001"}},
unappliedPDSet: map[string]struct{}{"PD-001": {}},
expectedFailure: nil,
expectedValid: true,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
failure, valid := validatePostDeployMigrationOrder(tc.sortedMigrations, tc.unappliedPDSet)
require.Equal(t, tc.expectedValid, valid)
if !valid {
require.Equal(t, tc.expectedFailure.MigrationID, failure.MigrationID)
require.Equal(t, tc.expectedFailure.SafeToMigrateLimit, failure.SafeToMigrateLimit)
}
})
}
}
func TestClassifyUnappliedMigrations(t *testing.T) {
tests := []struct {
name string
migrationLimit int
appliedRecords []*migrate.MigrationRecord
allMigrations []*Migration
expectedPDSet map[string]struct{}
expectedSorted []string
}{
{
name: "All migrations unapplied, no limit",
migrationLimit: 0,
appliedRecords: make([]*migrate.MigrationRecord, 0),
allMigrations: []*Migration{
{
Migration: &migrate.Migration{Id: "001"},
PostDeployment: false,
},
{
Migration: &migrate.Migration{Id: "PD-002"},
PostDeployment: true,
},
{
Migration: &migrate.Migration{Id: "003"},
PostDeployment: false,
},
},
expectedPDSet: map[string]struct{}{"PD-002": {}},
expectedSorted: []string{"001", "003", "PD-002"},
},
{
name: "Some migrations already applied",
migrationLimit: 0,
appliedRecords: []*migrate.MigrationRecord{
{Id: "001"},
},
allMigrations: []*Migration{
{
Migration: &migrate.Migration{Id: "001"},
PostDeployment: false,
},
{
Migration: &migrate.Migration{Id: "PD-002"},
PostDeployment: true,
},
{
Migration: &migrate.Migration{Id: "003"},
PostDeployment: false,
},
},
expectedPDSet: map[string]struct{}{"PD-002": {}},
expectedSorted: []string{"003", "PD-002"},
},
{
name: "Migration limit reached",
migrationLimit: 1,
appliedRecords: make([]*migrate.MigrationRecord, 0),
allMigrations: []*Migration{
{
Migration: &migrate.Migration{Id: "001"},
PostDeployment: false,
},
{
Migration: &migrate.Migration{Id: "002"},
PostDeployment: false,
},
{
Migration: &migrate.Migration{Id: "003"},
PostDeployment: false,
},
},
expectedPDSet: make(map[string]struct{}),
expectedSorted: []string{"001"}, // stops after first non-PD due to limit
},
{
name: "Migration limit reached PD",
migrationLimit: 1,
appliedRecords: make([]*migrate.MigrationRecord, 0),
allMigrations: []*Migration{
{
Migration: &migrate.Migration{Id: "001"},
PostDeployment: true,
},
{
Migration: &migrate.Migration{Id: "002"},
PostDeployment: false,
},
{
Migration: &migrate.Migration{Id: "003"},
PostDeployment: false,
},
},
expectedPDSet: map[string]struct{}{"001": {}},
expectedSorted: []string{"001", "002"}, // stops after first non-PD due to limit
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
sorted, pdSet := classifyUnappliedMigrations(tc.migrationLimit, tc.appliedRecords, tc.allMigrations)
require.Equal(t, tc.expectedPDSet, pdSet)
// extract sorted migration IDs
var sortedIDs []string
for _, m := range sorted {
sortedIDs = append(sortedIDs, m.Id)
}
require.Equal(t, tc.expectedSorted, sortedIDs)
})
}
}
......@@ -97,7 +97,10 @@ var (
maxBBMJobRetry *int
)
var parallelwalkKey = "parallelwalk"
var (
parallelwalkKey = "parallelwalk"
errPendingPDMigration = errors.New("pending post-deployment migrations must be run to unblock other available migrations")
)
// nullableInt implements spf13/pflag#Value as a custom nullable integer to capture spf13/cobra command flags.
// https://pkg.go.dev/github.com/spf13/pflag?tab=doc#Value
......@@ -255,12 +258,27 @@ var MigrateUpCmd = &cobra.Command{
}
m := migrations.NewMigrator(db, opts...)
var havePendingPostDeploy bool
if skipPostDeployment {
canSkip, acceptableMigrationLimit, err := m.CanSkipPostDeploy(*maxNumMigrations)
if !canSkip {
fmt.Printf("WARNING: %v\n", err)
if acceptableMigrationLimit == -1 { // -1 means there is no safe non-post deployment to run
return errPendingPDMigration
}
*maxNumMigrations = acceptableMigrationLimit
fmt.Printf("Will only apply the first %d migration(s)\n", acceptableMigrationLimit)
havePendingPostDeploy = true
}
}
plan, err := m.UpNPlan(*maxNumMigrations)
if err != nil {
return fmt.Errorf("failed to prepare Up plan: %w", err)
}
if len(plan) > 0 {
_, _ = fmt.Println("The following migrations will be applied:")
_, _ = fmt.Println(strings.Join(plan, "\n"))
}
......@@ -271,6 +289,9 @@ var MigrateUpCmd = &cobra.Command{
return fmt.Errorf("failed to run database migrations: %w", err)
}
fmt.Printf("OK: applied %d migrations and %d background migrations in %.3fs\n", mr.AppliedCount, mr.AppliedBBMCount, time.Since(start).Seconds())
if havePendingPostDeploy {
return errPendingPDMigration
}
}
return nil
},
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment