Dynamically mask user specified values in Gitlab Runner log output

Status update: 2025-03-14

A new masking algorithm and proxy-shell (expiremental feature) was released in 17.10, and can be enabled in the Runner config:

[[runners]]
  name = "<name>"
  url = "https://gitlab.com"
  token = "<token>"
  executor = "shell"
  proxy_exec = true # false by default
  • It is an experimental feature and hasn't been thoroughly tested yet.

  • This should work with the shell and docker executors.

  • It's unlikely to work with the Kubernetes executor.

  • Another missing item is ::add-mask:: detection if proxy_exec is not enabled. We want to add this as a safety net so that secrets cannot be leaked if proxy_exec is misconfigured. However, this isn't something we have decided to add as yet because it relies on the new masking algorithm and would need to be on by default.

Problem(s) to solve

At least one customer has had repeated issues of secrets exposed in the runner CI job logs. In this customer's case, the use-case scenario that results in a high count of un-masked credentials in the job log is as follows.

  • Whilst GitLab provides masking for CI/CD variables and secrets retrieved from the Hashicorp vault via native secrets support, there are a number of times when we are required to exchange credentials with other services to receive a dynamic token.

  • The tokens are used in subsequent calls in a CI pipeline job.

  • A token value can be printed in plain text if a developer isn't cautious.

Archived proposal

Implement dynamic masking in runner core. See code snippet below. Source file here.

Note: The CI Steps program is planning on implementing a feature set to handle sensitive values. However, the customer needs a solution that can mitigate the risk now for current production pipelines.

Code snippet

  1. .gitlab-ci.yml

mask_test:
    tags:
      - masking-runner
    script:
        - secret=$(openssl rand -base64 12)
        - echo $secret
        - |
          echo "::mask:: $secret"
        - echo $secret

2 .masker.go

package masker

import (
	"bytes"
	"io"
	"sort"
	"unicode"
)

var mask = []byte("[MASKED]")

type DynamicMasker struct {
	buffer      []byte
	maskers     []*masker
	next        io.WriteCloser
	partialMask []byte
}

func New(w io.WriteCloser, phrases [][]byte) *DynamicMasker {
	dm := &DynamicMasker{
		next:   w,
		buffer: make([]byte, 0, 1024),
	}
	dm.addMaskers(phrases)
	return dm
}

func (dm *DynamicMasker) Write(p []byte) (n int, err error) {
	n = len(p)
	dm.buffer = append(dm.buffer, p...)

	for {
		// Look for a complete mask command
		idx := bytes.Index(dm.buffer, []byte("::mask:: "))
		if idx == -1 {
			// No mask command found, write everything up to the last 8 bytes
			if len(dm.buffer) > 8 {
				_, err = dm.writeThrough(dm.buffer[:len(dm.buffer)-8])
				if err != nil {
					return n, err
				}
				dm.buffer = dm.buffer[len(dm.buffer)-8:]
			}
			return n, nil
		}

		endIdx := bytes.Index(dm.buffer[idx:], []byte("\n"))
		if endIdx == -1 {
			// Incomplete mask command, write everything before it
			if idx > 0 {
				_, err = dm.writeThrough(dm.buffer[:idx])
				if err != nil {
					return n, err
				}
			}
			dm.buffer = dm.buffer[idx:]
			return n, nil
		}
		endIdx += idx

		// Write content before the mask command
		if idx > 0 {
			_, err = dm.writeThrough(dm.buffer[:idx])
			if err != nil {
				return n, err
			}
		}

		// Process the complete mask command
		maskPhrase := dm.buffer[idx+len("::mask:: ") : endIdx]
		dm.addMaskers([][]byte{maskPhrase})

		// Move past the mask command
		dm.buffer = dm.buffer[endIdx+1:]
	}
}

func (dm *DynamicMasker) writeThrough(data []byte) (int, error) {
	if len(dm.maskers) > 0 {
		return dm.maskers[0].Write(data)
	}
	return dm.next.Write(data)
}

func (dm *DynamicMasker) Close() error {
	if len(dm.buffer) > 0 {
		_, err := dm.writeThrough(dm.buffer)
		if err != nil {
			return err
		}
	}
	if len(dm.maskers) > 0 {
		return dm.maskers[0].Close()
	}
	return dm.next.Close()
}

func (dm *DynamicMasker) addMaskers(phrases [][]byte) {
	for _, phrase := range phrases {
		dm.maskers = append(dm.maskers, &masker{phrase: phrase})
	}

	sort.Slice(dm.maskers, func(i, j int) bool {
		return len(dm.maskers[i].phrase) > len(dm.maskers[j].phrase)
	})

	for i := 0; i < len(dm.maskers); i++ {
		if i == len(dm.maskers)-1 {
			dm.maskers[i].next = dm.next
		} else {
			dm.maskers[i].next = dm.maskers[i+1]
		}
	}
}
func findMaskCommand(p []byte) []byte {
	maskPrefix := []byte("::mask:: ")
	var matching int
	var start int

	for n := 0; n < len(p); {
		// Use IndexByte to find the potential start of a mask command
		if matching == 0 {
			off := bytes.Index(p[n:], maskPrefix)
			if off < 0 {
				break
			}
			n += off
			start = n
		}

		// Check if the mask command is at the start, immediately after a newline,
		// or preceded only by whitespace on the same line
		if matching == 0 && start > 0 {
			isValid := true
			for i := start - 1; i >= 0 && p[i] != '\n'; i-- {
				if !unicode.IsSpace(rune(p[i])) {
					isValid = false
					break
				}
			}
			if !isValid {
				n = start + 1
				continue
			}
		}

		// If we've matched the entire prefix, extract the mask value
		if matching == 0 && bytes.HasPrefix(p[n:], maskPrefix) {
			n += len(maskPrefix)
			end := bytes.IndexByte(p[n:], '\n')
			if end < 0 {
				end = len(p) - n
			}
			return bytes.TrimSpace(p[n : n+end])
		}

		// If we didn't find a valid mask command, move to the next character
		matching = 0
		n++
	}

	return nil
}

type masker struct {
	phrase   []byte
	matching int
	next     io.WriteCloser
}

//nolint:gocognit
func (m *masker) Write(p []byte) (n int, err error) {
	if len(p) == 0 {
		return 0, nil
	}

	// fast path: if the write is "[MASKED]" from an upper-level, don't bother
	// processing it, send it to the next writer.
	if bytes.Equal(p, mask) {
		return m.next.Write(p)
	}

	var last int
	for n < len(p) {
		// optimization: use the faster IndexByte to jump to the start of a
		// potential phrase and if not found, advance the whole buffer.
		if m.matching == 0 {
			off := bytes.IndexByte(p[n:], m.phrase[0])
			if off < 0 {
				n += len(p[n:])
				break
			}
			if off > -1 {
				n += off
			}
		}

		// find out how much data we can match: the minimum of len(p) and the
		// remainder of the phrase.
		min := len(m.phrase[m.matching:])
		if len(p[n:]) < min {
			min = len(p[n:])
		}

		// try to match the next part of the phrase
		if bytes.HasPrefix(p[n:], m.phrase[m.matching:m.matching+min]) {
			// send any data that we've not sent prior to our match to the
			// next writer.
			_, err = m.next.Write(p[last:n])
			if err != nil {
				return n, err
			}

			m.matching += min
			n += min
			last = n

			// if we've tracked each byte of our phrase, we can replace it
			if m.matching == len(m.phrase) {
				_, err := m.Write(mask)
				if err != nil {
					return n, err
				}
				m.matching = 0
			}

			continue
		}

		// if we didn't complete a phrase match, send the tracked bytes of
		// the phrase to the next writer unmodified.
		if m.matching > 0 {
			_, err = m.next.Write(m.phrase[:m.matching])
			if err != nil {
				return n, err
			}

			// if the end of this phrase matches the start of it, try again
			if m.phrase[0] == p[n] {
				m.matching = 1
				last++
				n++
				continue
			}
		}
		m.matching = 0

		n++
	}

	// any unmatched data is sent to the next writer
	_, err = m.next.Write(p[last:n])

	return n, err
}

func (m *masker) Close() error {
	var werr error

	if m.matching == len(m.phrase) {
		// this mask is added to avoid a potential undiscovered edge-case:
		// this should be unreachable as we replace full matches immediately in
		// Write().
		_, werr = m.next.Write(mask)
	} else if m.matching > 0 {
		_, werr = m.next.Write(m.phrase[:m.matching])
	}

	err := m.next.Close()
	if err == nil {
		return werr
	}

	return err
}

3.masker_test.go


//go:build !integration

package masker

import (
	"bytes"
	"strings"
	"testing"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"

	"gitlab.com/gitlab-org/gitlab-runner/common/buildlogger/internal"
)

func TestMasking(t *testing.T) {
	tests := []struct {
		input    string
		values   []string
		expected string
	}{
		{
			input:    "empty secrets have no affect",
			values:   []string{""},
			expected: "empty secrets have no affect",
		},
		{
			input:    "no escaping at all",
			expected: "no escaping at all",
		},
		{
			input:    "secrets",
			values:   []string{"secrets"},
			expected: "[MASKED]",
		},
		{
			input:    "secret|s",
			values:   []string{"secrets"},
			expected: "[MASKED]",
		},
		{
			input:    "s|ecrets",
			values:   []string{"secrets"},
			expected: "[MASKED]",
		},
		{
			input:    "secretssecrets",
			values:   []string{"secrets"},
			expected: "[MASKED][MASKED]",
		},
		{
			input:    "ssecrets",
			values:   []string{"secrets"},
			expected: "s[MASKED]",
		},
		{
			input:    "s|secrets",
			values:   []string{"secrets"},
			expected: "s[MASKED]",
		},
		{
			input:    "at the start of the buffer",
			values:   []string{"at"},
			expected: "[MASKED] the start of the buffer",
		},
		{
			input:    "in the middle of the buffer",
			values:   []string{"middle"},
			expected: "in the [MASKED] of the buffer",
		},
		{
			input:    "at the end of the buffer",
			values:   []string{"buffer"},
			expected: "at the end of the [MASKED]",
		},
		{
			input:    "all values are masked",
			values:   []string{"all", "values", "are", "masked"},
			expected: "[MASKED] [MASKED] [MASKED] [MASKED]",
		},
		{
			input:    "prefixed and suffixed: xfoox ybary ffoo barr ffooo bbarr",
			values:   []string{"foo", "bar"},
			expected: "prefixed and suffixed: x[MASKED]x y[MASKED]y f[MASKED] [MASKED]r f[MASKED]o b[MASKED]r",
		},
		{
			input:    "prefix|ed, su|ffi|xed |and split|:| xfo|ox y|bary ffo|o ba|rr ffooo b|barr",
			values:   []string{"foo", "bar"},
			expected: "prefixed, suffixed and split: x[MASKED]x y[MASKED]y f[MASKED] [MASKED]r f[MASKED]o b[MASKED]r",
		},
		{
			input:    "sp|lit al|l val|ues ar|e |mask|ed",
			values:   []string{"split", "all", "values", "are", "masked"},
			expected: "[MASKED] [MASKED] [MASKED] [MASKED] [MASKED]",
		},
		{
			input:    "prefix_mask mask prefix_|mask prefix_ma|sk mas|k",
			values:   []string{"mask", "prefix_mask"},
			expected: "[MASKED] [MASKED] [MASKED] [MASKED] [MASKED]",
		},
		{
			input:    "large secret: " + strings.Repeat("_", 8000) + "|" + strings.Repeat("_", 8000),
			values:   []string{strings.Repeat("_", 8000*2)},
			expected: "large secret: [MASKED]",
		},
		{
			input:    "overlap: this is the en| foobar",
			values:   []string{"this is the end", "en foobar", "en"},
			expected: "overlap: this is the [MASKED]",
		},
		{
			input:    "This is not masked\n::mask:: secret\nThe secret is masked",
			values:   []string{},
			expected: "This is not masked\nThe [MASKED] is masked",
		},
		{
			input:    "::mask:: foo\n::mask:: bar\nfoo and bar are masked",
			values:   []string{},
			expected: "[MASKED] and [MASKED] are masked",
		},
		{
			input:    "Initial line\n::mask:: dynamic\nBoth dynamic",
			values:   []string{},
			expected: "Initial line\nBoth [MASKED]",
		},
		{
			input:    "::mask:: over\n::mask:: overlap\noverlap is over",
			values:   []string{},
			expected: "[MASKED] is [MASKED]",
		},
		{
			input:    "This is not masked\n::mask:: middle\nThis is middle in the middle",
			values:   []string{},
			expected: "This is not masked\nThis is [MASKED] in the [MASKED]",
		},
		{
			input:    "First line\n::mask:: new phrase\nThe phrase is new",
			values:   []string{},
			expected: "First line\nThe phrase is new",
		},
		// New test case to ensure ::mask:: is not recognized mid-line
		{
			input:    "This line has ::mask:: phrase but it should not be masked",
			values:   []string{},
			expected: "This line has ::mask:: phrase but it should not be masked",
		},
		// Test case with multiple masks on separate lines
		{
			input:    "Line 1\n::mask:: secret1\nLine 2\n::mask:: secret2\nBoth secret1 and secret2 are masked",
			values:   []string{},
			expected: "Line 1\nLine 2\nBoth [MASKED] and [MASKED] are masked",
		},
		{
			input:    "::mask:: fir|st\n::m|ask::| second|\nfirst seco|nd",
			values:   []string{},
			expected: "[MASKED] [MASKED]",
		},
		{
			input:    "::mask::| fir|st\n::m|ask::| second|\nfirst seco|nd",
			values:   []string{},
			expected: "[MASKED] [MASKED]",
		},
		{
			input:    "No mask\n::mask:: secret\nSecret line\nNo mask",
			values:   []string{},
			expected: "No mask\nSecret line\nNo mask",
		},
	}

	for _, tc := range tests {
		t.Run(tc.input, func(t *testing.T) {
			buf := new(bytes.Buffer)

			m := New(internal.NewNopCloser(buf), internal.Unique(tc.values))

			parts := bytes.Split([]byte(tc.input), []byte{'|'})
			for _, part := range parts {
				n, err := m.Write(part)
				require.NoError(t, err)

				assert.Equal(t, len(part), n)
			}

			require.NoError(t, m.Close())
			assert.Equal(t, tc.expected, buf.String())
		})
	}
}

Discussion

Pros

  • Adding this code immediately address the problem and implied security risk.

Cons

  • Adding this code to the current runner manager means that you could effectively overload the manager which will affect all jobs it is executing.
Edited by Darren Eastman