Commit 2f09be2f authored by Zach Rice's avatar Zach Rice

Big refactor... adding convert and conforming to other analyzer template

parent eb4c7448
Pipeline #167399705 failed with stages
in 3 minutes and 18 seconds
package main
import (
"sort"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"syscall"
"github.com/otiai10/copy"
log "github.com/sirupsen/logrus"
"github.com/urfave/cli"
"gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing/object"
)
"gitlab.com/gitlab-org/security-products/analyzers/common/v2/issue"
"gitlab.com/gitlab-org/security-products/analyzers/secrets/v2/git"
"gitlab.com/gitlab-org/security-products/analyzers/secrets/v2/scanner"
"gitlab.com/gitlab-org/security-products/analyzers/secrets/v2/scanner/gitleaks"
"gitlab.com/gitlab-org/security-products/analyzers/secrets/v2/scanner/trufflehog"
const (
pathGitleaks = "gitleaks"
pathGitleaksConfig = "/gitleaks.toml"
entropyRuleTmpl = `
[[rules]]
description = "Generic Secret plus Entropy"
regex = '''(?i)(api_key|apikey|secret|key|api|password|pw)'''
entropies = ["%f-8.0"]
`
)
func addEntropyRule(entropy float64) error {
// append regex rule to the gitleaks config to maintain old functionality
rule := fmt.Sprintf(entropyRuleTmpl, entropy)
f, err := os.OpenFile(pathGitleaksConfig, os.O_APPEND|os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return err
}
defer f.Close()
if _, err := f.WriteString(rule); err != nil {
return err
}
return nil
}
// analyze runs the tools and produces a report containing issues for each detected secret leak.
func analyze(c *cli.Context, path string) (*issue.Report, error) {
func analyze(c *cli.Context, path string) (io.ReadCloser, error) {
var err error
historic := IsHistoric(c)
// First, check if this scan is a historic/commit range scan or current scan
if !scanner.IsHistoric(c) {
if !historic {
// Place the files in a new git repository, in one single commit.
// This is needed as the tools search the commit history for secret leaks and
// searching into the full history would find leaks that don't exist anymore.
path, err = git.FlattenRepo(path)
path, err = FlattenRepo(path)
if err != nil {
log.Errorf("Couldn't prepare the repository for analysis: %v\n", err)
return nil, err
}
}
// Run the tools.
// TODO: refactor to avoid passing *cli.Context to Run functions.
gitleaksIssues, err := gitleaks.Run(c, path)
if err != nil {
log.Errorf("Gitleaks analysis failed: %v\n", err)
return nil, err
// Create a temporary file. Gitleaks can't output to stdout.
reportName := "gitleaks-*.json"
if historic {
reportName = "historic-gitleaks-*.json"
}
truffleHogIssues, err := trufflehog.Run(c, path)
gitleaksReportFile, err := ioutil.TempFile("", reportName)
if err != nil {
log.Errorf("TruffleHog analysis failed: %v\n", err)
log.Errorf("Couldn't create temporary file: %v\n", err)
return nil, err
}
gitleaksReportFile.Close()
if c.Float64(FlagEntropyLevel) != DefaultEntropy {
if err := addEntropyRule(c.Float64(FlagEntropyLevel)); err != nil {
return nil, err
}
}
// Run Gitleaks on the given path.
cmd := exec.Command(pathGitleaks, "--report", gitleaksReportFile.Name(), "--repo-path", path, "--config", pathGitleaksConfig)
if c.String(CommitTo) != "" && c.String(CommitFrom) != "" {
cmd = exec.Command(pathGitleaks, "--report", gitleaksReportFile.Name(), "--repo-path", path,
"--config", pathGitleaksConfig, "--commit-to", c.String(CommitTo), "--commit-from", c.String(CommitFrom))
}
cmd.Env = os.Environ()
output, err := cmd.CombinedOutput()
log.Debugf("%s\n%s", cmd.String(), output)
// Gitleaks exits with these status codes:
// 0: no leaks
// 1: leaks present
// 2: error encountered
if err == nil {
return nil, nil // no leaks
}
if exitErr, ok := err.(*exec.ExitError); ok {
if exitErr.Sys().(syscall.WaitStatus).ExitStatus() == 1 {
f, err := os.Open(gitleaksReportFile.Name())
if err != nil {
return nil, err
}
return f, nil
}
}
// Merge Gitleaks and TruffleHog issues
issues := append(gitleaksIssues, truffleHogIssues...)
return nil, fmt.Errorf("Couldn't run the gitleaks command: %v\n", err)
}
// Deduplicate Gitleaks and TruffleHog issues, remove entropy issues contained in others and consolidate
// them into a single issue if present on several consecutive lines
issues = consolidateEntropyIssues(cleanEntropyIssues(deduplicate(issues)))
// FlattenRepo flattens the given repository into a new directory containing only one commit and returns its path.
func FlattenRepo(path string) (string, error) {
// Copy the repository to a new directory.
flatPath, err := ioutil.TempDir("", "flat-")
if err != nil {
return "", err
}
if err = copy.Copy(path, flatPath); err != nil {
return "", err
}
// Remove .git
if err = os.RemoveAll(filepath.Join(flatPath, ".git")); err != nil {
return "", err
}
// Init a new repository
r, err := git.PlainInit(flatPath, false)
if err != nil {
return "", err
}
w, err := r.Worktree()
if err != nil {
return "", err
}
// Create a commit containing all the files.
_, err = w.Add(".")
if err != nil {
return "", err
}
_, err = w.Commit("", &git.CommitOptions{
Author: &object.Signature{
Name: "Analyzer",
Email: "Analyzer",
},
})
if err != nil {
return "", err
}
sort.Sort(ByName(issues))
return flatPath, nil
}
// Return the report
if issues == nil {
// We need to initialize the slice for correct JSON marshalling
issues = []issue.Issue{}
// IsHistoric checks if this scan is a historic scan based on ci vars.
func IsHistoric(c *cli.Context) bool {
if (c.String(CommitTo) != "" && c.String(CommitFrom) != "") || c.Bool(FlagHistoricScan) {
return true
}
report := issue.NewReport()
report.Vulnerabilities = issues
return &report, nil
return false
}
package convert
import (
"bufio"
"crypto/sha256"
"encoding/json"
"fmt"
log "github.com/sirupsen/logrus"
"io"
"os"
"path/filepath"
"regexp"
"strings"
"gitlab.com/gitlab-org/security-products/analyzers/common/v2/issue"
"gitlab.com/gitlab-org/security-products/analyzers/secrets/v3/gitleaks"
)
// Secret represents a gitleaks leak
type Secret struct {
Line string `json:"line"`
Offender string `json:"offender"`
Rule string `json:"rule"`
Commit string `json:"commit"`
File string `json:"file"`
Message string `json:"commitMessage"`
Author string `json:"author"`
Date string `json:"date"`
}
// PemBegin and PemEnd are regexes used for matching PEM keys
var (
PemBegin *regexp.Regexp
PemEnd *regexp.Regexp
)
func init() {
PemBegin = regexp.MustCompile("-----BEGIN")
PemEnd = regexp.MustCompile("-----END")
}
func Convert(input io.Reader, prependPath string) (*issue.Report, error) {
var (
secrets []Secret
historic bool
)
// This is a hack to check if the scan was historic
switch v := input.(type) {
case *os.File:
if strings.HasPrefix(v.Name(), "historic-gitleaks") {
historic = true
}
}
// Decode JSON report
err := json.NewDecoder(input).Decode(&secrets)
if err != nil {
log.Errorf("Couldn't parse the Gitleaks report: %v\n", err)
return nil, err
}
var issues []issue.Issue
for _, secret := range secrets {
i, err := secretToIssue(secret)
if err != nil {
return nil, err
}
if historic {
// Update with an historic description
i.Description = fmt.Sprintf("Historic %s secret has been found in commit %s.", secret.Rule, secret.Commit)
} else {
// Update the location with a line start and end if not historic
f, err := os.Open(filepath.Join(prependPath, secret.File))
if err != nil {
log.Errorf("Couldn't open source file %s: %v\n", secret.File, err)
return nil, err
}
lineStart, lineEnd, err := extractLine(f, secret)
i.Location.LineStart = lineStart
i.Location.LineEnd = lineEnd
if err != nil {
return nil, nil
}
}
// append the issue.
issues = append(issues, i)
}
return nil, nil
}
// CompareKey returns a string used to establish whether two issues are the same.
func CompareKey(file, fingerprint, ruleID string) string {
if fingerprint != "" {
......@@ -51,3 +135,95 @@ func Identifier(tool, ruleID string) issue.Identifier {
Value: ruleID,
}
}
func extractLine(f *os.File, secret Secret) (lineStart int, lineEnd int, err error) {
defer f.Close()
reader := bufio.NewReader(f)
isPem := PemBegin.MatchString(secret.Offender)
line := 0
for {
text, err := reader.ReadString('\n')
if err == io.EOF {
break
} else if err != nil {
log.Errorf("Problem while reading source file %s: %v\n", f.Name(), err)
return -1, -1, err
}
if strings.Contains(text, secret.Offender) {
// Found the line
lineStart = line
lineEnd = line
if !isPem {
// No more work to do, this is not a multiline PEM secret.
break
}
} else if PemEnd.MatchString(text) && isPem && lineStart != 0 {
// End of the PEM block.
lineEnd = line
break
}
line++
}
return lineStart, lineEnd, nil
}
func secretToIssue(secret Secret) (issue.Issue, error) {
// Compute Name, Description and RuleID.
// TODO: extract function
var name, description, ruleID string
rule, ruleFound := gitleaks.Rules[secret.Rule]
switch {
case ruleFound:
name = rule.Name
description = rule.Description
ruleID = secret.Rule
case strings.HasPrefix(secret.Rule, "Entropy: "):
// A string with high entropy has been detected.
name = "High entropy string"
description = "A string with high entropy was found, this could be a secret"
ruleID = "Entropy"
default:
// This is an unknown rule. Warn the user and use default values.
log.Errorf(
"No description for Gitleaks rule %s, please open an issue on https://gitlab.com/gitlab-org/gitlab-ee/issues\n",
secret.Rule)
name = fmt.Sprint("Gitleaks rule ", secret.Rule)
description = fmt.Sprint("Gitleaks rule ", secret.Rule, " detected a secret")
ruleID = secret.Rule
}
// create commit object for issue location
commit := issue.Commit{
Author: secret.Author,
Date: secret.Date,
Message: secret.Message,
Sha: secret.Commit,
}
return issue.Issue{
Category: issue.CategorySecretDetection,
Scanner: issue.Scanner{
ID: "gitleaks",
Name: "Gitleaks",
},
Name: name,
Message: name,
Description: description,
CompareKey: CompareKey(secret.File, Fingerprint(secret.Offender), ruleID),
Severity: issue.SeverityLevelCritical,
Confidence: issue.ConfidenceLevelUnknown,
Location: issue.Location{
File: secret.File,
LineStart: -1,
LineEnd: -1,
Commit: &commit,
},
Identifiers: Identifiers("Gitleaks", ruleID),
}, nil
}
package git
import (
"io/ioutil"
"os"
"path/filepath"
"github.com/otiai10/copy"
"gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing/object"
)
// FlattenRepo flattens the given repository into a new directory containing only one commit and returns its path.
func FlattenRepo(path string) (string, error) {
// Copy the repository to a new directory.
flatPath, err := ioutil.TempDir("", "flat-")
if err != nil {
return "", err
}
if err = copy.Copy(path, flatPath); err != nil {
return "", err
}
// Remove .git
if err = os.RemoveAll(filepath.Join(flatPath, ".git")); err != nil {
return "", err
}
// Init a new repository
r, err := git.PlainInit(flatPath, false)
if err != nil {
return "", err
}
w, err := r.Worktree()
if err != nil {
return "", err
}
// Create a commit containing all the files.
_, err = w.Add(".")
if err != nil {
return "", err
}
_, err = w.Commit("", &git.CommitOptions{
Author: &object.Signature{
Name: "Analyzer",
Email: "Analyzer",
},
})
if err != nil {
return "", err
}
return flatPath, nil
}
package gitleaks
import "fmt"
type Rule struct {
Name string
Description string
}
func singleCredDesc(subject string) string {
return fmt.Sprintf("%s detected; please remove and revoke it if this is a leak.", subject)
}
// See scanner/gitleaks/gitleaks.toml for a list of Gitleaks rules.
var Rules = map[string]Rule{
"AWS": {
Name: "AWS API key",
Description: singleCredDesc("Amazon Web Services API key"),
},
"Facebook": {
Name: "Facebook token",
Description: singleCredDesc("Facebook token"),
},
"Github": {
Name: "GitHub token",
Description: singleCredDesc("GitHub token"),
},
"PGP": {
Name: "PGP private key",
Description: singleCredDesc("PGP private key"),
},
"PKCS8": {
Name: "PKCS8 key",
Description: singleCredDesc("PKCS8 private key"),
},
"RSA": {
Name: "RSA private key",
Description: singleCredDesc("RSA private key"),
},
"Slack token": {
Name: "Slack token",
Description: singleCredDesc("Slack token"),
},
"Stripe": {
Name: "Stripe",
Description: singleCredDesc("Stripe API key"),
},
"SSH": {
Name: "SSH private key",
Description: singleCredDesc("SSH private key"),
},
"Twitter": {
Name: "Twitter key",
Description: singleCredDesc("Twitter key"),
},
"Generic API Key": {
Name: "Generic API Key",
Description: singleCredDesc("Unknown API key"),
},
"Generic Secret plus Entropy": {
Name: "Generic Secret plus Entropy",
Description: singleCredDesc("Unknown API key"),
},
"Heroku API Key": {
Name: "Heroku API key",
Description: singleCredDesc("Heroku API key"),
},
"Twilio API Key": {
Name: "Twilio API key",
Description: singleCredDesc("Twilio API key"),
},
"Password in URL": {
Name: "Password in URL",
Description: singleCredDesc("Password in URL"),
},
}
......@@ -2,28 +2,36 @@
package main
import (
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
log "github.com/sirupsen/logrus"
"github.com/urfave/cli"
"os"
"gitlab.com/gitlab-org/security-products/analyzers/common/v2/cacert"
"gitlab.com/gitlab-org/security-products/analyzers/common/v2/command"
"gitlab.com/gitlab-org/security-products/analyzers/common/v2/issue"
"gitlab.com/gitlab-org/security-products/analyzers/common/v2/logutil"
"gitlab.com/gitlab-org/security-products/analyzers/common/v2/pathfilter"
"gitlab.com/gitlab-org/security-products/analyzers/secrets/v2/scanner"
"gitlab.com/gitlab-org/security-products/analyzers/secrets/v2/utils"
"gitlab.com/gitlab-org/security-products/analyzers/secrets/v3/convert"
)
const (
flagTargetDir = "target-dir"
flagArtifactDir = "artifact-dir"
flagExcludedPaths = "excluded-paths"
// FlagEntropyLevel is a gitleaks entropy level 0-8
FlagEntropyLevel = "gitleaks-entropy-level"
// CommitFrom is the commit a Gitleaks scan starts at.
CommitFrom = "commit-from"
// CommitTo is the commit a Gitleaks scan ends at.
CommitTo = "commit-to"
// FlagHistoricScan is flag to enable a historic scan.
FlagHistoricScan = "full-scan"
// DefaultEntropy is 8.0
DefaultEntropy = 8.0
envVarEntropy = "SECRET_DETECTION_ENTROPY_LEVEL"
envVarCommitFrom = "SECRET_DETECTION_COMMIT_FROM"
envVarCommitTo = "SECRET_DETECTION_COMMIT_TO"
envVarFullScan = "SECRET_DETECTION_HISTORIC_SCAN"
)
func init() {
......@@ -31,18 +39,6 @@ func init() {
}
func main() {
app := cli.NewApp()
app.Name = "analyzer"
app.Usage = "Secrets analyzer for GitLab SAST"
app.Author = "GitLab"
app.Commands = []cli.Command{runCommand()}
if err := app.Run(os.Args); err != nil {
log.Fatal(err)
}
}
func runCommand() cli.Command {
flags := []cli.Flag{
cli.StringFlag{
Name: flagTargetDir,
......@@ -59,73 +55,42 @@ func runCommand() cli.Command {
EnvVar: "SAST_EXCLUDED_PATHS",
Usage: "Comma-separated list of paths (globs supported) to be excluded from the output.",
},
}
flags = append(flags, cacert.NewFlags()...)
flags = append(flags, scanner.MakeFlags()...)
return cli.Command{
Name: "run",
Aliases: []string{"r"},
Usage: "Run the analyzer on detected project and generate a compatible artifact",
Flags: flags,
Action: func(c *cli.Context) error {
// no args
if c.Args().Present() {
if err := cli.ShowSubcommandHelp(c); err != nil {
return err
}
return errors.New("invalid number of arguments")
}
// parse excluded paths