Skip to content

Optimize query for CI pipelines of merge request

Igor Drozdov requested to merge id-optimize-query-for-ci-pipelines into master

What does this MR do?

When an MR with a large number of commits is opened, the number of pipelines is calculated for the latest 10,000 commits. In order to do that an SQL query is performed to check how many of ci_pipelines belong to these 10,000 commits via source_sha or sha. Here's an example of such query for 20 commits:

SELECT * FROM (
  (SELECT ci_pipelines.* FROM ci_pipelines WHERE ci_pipelines.project_id = 278964
    AND (ci_pipelines.config_source IN (1, 2) OR ci_pipelines.config_source IS NULL)
    AND ci_pipelines.source = 10 AND ci_pipelines.merge_request_id = 41385074
    AND ci_pipelines.source_sha IN (
      '73dacea9cf40c50534b676a2031a6e6c71229775', '254df0104abe3f8a8b210c41e9006c7c12d66c2c', 'c7d9fbb3c58f3ca5d1dd719723ae8a171dc93e38',
      '6d3cabd8d1bf88bb9a9f3e2d45ab306ecbbc25a5', '4c96c6a6c70b2cd0936b14dc7dc10074fe520c91', '44dae7a8af974f0b23b992b2394955febbb584d9',
      'c8f8098daaeccb6c11c32ddb080826a671e3b665', '2d42fd8a16ff01b6543d83169d17ba804fd946be', '55b99b83f5dc29d6c953be0e9e23c435d6b3d825',
      '64487732b6ce00fea50177b5d339b9a6d565250a', 'a1da4104a103050eb1f181e6f50a0a13101d9cbf', '4d0c7c9fc20a560093ba15336350dc813ea37a8f',
      'a10767acc07657219e3851a8d96a2d025d8dbec8', 'a50b837c680f99228d64ae4746837bc0b1a518a2', '4781c6dac2da74963e41112537113599ca2d540f',
      '831d60aad41639687c299ffb644d5fe2650935d8', '990c6e83391ed9a3be2f569427d8f1182a764b85', '0418570ad38a97030e6b0100d30ced90eea3b46e',
      'bb472ffd55734193d8495c461694db93e6aa8620', 'c58a96b8cd207efcfa95efb5c32d0332e46a98e3'))
  UNION ALL (
    SELECT ci_pipelines.* FROM ci_pipelines WHERE ci_pipelines.project_id = 278964
      AND (ci_pipelines.config_source IN (1, 2) OR ci_pipelines.config_source IS NULL)
      AND ci_pipelines.source = 10 AND ci_pipelines.merge_request_id = 41385074
      AND ci_pipelines.sha IN (
        '73dacea9cf40c50534b676a2031a6e6c71229775', '254df0104abe3f8a8b210c41e9006c7c12d66c2c', 'c7d9fbb3c58f3ca5d1dd719723ae8a171dc93e38',
        '6d3cabd8d1bf88bb9a9f3e2d45ab306ecbbc25a5', '4c96c6a6c70b2cd0936b14dc7dc10074fe520c91', '44dae7a8af974f0b23b992b2394955febbb584d9',
        'c8f8098daaeccb6c11c32ddb080826a671e3b665', '2d42fd8a16ff01b6543d83169d17ba804fd946be', '55b99b83f5dc29d6c953be0e9e23c435d6b3d825',
        '64487732b6ce00fea50177b5d339b9a6d565250a', 'a1da4104a103050eb1f181e6f50a0a13101d9cbf', '4d0c7c9fc20a560093ba15336350dc813ea37a8f',
        'a10767acc07657219e3851a8d96a2d025d8dbec8', 'a50b837c680f99228d64ae4746837bc0b1a518a2', '4781c6dac2da74963e41112537113599ca2d540f',
        '831d60aad41639687c299ffb644d5fe2650935d8', '990c6e83391ed9a3be2f569427d8f1182a764b85', '0418570ad38a97030e6b0100d30ced90eea3b46e',
        'bb472ffd55734193d8495c461694db93e6aa8620', 'c58a96b8cd207efcfa95efb5c32d0332e46a98e3'))
    UNION ALL (SELECT ci_pipelines.* FROM ci_pipelines WHERE ci_pipelines.project_id = 278964
      AND (ci_pipelines.config_source IN (1, 2) OR ci_pipelines.config_source IS NULL)
      AND (ci_pipelines.source IN (1, 2, 3, 4, 5, 6, 7, 8, 11, 9) OR ci_pipelines.source IS NULL)
      AND ci_pipelines.ref = master
      AND ci_pipelines.tag = FALSE
      AND ci_pipelines.sha IN(
        '73dacea9cf40c50534b676a2031a6e6c71229775', '254df0104abe3f8a8b210c41e9006c7c12d66c2c', 'c7d9fbb3c58f3ca5d1dd719723ae8a171dc93e38',
        '6d3cabd8d1bf88bb9a9f3e2d45ab306ecbbc25a5', '4c96c6a6c70b2cd0936b14dc7dc10074fe520c91', '44dae7a8af974f0b23b992b2394955febbb584d9',
        'c8f8098daaeccb6c11c32ddb080826a671e3b665', '2d42fd8a16ff01b6543d83169d17ba804fd946be', '55b99b83f5dc29d6c953be0e9e23c435d6b3d825',
        '64487732b6ce00fea50177b5d339b9a6d565250a', 'a1da4104a103050eb1f181e6f50a0a13101d9cbf', '4d0c7c9fc20a560093ba15336350dc813ea37a8f',
        'a10767acc07657219e3851a8d96a2d025d8dbec8', 'a50b837c680f99228d64ae4746837bc0b1a518a2', '4781c6dac2da74963e41112537113599ca2d540f',
        '831d60aad41639687c299ffb644d5fe2650935d8', '990c6e83391ed9a3be2f569427d8f1182a764b85', '0418570ad38a97030e6b0100d30ced90eea3b46e',
        'bb472ffd55734193d8495c461694db93e6aa8620', 'c58a96b8cd207efcfa95efb5c32d0332e46a98e3')
  )
) ci_pipelines

For a larger number of commits, it's obviously much bigger request.

Here's a query plan for 20 commits: https://explain.depesz.com/s/JOWW

This MR introduces the changes which provide a subquery instead of a list of commits:

SELECT * FROM (
  (SELECT ci_pipelines.* FROM ci_pipelines WHERE ci_pipelines.project_id = 278964
    AND (ci_pipelines.config_source IN (1, 2) OR ci_pipelines.config_source IS NULL)
    AND ci_pipelines.source = 10 AND ci_pipelines.merge_request_id = 41385074
    AND ci_pipelines.source_sha IN (
      SELECT  merge_request_diff_commits.sha FROM merge_request_diff_commits WHERE merge_request_diff_commits.merge_request_diff_id IN (
        SELECT  merge_request_diffs.id FROM merge_request_diffs WHERE merge_request_diffs.merge_request_id = 41385074 ORDER BY merge_request_diffs.id DESC LIMIT 100) LIMIT 10000))
  UNION ALL (
    SELECT ci_pipelines.* FROM ci_pipelines WHERE ci_pipelines.project_id = 278964
      AND (ci_pipelines.config_source IN (1, 2) OR ci_pipelines.config_source IS NULL)
      AND ci_pipelines.source = 10 AND ci_pipelines.merge_request_id = 41385074
      AND ci_pipelines.sha IN (
        SELECT  encode(sha, hex) FROM merge_request_diff_commits WHERE merge_request_diff_commits.merge_request_diff_id IN (
          SELECT  merge_request_diffs.id FROM merge_request_diffs WHERE merge_request_diffs.merge_request_id = 41385074 ORDER BY merge_request_diffs.id DESC LIMIT 100) LIMIT 10000))
  UNION ALL (SELECT ci_pipelines.* FROM ci_pipelines WHERE ci_pipelines.project_id = 278964
    AND (ci_pipelines.config_source IN (1, 2) OR ci_pipelines.config_source IS NULL)
    AND (ci_pipelines.source IN (1, 2, 3, 4, 5, 6, 7, 8, 11, 9) OR ci_pipelines.source IS NULL)
    AND ci_pipelines.ref = master AND ci_pipelines.tag = FALSE
    AND ci_pipelines.sha IN (
      SELECT  encode(sha, hex) FROM merge_request_diff_commits WHERE merge_request_diff_commits.merge_request_diff_id IN (
        SELECT  merge_request_diffs.id FROM merge_request_diffs WHERE merge_request_diffs.merge_request_id = 41385074 ORDER BY merge_request_diffs.id DESC LIMIT 100) LIMIT 10000))) ci_pipelines

Here's a query plan for this request: https://explain.depesz.com/s/nKn1

Query which uses WITH for optimization:

WITH merge_request_diff_commits AS
  (SELECT merge_request_diff_commits.*
   FROM merge_request_diff_commits
   WHERE merge_request_diff_commits.merge_request_diff_id IN
       (SELECT merge_request_diffs.id
        FROM merge_request_diffs
        WHERE merge_request_diffs.merge_request_id = 41385074
        ORDER BY merge_request_diffs.id DESC
        LIMIT 100)
   LIMIT 10000)
SELECT ci_pipelines.*
FROM (
        (SELECT ci_pipelines.*
         FROM ci_pipelines
         WHERE ci_pipelines.project_id = 278964
           AND (ci_pipelines.config_source IN (1, 2)
                OR ci_pipelines.config_source IS NULL)
           AND ci_pipelines.source = 10
           AND ci_pipelines.merge_request_id = 41385074
           AND ci_pipelines.source_sha IN
             (SELECT merge_request_diff_commits.sha
              FROM merge_request_diff_commits))
      UNION ALL
        (SELECT ci_pipelines.*
         FROM ci_pipelines
         WHERE ci_pipelines.project_id = 278964
           AND (ci_pipelines.config_source IN (1, 2)
                OR ci_pipelines.config_source IS NULL)
           AND ci_pipelines.source = 10
           AND ci_pipelines.merge_request_id = 41385074
           AND ci_pipelines.sha IN
             (SELECT encode(sha, 'hex')
              FROM merge_request_diff_commits))
      UNION ALL
        (SELECT ci_pipelines.*
         FROM ci_pipelines
         WHERE ci_pipelines.project_id = 278964
           AND (ci_pipelines.config_source IN (1, 2)
                OR ci_pipelines.config_source IS NULL)
           AND (ci_pipelines.source IN (1, 2, 3, 4, 5, 6, 7, 8, 11, 9)
                OR ci_pipelines.source IS NULL)
           AND ci_pipelines.ref = '11-9-stable-ee’'
           AND ci_pipelines.tag = FALSE
           AND ci_pipelines.sha IN
             (SELECT encode(sha, 'hex')
              FROM merge_request_diff_commits))) ci_pipelines
ORDER BY CASE ci_pipelines.source
             WHEN (10) THEN 0
             ELSE 1
         END,
         ci_pipelines.id DESC

Query plan: https://explain.depesz.com/s/sgHr

Query which uses INNER JOIN instead of a subquery:

WITH shas AS
  (SELECT merge_request_diff_commits.sha
   FROM merge_request_diff_commits
   WHERE merge_request_diff_commits.merge_request_diff_id IN
       (SELECT merge_request_diffs.id
        FROM merge_request_diffs
        WHERE merge_request_diffs.merge_request_id = 41385074
        ORDER BY merge_request_diffs.id DESC
        LIMIT 100)
   LIMIT 10000)
SELECT ci_pipelines.*
FROM (
        (SELECT ci_pipelines.*
         FROM ci_pipelines
         INNER JOIN shas ON shas.sha = ci_pipelines.source_sha
         WHERE ci_pipelines.project_id = 278964
           AND (ci_pipelines.config_source IN (1,
                                               2)
                OR ci_pipelines.config_source IS NULL)
           AND ci_pipelines.source = 10
           AND ci_pipelines.merge_request_id = 41385074)
      UNION ALL
        (SELECT ci_pipelines.*
         FROM ci_pipelines
         INNER JOIN shas ON encode(shas.sha, 'hex') = ci_pipelines.sha
         WHERE ci_pipelines.project_id = 278964
           AND (ci_pipelines.config_source IN (1,
                                               2)
                OR ci_pipelines.config_source IS NULL)
           AND ci_pipelines.source = 10
           AND ci_pipelines.merge_request_id = 41385074)
      UNION ALL
        (SELECT ci_pipelines.*
         FROM ci_pipelines
         INNER JOIN shas ON encode(shas.sha, 'hex') = ci_pipelines.sha
         WHERE ci_pipelines.project_id = 278964
           AND (ci_pipelines.config_source IN (1,
                                               2)
                OR ci_pipelines.config_source IS NULL)
           AND (ci_pipelines.source IN (1, 2, 3, 4, 5, 6, 7, 8, 11, 9)
                OR ci_pipelines.source IS NULL)
           AND ci_pipelines.ref = '11-9-stable-ee'
           AND ci_pipelines.tag = FALSE)) ci_pipelines
ORDER BY CASE ci_pipelines.source
             WHEN (10) THEN 0
             ELSE 1
         END,
         ci_pipelines.id DESC

Query plan: https://explain.depesz.com/s/KhJw

Related issue: #35612 (closed)

Edited by Igor Drozdov

Merge request reports