From 79e42cbfe9e6308d34a596d824bf43a559d872ef Mon Sep 17 00:00:00 2001
From: Johan Lorenzo <gitlab@johan.lrnz.fr>
Date: Fri, 1 Apr 2022 16:10:41 +0200
Subject: [PATCH 1/4] [malt] Run jobgraph jobs on malt's infra

---
 .gitlab-ci.yml       | 2 ++
 gitlab-ci/config.yml | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index f495658..242be16 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -52,6 +52,8 @@ decision:
         --pipeline-source="$CI_PIPELINE_SOURCE"
         --target-jobs-method="$TARGET_JOBS_METHOD"
     stage: decision
+    tags:
+    - malt-build
     variables:
         GET_SOURCES_ATTEMPTS: 3
         # We need the full history to ensure jobgraph makes the right decisions
diff --git a/gitlab-ci/config.yml b/gitlab-ci/config.yml
index 3bd39f0..9155cda 100644
--- a/gitlab-ci/config.yml
+++ b/gitlab-ci/config.yml
@@ -14,8 +14,8 @@ jobgraph: {}
 runners:
     aliases:
         images:
-            runner_tag: gitlab-org-docker
+            runner_tag: malt-build
         misc:
-            runner_tag: gitlab-org-docker
+            runner_tag: malt-build
         t-linux:
-            runner_tag: gitlab-org-docker
+            runner_tag: malt-build
-- 
GitLab


From cd8b1208f5b763cff1caac872034d0e6ec1508a7 Mon Sep 17 00:00:00 2001
From: Johan Lorenzo <gitlab@johan.lrnz.fr>
Date: Wed, 6 Apr 2022 12:14:44 +0200
Subject: [PATCH 2/4] [malt] Do not use schedule pipelines on this fork since
 updates come from original repo

---
 gitlab-ci/stages/jobgraph_schedules/stage.yml | 53 -------------------
 1 file changed, 53 deletions(-)
 delete mode 100644 gitlab-ci/stages/jobgraph_schedules/stage.yml

diff --git a/gitlab-ci/stages/jobgraph_schedules/stage.yml b/gitlab-ci/stages/jobgraph_schedules/stage.yml
deleted file mode 100644
index 2bdc0cd..0000000
--- a/gitlab-ci/stages/jobgraph_schedules/stage.yml
+++ /dev/null
@@ -1,53 +0,0 @@
----
-loader: jobgraph.loader.transform:loader
-
-transforms:
-- jobgraph.transforms.job:transforms
-
-
-job_defaults:
-    before_script:
-    - cd "${TF_ROOT}"
-    image: {in_tree: jobgraph}
-    optimization:
-        skip_unless_changed:
-        - gitlab-ci/schedules.yml
-    # We need to get access to $JOBGRAPH_BOT_GITLAB_TOKEN even for a `terraform plan`
-    run_on_git_branches:
-    - main
-    runner_alias: misc
-    variables:
-        TF_ADDRESS: ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/terraform/state/jobgraph
-        TF_ROOT: ${CI_PROJECT_DIR}/terraform
-
-jobs:
-    plan:
-        artifacts:
-            name: plan
-            paths:
-            - ${CI_PROJECT_DIR}/plan.cache
-            reports:
-                terraform: ${CI_PROJECT_DIR}/plan.json
-
-        description: "Report what changes are going to be made to Gitlab CI schedules, based on the content of schedules.yml"
-        script:
-        - >-
-            gitlab-terraform plan
-            -var "GITLAB_PROJECT_ID=${CI_PROJECT_ID}"
-            -var "GITLAB_DEFAULT_BRANCH=${CI_DEFAULT_BRANCH}"
-            -var "JOBGRAPH_BOT_GITLAB_TOKEN=${JOBGRAPH_BOT_GITLAB_TOKEN}"
-            -var "SCHEDULES_YML_PATH=${CI_PROJECT_DIR}/gitlab-ci/schedules.yml"
-        - gitlab-terraform plan-json
-        - mv "${TF_ROOT}"/plan.* "${CI_PROJECT_DIR}"
-
-    apply:
-        description: "Change Gitlab CI schedules, based on the content of schedules.yml"
-        environment:
-            name: jobgraph
-
-        script:
-        - mv "${CI_PROJECT_DIR}/plan.cache" "${TF_ROOT}"
-        # No need to provide variable because it is going to reuse the existing cached plan.
-        - gitlab-terraform apply
-
-        upstream_dependencies: {jobgraph_schedules: "plan"}
-- 
GitLab


From 1b8b410d3797c58b3adb5dbba674e077c0e31ce9 Mon Sep 17 00:00:00 2001
From: Nicolas Demengel <nicolas.demengel@gmail.com>
Date: Thu, 14 Apr 2022 15:59:54 +0200
Subject: [PATCH 3/4] Allow for optimizing a job even if a set of tolerated
 upstream jobs are not optimized

---
 src/jobgraph/optimize/__init__.py  |  6 ++++
 src/jobgraph/test/test_optimize.py | 46 ++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/src/jobgraph/optimize/__init__.py b/src/jobgraph/optimize/__init__.py
index c70653b..e3a2bff 100644
--- a/src/jobgraph/optimize/__init__.py
+++ b/src/jobgraph/optimize/__init__.py
@@ -164,6 +164,11 @@ def remove_jobs(target_job_graph, params, optimizations, do_not_optimize, graph_
         # away. This usually means something upstream is new and we have to
         # run the job anyway
         job = target_job_graph.jobs[label]
+        tolerated_upstream_jobs = (
+            job.optimization.pop("optimize_even_if_upstream_jobs_are_not_optimized", [])
+            if job.optimization
+            else []
+        )
         named_links_dict = target_job_graph.graph.named_links_dict()
         named_job_dependencies = {
             upstream_dep_reference: upstream_dep_label
@@ -171,6 +176,7 @@ def remove_jobs(target_job_graph, params, optimizations, do_not_optimize, graph_
                 label, {}
             ).items()
             if upstream_dep_label not in removed
+            and upstream_dep_label not in tolerated_upstream_jobs
         }
         if named_job_dependencies:
             job.optimization = {}
diff --git a/src/jobgraph/test/test_optimize.py b/src/jobgraph/test/test_optimize.py
index 606fa53..9cedb8f 100644
--- a/src/jobgraph/test/test_optimize.py
+++ b/src/jobgraph/test/test_optimize.py
@@ -108,6 +108,52 @@ class TestOptimize(unittest.TestCase):
         )
         self.assert_remove_jobs(graph, {"t1"}, do_not_optimize={"t2"})
 
+    def test_optimize_job_even_if_given_upstream_job_is_not_optimized(self):
+        graph = self.make_triangle(
+            # not optimized
+            t1={},
+            # should not be optimized because of t1, but will be because
+            # optimize_even_if_upstream_jobs_are_not_optimized = ["t1"]
+            t2={
+                "remove": True,
+                "optimize_even_if_upstream_jobs_are_not_optimized": ["t1"],
+            },
+            # not optimized, but ultimately optimized because of t1
+            t3={"remove": True},
+        )
+        self.assert_remove_jobs(graph, {"t2"})
+
+    def test_do_not_optimize_job_if_only_one_of_non_optimized_upstream_jobs_is_tolerated(  # noqa E501
+        self,
+    ):
+        graph = self.make_triangle(
+            # not optimized
+            t1={},
+            # not optimized
+            t2={},
+            # not optimized because of t2
+            t3={
+                "remove": True,
+                "optimize_even_if_upstream_jobs_are_not_optimized": ["t1"],
+            },
+        )
+        self.assert_remove_jobs(graph, set())
+
+    def test_optimize_job_even_if_given_upstream_jobs_are_not_optimized(self):
+        graph = self.make_triangle(
+            # not optimized
+            t1={},
+            # not optimized
+            t2={},
+            # should not be optimized because of t1 and t2, but will be because
+            # optimize_even_if_upstream_jobs_are_not_optimized = ["t1", "t2"]
+            t3={
+                "remove": True,
+                "optimize_even_if_upstream_jobs_are_not_optimized": ["t1", "t2"],
+            },
+        )
+        self.assert_remove_jobs(graph, {"t3"})
+
     def assert_subgraph(
         self,
         graph,
-- 
GitLab


From 19d555c6a83e725249f821bb13215e345d2eb4b4 Mon Sep 17 00:00:00 2001
From: Nicolas Demengel <nicolas.demengel@gmail.com>
Date: Fri, 29 Apr 2022 15:33:52 +0200
Subject: [PATCH 4/4] fix: append new *protected suffix to cache keys as Gitlab
 does

Context:
On 2022-04-28 Gitlab introduced a regression by
automatically adding a suffix to caches based on
whether the branch was protected. The following
code adds that suffix as well to query the cache.

See:
- https://gitlab.com/gitlab-org/gitlab/-/issues/360910
- https://gitlab.com/gitlab-org/gitlab/-/merge_requests/86107/diffs#fa6cef4c2c6f398d87d46ae6584eaec407365c19_450_454
---
 src/jobgraph/optimize/cache.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/jobgraph/optimize/cache.py b/src/jobgraph/optimize/cache.py
index 208d2cf..bc8ca0e 100644
--- a/src/jobgraph/optimize/cache.py
+++ b/src/jobgraph/optimize/cache.py
@@ -38,6 +38,18 @@ def does_cache_exist(graph_config, cache_path):
     cache_type = graph_config["cache"]["type"]
     try:
         does_cache_exist_func = _registry_cache_type[cache_type]
-        return does_cache_exist_func(graph_config, cache_path)
+
+        # On 2022-04-28 Gitlab introduced a regression by automatically adding a suffix
+        # to caches based on whether the branch was protected. The following code adds
+        # that suffix as well to query the cache.
+        # See https://gitlab.com/gitlab-org/gitlab/-/issues/360910
+        #     https://gitlab.com/gitlab-org/gitlab/-/merge_requests/86107/diffs#fa6cef4c2c6f398d87d46ae6584eaec407365c19_450_454
+        cache_suffix = ("non_protected"
+                        # this prefix is added by jobgraph,
+                        # see jobgraph.transforms.job.build_push_cache_payload
+                        if "/unprotected-branches-" in cache_path
+                        else "protected")
+        cache_path_with_gitlab_suffix = f"{cache_path}-{cache_suffix}"
+        return does_cache_exist_func(graph_config, cache_path_with_gitlab_suffix)
     except KeyError:
         raise KeyError(f"Unknown cache type: {cache_type}")
-- 
GitLab