Verified Commit 44be42c1 authored by Dmitry Gruzd's avatar Dmitry Gruzd 2️⃣ Committed by GitLab
Browse files

feat: add mise dev task for GDK-connected local development

parent f716de63
Loading
Loading
Loading
Loading

.env.example

0 → 100644
+22 −0
Original line number Diff line number Diff line
# Lightweight native-process GKG dev environment configuration
# Copy to .env and adjust values as needed.
# Ensure mise shell activation is enabled so that cargo, ruby, and clickhouse
# are on PATH (the dev script shells out to ruby for ClickHouse config parsing).

# GDK_ROOT is required. The dev script reads gdk.yml from that directory and
# auto-derives GDK service ports and endpoints from it.
# GDK_DIR is also accepted as an alias for GDK_ROOT.
GDK_ROOT=/path/to/your/gdk

# Webserver
GKG_SERVER_PORT=8090
GKG_SERVER_GRPC_PORT=50054

# Indexer
GKG_INDEXER_PORT=4202
GKG_INDEXER_CONSUMER=gkg-indexer-dev

# Optional metrics (set GKG_ENABLE_METRICS=true to use)
GKG_ENABLE_METRICS=false
GKG_METRICS_PORT=9100
GKG_INDEXER_METRICS_PORT=9200
+1 −0
Original line number Diff line number Diff line
@@ -133,6 +133,7 @@ These repositories on [ops.gitlab.net](https://ops.gitlab.net) manage the Kubern
| [Readiness reviews (old)](https://gitlab.com/gitlab-com/gl-infra/readiness) | Legacy readiness repo. Siphon review [MR !231](https://gitlab.com/gitlab-com/gl-infra/readiness/-/merge_requests/231) (open, 78 comments), NATS review [MR !240](https://gitlab.com/gitlab-com/gl-infra/readiness/-/merge_requests/240) (merged). |
| In-repo dev/sandbox docs | [INFRASTRUCTURE.md](https://gitlab.com/gitlab-org/orbit/knowledge-graph/-/blob/main/docs/dev/INFRASTRUCTURE.md) -- GCP sandbox environment details (dev/sandbox only) |
| Operational runbooks | [docs/dev/runbooks/](https://gitlab.com/gitlab-org/orbit/knowledge-graph/-/tree/main/docs/dev/runbooks) -- indexing pipelines, configuration, troubleshooting |
| Local GDK-connected development | [docs/dev/local-development.md](https://gitlab.com/gitlab-org/orbit/knowledge-graph/-/blob/main/docs/dev/local-development.md) -- `mise run dev` to launch the full local stack against an existing GDK |
| [Design Specs (Figma)](https://www.figma.com/design/GOrqDStp1E1SE0Ms7lVbXF/--588317--Orbit-GA-Designs?t=SLZ2CosGuBAzjC6r-0) | UI/UX design specs and visual references for Orbit GA features |

---
+64 −1
Original line number Diff line number Diff line
@@ -81,6 +81,11 @@ impl AppConfig {
        let config = config::Config::builder()
            .add_source(config::File::with_name("config/default").required(false))
            .add_source(SecretFileSource::new(secret_dir))
            .add_source(
                config::Environment::with_prefix("GKG")
                    .separator("__")
                    .try_parsing(true),
            )
            .build()
            .map_err(ConfigError::Config)?;

@@ -111,13 +116,14 @@ pub enum ConfigError {
    #[error("configuration error: {0}")]
    Config(#[from] config::ConfigError),
    #[error(
        "gitlab.jwt.verifying_key is required (set in config/default.yaml or mount at /etc/secrets/gitlab/jwt/verifying_key)"
        "gitlab.jwt.verifying_key is required (set GKG_GITLAB__JWT__VERIFYING_KEY, add to config/default.yaml, or mount at /etc/secrets/gitlab/jwt/verifying_key)"
    )]
    MissingJwtSecret,
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::engine::EngineConfiguration;

    /// Verifies the kebab-case handler config keys in YAML actually
@@ -195,4 +201,61 @@ handlers:
            Some(1)
        );
    }

    /// Environment source with `GKG_` prefix and `__` separator maps env
    /// vars to nested config keys:
    ///   GKG_NATS__URL -> nats.url
    ///   GKG_GRAPH__DATABASE -> graph.database
    #[test]
    fn environment_source_overrides_file_values() {
        // Build a config that simulates what env vars would produce by
        // testing the Environment source directly against a known set of
        // overrides. We use Config::builder with manual set() calls to
        // mirror the env var effect without mutating process state.
        let dir = tempfile::TempDir::new().unwrap();

        let config = config::Config::builder()
            .add_source(config::File::with_name("config/default").required(false))
            .add_source(SecretFileSource::new(dir.path()))
            // Provide required base config (normally from config/default.yaml)
            .set_default("nats.url", "localhost:4222")
            .unwrap()
            .set_default("datalake.url", "http://127.0.0.1:8123")
            .unwrap()
            .set_default("datalake.database", "default")
            .unwrap()
            .set_default("datalake.username", "default")
            .unwrap()
            .set_default("graph.url", "http://127.0.0.1:8123")
            .unwrap()
            .set_default("graph.database", "default")
            .unwrap()
            .set_default("graph.username", "default")
            .unwrap()
            // Simulate what GKG_NATS__URL, GKG_GRAPH__DATABASE, etc. would
            // produce via config::Environment
            .set_override("nats.url", "nats://custom:4222")
            .unwrap()
            .set_override("graph.database", "test-graph-db")
            .unwrap()
            .set_override("datalake.database", "test-datalake-db")
            .unwrap()
            .set_override(
                "gitlab.jwt.verifying_key",
                "env-secret-at-least-32-bytes-long",
            )
            .unwrap()
            .build()
            .unwrap();

        let config: AppConfig = config.try_deserialize().expect("config should deserialize");

        assert_eq!(config.nats.url, "nats://custom:4222");
        assert_eq!(config.graph.database, "test-graph-db");
        assert_eq!(config.datalake.database, "test-datalake-db");
        assert_eq!(
            config.gitlab.jwt.verifying_key.as_deref(),
            Some("env-secret-at-least-32-bytes-long")
        );
    }
}
+72 −0
Original line number Diff line number Diff line
@@ -264,6 +264,78 @@ scripts/gkg-dev.sh status
scripts/gkg-dev.sh stop
```

## Alternative: quick start with mise

If you prefer using the repository's existing `mise` task runner, an additive
shortcut is also available:

```shell
mise run dev
```

This alternative is separate from the Tilt/Kubernetes workflow above. It starts
lightweight native Rust processes directly on your host and connects them to the
existing services in your GDK instance (for example NATS, ClickHouse, GitLab,
and Gitaly), without using Tilt, Helm, Colima, or minikube.

It starts all three GKG runtime modes in the foreground:

- 1 webserver (HTTP + gRPC)
- 1 indexer
- 1 dispatcher (dispatch-indexing)

`mise run dev` orchestrates these long-running processes directly via mise
tasks, so you get mise's built-in prefixed output and Ctrl+C stops
everything.

Useful companion tasks:

```shell
mise run dev:check    # validate prerequisites
mise run dev:setup    # create graph DB + apply schema
mise run dev:status   # show derived config
mise run dev:env      # print env vars
```

`mise run gdk` is also available as an alias for the same GDK-connected local
development workflow.

Port assignments and GDK connection settings can be overridden in a gitignored
`.env` file. The only required input is `GDK_ROOT` (or `GDK_DIR` as an alias),
and the script derives GDK service ports from `gdk.yml` automatically. Start from the checked-in template
if you want to override only the GKG-local listen ports:

```shell
cp .env.example .env
```

For example, you can change the webserver and indexer ports if you want to run
multiple isolated local clusters on the same machine. You do not need to copy
GDK connection details into `.env`; those are parsed from `gdk.yml`.

Prerequisites:

- A working GDK with `nats`, `clickhouse`, and `siphon` enabled in `gdk.yml`
- PostgreSQL `wal_level = logical` (required for Siphon CDC)
- `mise` shell activation so that `cargo`, `ruby`, and `clickhouse` are on `PATH`
- Run `mise run dev:check` to validate all prerequisites

Typical usage:

```shell
export GDK_ROOT=~/workspace/gdk
mise run dev
```

On the first run, `cargo` compiles the full workspace which takes several
minutes. Subsequent runs use the cached build and start in seconds.

`mise run dev:setup` creates the graph database (default `gkg-development`) and
applies `config/graph.sql` to the configured ClickHouse instance.

This lightweight path assumes NATS, ClickHouse, Siphon, PostgreSQL, and Gitaly
come from GDK.

See `.gkg-dev.conf.example` for all configuration options (K8s runtime,
resource allocation, Tilt streaming mode).

+55 −3
Original line number Diff line number Diff line
@@ -6,13 +6,14 @@ postinstall = "if [ -z \"$CI\" ]; then lefthook install --force; fi"

[env]
# GDK root directory. Override with an absolute path, e.g: export GDK_ROOT=$HOME/workspace/gdk
# GDK_DIR is also accepted as an alias for GDK_ROOT.
# Do not use ~ (tilde is not expanded in variable values).
GDK_ROOT_RESOLVED = "{{env.GDK_ROOT | default(value=env.HOME ~ '/gitlab/gdk')}}"
GDK_ROOT_RESOLVED = "{{env.GDK_ROOT | default(value=env.GDK_DIR | default(value=env.HOME ~ '/gitlab/gdk'))}}"


# Path to Siphon repository for proto compilation (siphon-proto crate)
# Clone from: https://gitlab.com/gitlab-org/analytics-section/siphon
SIPHON_PROTO_ROOT = "{{env.GDK_ROOT | default(value=env.HOME ~ '/gitlab/gdk')}}/siphon"
SIPHON_PROTO_ROOT = "{{env.GDK_ROOT | default(value=env.GDK_DIR | default(value=env.HOME ~ '/gitlab/gdk'))}}/siphon"

# Auto-download prebuilt DuckDB shared library on first build (cached in target/).
DUCKDB_DOWNLOAD_LIB = "{{env.DUCKDB_DOWNLOAD_LIB | default(value='1')}}"
@@ -169,6 +170,58 @@ description = "Start the GKG server (webserver mode)"
run = './scripts/run-dispatcher.sh'
description = "Start the GKG server (dispatch-indexing mode)"

[tasks.dev]
run = [
  "mise run dev:check",
  "mise run dev:web ::: dev:indexer ::: dev:dispatcher",
]
description = "Start a local GKG environment (webserver + indexer + dispatcher) connected to GDK services"

[tasks.gdk]
run = "mise run dev"
description = "Alias for the local GKG environment connected to GDK services"

[tasks."dev:check"]
run = "./scripts/gkg-native-dev.sh check"
description = "Verify GDK-connected local development prerequisites"

[tasks."dev:status"]
run = "./scripts/gkg-native-dev.sh env"
description = "Show the derived GDK-backed environment for local development"

[tasks."dev:restart"]
run = "mise run dev"
description = "Restart the local GKG environment"

[tasks."dev:stop"]
run = "echo 'Use Ctrl+C in the running mise session to stop all dev processes'"
description = "Explain how to stop the local GKG environment"

[tasks."dev:setup"]
run = "./scripts/gkg-native-dev.sh setup"
description = "Create the graph database and apply config/graph.sql to ClickHouse"

[tasks."dev:web"]
run = "GKG_BIND_ADDRESS=127.0.0.1:${GKG_SERVER_PORT:-8090} GKG_GRPC_BIND_ADDRESS=127.0.0.1:${GKG_SERVER_GRPC_PORT:-50054} GKG_METRICS__PROMETHEUS__ENABLED=${GKG_ENABLE_METRICS:-false} GKG_METRICS__PROMETHEUS__PORT=${GKG_METRICS_PORT:-9100} ./scripts/gkg-native-dev.sh webserver"
description = "Start a local GKG webserver connected to GDK services"

[tasks."dev:indexer"]
run = "GKG_INDEXER_HEALTH_BIND_ADDRESS=127.0.0.1:${GKG_INDEXER_PORT:-4202} GKG_NATS__CONSUMER_NAME=${GKG_INDEXER_CONSUMER:-gkg-indexer-dev} GKG_METRICS__PROMETHEUS__ENABLED=${GKG_ENABLE_METRICS:-false} GKG_METRICS__PROMETHEUS__PORT=${GKG_INDEXER_METRICS_PORT:-9200} ./scripts/gkg-native-dev.sh indexer"
description = "Start a local GKG indexer connected to GDK services"

[tasks."dev:dispatcher"]
run = "./scripts/gkg-native-dev.sh dispatcher"
description = "Start a local GKG dispatch-indexing process connected to GDK services"

[tasks."dev:healthcheck"]
run = "./scripts/gkg-native-dev.sh healthcheck"
description = "Start the local GKG health-check process"

[tasks."dev:env"]
run = "./scripts/gkg-native-dev.sh env"
description = "Print the derived local GKG environment"


[tasks."ontology:validate"]
run = "find config/ontology -name '*.yaml' ! -name 'reference.yaml' | xargs uvx check-jsonschema --verbose --schemafile config/schemas/ontology.schema.json"
description = "Validate ontology YAML files against JSON schema"
@@ -229,4 +282,3 @@ description = "Check for broken links in markdown files"
depends = ["lint:markdown", "lint:vale", "lint:links"]
run = "echo 'All doc linters passed'"
description = "Run all documentation linters"
Loading