Loading config/graph.sql +6 −3 Original line number Diff line number Diff line Loading @@ -246,6 +246,8 @@ CREATE TABLE IF NOT EXISTS gl_work_item ( ORDER BY (traversal_path, id) PRIMARY KEY (traversal_path, id) SETTINGS index_granularity = 2048, allow_experimental_replacing_merge_with_cleanup = 1; -- Edge table: namespace-scoped PK with adjacency projections CREATE TABLE IF NOT EXISTS gl_edge ( traversal_path String DEFAULT '0/' CODEC(ZSTD(1)), source_id Int64 CODEC(Delta(8), ZSTD(1)), Loading @@ -256,10 +258,11 @@ CREATE TABLE IF NOT EXISTS gl_edge ( _version DateTime64(6, 'UTC') DEFAULT now64(6) CODEC(ZSTD(1)), _deleted Bool DEFAULT false, INDEX idx_relationship relationship_kind TYPE set(50) GRANULARITY 2, PROJECTION by_target (SELECT * ORDER BY (target_id, relationship_kind, target_kind, source_id, traversal_path)) PROJECTION by_source (SELECT * ORDER BY (source_id, relationship_kind, target_id, traversal_path, source_kind, target_kind)), PROJECTION by_target (SELECT * ORDER BY (target_id, relationship_kind, source_id, traversal_path, source_kind, target_kind)) ) ENGINE = ReplacingMergeTree(_version, _deleted) ORDER BY (source_id, relationship_kind, target_id, traversal_path, source_kind, target_kind) PRIMARY KEY (source_id, relationship_kind, target_id) ORDER BY (traversal_path, source_id, relationship_kind, target_id, source_kind, target_kind) PRIMARY KEY (traversal_path, source_id, relationship_kind) SETTINGS index_granularity = 1024, deduplicate_merge_projection_mode = 'rebuild', allow_experimental_replacing_merge_with_cleanup = 1; -- CI graph tables Loading crates/integration-tests/tests/server/data_correctness/edge_cases.rs +171 −0 Original line number Diff line number Diff line Loading @@ -185,6 +185,177 @@ pub(super) async fn sip_target_aggregation_with_filter_returns_correct_counts(ct resp.assert_node_absent("User", 3); } /// Cross-namespace: User 2 is MEMBER_OF group 100 (ns `1/100/`) but authored /// MR 2002 in ns `1/101/1001/`. When scoped to `1/101/`, User 2 must appear /// as the MR author even though their membership edge is in a different namespace. pub(super) async fn cross_namespace_user_authors_mr_in_different_group(ctx: &TestContext) { let ctx_101 = SecurityContext::new(1, vec!["1/101/".into()]).unwrap(); let resp = run_query_with_security( ctx, r#"{ "query_type": "traversal", "nodes": [ {"id": "u", "entity": "User", "columns": ["username"]}, {"id": "mr", "entity": "MergeRequest", "columns": ["title"]} ], "relationships": [{"type": "AUTHORED", "from": "u", "to": "mr"}], "limit": 20 }"#, &allow_all(), ctx_101, ) .await; resp.assert_node_count(2); // User 2 (bob) authored MR 2002 in ns 1/101/1001/ — must be visible resp.assert_node("User", 2, |n| n.prop_str("username") == Some("bob")); resp.assert_node("MergeRequest", 2002, |n| { n.prop_str("title") == Some("Refactor C") }); resp.assert_edge_exists("User", 2, "MergeRequest", 2002, "AUTHORED"); // User 1's AUTHORED edges are in ns 1/100/1000/ — must NOT appear resp.assert_node_absent("User", 1); resp.assert_node_absent("MergeRequest", 2000); resp.assert_node_absent("MergeRequest", 2001); resp.assert_referential_integrity(); } /// Cross-namespace: Group 100 (ns `1/100/`) CONTAINS subgroup 200 (edge ns /// `1/100/200/`) and subgroup 200 CONTAINS subgroup 300 (edge ns /// `1/100/200/300/`). All containment edges must be visible when scoped to /// the parent namespace `1/100/`. pub(super) async fn cross_namespace_group_containment_across_depth(ctx: &TestContext) { let ctx_100 = SecurityContext::new(1, vec!["1/100/".into()]).unwrap(); let resp = run_query_with_security( ctx, r#"{ "query_type": "traversal", "nodes": [ {"id": "g", "entity": "Group", "columns": ["name"]}, {"id": "child", "entity": "Group", "columns": ["name"]} ], "relationships": [{"type": "CONTAINS", "from": "g", "to": "child"}], "limit": 20 }"#, &allow_all(), ctx_100, ) .await; resp.assert_node_count(3); // Group 100 contains Group 200 (edge ns 1/100/200/) resp.assert_edge_exists("Group", 100, "Group", 200, "CONTAINS"); // Group 200 contains Group 300 (edge ns 1/100/200/300/) resp.assert_edge_exists("Group", 200, "Group", 300, "CONTAINS"); resp.assert_referential_integrity(); } /// Cross-namespace isolation: scoped to `1/101/` should NOT see edges from /// `1/100/` or `1/102/`. User 1's AUTHORED MRs in `1/100/1000/` and /// User 3's MR in `1/102/1004/` must be invisible. pub(super) async fn cross_namespace_isolation_no_leakage(ctx: &TestContext) { let ctx_101 = SecurityContext::new(1, vec!["1/101/".into()]).unwrap(); let resp = run_query_with_security( ctx, r#"{ "query_type": "traversal", "nodes": [ {"id": "u", "entity": "User"}, {"id": "mr", "entity": "MergeRequest"} ], "relationships": [{"type": "AUTHORED", "from": "u", "to": "mr"}], "limit": 50 }"#, &allow_all(), ctx_101, ) .await; resp.assert_node_count(2); // Only MR 2002 is in ns 1/101/ — authored by User 2 resp.assert_node_ids("MergeRequest", &[2002]); resp.assert_edge_set("AUTHORED", &[(2, 2002)]); // MRs from other namespaces must not leak resp.assert_node_absent("MergeRequest", 2000); // ns 1/100/1000/ resp.assert_node_absent("MergeRequest", 2001); // ns 1/100/1000/ resp.assert_node_absent("MergeRequest", 2003); // ns 1/102/1004/ resp.assert_referential_integrity(); } /// Cross-namespace: narrow scope `1/100/1000/` sees AUTHORED edges in that /// project's namespace. The source User has no traversal_path filter — they /// come from any namespace. Only edges with matching traversal_path appear. pub(super) async fn cross_namespace_narrow_scope_returns_all_authors(ctx: &TestContext) { let ctx_project = SecurityContext::new(1, vec!["1/100/1000/".into()]).unwrap(); let resp = run_query_with_security( ctx, r#"{ "query_type": "traversal", "nodes": [ {"id": "u", "entity": "User", "columns": ["username"]}, {"id": "mr", "entity": "MergeRequest", "columns": ["title"]} ], "relationships": [{"type": "AUTHORED", "from": "u", "to": "mr"}], "limit": 20 }"#, &allow_all(), ctx_project, ) .await; resp.assert_node_count(3); // Both MRs 2000 and 2001 are in 1/100/1000/, authored by User 1 resp.assert_node_ids("MergeRequest", &[2000, 2001]); resp.assert_node("User", 1, |n| n.prop_str("username") == Some("alice")); resp.assert_edge_set("AUTHORED", &[(1, 2000), (1, 2001)]); // User 2's MR 2002 is in 1/101/ — must not appear resp.assert_node_absent("MergeRequest", 2002); resp.assert_referential_integrity(); } /// Cross-namespace aggregation: scoped to `1/100/`, count projects per group. /// Group 100 CONTAINS projects 1000 and 1002 via edges in `1/100/` subtree. /// Projects in `1/101/` and `1/102/` must not appear. pub(super) async fn cross_namespace_aggregation_respects_scope(ctx: &TestContext) { let ctx_100 = SecurityContext::new(1, vec!["1/100/".into()]).unwrap(); let resp = run_query_with_security( ctx, r#"{ "query_type": "aggregation", "nodes": [ {"id": "p", "entity": "Project"}, {"id": "g", "entity": "Group", "columns": ["name"]} ], "relationships": [{"type": "CONTAINS", "from": "g", "to": "p"}], "aggregations": [{"function": "count", "target": "p", "group_by": "g", "alias": "project_count"}], "limit": 20 }"#, &allow_all(), ctx_100, ) .await; // Group 100 CONTAINS projects 1000 (edge ns 1/100/1000/) and 1002 // (edge ns 1/100/1002/) — both in the 1/100/ subtree resp.assert_node_count(1); resp.assert_node("Group", 100, |n| n.prop_i64("project_count") == Some(2)); // Groups 101 and 102 have CONTAINS edges outside 1/100/ — must not appear resp.assert_node_absent("Group", 101); resp.assert_node_absent("Group", 102); } pub(super) async fn empty_result_has_valid_schema(ctx: &TestContext) { let resp = run_query( ctx, Loading crates/integration-tests/tests/server/data_correctness/mod.rs +6 −0 Original line number Diff line number Diff line Loading @@ -96,6 +96,12 @@ async fn data_correctness() { edge_cases::sip_prefilter_with_filter_returns_correct_results, edge_cases::sip_prefilter_multi_hop_returns_correct_results, edge_cases::sip_target_aggregation_with_filter_returns_correct_counts, // cross-namespace correctness edge_cases::cross_namespace_user_authors_mr_in_different_group, edge_cases::cross_namespace_group_containment_across_depth, edge_cases::cross_namespace_isolation_no_leakage, edge_cases::cross_namespace_narrow_scope_returns_all_authors, edge_cases::cross_namespace_aggregation_respects_scope, // referential integrity edge_cases::traversal_referential_integrity_on_complex_query, ); Loading crates/query-engine/compiler/src/lower.rs +3 −3 Original line number Diff line number Diff line Loading @@ -764,9 +764,9 @@ fn lower_neighbors(input: &Input) -> Result<Node> { let (limit, offset) = pagination(input); // For Direction::Both, split into UNION ALL of outgoing + incoming so // ClickHouse can use by_source and by_target projections respectively. // An OR join (source_id = X OR target_id = X) prevents projection use // and forces a full edge table scan. // ClickHouse can select the optimal access path for each direction // (base table PK or by_source/by_target projections). An OR join // (source_id = X OR target_id = X) prevents index use. if neighbors_config.direction == Direction::Both { let build_arm = |dir: Direction| -> Query { let (edge_table, edge_type_cond) = edge_scan(edge_alias, &type_filter); Loading crates/query-engine/compiler/src/optimize.rs +6 −7 Original line number Diff line number Diff line Loading @@ -122,14 +122,13 @@ fn choose_sip_root(input: &Input) -> Option<&InputNode> { /// SIP (Sideways Information Passing) pre-filter. /// /// Materializes the root node's matching IDs in a CTE and pushes them into /// the edge table scan via IN subquery. This triggers ClickHouse's `by_source` /// projection on the edge table, reducing rows scanned by up to 63%. /// the edge table scan via IN subquery. Combined with the namespace-first /// edge PK `(traversal_path, source_id, relationship_kind)`, the IN filter /// and startsWith filter work together for precise granule pruning. /// /// Applied when either: /// - The root node has explicit selectivity (filters, node_ids, cursor, id_range) /// - The root node's table has a traversal_path security filter (the security /// pass will inject startsWith into the CTE, giving the IN subquery enough /// selectivity to trigger projection-based edge scans) /// When source_id IN (...) is present without startsWith, ClickHouse selects /// the `by_source` projection instead. When both are present, the base table /// PK handles both predicates via prefix matching. fn apply_sip_prefilter(q: &mut Query, input: &Input, ctx: &SecurityContext) { if !matches!( input.query_type, Loading Loading
config/graph.sql +6 −3 Original line number Diff line number Diff line Loading @@ -246,6 +246,8 @@ CREATE TABLE IF NOT EXISTS gl_work_item ( ORDER BY (traversal_path, id) PRIMARY KEY (traversal_path, id) SETTINGS index_granularity = 2048, allow_experimental_replacing_merge_with_cleanup = 1; -- Edge table: namespace-scoped PK with adjacency projections CREATE TABLE IF NOT EXISTS gl_edge ( traversal_path String DEFAULT '0/' CODEC(ZSTD(1)), source_id Int64 CODEC(Delta(8), ZSTD(1)), Loading @@ -256,10 +258,11 @@ CREATE TABLE IF NOT EXISTS gl_edge ( _version DateTime64(6, 'UTC') DEFAULT now64(6) CODEC(ZSTD(1)), _deleted Bool DEFAULT false, INDEX idx_relationship relationship_kind TYPE set(50) GRANULARITY 2, PROJECTION by_target (SELECT * ORDER BY (target_id, relationship_kind, target_kind, source_id, traversal_path)) PROJECTION by_source (SELECT * ORDER BY (source_id, relationship_kind, target_id, traversal_path, source_kind, target_kind)), PROJECTION by_target (SELECT * ORDER BY (target_id, relationship_kind, source_id, traversal_path, source_kind, target_kind)) ) ENGINE = ReplacingMergeTree(_version, _deleted) ORDER BY (source_id, relationship_kind, target_id, traversal_path, source_kind, target_kind) PRIMARY KEY (source_id, relationship_kind, target_id) ORDER BY (traversal_path, source_id, relationship_kind, target_id, source_kind, target_kind) PRIMARY KEY (traversal_path, source_id, relationship_kind) SETTINGS index_granularity = 1024, deduplicate_merge_projection_mode = 'rebuild', allow_experimental_replacing_merge_with_cleanup = 1; -- CI graph tables Loading
crates/integration-tests/tests/server/data_correctness/edge_cases.rs +171 −0 Original line number Diff line number Diff line Loading @@ -185,6 +185,177 @@ pub(super) async fn sip_target_aggregation_with_filter_returns_correct_counts(ct resp.assert_node_absent("User", 3); } /// Cross-namespace: User 2 is MEMBER_OF group 100 (ns `1/100/`) but authored /// MR 2002 in ns `1/101/1001/`. When scoped to `1/101/`, User 2 must appear /// as the MR author even though their membership edge is in a different namespace. pub(super) async fn cross_namespace_user_authors_mr_in_different_group(ctx: &TestContext) { let ctx_101 = SecurityContext::new(1, vec!["1/101/".into()]).unwrap(); let resp = run_query_with_security( ctx, r#"{ "query_type": "traversal", "nodes": [ {"id": "u", "entity": "User", "columns": ["username"]}, {"id": "mr", "entity": "MergeRequest", "columns": ["title"]} ], "relationships": [{"type": "AUTHORED", "from": "u", "to": "mr"}], "limit": 20 }"#, &allow_all(), ctx_101, ) .await; resp.assert_node_count(2); // User 2 (bob) authored MR 2002 in ns 1/101/1001/ — must be visible resp.assert_node("User", 2, |n| n.prop_str("username") == Some("bob")); resp.assert_node("MergeRequest", 2002, |n| { n.prop_str("title") == Some("Refactor C") }); resp.assert_edge_exists("User", 2, "MergeRequest", 2002, "AUTHORED"); // User 1's AUTHORED edges are in ns 1/100/1000/ — must NOT appear resp.assert_node_absent("User", 1); resp.assert_node_absent("MergeRequest", 2000); resp.assert_node_absent("MergeRequest", 2001); resp.assert_referential_integrity(); } /// Cross-namespace: Group 100 (ns `1/100/`) CONTAINS subgroup 200 (edge ns /// `1/100/200/`) and subgroup 200 CONTAINS subgroup 300 (edge ns /// `1/100/200/300/`). All containment edges must be visible when scoped to /// the parent namespace `1/100/`. pub(super) async fn cross_namespace_group_containment_across_depth(ctx: &TestContext) { let ctx_100 = SecurityContext::new(1, vec!["1/100/".into()]).unwrap(); let resp = run_query_with_security( ctx, r#"{ "query_type": "traversal", "nodes": [ {"id": "g", "entity": "Group", "columns": ["name"]}, {"id": "child", "entity": "Group", "columns": ["name"]} ], "relationships": [{"type": "CONTAINS", "from": "g", "to": "child"}], "limit": 20 }"#, &allow_all(), ctx_100, ) .await; resp.assert_node_count(3); // Group 100 contains Group 200 (edge ns 1/100/200/) resp.assert_edge_exists("Group", 100, "Group", 200, "CONTAINS"); // Group 200 contains Group 300 (edge ns 1/100/200/300/) resp.assert_edge_exists("Group", 200, "Group", 300, "CONTAINS"); resp.assert_referential_integrity(); } /// Cross-namespace isolation: scoped to `1/101/` should NOT see edges from /// `1/100/` or `1/102/`. User 1's AUTHORED MRs in `1/100/1000/` and /// User 3's MR in `1/102/1004/` must be invisible. pub(super) async fn cross_namespace_isolation_no_leakage(ctx: &TestContext) { let ctx_101 = SecurityContext::new(1, vec!["1/101/".into()]).unwrap(); let resp = run_query_with_security( ctx, r#"{ "query_type": "traversal", "nodes": [ {"id": "u", "entity": "User"}, {"id": "mr", "entity": "MergeRequest"} ], "relationships": [{"type": "AUTHORED", "from": "u", "to": "mr"}], "limit": 50 }"#, &allow_all(), ctx_101, ) .await; resp.assert_node_count(2); // Only MR 2002 is in ns 1/101/ — authored by User 2 resp.assert_node_ids("MergeRequest", &[2002]); resp.assert_edge_set("AUTHORED", &[(2, 2002)]); // MRs from other namespaces must not leak resp.assert_node_absent("MergeRequest", 2000); // ns 1/100/1000/ resp.assert_node_absent("MergeRequest", 2001); // ns 1/100/1000/ resp.assert_node_absent("MergeRequest", 2003); // ns 1/102/1004/ resp.assert_referential_integrity(); } /// Cross-namespace: narrow scope `1/100/1000/` sees AUTHORED edges in that /// project's namespace. The source User has no traversal_path filter — they /// come from any namespace. Only edges with matching traversal_path appear. pub(super) async fn cross_namespace_narrow_scope_returns_all_authors(ctx: &TestContext) { let ctx_project = SecurityContext::new(1, vec!["1/100/1000/".into()]).unwrap(); let resp = run_query_with_security( ctx, r#"{ "query_type": "traversal", "nodes": [ {"id": "u", "entity": "User", "columns": ["username"]}, {"id": "mr", "entity": "MergeRequest", "columns": ["title"]} ], "relationships": [{"type": "AUTHORED", "from": "u", "to": "mr"}], "limit": 20 }"#, &allow_all(), ctx_project, ) .await; resp.assert_node_count(3); // Both MRs 2000 and 2001 are in 1/100/1000/, authored by User 1 resp.assert_node_ids("MergeRequest", &[2000, 2001]); resp.assert_node("User", 1, |n| n.prop_str("username") == Some("alice")); resp.assert_edge_set("AUTHORED", &[(1, 2000), (1, 2001)]); // User 2's MR 2002 is in 1/101/ — must not appear resp.assert_node_absent("MergeRequest", 2002); resp.assert_referential_integrity(); } /// Cross-namespace aggregation: scoped to `1/100/`, count projects per group. /// Group 100 CONTAINS projects 1000 and 1002 via edges in `1/100/` subtree. /// Projects in `1/101/` and `1/102/` must not appear. pub(super) async fn cross_namespace_aggregation_respects_scope(ctx: &TestContext) { let ctx_100 = SecurityContext::new(1, vec!["1/100/".into()]).unwrap(); let resp = run_query_with_security( ctx, r#"{ "query_type": "aggregation", "nodes": [ {"id": "p", "entity": "Project"}, {"id": "g", "entity": "Group", "columns": ["name"]} ], "relationships": [{"type": "CONTAINS", "from": "g", "to": "p"}], "aggregations": [{"function": "count", "target": "p", "group_by": "g", "alias": "project_count"}], "limit": 20 }"#, &allow_all(), ctx_100, ) .await; // Group 100 CONTAINS projects 1000 (edge ns 1/100/1000/) and 1002 // (edge ns 1/100/1002/) — both in the 1/100/ subtree resp.assert_node_count(1); resp.assert_node("Group", 100, |n| n.prop_i64("project_count") == Some(2)); // Groups 101 and 102 have CONTAINS edges outside 1/100/ — must not appear resp.assert_node_absent("Group", 101); resp.assert_node_absent("Group", 102); } pub(super) async fn empty_result_has_valid_schema(ctx: &TestContext) { let resp = run_query( ctx, Loading
crates/integration-tests/tests/server/data_correctness/mod.rs +6 −0 Original line number Diff line number Diff line Loading @@ -96,6 +96,12 @@ async fn data_correctness() { edge_cases::sip_prefilter_with_filter_returns_correct_results, edge_cases::sip_prefilter_multi_hop_returns_correct_results, edge_cases::sip_target_aggregation_with_filter_returns_correct_counts, // cross-namespace correctness edge_cases::cross_namespace_user_authors_mr_in_different_group, edge_cases::cross_namespace_group_containment_across_depth, edge_cases::cross_namespace_isolation_no_leakage, edge_cases::cross_namespace_narrow_scope_returns_all_authors, edge_cases::cross_namespace_aggregation_respects_scope, // referential integrity edge_cases::traversal_referential_integrity_on_complex_query, ); Loading
crates/query-engine/compiler/src/lower.rs +3 −3 Original line number Diff line number Diff line Loading @@ -764,9 +764,9 @@ fn lower_neighbors(input: &Input) -> Result<Node> { let (limit, offset) = pagination(input); // For Direction::Both, split into UNION ALL of outgoing + incoming so // ClickHouse can use by_source and by_target projections respectively. // An OR join (source_id = X OR target_id = X) prevents projection use // and forces a full edge table scan. // ClickHouse can select the optimal access path for each direction // (base table PK or by_source/by_target projections). An OR join // (source_id = X OR target_id = X) prevents index use. if neighbors_config.direction == Direction::Both { let build_arm = |dir: Direction| -> Query { let (edge_table, edge_type_cond) = edge_scan(edge_alias, &type_filter); Loading
crates/query-engine/compiler/src/optimize.rs +6 −7 Original line number Diff line number Diff line Loading @@ -122,14 +122,13 @@ fn choose_sip_root(input: &Input) -> Option<&InputNode> { /// SIP (Sideways Information Passing) pre-filter. /// /// Materializes the root node's matching IDs in a CTE and pushes them into /// the edge table scan via IN subquery. This triggers ClickHouse's `by_source` /// projection on the edge table, reducing rows scanned by up to 63%. /// the edge table scan via IN subquery. Combined with the namespace-first /// edge PK `(traversal_path, source_id, relationship_kind)`, the IN filter /// and startsWith filter work together for precise granule pruning. /// /// Applied when either: /// - The root node has explicit selectivity (filters, node_ids, cursor, id_range) /// - The root node's table has a traversal_path security filter (the security /// pass will inject startsWith into the CTE, giving the IN subquery enough /// selectivity to trigger projection-based edge scans) /// When source_id IN (...) is present without startsWith, ClickHouse selects /// the `by_source` projection instead. When both are present, the base table /// PK handles both predicates via prefix matching. fn apply_sip_prefilter(q: &mut Query, input: &Input, ctx: &SecurityContext) { if !matches!( input.query_type, Loading