Verified Commit 48171999 authored by Michael Usachenko's avatar Michael Usachenko Committed by GitLab
Browse files

perf(compiler): collapse traversal paths via trie subsumption

parent e2d4d0f2
Loading
Loading
Loading
Loading
+210 −10
Original line number Diff line number Diff line
@@ -102,33 +102,112 @@ fn build_path_filter(alias: &str, paths: &[&str]) -> Expr {
        0 => Expr::param(ChType::Bool, false),
        1 => starts_with_expr(alias, paths[0]),
        _ => {
            let owned: Vec<String> = paths.iter().map(|s| (*s).to_string()).collect();
            let prefix = lowest_common_prefix(&owned);
            let prefix_filter = starts_with_expr(alias, &prefix);
            match Expr::or_all(paths.iter().map(|p| Some(starts_with_expr(alias, p)))) {
                Some(or_filters) => Expr::and(prefix_filter, or_filters),
                None => prefix_filter,
            let collapsed = PathTrie::from_paths(paths).to_minimal_prefixes();
            if collapsed.len() == 1 {
                return starts_with_expr(alias, &collapsed[0]);
            }
            let lcp = lowest_common_prefix(&collapsed);
            let lcp_filter = starts_with_expr(alias, &lcp);
            match Expr::or_all(collapsed.iter().map(|p| Some(starts_with_expr(alias, p)))) {
                Some(or_filters) => Expr::and(lcp_filter, or_filters),
                None => lcp_filter,
            }
        }
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// PathTrie — segment-level trie for collapsing traversal paths
// ─────────────────────────────────────────────────────────────────────────────

/// A trie keyed on path segments (`"1"`, `"100"`, …). Each node tracks
/// whether it was explicitly inserted (i.e., the user has access to that
/// exact namespace prefix). Inserting `"1/100/"` marks the `1 → 100` node
/// as terminal.
#[derive(Default)]
struct PathTrie {
    children: std::collections::BTreeMap<String, PathTrie>,
    terminal: bool,
}

impl PathTrie {
    fn from_paths(paths: &[&str]) -> Self {
        let mut root = Self::default();
        for path in paths {
            root.insert(path);
        }
        root
    }

    fn insert(&mut self, path: &str) {
        let segments: Vec<&str> = path
            .trim_end_matches('/')
            .split('/')
            .filter(|s| !s.is_empty())
            .collect();
        // Empty paths are impossible: SecurityContext::validate_traversal_path
        // enforces ^(\d+/)+$. Guard here to prevent the root node from being
        // marked terminal, which would emit "" and match everything.
        debug_assert!(
            !segments.is_empty(),
            "PathTrie::insert called with empty path"
        );
        if segments.is_empty() {
            return;
        }
        let mut node = self;
        for seg in segments {
            node = node.children.entry(seg.to_string()).or_default();
        }
        node.terminal = true;
    }

    /// Walk the trie and emit the minimal set of prefixes. A terminal
    /// node emits its path and prunes all descendants (subsumption).
    /// A non-terminal node with exactly one child merges into that
    /// child (prefix compression).
    fn to_minimal_prefixes(&self) -> Vec<String> {
        let mut result = Vec::new();
        self.collect(&mut String::new(), &mut result);
        result
    }

    fn collect(&self, prefix: &mut String, out: &mut Vec<String>) {
        if self.terminal {
            // This node is authorized — emit the prefix, skip children.
            let mut p = prefix.clone();
            if !p.is_empty() {
                p.push('/');
            }
            out.push(p);
            return;
        }

/// Find the lowest common path prefix.
        for (seg, child) in &self.children {
            let restore_len = prefix.len();
            if !prefix.is_empty() {
                prefix.push('/');
            }
            prefix.push_str(seg);
            child.collect(prefix, out);
            prefix.truncate(restore_len);
        }
    }
}

/// Find the lowest common path prefix across a set of paths.
fn lowest_common_prefix(paths: &[String]) -> String {
    if paths.is_empty() {
        return String::new();
    }

    let segments: Vec<Vec<&str>> = paths
        .iter()
        .map(|p| p.trim_end_matches('/').split('/').collect())
        .collect();

    let first = &segments[0];
    let common_len = (0..first.len())
        .take_while(|&i| segments.iter().all(|s| s.get(i) == first.get(i)))
        .count();

    if common_len == 0 {
        String::new()
    } else {
@@ -481,6 +560,127 @@ mod tests {
        assert_eq!(lowest_common_prefix(&["42/".into()]), "42/");
    }

    #[test]
    fn path_trie_subsumes_children() {
        let t = PathTrie::from_paths(&["1/100/", "1/100/200/", "1/100/201/"]);
        assert_eq!(t.to_minimal_prefixes(), vec!["1/100/"]);
    }

    #[test]
    fn path_trie_keeps_siblings() {
        let t = PathTrie::from_paths(&["1/100/", "1/200/"]);
        assert_eq!(t.to_minimal_prefixes(), vec!["1/100/", "1/200/"]);
    }

    #[test]
    fn path_trie_siblings_under_shared_parent() {
        // Three children under 1/100/ — trie keeps all three since
        // the parent 1/100/ is not itself authorized.
        let t = PathTrie::from_paths(&["1/100/200/", "1/100/201/", "1/100/202/", "1/200/300/"]);
        let result = t.to_minimal_prefixes();
        assert_eq!(result.len(), 4);
        assert!(result.contains(&"1/200/300/".to_string()));
    }

    #[test]
    fn path_trie_single_path() {
        let t = PathTrie::from_paths(&["1/100/"]);
        assert_eq!(t.to_minimal_prefixes(), vec!["1/100/"]);
    }

    #[test]
    fn path_trie_deduplicates() {
        let t = PathTrie::from_paths(&["1/100/", "1/100/", "1/200/"]);
        assert_eq!(t.to_minimal_prefixes(), vec!["1/100/", "1/200/"]);
    }

    #[test]
    fn path_trie_deep_subsumption() {
        let t = PathTrie::from_paths(&["1/", "1/100/", "1/100/200/", "1/100/200/300/"]);
        assert_eq!(t.to_minimal_prefixes(), vec!["1/"]);
    }

    #[test]
    fn path_trie_mixed_orgs() {
        let t = PathTrie::from_paths(&["1/100/", "2/100/"]);
        assert_eq!(t.to_minimal_prefixes(), vec!["1/100/", "2/100/"]);
    }

    #[test]
    fn path_trie_realistic_38_paths() {
        // Simulate a user with access to 38 groups, 30 under 1/10/
        // and 8 scattered elsewhere. The trie should keep all 38
        // (no subsumption since no parent path is authorized), but
        // the LCP in build_path_filter will be "1/" which is correct.
        let mut paths: Vec<String> = (100..130).map(|i| format!("1/10/{i}/")).collect();
        paths.extend((200..208).map(|i| format!("1/{i}/")));
        let refs: Vec<&str> = paths.iter().map(|s| s.as_str()).collect();
        let t = PathTrie::from_paths(&refs);
        let result = t.to_minimal_prefixes();
        // No subsumption possible — all are leaf groups
        assert_eq!(result.len(), 38);
    }

    #[test]
    fn path_trie_parent_collapses_many_children() {
        // User has access to parent group 1/10/ plus individual
        // subgroups — parent subsumes everything underneath.
        let mut paths = vec!["1/10/"];
        let children: Vec<String> = (100..130).map(|i| format!("1/10/{i}/")).collect();
        let refs: Vec<&str> = children.iter().map(|s| s.as_str()).collect();
        paths.extend(refs);
        let t = PathTrie::from_paths(&paths);
        assert_eq!(t.to_minimal_prefixes(), vec!["1/10/"]);
    }

    #[test]
    #[should_panic(expected = "empty path")]
    fn path_trie_empty_path_panics_in_debug() {
        // Empty paths are impossible (SecurityContext validates ^(\d+/)+$).
        // The debug_assert catches misuse during development.
        PathTrie::from_paths(&[""]);
    }

    #[test]
    fn trie_collapse_after_role_filtering() {
        // Simulate paths_at_least merging paths from different role buckets.
        // User has:
        //   - Reporter on 1/100/ and 1/100/200/ (parent + child)
        //   - Developer on 1/100/200/ and 1/300/
        //
        // For a Reporter-floor entity, paths_at_least returns all four paths
        // (both Reporter and Developer qualify). The trie should collapse
        // 1/100/ + 1/100/200/ → 1/100/ (subsumption), keeping 1/300/.
        use crate::types::TraversalPath;
        let ctx = SecurityContext::new_with_roles(
            1,
            vec![
                TraversalPath::new(String::from("1/100/"), 20), // Reporter
                TraversalPath::new(String::from("1/100/200/"), 20), // Reporter
                TraversalPath::new(String::from("1/100/200/"), 30), // Developer
                TraversalPath::new(String::from("1/300/"), 30), // Developer
            ],
        )
        .unwrap();

        // Reporter-floor entity (level 20): all paths qualify
        let eligible = ctx.paths_at_least(20);
        assert_eq!(eligible.len(), 4);

        // Trie collapses 1/100/ + 1/100/200/ → 1/100/
        let collapsed = PathTrie::from_paths(&eligible).to_minimal_prefixes();
        assert_eq!(collapsed, vec!["1/100/", "1/300/"]);

        // build_path_filter produces the correct SQL shape
        let filter = build_path_filter("t", &eligible);
        let sql = format!("{filter:?}");
        // LCP is "1/" wrapping two startsWith arms
        assert!(
            sql.contains("startsWith"),
            "should produce startsWith predicates: {sql}"
        );
    }

    // ─────────────────────────────────────────────────────────────────────────
    // Security injection tests
    // ─────────────────────────────────────────────────────────────────────────