Verified Commit 1a57768b authored by Michael Usachenko's avatar Michael Usachenko Committed by GitLab
Browse files

refactor(tests): extract data correctness seeds to SQL file

parent 6f766b7c
Loading
Loading
Loading
Loading
+57 −14
Original line number Diff line number Diff line
@@ -1535,7 +1535,7 @@ dependencies = [
 "parking_lot",
 "rand 0.9.2",
 "regex",
 "sqlparser",
 "sqlparser 0.59.0",
 "tempfile",
 "tokio",
 "url",
@@ -1607,7 +1607,7 @@ dependencies = [
 "log",
 "object_store",
 "paste",
 "sqlparser",
 "sqlparser 0.59.0",
 "tokio",
 "web-time",
]
@@ -1767,7 +1767,7 @@ dependencies = [
 "itertools 0.14.0",
 "paste",
 "serde_json",
 "sqlparser",
 "sqlparser 0.59.0",
]

[[package]]
@@ -2090,7 +2090,7 @@ dependencies = [
 "indexmap 2.13.0",
 "log",
 "regex",
 "sqlparser",
 "sqlparser 0.59.0",
]

[[package]]
@@ -2171,7 +2171,7 @@ dependencies = [
 "libc",
 "option-ext",
 "redox_users",
 "windows-sys 0.59.0",
 "windows-sys 0.61.2",
]

[[package]]
@@ -2355,7 +2355,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
 "libc",
 "windows-sys 0.59.0",
 "windows-sys 0.61.2",
]

[[package]]
@@ -3437,6 +3437,7 @@ dependencies = [
 "query-engine",
 "rustls",
 "serde_json",
 "sqlparser 0.61.0",
 "testcontainers",
 "tokio",
 "uuid",
@@ -4155,7 +4156,7 @@ version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [
 "windows-sys 0.59.0",
 "windows-sys 0.61.2",
]

[[package]]
@@ -5064,7 +5065,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7"
dependencies = [
 "heck",
 "itertools 0.13.0",
 "itertools 0.14.0",
 "log",
 "multimap",
 "petgraph",
@@ -5085,7 +5086,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b"
dependencies = [
 "anyhow",
 "itertools 0.13.0",
 "itertools 0.14.0",
 "proc-macro2",
 "quote",
 "syn",
@@ -5387,6 +5388,26 @@ dependencies = [
 "crossbeam-utils",
]

[[package]]
name = "recursive"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e"
dependencies = [
 "recursive-proc-macro-impl",
 "stacker",
]

[[package]]
name = "recursive-proc-macro-impl"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
dependencies = [
 "quote",
 "syn",
]

[[package]]
name = "redox_syscall"
version = "0.5.18"
@@ -5653,7 +5674,7 @@ dependencies = [
 "errno",
 "libc",
 "linux-raw-sys",
 "windows-sys 0.59.0",
 "windows-sys 0.61.2",
]

[[package]]
@@ -5733,7 +5754,7 @@ dependencies = [
 "security-framework 3.7.0",
 "security-framework-sys",
 "webpki-root-certs",
 "windows-sys 0.59.0",
 "windows-sys 0.61.2",
]

[[package]]
@@ -6286,7 +6307,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f"
dependencies = [
 "log",
 "sqlparser_derive",
 "sqlparser_derive 0.3.0",
]

[[package]]
name = "sqlparser"
version = "0.61.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7"
dependencies = [
 "log",
 "recursive",
 "sqlparser_derive 0.5.0",
]

[[package]]
@@ -6300,6 +6332,17 @@ dependencies = [
 "syn",
]

[[package]]
name = "sqlparser_derive"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289"
dependencies = [
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "stable_deref_trait"
version = "1.2.1"
@@ -6644,7 +6687,7 @@ dependencies = [
 "getrandom 0.4.2",
 "once_cell",
 "rustix",
 "windows-sys 0.59.0",
 "windows-sys 0.61.2",
]

[[package]]
@@ -7785,7 +7828,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
 "windows-sys 0.59.0",
 "windows-sys 0.61.2",
]

[[package]]
+1 −0
Original line number Diff line number Diff line
@@ -110,6 +110,7 @@ indicatif = "0.18"


regex = "1.12.2"
sqlparser = { version = "0.61.0", features = ["visitor"] }
hex = "0.4.3"
hmac = "0.12.1"
hyper-util = { version = "0.1", features = ["client", "tokio"] }
+137 −0
Original line number Diff line number Diff line
-- Data correctness seed: known-good fixture data for integration tests.
--
-- Topology:
--
--   Users:
--     1 alice   (active,  human)
--     2 bob     (active,  human)
--     3 charlie (active,  human)
--     4 diana   (active,  project_bot)
--     5 eve     (blocked, service_account)
--     6 用户_émoji_🎉 (active, human) — unicode stress test
--
--   Groups:
--     100 Public Group   (public,   path 1/100/)
--     101 Private Group  (private,  path 1/101/)
--     102 Internal Group (internal, path 1/102/)
--     200 Deep Group A   (public,   path 1/100/200/)
--     300 Deep Group B   (public,   path 1/100/200/300/)
--
--   Projects:
--     1000 Public Project   (public,   path 1/100/1000/)
--     1001 Private Project  (private,  path 1/101/1001/)
--     1002 Internal Project (internal, path 1/100/1002/)
--     1003 Secret Project   (private,  path 1/101/1003/)
--     1004 Shared Project   (public,   path 1/102/1004/)
--
--   MergeRequests:
--     2000 Add feature A (opened, path 1/100/1000/)
--     2001 Fix bug B     (opened, path 1/100/1000/)
--     2002 Refactor C    (merged, path 1/101/1001/)
--     2003 Update D      (closed, path 1/102/1004/)
--
--   MergeRequestDiffs:
--     5000 (MR 2000, collected)
--     5001 (MR 2000, collected)
--     5002 (MR 2001, collected)
--
--   Notes:
--     3000 Normal note           (MR 2000, not confidential, not internal)
--     3001 Confidential note     (MR 2001, confidential=true)
--     3002 Giant string note     (MR 2000, 10000 chars)
--     3003 SQL injection note    (MR 2000, DROP TABLE payload)
--
--   MEMBER_OF edges:
--     User 1 -> Group 100, User 1 -> Group 102
--     User 2 -> Group 100, User 3 -> Group 101
--     User 4 -> Group 101, User 4 -> Group 102, User 5 -> Group 101
--     User 6 -> Group 100, User 6 -> Group 101
--
--   CONTAINS edges:
--     Group 100 -> Project 1000, Group 100 -> Project 1002
--     Group 100 -> Group 200 (subgroup)
--     Group 200 -> Group 300 (subgroup depth 2)
--     Group 101 -> Project 1001, Group 101 -> Project 1003
--     Group 102 -> Project 1004
--
--   AUTHORED edges:
--     User 1 -> MR 2000, User 1 -> MR 2001
--     User 2 -> MR 2002, User 3 -> MR 2003
--     User 1 -> Note 3000
--
--   HAS_NOTE edges:
--     MR 2000 -> Note 3000, MR 2000 -> Note 3002, MR 2000 -> Note 3003
--     MR 2001 -> Note 3001
--
--   HAS_DIFF edges:
--     MR 2000 -> MergeRequestDiff 5000, MR 2000 -> MergeRequestDiff 5001
--     MR 2001 -> MergeRequestDiff 5002

INSERT INTO gl_user (id, username, name, state, user_type) VALUES
    (1, 'alice', 'Alice Admin', 'active', 'human'),
    (2, 'bob', 'Bob Builder', 'active', 'human'),
    (3, 'charlie', 'Charlie Private', 'active', 'human'),
    (4, 'diana', 'Diana Developer', 'active', 'project_bot'),
    (5, 'eve', 'Eve External', 'blocked', 'service_account'),
    (6, '用户_émoji_🎉', 'Ünïcödé Üser', 'active', 'human');

INSERT INTO gl_group (id, name, visibility_level, traversal_path) VALUES
    (100, 'Public Group', 'public', '1/100/'),
    (101, 'Private Group', 'private', '1/101/'),
    (102, 'Internal Group', 'internal', '1/102/'),
    (200, 'Deep Group A', 'public', '1/100/200/'),
    (300, 'Deep Group B', 'public', '1/100/200/300/');

INSERT INTO gl_project (id, name, visibility_level, traversal_path) VALUES
    (1000, 'Public Project', 'public', '1/100/1000/'),
    (1001, 'Private Project', 'private', '1/101/1001/'),
    (1002, 'Internal Project', 'internal', '1/100/1002/'),
    (1003, 'Secret Project', 'private', '1/101/1003/'),
    (1004, 'Shared Project', 'public', '1/102/1004/');

INSERT INTO gl_merge_request (id, iid, title, state, source_branch, target_branch, traversal_path) VALUES
    (2000, 1, 'Add feature A', 'opened', 'feature-a', 'main', '1/100/1000/'),
    (2001, 2, 'Fix bug B', 'opened', 'fix-b', 'main', '1/100/1000/'),
    (2002, 3, 'Refactor C', 'merged', 'refactor-c', 'main', '1/101/1001/'),
    (2003, 4, 'Update D', 'closed', 'update-d', 'main', '1/102/1004/');

INSERT INTO gl_note (id, note, noteable_type, noteable_id, confidential, internal, created_at, traversal_path) VALUES
    (3000, 'Normal note on feature A', 'MergeRequest', 2000, false, false, '2024-01-15 10:30:00', '1/100/1000/'),
    (3001, 'Confidential feedback on bug B', 'MergeRequest', 2001, true, false, '2024-02-20 14:45:00', '1/100/1000/'),
    (3002, repeat('x', 10000), 'MergeRequest', 2000, false, false, NULL, '1/100/1000/'),
    (3003, 'Robert''); DROP TABLE gl_note;--', 'MergeRequest', 2000, false, false, NULL, '1/100/1000/');

INSERT INTO gl_merge_request_diff (id, merge_request_id, state, traversal_path) VALUES
    (5000, 2000, 'collected', '1/100/1000/'),
    (5001, 2000, 'collected', '1/100/1000/'),
    (5002, 2001, 'collected', '1/100/1000/');

INSERT INTO gl_edge (traversal_path, source_id, source_kind, relationship_kind, target_id, target_kind) VALUES
    ('1/100/', 1, 'User', 'MEMBER_OF', 100, 'Group'),
    ('1/102/', 1, 'User', 'MEMBER_OF', 102, 'Group'),
    ('1/100/', 2, 'User', 'MEMBER_OF', 100, 'Group'),
    ('1/101/', 3, 'User', 'MEMBER_OF', 101, 'Group'),
    ('1/101/', 4, 'User', 'MEMBER_OF', 101, 'Group'),
    ('1/102/', 4, 'User', 'MEMBER_OF', 102, 'Group'),
    ('1/101/', 5, 'User', 'MEMBER_OF', 101, 'Group'),
    ('1/100/', 6, 'User', 'MEMBER_OF', 100, 'Group'),
    ('1/101/', 6, 'User', 'MEMBER_OF', 101, 'Group'),
    ('1/100/200/', 100, 'Group', 'CONTAINS', 200, 'Group'),
    ('1/100/200/300/', 200, 'Group', 'CONTAINS', 300, 'Group'),
    ('1/100/1000/', 100, 'Group', 'CONTAINS', 1000, 'Project'),
    ('1/100/1002/', 100, 'Group', 'CONTAINS', 1002, 'Project'),
    ('1/101/1001/', 101, 'Group', 'CONTAINS', 1001, 'Project'),
    ('1/101/1003/', 101, 'Group', 'CONTAINS', 1003, 'Project'),
    ('1/102/1004/', 102, 'Group', 'CONTAINS', 1004, 'Project'),
    ('1/100/1000/', 1, 'User', 'AUTHORED', 2000, 'MergeRequest'),
    ('1/100/1000/', 1, 'User', 'AUTHORED', 2001, 'MergeRequest'),
    ('1/101/1001/', 2, 'User', 'AUTHORED', 2002, 'MergeRequest'),
    ('1/102/1004/', 3, 'User', 'AUTHORED', 2003, 'MergeRequest'),
    ('1/100/1000/', 1, 'User', 'AUTHORED', 3000, 'Note'),
    ('1/100/1000/', 2000, 'MergeRequest', 'HAS_NOTE', 3000, 'Note'),
    ('1/100/1000/', 2000, 'MergeRequest', 'HAS_NOTE', 3002, 'Note'),
    ('1/100/1000/', 2000, 'MergeRequest', 'HAS_NOTE', 3003, 'Note'),
    ('1/100/1000/', 2001, 'MergeRequest', 'HAS_NOTE', 3001, 'Note'),
    ('1/100/1000/', 2000, 'MergeRequest', 'HAS_DIFF', 5000, 'MergeRequestDiff'),
    ('1/100/1000/', 2000, 'MergeRequest', 'HAS_DIFF', 5001, 'MergeRequestDiff'),
    ('1/100/1000/', 2001, 'MergeRequest', 'HAS_DIFF', 5002, 'MergeRequestDiff');
+1 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@ futures = { workspace = true }
gkg-server = { path = "../gkg-server" }
query-engine = { path = "../query-engine" }
rustls = { workspace = true }
sqlparser = { workspace = true }
serde_json = { workspace = true }
testcontainers = { workspace = true }
tokio = { workspace = true }
+107 −5
Original line number Diff line number Diff line
use crate::context::TestContext;
use sqlparser::ast::Statement;
use sqlparser::dialect::ClickHouseDialect;
use sqlparser::parser::Parser;

const SEED_DIR: &str = env!("SEEDS_DIR");

@@ -6,11 +9,110 @@ pub async fn load_seed(ctx: &TestContext, name: &str) {
    let path = format!("{SEED_DIR}/{name}.sql");
    let sql = std::fs::read_to_string(&path)
        .unwrap_or_else(|e| panic!("seed '{name}' not found at {path}: {e}"));
    for statement in sql.split(';') {
        let statement = statement.trim();
        if statement.is_empty() || statement.starts_with("--") {
            continue;
    for stmt in split_sql_statements(&sql).unwrap() {
        ctx.execute(&stmt).await;
    }
        ctx.execute(statement).await;
}

/// Split a SQL seed file into individual statement strings, validated
/// by a real SQL parser. Fails on any parse error, empty input, or
/// non-INSERT statement.
pub fn split_sql_statements(sql: &str) -> Result<Vec<String>, SplitError> {
    let dialect = ClickHouseDialect {};
    let stmts = Parser::parse_sql(&dialect, sql).map_err(|e| SplitError::Parse(e.to_string()))?;

    if stmts.is_empty() {
        return Err(SplitError::EmptyInput);
    }

    stmts
        .into_iter()
        .enumerate()
        .map(|(i, stmt)| match &stmt {
            Statement::Insert(_) => Ok(stmt.to_string()),
            other => Err(SplitError::UnexpectedStatement {
                index: i,
                kind: format!("{other:?}")
                    .split('(')
                    .next()
                    .unwrap_or("Unknown")
                    .to_string(),
            }),
        })
        .collect()
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SplitError {
    Parse(String),
    EmptyInput,
    UnexpectedStatement { index: usize, kind: String },
}

impl std::fmt::Display for SplitError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Parse(e) => write!(f, "SQL parse error: {e}"),
            Self::EmptyInput => write!(f, "input contains no SQL statements"),
            Self::UnexpectedStatement { index, kind } => {
                write!(f, "statement {index} is {kind}, expected INSERT")
            }
        }
    }
}

impl std::error::Error for SplitError {}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn splits_basic_inserts() {
        let sql = "INSERT INTO t VALUES (1); INSERT INTO t VALUES (2);";
        let stmts = split_sql_statements(sql).unwrap();
        assert_eq!(stmts.len(), 2);
    }

    #[test]
    fn handles_escaped_quotes_and_injection() {
        let sql = "INSERT INTO t VALUES ('Robert'' DROP TABLE t --');";
        let stmts = split_sql_statements(sql).unwrap();
        assert_eq!(stmts.len(), 1);
    }

    #[test]
    fn rejects_non_insert() {
        let sql = "INSERT INTO t VALUES (1); SELECT 1;";
        let err = split_sql_statements(sql).unwrap_err();
        assert!(matches!(
            err,
            SplitError::UnexpectedStatement { index: 1, .. }
        ));
    }

    #[test]
    fn rejects_syntax_errors() {
        let sql = "INSERT INTO t VALUES ('unterminated;";
        assert!(matches!(
            split_sql_statements(sql).unwrap_err(),
            SplitError::Parse(_)
        ));
    }

    #[test]
    fn rejects_empty() {
        assert_eq!(
            split_sql_statements("").unwrap_err(),
            SplitError::EmptyInput
        );
    }

    #[test]
    fn splits_data_correctness_seed() {
        let path = format!("{}/data_correctness.sql", env!("SEEDS_DIR"));
        let seed = std::fs::read_to_string(&path).unwrap();
        let stmts = split_sql_statements(&seed).unwrap();
        assert_eq!(stmts.len(), 7);
    }
}
Loading