Verified Commit 4ab65f85 authored by Michael Angelo Rivera's avatar Michael Angelo Rivera Committed by GitLab
Browse files

feat(compiler): truncate Date/DateTime group_by keys to bucket units

parent ef90c83f
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
2.0.0
2.1.0
+19 −0
Original line number Diff line number Diff line
@@ -634,6 +634,10 @@
              "$ref": "#/$defs/Identifier",
              "description": "Property name to group by"
            },
            "transform": {
              "$ref": "#/$defs/PropertyTransform",
              "description": "Optional transform applied to the property before grouping (e.g. date truncation)."
            },
            "alias": {
              "$ref": "#/$defs/Identifier",
              "description": "Output column alias for this group key"
@@ -643,6 +647,21 @@
        }
      ]
    },
    "PropertyTransform": {
      "description": "Tagged enum: add new variants by `kind`.",
      "oneOf": [
        {
          "type": "object",
          "description": "Truncate a Date/DateTime property. minute/hour require node_ids or a filter on the property.",
          "required": ["kind", "unit"],
          "properties": {
            "kind": {"const": "truncate"},
            "unit": {"enum": ["minute", "hour", "day", "week", "month", "quarter", "year"]}
          },
          "additionalProperties": false
        }
      ]
    },
    "AggFunction": {
      "type": "string",
      "description": "Available aggregation functions",
+2 −2
Original line number Diff line number Diff line
@@ -106,8 +106,8 @@ mod tests {
        let condensed = condensed_query_schema().expect("Should condense");

        assert!(
            condensed.len() < 20000,
            "Condensed schema should be under 20KB, got {} bytes",
            condensed.len() < 22000,
            "Condensed schema should be under 22KB, got {} bytes",
            condensed.len()
        );
    }
+163 −0
Original line number Diff line number Diff line
@@ -86,6 +86,169 @@ fn aggregation_query() {
    assert!(rendered.contains("GROUP BY"));
}

#[test]
fn group_by_property_truncate_month_wraps_column() {
    let json = r#"{
        "query_type": "aggregation",
        "nodes": [
            {"id": "u", "entity": "Note", "filters": {"confidential": {"op": "eq", "value": false}}}
        ],
        "aggregations": [{"function": "count", "target": "u", "alias": "n"}],
        "group_by": [
            {"kind": "property", "node": "u", "property": "created_at", "transform": {"kind": "truncate", "unit": "month"}}
        ],
        "limit": 50
    }"#;
    let result = compile(json, &test_ontology(), &test_ctx()).unwrap();
    let rendered = result.base.render();
    assert!(
        rendered.contains("toStartOfMonth(u.created_at)"),
        "expected toStartOfMonth wrapper; got:\n{rendered}"
    );
    assert!(
        rendered.contains("toStartOfMonth(u.created_at) AS created_at_month"),
        "expected default alias `created_at_month`; got:\n{rendered}"
    );
}

#[test]
fn group_by_property_truncate_all_units_compile() {
    for unit in ["minute", "hour", "day", "week", "month", "quarter", "year"] {
        let json = format!(
            r#"{{
                "query_type": "aggregation",
                "nodes": [
                    {{"id": "u", "entity": "Note", "node_ids": [1]}}
                ],
                "aggregations": [{{"function": "count", "target": "u", "alias": "n"}}],
                "group_by": [
                    {{"kind": "property", "node": "u", "property": "created_at", "transform": {{"kind": "truncate", "unit": "{unit}"}}}}
                ],
                "limit": 10
            }}"#
        );
        let result = compile(&json, &test_ontology(), &test_ctx())
            .unwrap_or_else(|e| panic!("compile failed for unit {unit}: {e:?}"));
        let rendered = result.base.render();
        let expected = match unit {
            "minute" => "toStartOfMinute",
            "hour" => "toStartOfHour",
            "day" => "toStartOfDay",
            "week" => "toStartOfWeek",
            "month" => "toStartOfMonth",
            "quarter" => "toStartOfQuarter",
            "year" => "toStartOfYear",
            _ => unreachable!(),
        };
        assert!(
            rendered.contains(expected),
            "unit {unit}: expected {expected} in SQL; got:\n{rendered}"
        );
    }
}

#[test]
fn group_by_truncate_minute_without_selectivity_rejected() {
    let json = r#"{
        "query_type": "aggregation",
        "nodes": [
            {"id": "u", "entity": "Note"}
        ],
        "aggregations": [{"function": "count", "target": "u", "alias": "n"}],
        "group_by": [
            {"kind": "property", "node": "u", "property": "created_at", "transform": {"kind": "truncate", "unit": "minute"}}
        ],
        "limit": 10
    }"#;
    let err = compile(json, &test_ontology(), &test_ctx()).unwrap_err();
    let msg = format!("{err:?}");
    assert!(
        msg.contains("requires either node_ids") && msg.contains("minute"),
        "expected cardinality-guard rejection; got: {msg}"
    );
}

#[test]
fn group_by_truncate_minute_with_node_ids_accepted() {
    let json = r#"{
        "query_type": "aggregation",
        "nodes": [
            {"id": "u", "entity": "Note", "node_ids": [1, 2]}
        ],
        "aggregations": [{"function": "count", "target": "u", "alias": "n"}],
        "group_by": [
            {"kind": "property", "node": "u", "property": "created_at", "transform": {"kind": "truncate", "unit": "minute"}}
        ],
        "limit": 10
    }"#;
    let result = compile(json, &test_ontology(), &test_ctx()).unwrap();
    assert!(
        result
            .base
            .render()
            .contains("toStartOfMinute(u.created_at)")
    );
}

#[test]
fn group_by_truncate_hour_with_property_filter_accepted() {
    let json = r#"{
        "query_type": "aggregation",
        "nodes": [
            {"id": "u", "entity": "Note", "filters": {"created_at": {"op": "gte", "value": "2026-04-01T00:00:00Z"}}}
        ],
        "aggregations": [{"function": "count", "target": "u", "alias": "n"}],
        "group_by": [
            {"kind": "property", "node": "u", "property": "created_at", "transform": {"kind": "truncate", "unit": "hour"}}
        ],
        "limit": 50
    }"#;
    let result = compile(json, &test_ontology(), &test_ctx()).unwrap();
    assert!(result.base.render().contains("toStartOfHour(u.created_at)"));
}

#[test]
fn group_by_truncate_on_non_date_property_rejected() {
    let json = r#"{
        "query_type": "aggregation",
        "nodes": [
            {"id": "u", "entity": "Note", "node_ids": [1]}
        ],
        "aggregations": [{"function": "count", "target": "u", "alias": "n"}],
        "group_by": [
            {"kind": "property", "node": "u", "property": "confidential", "transform": {"kind": "truncate", "unit": "month"}}
        ],
        "limit": 10
    }"#;
    let err = compile(json, &test_ontology(), &test_ctx()).unwrap_err();
    let msg = format!("{err:?}");
    assert!(
        msg.contains("requires a Date or DateTime property"),
        "expected data-type rejection; got: {msg}"
    );
}

#[test]
fn group_by_truncate_custom_alias_preserved() {
    let json = r#"{
        "query_type": "aggregation",
        "nodes": [
            {"id": "u", "entity": "Note", "node_ids": [1]}
        ],
        "aggregations": [{"function": "count", "target": "u", "alias": "n"}],
        "group_by": [
            {"kind": "property", "node": "u", "property": "created_at", "transform": {"kind": "truncate", "unit": "month"}, "alias": "bucket"}
        ],
        "limit": 10
    }"#;
    let result = compile(json, &test_ontology(), &test_ctx()).unwrap();
    let rendered = result.base.render();
    assert!(
        rendered.contains("toStartOfMonth(u.created_at) AS bucket"),
        "expected alias `bucket`; got:\n{rendered}"
    );
}

#[test]
fn path_finding_query() {
    let json = r#"{
+55 −0
Original line number Diff line number Diff line
@@ -140,6 +140,61 @@ fn neighbors() {
    assert_eq!(sql.limit_value(), Some(10));
}

#[test]
fn group_by_truncate_emits_duckdb_date_trunc() {
    let result = compile_local(
        r#"{
        "query_type": "aggregation",
        "nodes": [
            {"id": "u", "entity": "Note", "node_ids": [1]}
        ],
        "aggregations": [{"function": "count", "target": "u", "alias": "n"}],
        "group_by": [
            {"kind": "property", "node": "u", "property": "created_at", "transform": {"kind": "truncate", "unit": "month"}, "alias": "bucket"}
        ],
        "limit": 10
    }"#,
        &test_ontology(),
    )
    .unwrap();
    let rendered = result.base.render();
    assert!(
        rendered.contains("date_trunc('month', u.created_at)"),
        "expected DuckDB date_trunc('month', ...); got:\n{rendered}"
    );
    assert!(
        !rendered.contains("toStartOfMonth"),
        "ClickHouse-only toStartOfMonth must not leak into DuckDB SQL:\n{rendered}"
    );
}

#[test]
fn group_by_truncate_all_units_emit_duckdb_date_trunc() {
    for unit in ["minute", "hour", "day", "week", "month", "quarter", "year"] {
        let json = format!(
            r#"{{
                "query_type": "aggregation",
                "nodes": [
                    {{"id": "u", "entity": "Note", "node_ids": [1]}}
                ],
                "aggregations": [{{"function": "count", "target": "u", "alias": "n"}}],
                "group_by": [
                    {{"kind": "property", "node": "u", "property": "created_at", "transform": {{"kind": "truncate", "unit": "{unit}"}}}}
                ],
                "limit": 10
            }}"#
        );
        let result = compile_local(&json, &test_ontology())
            .unwrap_or_else(|e| panic!("compile_local failed for unit {unit}: {e:?}"));
        let rendered = result.base.render();
        let expected = format!("date_trunc('{unit}', u.created_at)");
        assert!(
            rendered.contains(&expected),
            "unit {unit}: expected `{expected}` in DuckDB SQL; got:\n{rendered}"
        );
    }
}

#[test]
fn node_ids_expand_params() {
    let sql = parse_duckdb(
Loading