Verified Commit dd18daed authored by Michael Usachenko's avatar Michael Usachenko Committed by GitLab
Browse files

feat(ontology): add local_entities and local_exclude_properties settings

parent 9e64eea5
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -25,6 +25,24 @@ settings:
        - {name: target_kind, type: string}
  internal_column_prefix: "_gkg_"
  skip_security_filter_for_entities: [User]
  local_db:
    entities:
      - name: Directory
        exclude_properties: [traversal_path, commit_sha]
      - name: File
        exclude_properties: [traversal_path, commit_sha]
      - name: Definition
        exclude_properties: [traversal_path, commit_sha]
      - name: ImportedSymbol
        exclude_properties: [traversal_path, commit_sha]
    edge_table:
      columns:
        - {name: source_id, type: int64}
        - {name: source_kind, type: string}
        - {name: relationship_kind, type: string}
        - {name: target_id, type: int64}
        - {name: target_kind, type: string}
        - {name: _version, type: int64}
  etl:
    default_watermark: _siphon_replicated_at
    default_deleted: _siphon_deleted
+55 −0
Original line number Diff line number Diff line
@@ -372,6 +372,61 @@
          "items": { "type": "string" },
          "description": "Entity names (e.g. 'User') excluded from traversal-path security filters. Resolved to physical table names at load time. These are entities whose visibility is determined through relationships rather than direct path hierarchy."
        },
        "local_db": {
          "type": "object",
          "description": "Configuration for entities that participate in the local DuckDB graph (CLI indexing).",
          "properties": {
            "entities": {
              "type": "array",
              "items": {
                "type": "object",
                "required": ["name"],
                "properties": {
                  "name": {
                    "type": "string",
                    "description": "Entity name (must reference a declared node)."
                  },
                  "exclude_properties": {
                    "type": "array",
                    "items": { "type": "string" },
                    "description": "Property names excluded from the local table for this entity (e.g. server-only columns like 'traversal_path')."
                  }
                },
                "additionalProperties": false
              },
              "description": "Entities indexed locally, with per-entity property exclusions."
            },
            "edge_table": {
              "type": "object",
              "description": "Edge table schema for the local DuckDB graph.",
              "required": ["columns"],
              "properties": {
                "columns": {
                  "type": "array",
                  "items": {
                    "type": "object",
                    "required": ["name", "type"],
                    "properties": {
                      "name": {
                        "type": "string",
                        "description": "Column name."
                      },
                      "type": {
                        "type": "string",
                        "enum": ["string", "int64", "float64", "boolean", "date", "datetime"],
                        "description": "Column data type."
                      }
                    },
                    "additionalProperties": false
                  },
                  "description": "Columns in the local edge table, in order."
                }
              },
              "additionalProperties": false
            }
          },
          "additionalProperties": false
        },
        "etl": {
          "$ref": "#/definitions/etlSettingsDefinition",
          "description": "Default ETL settings for watermark, deleted, and edge order_by columns."
+122 −0
Original line number Diff line number Diff line
@@ -108,6 +108,11 @@ pub struct Ontology {
    pub(crate) etl_settings: EtlSettings,
    pub(crate) internal_column_prefix: String,
    pub(crate) skip_security_filter_for_tables: Vec<String>,
    /// Local entity configs keyed by entity name. Each entry lists
    /// properties to exclude from the local DuckDB table.
    pub(crate) local_entities: BTreeMap<String, Vec<String>>,
    /// Local edge table columns, if declared.
    pub(crate) local_edge_columns: Vec<EdgeColumn>,
}

impl Default for Ontology {
@@ -152,6 +157,8 @@ impl Ontology {
            },
            internal_column_prefix: "_gkg_".to_string(),
            skip_security_filter_for_tables: Vec::new(),
            local_entities: BTreeMap::new(),
            local_edge_columns: Vec::new(),
        }
    }

@@ -557,6 +564,38 @@ impl Ontology {
        &self.skip_security_filter_for_tables
    }

    /// Entity names that participate in the local DuckDB graph.
    #[must_use]
    pub fn local_entity_names(&self) -> Vec<&str> {
        self.local_entities.keys().map(|s| s.as_str()).collect()
    }

    /// Returns the fields for a local entity, filtered to exclude virtual
    /// fields and properties listed in the entity's `exclude_properties`.
    ///
    /// Returns `None` if the entity is not in `local_entities`.
    #[must_use]
    pub fn local_entity_fields(&self, entity_name: &str) -> Option<Vec<&Field>> {
        let exclude = self.local_entities.get(entity_name)?;
        let node = self
            .nodes
            .get(entity_name)
            .expect("local entity must exist in nodes");
        Some(
            node.fields
                .iter()
                .filter(|f| !matches!(f.source, FieldSource::Virtual(_)))
                .filter(|f| !exclude.iter().any(|p| p == &f.name))
                .collect(),
        )
    }

    /// Column definitions for the local edge table, if declared.
    #[must_use]
    pub fn local_edge_columns(&self) -> &[EdgeColumn] {
        &self.local_edge_columns
    }

    /// Default ORDER BY / dedup key columns for node tables.
    #[must_use]
    pub fn default_entity_sort_key(&self) -> &[String] {
@@ -1731,4 +1770,87 @@ properties:
            .modify_field("User", "bogus", |_| {});
        assert!(result.is_err());
    }

    #[test]
    fn local_entities_loaded_from_ontology() {
        let ontology = Ontology::load_from_dir(fixtures_dir()).expect("should load ontology");
        let local = ontology.local_entity_names();
        assert!(local.contains(&"Directory"));
        assert!(local.contains(&"File"));
        assert!(local.contains(&"Definition"));
        assert!(local.contains(&"ImportedSymbol"));
        assert!(!local.contains(&"User"));
    }

    #[test]
    fn local_entity_fields_excludes_per_entity_properties() {
        let ontology = Ontology::load_from_dir(fixtures_dir()).expect("should load ontology");
        let fields = ontology
            .local_entity_fields("Directory")
            .expect("Directory is a local entity");
        let names: Vec<&str> = fields.iter().map(|f| f.name.as_str()).collect();

        // Included: regular fields and envelope fields (not excluded in YAML)
        assert!(names.contains(&"id"));
        assert!(names.contains(&"project_id"));
        assert!(names.contains(&"branch"));
        assert!(names.contains(&"path"));
        assert!(names.contains(&"name"));
        // Excluded: listed in exclude_properties for this entity
        assert!(!names.contains(&"traversal_path"));
        assert!(!names.contains(&"commit_sha"));
    }

    #[test]
    fn local_entity_fields_excludes_virtual_fields() {
        let ontology = Ontology::load_from_dir(fixtures_dir()).expect("should load ontology");
        let fields = ontology
            .local_entity_fields("Definition")
            .expect("Definition is a local entity");
        let names: Vec<&str> = fields.iter().map(|f| f.name.as_str()).collect();

        assert!(names.contains(&"fqn"));
        assert!(!names.contains(&"content"), "virtual field");
    }

    #[test]
    fn local_entity_fields_returns_none_for_non_local() {
        let ontology = Ontology::load_from_dir(fixtures_dir()).expect("should load ontology");
        assert!(ontology.local_entity_fields("User").is_none());
    }

    #[test]
    fn local_edge_columns_loaded_from_ontology() {
        let ontology = Ontology::load_from_dir(fixtures_dir()).expect("should load ontology");
        let cols = ontology.local_edge_columns();
        assert!(!cols.is_empty());
        let names: Vec<&str> = cols.iter().map(|c| c.name.as_str()).collect();
        assert_eq!(
            names,
            vec![
                "source_id",
                "source_kind",
                "relationship_kind",
                "target_id",
                "target_kind",
                "_version"
            ]
        );
    }

    #[test]
    fn local_exclude_properties_validated_against_fields() {
        // Verify the real ontology passes validation (all exclude_properties
        // reference actual fields). If someone adds a typo, this catches it.
        let ontology = Ontology::load_from_dir(fixtures_dir()).expect("should load ontology");
        for entity_name in ontology.local_entity_names() {
            let fields = ontology
                .local_entity_fields(entity_name)
                .expect("local entity should have fields");
            assert!(
                !fields.is_empty(),
                "local entity '{entity_name}' should have at least one field after exclusions"
            );
        }
    }
}
+52 −1
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@ use rust_embed::Embed;
use serde::Deserialize;
use std::path::Path;

use crate::entities::DomainInfo;
use crate::entities::{DomainInfo, EdgeColumn};
use crate::{Ontology, OntologyError};

pub(crate) use edge::EdgeYaml;
@@ -260,5 +260,56 @@ pub(crate) fn load_with(reader: &impl ReadOntologyFile) -> Result<Ontology, Onto
            .push(node.destination_table.clone());
    }

    // Validate and store local_db entity settings.
    if let Some(local_db) = schema.settings.local_db {
        for entry in local_db.entities {
            let node = ontology.nodes.get(&entry.name).ok_or_else(|| {
                OntologyError::Validation(format!(
                    "local_db.entities: unknown entity '{}'",
                    entry.name
                ))
            })?;

            // Validate exclude_properties reference actual fields.
            let field_names: std::collections::HashSet<&str> =
                node.fields.iter().map(|f| f.name.as_str()).collect();
            for prop in &entry.exclude_properties {
                if !field_names.contains(prop.as_str()) {
                    return Err(OntologyError::Validation(format!(
                        "local_db.entities: exclude_properties entry '{}' \
                         is not a declared property of '{}'",
                        prop, entry.name
                    )));
                }
            }

            ontology
                .local_entities
                .insert(entry.name, entry.exclude_properties);
        }

        if let Some(edge_table) = local_db.edge_table {
            // Validate no duplicate column names.
            let mut seen = std::collections::HashSet::new();
            for col in &edge_table.columns {
                if !seen.insert(&col.name) {
                    return Err(OntologyError::Validation(format!(
                        "local_db.edge_table: duplicate column name '{}'",
                        col.name
                    )));
                }
            }

            ontology.local_edge_columns = edge_table
                .columns
                .into_iter()
                .map(|c| EdgeColumn {
                    name: c.name,
                    data_type: c.data_type,
                })
                .collect();
        }
    }

    Ok(ontology)
}
+22 −0
Original line number Diff line number Diff line
@@ -35,9 +35,31 @@ pub(super) struct SettingsYaml {
    pub internal_column_prefix: String,
    #[serde(default)]
    pub skip_security_filter_for_entities: Vec<String>,
    #[serde(default)]
    pub local_db: Option<LocalSettingsYaml>,
    pub etl: EtlSettingsYaml,
}

#[derive(Debug, Deserialize)]
pub(super) struct LocalSettingsYaml {
    #[serde(default)]
    pub entities: Vec<LocalEntityYaml>,
    #[serde(default)]
    pub edge_table: Option<LocalEdgeTableYaml>,
}

#[derive(Debug, Deserialize)]
pub(super) struct LocalEdgeTableYaml {
    pub columns: Vec<EdgeColumnYaml>,
}

#[derive(Debug, Deserialize)]
pub(super) struct LocalEntityYaml {
    pub name: String,
    #[serde(default)]
    pub exclude_properties: Vec<String>,
}

#[derive(Debug, Deserialize)]
pub(super) struct EtlSettingsYaml {
    pub default_watermark: String,