Verified Commit 2de70638 authored by Jean-Gabriel Doyon PTO until 2024-04-17's avatar Jean-Gabriel Doyon PTO until 2024-04-17 Committed by GitLab
Browse files

feat(indexer): replace direct Gitaly access with Rails internal API

parent 03812719
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -3368,6 +3368,7 @@ dependencies = [
 "clickhouse-client",
 "code-graph",
 "datafusion",
 "flate2",
 "futures",
 "gitaly-client",
 "gitlab-client",
@@ -3463,6 +3464,7 @@ dependencies = [
 "bytes",
 "chrono",
 "clickhouse-client",
 "flate2",
 "futures",
 "gitaly-client",
 "gitlab-client",
@@ -3478,6 +3480,7 @@ dependencies = [
 "serde_json",
 "sha2",
 "siphon-proto",
 "tempfile",
 "testcontainers",
 "testcontainers-modules",
 "tokio",
+1 −0
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@ clap = { version = "4.5.57", features = ["derive", "env"] }
dunce = "1.0.5"
async-trait = "0.1.89"
bytes = "1.11.0"
flate2 = "1.1.9"
chrono = { version = "0.4.43", features = ["serde"] }
futures = "0.3.31"
internment = "0.8.6"
+51 −23
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ use tracing::debug;

use crate::config::GitlabClientConfiguration;
use crate::error::GitlabClientError;
use crate::types::RepositoryInfo;
use crate::types::ProjectInfo;

/// JWT issuer — Rails expects this value when validating incoming tokens.
pub const JWT_ISSUER: &str = "gitlab";
@@ -23,7 +23,7 @@ pub const JWT_SUBJECT: &str = "gkg-indexer:code";

/// Custom authentication header used by the Knowledge Graph internal API.
/// The raw JWT token is sent directly as the header value (no `Bearer` prefix).
const AUTH_HEADER: &str = "Gitlab-Kg-Api-Request";
const AUTH_HEADER: &str = "Gitlab-Orbit-Api-Request";

const JWT_EXPIRY_SECONDS: i64 = 300;

@@ -92,39 +92,67 @@ impl GitlabClient {
            .map_err(|e| GitlabClientError::Unexpected(format!("failed to build HTTP client: {e}")))
    }

    pub async fn repository_info(
    pub async fn project_info(&self, project_id: i64) -> Result<ProjectInfo, GitlabClientError> {
        let url = format!(
            "{}/api/v4/internal/orbit/project/{}/info",
            self.base_url, project_id
        );

        debug!(project_id, url = %url, "fetching project info from GitLab");

        let response = self.authenticated_get(&url).await?;
        Self::check_status(&response, project_id)?;

        let info: ProjectInfo = response.json().await?;
        Ok(info)
    }

    pub async fn download_archive(
        &self,
        project_id: i64,
    ) -> Result<RepositoryInfo, GitlabClientError> {
        let token = self.sign_jwt()?;
        let url = format!(
            "{}/api/v4/internal/knowledge_graph/{}/repository_info",
        ref_name: &str,
    ) -> Result<Vec<u8>, GitlabClientError> {
        let base = format!(
            "{}/api/v4/internal/orbit/project/{}/repository/archive",
            self.base_url, project_id
        );
        let url = reqwest::Url::parse_with_params(&base, &[("ref", ref_name)])
            .map_err(|e| GitlabClientError::Unexpected(format!("invalid URL: {e}")))?;

        debug!(project_id, url = %url, "fetching repository info from GitLab");
        debug!(project_id, ref_name, url = %url, "downloading archive from GitLab");

        let response = self
        let response = self.authenticated_get(url).await?;
        Self::check_status(&response, project_id)?;

        let bytes = response.bytes().await?;
        Ok(bytes.to_vec())
    }

    async fn authenticated_get(
        &self,
        url: impl reqwest::IntoUrl,
    ) -> Result<reqwest::Response, GitlabClientError> {
        let token = self.sign_jwt()?;
        Ok(self
            .http
            .get(&url)
            .get(url)
            .header(AUTH_HEADER, &token)
            .send()
            .await?;
            .await?)
    }

    fn check_status(
        response: &reqwest::Response,
        project_id: i64,
    ) -> Result<(), GitlabClientError> {
        match response.status() {
            StatusCode::OK => {}
            StatusCode::UNAUTHORIZED => return Err(GitlabClientError::Unauthorized),
            StatusCode::NOT_FOUND => return Err(GitlabClientError::NotFound(project_id)),
            status => {
                let body = response.text().await.unwrap_or_default();
                return Err(GitlabClientError::Unexpected(format!(
                    "status {status}: {body}"
                )));
            }
            StatusCode::OK => Ok(()),
            StatusCode::UNAUTHORIZED => Err(GitlabClientError::Unauthorized),
            StatusCode::NOT_FOUND => Err(GitlabClientError::NotFound(project_id)),
            status => Err(GitlabClientError::Unexpected(format!(
                "unexpected status {status}"
            ))),
        }

        let info: RepositoryInfo = response.json().await?;
        Ok(info)
    }

    fn sign_jwt(&self) -> Result<String, GitlabClientError> {
+1 −1
Original line number Diff line number Diff line
@@ -6,4 +6,4 @@ mod types;
pub use client::{GitlabClient, JWT_AUDIENCE, JWT_ISSUER, JWT_SUBJECT};
pub use config::GitlabClientConfiguration;
pub use error::GitlabClientError;
pub use types::{GitalyConnectionInfo, RepositoryInfo};
pub use types::ProjectInfo;
+2 −11
Original line number Diff line number Diff line
/// Per-project Gitaly connection details and repository metadata returned by Rails.
/// Project metadata returned by the `/info` endpoint.
#[derive(Debug, Clone, serde::Deserialize)]
pub struct RepositoryInfo {
pub struct ProjectInfo {
    pub project_id: i64,
    pub gitaly_connection_info: GitalyConnectionInfo,
    pub default_branch: String,
}

#[derive(Debug, Clone, serde::Deserialize)]
pub struct GitalyConnectionInfo {
    pub address: String,
    pub token: Option<String>,
    pub storage: String,
    pub path: String,
}
Loading