Skip to content

[Fleet Visibility] Implement GraphQL API for CI job performance metrics

Summary

Implement GraphQL API to expose CI job performance metrics from the ClickHouse materialized view.

Background

Part of the implementation for &18548. After creating and backfilling the materialized view (#555974 (closed), #555975), we need to expose the data via GraphQL.

Blocked by #555975 - The materialized view must be created and backfilled before we can query it via GraphQL.

Implementation Details

1. GraphQL Type Definition

Create a new type for job performance statistics:

# ee/app/graphql/types/ci/job_performance_statistics_type.rb
module Types
  module Ci
    class JobPerformanceStatisticsType < BaseObject
      graphql_name 'CiJobPerformanceStatistics'

      description 'CI job performance statistics'

      authorize :read_build

      field :name, GraphQL::Types::String, null: false,
            description: 'Job name.'
      field :stage, GraphQL::Types::String, null: false,
            description: 'Job stage.'
      field :duration_statistics, ::Types::Ci::DurationStatisticsType, null: true,
            description: 'Job duration statistics.'
      field :failure_rate, GraphQL::Types::Float, null: true,
            description: 'Failure rate (0.0 to 1.0).'
      field :total_builds, GraphQL::Types::BigInt, null: false,
            description: 'Total number of builds.'
      field :failed_builds, GraphQL::Types::BigInt, null: false,
            description: 'Number of failed builds.'
      field :source, GraphQL::Types::String, null: true,
            description: 'Pipeline source.'
      field :ref, GraphQL::Types::String, null: true,
            description: 'Git ref.'
    end
  end
end

2. Resolver Implementation

# ee/app/graphql/resolvers/ci/job_performance_statistics_resolver.rb
module Resolvers
  module Ci
    class JobPerformanceStatisticsResolver < BaseResolver
      include Gitlab::Graphql::Authorize::AuthorizeResource

      type Types::Ci::JobPerformanceStatisticsType.connection_type, null: true

      argument :from_time, Types::TimeType,
               required: false,
               description: 'Start of the requested time (in UTC). Defaults to the jobs started in the past week.',
               experiment: { milestone: '18.6' }
      
      argument :to_time, Types::TimeType,
               required: false,
               description: 'End of the requested time (in UTC). Defaults to the jobs started before the current date.',
               experiment: { milestone: '18.6' }
      
      argument :job_name, GraphQL::Types::String,
               required: false,
               description: 'Filter by job name (partial match)'
      
      argument :source, GraphQL::Types::String,
               required: false,
               description: 'Filter by pipeline source'

      argument :ref, GraphQL::Types::String,
               required: false,
               description: 'Filter by Git ref'
      
      argument :sort_by, Types::Ci::JobPerformanceSortEnum,
               required: false,
               default_value: 'P50_DURATION_DESC',
               description: 'Sort order for results'

      def resolve(from_time: nil, to_time: nil, job_name: nil, source: nil, ref: nil, sort_by:)
        authorize!(project)
        
        return unless Feature.enabled?(:ci_analytics_job_performance, project)
        return unless project.licensed_feature_available?(:ci_analytics)

        # Default time range if not specified
        to_time ||= Time.current
        from_time ||= to_time - 30.days

        # Validate time range
        if from_time && to_time && from_time > to_time
          raise Gitlab::Graphql::Errors::ArgumentError, "from_time must be before to_time"
        end
        
        if from_time && to_time && (to_time - from_time) > 1.year
          raise Gitlab::Graphql::Errors::ArgumentError, "Time range cannot exceed 1 year"
        end

        query = build_query(from_time, to_time, job_name, source, ref, sort_by)
        
        ::Gitlab::ClickHouse::Client.select(query, :main)
      rescue StandardError => e
        Gitlab::ErrorTracking.track_exception(e)
        nil
      end

      private

      def project
        object
      end

      def authorize!
        Ability.allowed?(context[:current_user], :read_build, project) || raise_resource_not_available_error!
      end

      def build_query(from_time, to_time, job_name, source, ref, sort_by)
        order_clause = sort_by_to_order_clause(sort_by)
        
        # Format dates for ClickHouse
        from_date = from_time.to_date.to_s
        to_date = to_time.to_date.to_s
        
        query = <<~SQL
          SELECT 
            name,
            stage_id,
            source,
            ref,
            countMerge(total_builds) AS total_builds,
            countMerge(failed_builds) AS failed_builds,
            quantilesMerge(0.5, 0.95)(duration_quantiles) AS duration_percentiles,
            duration_percentiles[1] AS p50_duration,
            duration_percentiles[2] AS p95_duration,
            if(total_builds > 0, failed_builds / total_builds, 0) AS failure_rate
          FROM gitlab_clickhouse_development.ci_job_performance_daily_mv
          WHERE date >= toDate('#{from_date}')
            AND date <= toDate('#{to_date}')
            AND project_id = #{project.id}
        SQL

        query += " AND name LIKE #{ActiveRecord::Base.connection.quote("%#{job_name}%")}" if job_name.present?
        query += " AND source = #{ActiveRecord::Base.connection.quote(source)}" if source.present?
        query += " AND ref = #{ActiveRecord::Base.connection.quote(ref)}" if ref.present?
        query += " GROUP BY name, stage_id, source, ref"
        query += " #{order_clause}"
        query += " LIMIT 100"
        
        query
      end

      def sort_by_to_order_clause(sort_by)
        case sort_by
        when 'P50_DURATION_ASC' then 'ORDER BY p50_duration ASC'
        when 'P50_DURATION_DESC' then 'ORDER BY p50_duration DESC'
        when 'P95_DURATION_ASC' then 'ORDER BY p95_duration ASC'
        when 'P95_DURATION_DESC' then 'ORDER BY p95_duration DESC'
        when 'FAILURE_RATE_ASC' then 'ORDER BY failure_rate ASC'
        when 'FAILURE_RATE_DESC' then 'ORDER BY failure_rate DESC'
        when 'NAME_ASC' then 'ORDER BY name ASC'
        when 'NAME_DESC' then 'ORDER BY name DESC'
        else 'ORDER BY p50_duration DESC'
        end
      end
    end
  end
end

3. Sort Enum Type

# ee/app/graphql/types/ci/job_performance_sort_enum.rb
module Types
  module Ci
    class JobPerformanceSortEnum < BaseEnum
      graphql_name 'CiJobPerformanceSort'
      description 'Sort order for CI job performance statistics'

      value 'P50_DURATION_ASC', 'P50 duration ascending', value: 'P50_DURATION_ASC'
      value 'P50_DURATION_DESC', 'P50 duration descending', value: 'P50_DURATION_DESC'
      value 'P95_DURATION_ASC', 'P95 duration ascending', value: 'P95_DURATION_ASC'
      value 'P95_DURATION_DESC', 'P95 duration descending', value: 'P95_DURATION_DESC'
      value 'FAILURE_RATE_ASC', 'Failure rate ascending', value: 'FAILURE_RATE_ASC'
      value 'FAILURE_RATE_DESC', 'Failure rate descending', value: 'FAILURE_RATE_DESC'
      value 'NAME_ASC', 'Job name ascending', value: 'NAME_ASC'
      value 'NAME_DESC', 'Job name descending', value: 'NAME_DESC'
    end
  end
end

4. Add to Project Type

# ee/app/graphql/ee/types/project_type.rb
module EE
  module Types
    module ProjectType
      extend ActiveSupport::Concern

      prepended do
        field :ci_job_performance_statistics,
              resolver: Resolvers::Ci::JobPerformanceStatisticsResolver,
              description: 'CI job performance statistics',
              complexity: 10,
              alpha: { milestone: '17.0' }
      end
    end
  end
end

5. Example GraphQL Query

query getJobPerformance($projectPath: ID!) {
  project(fullPath: $projectPath) {
    ciJobPerformanceStatistics(
      fromTime: "2025-01-01T00:00:00Z"
      toTime: "2025-01-15T23:59:59Z"
      jobName: "test"
      source: "push"
      ref: "main"
      sortBy: P50_DURATION_DESC
    ) {
      nodes {
        name
        stageId
        source
        ref
        p50Duration
        p95Duration
        failureRate
        totalBuilds
        failedBuilds
      }
      pageInfo {
        hasNextPage
        endCursor
      }
    }
  }
}

Acceptance Criteria

  • GraphQL types are implemented
  • Resolver properly queries ClickHouse
  • Authentication and authorization work correctly
  • Pagination works properly
  • Filtering and sorting work as expected
  • Feature flag ci_analytics_job_performance controls access
  • Appropriate specs are added

Testing

  • Unit tests for the resolver
  • GraphQL request specs
  • Feature flag testing
  • Performance testing with large datasets

Note: This issue was created by Claude based on implementation discussion in epic &18548

Edited by 🤖 GitLab Bot 🤖