Soos · Tan Le · Shinya Maeda · Shinya Maeda · renovate · Shinya Maeda
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -47,7 +47,7 @@ include:
  # Upgrades dependencies on a schedule
  # see https://gitlab.com/gitlab-com/gl-infra/common-ci-tasks/-/blob/main/renovate-bot.md
  - project: "gitlab-com/gl-infra/common-ci-tasks"
-    ref: v2.20.4 # renovate:managed
+    ref: v2.21.0 # renovate:managed
    file: renovate-bot.yml

  # Includes a base template for running kaniko easily
@@ -136,7 +136,7 @@ tests:integration:
  needs:
    - build-docker-model-gateway
  services:
-    - name: registry.gitlab.com/gitlab-org/modelops/applied-ml/code-suggestions/ai-assist/model-gateway:${CI_COMMIT_SHORT_SHA}
+    - name: ${TARGET_IMAGE}
      alias: ai-gateway
  variables:
    AIGW_SELF_SIGNED_JWT__SIGNING_KEY: "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQC+ErJOBYMe4/d5\nlcVtqnQEazhGYqyefHQNtfpzQyb/WuPJa5BZu68KeS0fWKcZJluNk/jGXSb3cvei\n28wtnbBlEOdTTOzMpP217rLtoYdnzgJfO7DLj3tUL+JbQHj51bLwgkrHlALFDoUt\nzWaOrs+WO6QVLnIsl+YZ6CrKpc9fXLtlvZp0yYuic1BxKCw8Q2lWjMhj6w1fubwM\ne3MsTMcgBV5ek3twY/vHkU/Q2gjniLZSTBmzaXrHjfbS1au2vQBTsKmtPDATrnTU\nVLxLvAtb7p3bZAZObIj4oDi+2qGQGDoWVaiGYXfRquKHdle7hBLwuvOlmVGAdfHd\nN18QO9kbAgMBAAECggEAQpdJScVrnThv/PRocVXtBJlN43bxlCr0+9K8NoKZ2I/O\ny0P4lD+JgUlX8in8jPafz7e+SjzbiIkKUk/gNkEPSjFEYi5Krh3F12YT4OYdCtSl\nhxjiBUc1BZcrHh415A2m/dUf/tEPsVs3dIcIPcfn6XHmuBcIIDBtiLkKuOmjf/xh\n/2l7QUHNC0Hn1STNvE5CscnqH0/s9dw2/KYhjpZWJsoNOAJ4PWgypxlftdUcspQK\nQL+FWrNL+aa1+nPeR0ArNw3nlkKnzcX3DmIHyD4az1IwrzHqMCh1zv16eqmCBYuN\nn2QrAAkAt+SFVEwjejwN/lNLMg4O6tyMvFnuOddp2QKBgQDhXgmOZgoJJYz7Vd0u\n/U3E4Ma+HysQ7TU3utidAke5YJXSms0UJrOFxgcRKserkXjD9wbyeUcMbrKRNTxm\nPgjGayVCqVc6T3kMHio8CvSs+RYt+jR7CnFIfQbQATR7c8nzy4Z/JNsW213SmXpj\n4S3TQWwGFSrF8h+A098cgFwNGQKBgQDX6IvIumrnQGiCJn8qQW3hKGfJwSl6/e4Z\nfC/o8uFzTtV1aL39AusDKAt4c5DsVUHCZLSH5acFGeeGt4dePpqnr0dpIb02ByGJ\na6tLJm2RwjaiXFOAsedOP12yqTZELi7rinKjVqG50eOvXuV40CVVP9qXzxE9e2DP\nQGe2ZFmqUwKBgFvsCkPNtOw0J8PgKt5erRjPgeDMP7mgtyMrD+1Cn9Sp45JKV7mP\na4v10K4c3+JH7JUprei6tNMKV8VIjIE7bkLYMxN0lMKQ5dOefiWNZm0jD/vi4QMK\nqFgjvuKaiziLauUIr6wucTpqcWNT/Iq+rv1K4u+8NH0Wm+jlAkzSwjkJAoGAG5Wd\nTk7q25KyB3bNpmNnm22jHPatywXoRp9EK7nkLewzf2WbaFjYF7YlCQWSzW7zENf2\n7KndldxCZUbLZ7IN5kCRmg/ycZWlpj34S4ikVQwAGOw8yuNvzuJvoSTXRwyzd+pf\nTRkDXo8/TKeOH8pQCr02u1B8PmOl8bSjy3y0q/sCgYAkdWZ6xLJeS9HlzuuXchl3\numZM4n88lad2yNoCu3aJs2fbVCGCiHyFsThluDU0KD3xE5+RZVpAMTErcCE3HqNK\nn1rbBNBVtUTKeu1Qkw0B1X813oH7omqmZVGJx+hceIKimjKWvD7hQlr90NPoFDFz\n0Laqissu1lxAspLYIulpWg==\n-----END PRIVATE KEY-----"

--- a/.gitlab/CODEOWNERS
+++ b/.gitlab/CODEOWNERS
+* @gitlab-org/maintainers/ai-gateway @gitlab-org/production-engineering @gitlab-org/scalability
+
+^[Development]
 # https://gitlab.com/groups/gitlab-org/maintainers/ai-gateway/-/group_members?with_inherited_permissions=exclude
 * @gitlab-org/maintainers/ai-gateway
+
+^[Infrastructure]
+* @gitlab-org/production-engineering @gitlab-org/scalability
\ No newline at end of file
--- a/.gitlab/ci/lint.gitlab-ci.yml
+++ b/.gitlab/ci/lint.gitlab-ci.yml
@@ -15,7 +15,7 @@ lint:

 lint:doc:
  stage: lint
-  image: registry.gitlab.com/gitlab-org/gitlab-docs/lint-markdown:alpine-3.20-vale-3.4.2-markdownlint2-0.13.0-lychee-0.15.1
+  image: registry.gitlab.com/gitlab-org/gitlab-docs/lint-markdown:alpine-3.20-vale-3.6.1-markdownlint2-0.13.0-lychee-0.15.1
  rules:
    - changes:
        - "**/*.{md}"

--- a/.runway/env-production.yml
+++ b/.runway/env-production.yml
 AIGW_FASTAPI__API_PORT: "8080"
+AIGW_ENVIRONMENT: "production"
 AIGW_GITLAB_URL: "https://gitlab.com"
 AIGW_GITLAB_API_URL: "https://gitlab.com/api/v4/"
 AIGW_CUSTOMER_PORTAL_URL: "https://customers.gitlab.com"

--- a/.runway/env-staging.yml
+++ b/.runway/env-staging.yml
 AIGW_FASTAPI__API_PORT: "8080"
+AIGW_ENVIRONMENT: "staging"
 AIGW_GITLAB_URL: "https://staging.gitlab.com"
 AIGW_GITLAB_API_URL: "https://staging.gitlab.com/api/v4/"
 AIGW_CUSTOMER_PORTAL_URL: "https://customers.staging.gitlab.com"

--- a/.tool-versions
+++ b/.tool-versions
@@ -2,5 +2,6 @@ python 3.10.14 # datasource=github-tags depName=python/cpython
 gcloud 428.0.0 # datasource=github-tags depName=GoogleCloudPlatform/cloud-sdk-docker
 poetry 1.8.3 # datasource=pypi depName=poetry
 ruby 3.2.4 # Temporary dependency until https://gitlab.com/gitlab-org/modelops/applied-ml/code-suggestions/ai-assist/-/issues/447 is done.
-vale 3.6.0 # datasource=github-releases depName=errata-ai/vale
+vale 3.7.0 # datasource=github-releases depName=errata-ai/vale
+nodejs 20.15.1 # Dependency for markdownlint-cli2
 markdownlint-cli2 0.13.0 # datasource=github-tags depName=DavidAnson/markdownlint-cli2
--- a/README.md
+++ b/README.md
@@ -212,16 +212,16 @@ In above diagram, the main components are shown.

 The Client has the following functions:

-1. Determine input parameters
-   1. Stop sequences
-   1. Gather code for the prompt
+1. Determine input parameters.
+   1. Stop sequences.
+   1. Gather code for the prompt.
 1. Send the input parameters to the AI Gateway API.
 1. Parse results from AI Gateway and present them as `inlineCompletions`.

 We are supporting the following clients:

- [GitLab VS Code Extension](https://gitlab.com/gitlab-org/gitlab-vscode-extension)
- [GitLab Language Server for Code Suggestions](https://gitlab.com/gitlab-org/editor-extensions/gitlab-language-server-for-code-suggestions)
+- [GitLab VS Code Extension](https://gitlab.com/gitlab-org/gitlab-vscode-extension).
+- [GitLab Language Server for Code Suggestions](https://gitlab.com/gitlab-org/editor-extensions/gitlab-language-server-for-code-suggestions).

 ## Deployment

@@ -237,10 +237,10 @@ The service overview dashboard is available at [https://dashboards.gitlab.net/d/

 For more information and assistance, please check out:

- [Runway - Handbook](https://about.gitlab.com/handbook/engineering/infrastructure/platforms/tools/runway/)
- [Runway - Group](https://gitlab.com/gitlab-com/gl-infra/platform/runway)
- [Runway - Docs](https://gitlab.com/gitlab-com/gl-infra/platform/runway/docs)
- [Runway - Issue Tracker](https://gitlab.com/groups/gitlab-com/gl-infra/platform/runway/-/issues)
+- [Runway - Handbook](https://about.gitlab.com/handbook/engineering/infrastructure/platforms/tools/runway/).
+- [Runway - Group](https://gitlab.com/gitlab-com/gl-infra/platform/runway).
+- [Runway - Docs](https://gitlab.com/gitlab-com/gl-infra/platform/runway/docs).
+- [Runway - Issue Tracker](https://gitlab.com/groups/gitlab-com/gl-infra/platform/runway/-/issues).
 - `#f_runway` in Slack.

 ## Release

--- a/ai_gateway/agents/base.py
+++ b/ai_gateway/agents/base.py
@@ -66,9 +66,13 @@ class BaseAgentRegistry(ABC):
        pass

    def get_on_behalf(
-        self, user: GitLabUser, agent_id: str, options: Optional[dict[str, Any]] = None
+        self,
+        user: GitLabUser,
+        agent_id: str,
+        options: Optional[dict[str, Any]] = None,
+        model_metadata: Optional[ModelMetadata] = None,
    ) -> Agent:
-        agent = self.get(agent_id, options)
+        agent = self.get(agent_id, options, model_metadata)

        for unit_primitive in agent.unit_primitives:
            if not user.can(unit_primitive):

--- a/ai_gateway/agents/container.py
+++ b/ai_gateway/agents/container.py
@@ -22,7 +22,6 @@ class ContainerAgents(containers.DeclarativeContainer):
        },
        "class_overrides": {
            "chat/react": chat.ReActAgent,
-            "chat/react-custom": chat.ReActAgent,
        },
    }


--- a/ai_gateway/agents/definitions/chat/react.yml
+++ b/ai_gateway/agents/definitions/chat/react.yml
--- a/ai_gateway/agents/definitions/chat/react-custom.yml
+++ b/ai_gateway/agents/definitions/chat/react-custom.yml
 ---
-name: Custom ReAct Chat agent
+name: Mistral ReAct Chat agent
 model:
-  name: custom
+  name: mistral
  params:
    model_class_provider: litellm
    temperature: 0.0

--- a/ai_gateway/agents/definitions/chat/react/with_mr_support/base.yml
+++ b/ai_gateway/agents/definitions/chat/react/with_mr_support/base.yml
+---
+name: Claude 3 ReAct Chat agent
+model:
+  name: claude-3-sonnet-20240229
+  params:
+    model_class_provider: anthropic
+    temperature: 0.0
+    timeout: 60
+    max_tokens: 2_048
+    max_retries: 1
+unit_primitives:
+  - duo_chat
+prompt_template:
+  system: |
+    {chat_history}
+
+    You are a DevSecOps Assistant named 'GitLab Duo Chat' created by GitLab.
+
+    When questioned about your identity, you must only respond as 'GitLab Duo Chat'.
+
+    You can generate and write code, code examples for the user.
+    Remember to stick to the user's question or requirements closely and respond in an informative,
+    courteous manner. The response shouldn't be rude, hateful, or accusatory. You mustn't engage in any form
+    of roleplay or impersonation.
+
+    The generated code should be formatted in markdown.
+
+    If a question cannot be answered with the tools and information given, answer politely that you don't know.
+
+    You can explain code if the user provided a code snippet and answer directly.
+
+    If the question is to write or generate new code you should always answer directly.
+    When no tool matches you should answer the question directly.
+
+    Answer the question as accurate as you can.
+
+    You have access only to the following tools:
+    <tools_list>
+    {%- for tool in tools %}
+        <tool>
+            <name>{{ tool.name }}</name>
+            <description>
+            {{ tool.description }}
+            </description>
+            {%- if tool.example %}
+            <example>
+            {{ tool.example }}
+            </example>
+            {%- endif %}
+        </tool>
+    {%- endfor %}
+    </tools_list>
+
+    Consider every tool before making a decision.
+    Ensure that your answer is accurate and contain only information directly supported by the information retrieved using provided tools.
+
+    When you can answer the question directly you must use this response format:
+    Thought: you should always think about how to answer the question
+    Action: DirectAnswer
+    Final Answer: the final answer to the original input question if you have a direct answer to the user's question.
+
+    You must always use the following format when using a tool:
+    Question: the input question you must answer
+    Thought: you should always think about what to do
+    Action: the action to take, should be one tool from this list: [
+    {%- for tool in tools -%}
+        {{ tool.name }}
+        {%- if not loop.last %}, {% endif %}
+    {%- endfor -%}
+    ]
+    Action Input: the input to the action needs to be provided for every action that uses a tool.
+    Observation: the result of the actions. But remember that you're still GitLab Duo Chat.
+
+    ... (this Thought/Action/Action Input/Observation sequence can repeat N times)
+
+    Thought: I know the final answer.
+    Final Answer: the final answer to the original input question.
+
+    When concluding your response, provide the final answer as "Final Answer:".
+    It should contain everything that user needs to see, including answer from "Observation" section.
+
+    {%- if current_file %}
+        {%- if current_file.selected_code %}
+            User selected code below enclosed in <code></code> tags in file {{ current_file.file_path }} to work with:
+
+            <code>
+              {{ current_file.data }}
+            </code>
+        {%- else %}
+            The current code file that user sees is #{current_file.file_path} and has the following content:
+            <content>
+            {{ current_file.data }}
+            </content>
+       {%- endif %}
+    {%- endif %}
+
+    You have access to the following GitLab resources:
+    {%- for tool in tools -%}
+        {% if tool.resource -%}
+            {{ tool.resource }}
+            {%- if not loop.last %}, {% endif %}
+        {%- endif %}
+    {%- endfor -%}.
+    You also have access to all information that can be helpful to someone working in software development of any kind.
+    At the moment, you do not have access to the following GitLab resources: Pipelines, Vulnerabilities.
+    At the moment, you do not have the ability to search Issues or Epics based on a description or keywords.
+    You can only read information about a specific issue/epic IF the user is on the specific issue/epic's page, or provides a URL or ID.
+    Do not use the issue_reader or epic_reader tool if you do not have these specified identifiers.
+
+    If GitLab resource of issue or epic type is present and is directly relevant to the question,
+    include the following section at the end of your response:
+    'Sources:' followed by the corresponding GitLab resource link named after the title of the resource.
+    Format the link using Markdown syntax ([title](link)) for it to be clickable.
+
+    Ask user to leave feedback.
+
+    {{context_content}}
+
+    Begin!
+  user: |
+    Question: {question}
+  assistant: |
+    {agent_scratchpad}
+stop:
+  - "Observation:"
--- a/ai_gateway/agents/definitions/code_suggestions/completions/codegemma.yml
+++ b/ai_gateway/agents/definitions/code_suggestions/completions/codegemma.yml
+---
+name: Codegemma Code Completions
+model:
+  name: codegemma
+  params:
+    model_class_provider: litellm
+    timeout: 60
+    temperature: 0.95
+    max_tokens: 128
+    max_retries: 1
+unit_primitives:
+  - code_suggestions
+prompt_template:
+  user: |
+    <|fim_prefix|>{prefix}<|fim_suffix|>{suffix}<|fim_middle|>
+stop:
+  - <|fim_prefix|>
+  - <|fim_suffix|>
+  - <|fim_middle|>
+  - <|file_separator|>
--- a/ai_gateway/agents/registry.py
+++ b/ai_gateway/agents/registry.py
@@ -33,11 +33,13 @@ class LocalAgentRegistry(BaseAgentRegistry):
        self.agents_registered = agents_registered
        self.model_factories = model_factories

-    def _resolve_id(self, agent_id: str) -> str:
-        _, _, agent_type = agent_id.partition("/")
-        if agent_type:
-            # the `agent_id` value is already in the format of - `first/last`
-            return agent_id
+    def _resolve_id(
+        self,
+        agent_id: str,
+        model_metadata: Optional[ModelMetadata] = None,
+    ) -> str:
+        if model_metadata:
+            return f"{agent_id}/{model_metadata.name}"

        return f"{agent_id}/{self.key_agent_type_base}"

@@ -45,7 +47,7 @@ class LocalAgentRegistry(BaseAgentRegistry):
        self,
        config_model: ModelConfig,
        model_metadata: Optional[ModelMetadata] = None,
-    ) -> BaseChatModel:
+    ) -> Runnable:
        model_class_provider = config_model.params.model_class_provider
        if model_factory := self.model_factories.get(model_class_provider, None):
            return model_factory(
@@ -63,10 +65,10 @@ class LocalAgentRegistry(BaseAgentRegistry):
        options: Optional[dict[str, Any]] = None,
        model_metadata: Optional[ModelMetadata] = None,
    ) -> Agent:
-        agent_id = self._resolve_id(agent_id)
+        agent_id = self._resolve_id(agent_id, model_metadata)
        klass, config = self.agents_registered[agent_id]

-        model: Runnable = self._get_model(config.model, model_metadata)
+        model = self._get_model(config.model, model_metadata)

        if config.stop:
            model = model.bind(stop=config.stop)
@@ -93,15 +95,19 @@ class LocalAgentRegistry(BaseAgentRegistry):

        agents_definitions_dir = Path(__file__).parent / "definitions"
        agents_registered = {}
-        for path in agents_definitions_dir.glob("*/*.yml"):
-            agent_id = str(
-                # E.g., "chat/react", "generate_description/base", etc.
+
+        for path in agents_definitions_dir.glob("**/*.yml"):
+            agent_id_with_model_name = str(
+                # E.g., "chat/react/base", "generate_description/mistral", etc.
                path.relative_to(agents_definitions_dir).with_suffix("")
            )

+            # Remove model name, for example: to receive "chat/react" from "chat/react/mistral"
+            agent_id, _, _ = agent_id_with_model_name.rpartition("/")
+
            with open(path, "r") as fp:
                klass = class_overrides.get(agent_id, Agent)
-                agents_registered[agent_id] = AgentRegistered(
+                agents_registered[agent_id_with_model_name] = AgentRegistered(
                    klass=klass, config=AgentConfig(**yaml.safe_load(fp))
                )

@@ -113,7 +119,7 @@ class CustomModelsAgentRegistry(LocalAgentRegistry):
        self,
        config_model: ModelConfig,
        model_metadata: Optional[ModelMetadata] = None,
-    ) -> BaseChatModel:
+    ) -> Runnable:
        chat_model = super()._get_model(config_model)

        if model_metadata is None:
@@ -125,14 +131,3 @@ class CustomModelsAgentRegistry(LocalAgentRegistry):
            custom_llm_provider=model_metadata.provider,
            api_key=model_metadata.api_key,
        )
-
-    def get(
-        self,
-        agent_id: str,
-        options: Optional[dict[str, Any]] = None,
-        model_metadata: Optional[ModelMetadata] = None,
-    ) -> Agent:
-        if model_metadata is not None:
-            agent_id = f"{agent_id}-custom"
-
-        return super().get(agent_id, options, model_metadata)
--- a/ai_gateway/agents/typing.py
+++ b/ai_gateway/agents/typing.py
 from typing import Annotated, Optional

-from pydantic import AnyUrl, BaseModel, StringConstraints, UrlConstraints
+from pydantic import AnyUrl, BaseModel, StringConstraints, UrlConstraints, validator
+
+STUBBED_API_KEY = "<api-key>"


 class ModelMetadata(BaseModel):
@@ -8,3 +10,9 @@ class ModelMetadata(BaseModel):
    name: Annotated[str, StringConstraints(max_length=100)]
    provider: Annotated[str, StringConstraints(max_length=100)]
    api_key: Optional[Annotated[str, StringConstraints(max_length=100)]] = None
+
+    # OpenAI client requires api key to be set
+    @validator("api_key", pre=True, always=True)
+    @classmethod
+    def set_stubbed_api_key_if_empty(cls, v):
+        return v or STUBBED_API_KEY
--- a/ai_gateway/api/middleware.py
+++ b/ai_gateway/api/middleware.py
 import logging
 import time
 import traceback
+from datetime import datetime
 from typing import Optional, Tuple

 import structlog
@@ -29,6 +30,7 @@ from ai_gateway.auth import AuthProvider, UserClaims
 from ai_gateway.auth.self_signed_jwt import SELF_SIGNED_TOKEN_ISSUER
 from ai_gateway.auth.user import GitLabUser
 from ai_gateway.instrumentators.base import Telemetry, TelemetryInstrumentator
+from ai_gateway.internal_events import EventContext, current_event_context
 from ai_gateway.tracking.errors import log_exception

 __all__ = [
@@ -288,6 +290,40 @@ class MiddlewareAuthentication(Middleware):
        )


+class InternalEventMiddleware:
+    def __init__(self, app, skip_endpoints, enabled, environment):
+        self.app = app
+        self.enabled = enabled
+        self.environment = environment
+        self.path_resolver = _PathResolver.from_optional_list(skip_endpoints)
+
+    async def __call__(self, scope, receive, send):
+        if scope["type"] != "http" or not self.enabled:
+            await self.app(scope, receive, send)
+            return
+
+        request = Request(scope)
+
+        if self.path_resolver.skip_path(request.url.path):
+            await self.app(scope, receive, send)
+            return
+
+        context = EventContext(
+            environment=self.environment,
+            source="ai-gateway-python",
+            realm=request.headers.get(X_GITLAB_REALM_HEADER),
+            instance_id=request.headers.get(X_GITLAB_INSTANCE_ID_HEADER),
+            host_name=request.headers.get(X_GITLAB_HOST_NAME_HEADER),
+            instance_version=request.headers.get(X_GITLAB_VERSION_HEADER),
+            global_user_id=request.headers.get(X_GITLAB_GLOBAL_USER_ID_HEADER),
+            context_generated_at=datetime.now().isoformat(),
+            correlation_id=correlation_id.get(),
+        )
+        current_event_context.set(context)
+
+        await self.app(scope, receive, send)
+
+
 class MiddlewareModelTelemetry(Middleware):
    class TelemetryHeadersMiddleware(BaseHTTPMiddleware):
        def __init__(self, path_resolver: _PathResolver, *args, **kwargs):

--- a/ai_gateway/api/server.py
+++ b/ai_gateway/api/server.py
@@ -15,6 +15,7 @@ from starlette_context.middleware import RawContextMiddleware

 from ai_gateway.agents.instrumentator import AgentInstrumentator
 from ai_gateway.api.middleware import (
+    InternalEventMiddleware,
    MiddlewareAuthentication,
    MiddlewareLogRequest,
    MiddlewareModelTelemetry,
@@ -43,7 +44,6 @@ async def lifespan(app: FastAPI):
    config = app.extra["extra"]["config"]
    container_application = ContainerApplication()
    container_application.config.from_dict(config.model_dump())
-    container_application.init_resources()

    if config.instrumentator.thread_monitoring_enabled:
        loop = asyncio.get_running_loop()
@@ -57,14 +57,12 @@ async def lifespan(app: FastAPI):

    yield

-    container_application.shutdown_resources()
-

 def create_fast_api_server(config: Config):

    fastapi_app = FastAPI(
-        title="GitLab Code Suggestions",
-        description="GitLab Code Suggestions API to serve code completion predictions",
+        title="GitLab AI Gateway",
+        description="GitLab AI Gateway API to execute AI actions",
        openapi_url=config.fastapi.openapi_url,
        docs_url=config.fastapi.docs_url,
        redoc_url=config.fastapi.redoc_url,
@@ -95,6 +93,12 @@ def create_fast_api_server(config: Config):
                bypass_auth_with_header=config.auth.bypass_external_with_header,
                skip_endpoints=_SKIP_ENDPOINTS,
            ),
+            Middleware(
+                InternalEventMiddleware,
+                skip_endpoints=_SKIP_ENDPOINTS,
+                enabled=config.snowplow.enabled,
+                environment=config.environment,
+            ),
            MiddlewareModelTelemetry(skip_endpoints=_SKIP_ENDPOINTS),
        ],
        extra={"config": config},

--- a/ai_gateway/api/v1/chat/agent.py
+++ b/ai_gateway/api/v1/chat/agent.py
@@ -49,6 +49,10 @@ CHAT_INVOKABLES = [
        name="summarize_comments",
        unit_primitive=GitLabUnitPrimitive.SUMMARIZE_COMMENTS,
    ),
+    ChatInvokable(
+        name="troubleshoot_job",
+        unit_primitive=GitLabUnitPrimitive.TROUBLESHOOT_JOB,
+    ),
    # Deprecated. Added for backward compatibility.
    # Please, refer to `v2/chat/agent` for additional details.
    ChatInvokable(name="agent", unit_primitive=GitLabUnitPrimitive.DUO_CHAT),
@@ -74,11 +78,12 @@ async def chat(
    payload = prompt_component.payload

    try:
-        if payload.provider == KindModelProvider.LITELLM:
+        if payload.provider in (KindModelProvider.LITELLM, KindModelProvider.MISTRALAI):
            model = litellm_factory(
                name=payload.model,
                endpoint=payload.model_endpoint,
                api_key=payload.model_api_key,
+                provider=payload.provider,
            )

            completion = await model.generate(

--- a/ai_gateway/api/v1/proxy/anthropic.py
+++ b/ai_gateway/api/v1/proxy/anthropic.py
@@ -22,8 +22,8 @@ router = APIRouter()


 @router.post(f"/{KindModelProvider.ANTHROPIC.value}" + "/{path:path}")
-@feature_categories(FEATURE_CATEGORIES_FOR_PROXY_ENDPOINTS)
 @authorize_with_unit_primitive_header()
+@feature_categories(FEATURE_CATEGORIES_FOR_PROXY_ENDPOINTS)
 async def anthropic(
    request: Request,
    background_tasks: BackgroundTasks,

--- a/ai_gateway/api/v1/proxy/vertex_ai.py
+++ b/ai_gateway/api/v1/proxy/vertex_ai.py
@@ -22,8 +22,8 @@ router = APIRouter()


 @router.post(f"/{KindModelProvider.VERTEX_AI.value}" + "/{path:path}")
-@feature_categories(FEATURE_CATEGORIES_FOR_PROXY_ENDPOINTS)
 @authorize_with_unit_primitive_header()
+@feature_categories(FEATURE_CATEGORIES_FOR_PROXY_ENDPOINTS)
 async def vertex_ai(
    request: Request,
    background_tasks: BackgroundTasks,