Skip to content
Commits on Source (68)
......@@ -47,7 +47,7 @@ include:
# Upgrades dependencies on a schedule
# see https://gitlab.com/gitlab-com/gl-infra/common-ci-tasks/-/blob/main/renovate-bot.md
- project: "gitlab-com/gl-infra/common-ci-tasks"
ref: v2.20.4 # renovate:managed
ref: v2.21.0 # renovate:managed
file: renovate-bot.yml
# Includes a base template for running kaniko easily
......@@ -136,7 +136,7 @@ tests:integration:
needs:
- build-docker-model-gateway
services:
- name: registry.gitlab.com/gitlab-org/modelops/applied-ml/code-suggestions/ai-assist/model-gateway:${CI_COMMIT_SHORT_SHA}
- name: ${TARGET_IMAGE}
alias: ai-gateway
variables:
AIGW_SELF_SIGNED_JWT__SIGNING_KEY: "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQC+ErJOBYMe4/d5\nlcVtqnQEazhGYqyefHQNtfpzQyb/WuPJa5BZu68KeS0fWKcZJluNk/jGXSb3cvei\n28wtnbBlEOdTTOzMpP217rLtoYdnzgJfO7DLj3tUL+JbQHj51bLwgkrHlALFDoUt\nzWaOrs+WO6QVLnIsl+YZ6CrKpc9fXLtlvZp0yYuic1BxKCw8Q2lWjMhj6w1fubwM\ne3MsTMcgBV5ek3twY/vHkU/Q2gjniLZSTBmzaXrHjfbS1au2vQBTsKmtPDATrnTU\nVLxLvAtb7p3bZAZObIj4oDi+2qGQGDoWVaiGYXfRquKHdle7hBLwuvOlmVGAdfHd\nN18QO9kbAgMBAAECggEAQpdJScVrnThv/PRocVXtBJlN43bxlCr0+9K8NoKZ2I/O\ny0P4lD+JgUlX8in8jPafz7e+SjzbiIkKUk/gNkEPSjFEYi5Krh3F12YT4OYdCtSl\nhxjiBUc1BZcrHh415A2m/dUf/tEPsVs3dIcIPcfn6XHmuBcIIDBtiLkKuOmjf/xh\n/2l7QUHNC0Hn1STNvE5CscnqH0/s9dw2/KYhjpZWJsoNOAJ4PWgypxlftdUcspQK\nQL+FWrNL+aa1+nPeR0ArNw3nlkKnzcX3DmIHyD4az1IwrzHqMCh1zv16eqmCBYuN\nn2QrAAkAt+SFVEwjejwN/lNLMg4O6tyMvFnuOddp2QKBgQDhXgmOZgoJJYz7Vd0u\n/U3E4Ma+HysQ7TU3utidAke5YJXSms0UJrOFxgcRKserkXjD9wbyeUcMbrKRNTxm\nPgjGayVCqVc6T3kMHio8CvSs+RYt+jR7CnFIfQbQATR7c8nzy4Z/JNsW213SmXpj\n4S3TQWwGFSrF8h+A098cgFwNGQKBgQDX6IvIumrnQGiCJn8qQW3hKGfJwSl6/e4Z\nfC/o8uFzTtV1aL39AusDKAt4c5DsVUHCZLSH5acFGeeGt4dePpqnr0dpIb02ByGJ\na6tLJm2RwjaiXFOAsedOP12yqTZELi7rinKjVqG50eOvXuV40CVVP9qXzxE9e2DP\nQGe2ZFmqUwKBgFvsCkPNtOw0J8PgKt5erRjPgeDMP7mgtyMrD+1Cn9Sp45JKV7mP\na4v10K4c3+JH7JUprei6tNMKV8VIjIE7bkLYMxN0lMKQ5dOefiWNZm0jD/vi4QMK\nqFgjvuKaiziLauUIr6wucTpqcWNT/Iq+rv1K4u+8NH0Wm+jlAkzSwjkJAoGAG5Wd\nTk7q25KyB3bNpmNnm22jHPatywXoRp9EK7nkLewzf2WbaFjYF7YlCQWSzW7zENf2\n7KndldxCZUbLZ7IN5kCRmg/ycZWlpj34S4ikVQwAGOw8yuNvzuJvoSTXRwyzd+pf\nTRkDXo8/TKeOH8pQCr02u1B8PmOl8bSjy3y0q/sCgYAkdWZ6xLJeS9HlzuuXchl3\numZM4n88lad2yNoCu3aJs2fbVCGCiHyFsThluDU0KD3xE5+RZVpAMTErcCE3HqNK\nn1rbBNBVtUTKeu1Qkw0B1X813oH7omqmZVGJx+hceIKimjKWvD7hQlr90NPoFDFz\n0Laqissu1lxAspLYIulpWg==\n-----END PRIVATE KEY-----"
......
* @gitlab-org/maintainers/ai-gateway @gitlab-org/production-engineering @gitlab-org/scalability
^[Development]
# https://gitlab.com/groups/gitlab-org/maintainers/ai-gateway/-/group_members?with_inherited_permissions=exclude
* @gitlab-org/maintainers/ai-gateway
^[Infrastructure]
* @gitlab-org/production-engineering @gitlab-org/scalability
\ No newline at end of file
......@@ -15,7 +15,7 @@ lint:
lint:doc:
stage: lint
image: registry.gitlab.com/gitlab-org/gitlab-docs/lint-markdown:alpine-3.20-vale-3.4.2-markdownlint2-0.13.0-lychee-0.15.1
image: registry.gitlab.com/gitlab-org/gitlab-docs/lint-markdown:alpine-3.20-vale-3.6.1-markdownlint2-0.13.0-lychee-0.15.1
rules:
- changes:
- "**/*.{md}"
......
AIGW_FASTAPI__API_PORT: "8080"
AIGW_ENVIRONMENT: "production"
AIGW_GITLAB_URL: "https://gitlab.com"
AIGW_GITLAB_API_URL: "https://gitlab.com/api/v4/"
AIGW_CUSTOMER_PORTAL_URL: "https://customers.gitlab.com"
......
AIGW_FASTAPI__API_PORT: "8080"
AIGW_ENVIRONMENT: "staging"
AIGW_GITLAB_URL: "https://staging.gitlab.com"
AIGW_GITLAB_API_URL: "https://staging.gitlab.com/api/v4/"
AIGW_CUSTOMER_PORTAL_URL: "https://customers.staging.gitlab.com"
......
......@@ -2,5 +2,6 @@ python 3.10.14 # datasource=github-tags depName=python/cpython
gcloud 428.0.0 # datasource=github-tags depName=GoogleCloudPlatform/cloud-sdk-docker
poetry 1.8.3 # datasource=pypi depName=poetry
ruby 3.2.4 # Temporary dependency until https://gitlab.com/gitlab-org/modelops/applied-ml/code-suggestions/ai-assist/-/issues/447 is done.
vale 3.6.0 # datasource=github-releases depName=errata-ai/vale
vale 3.7.0 # datasource=github-releases depName=errata-ai/vale
nodejs 20.15.1 # Dependency for markdownlint-cli2
markdownlint-cli2 0.13.0 # datasource=github-tags depName=DavidAnson/markdownlint-cli2
......@@ -212,16 +212,16 @@ In above diagram, the main components are shown.
The Client has the following functions:
1. Determine input parameters
1. Stop sequences
1. Gather code for the prompt
1. Determine input parameters.
1. Stop sequences.
1. Gather code for the prompt.
1. Send the input parameters to the AI Gateway API.
1. Parse results from AI Gateway and present them as `inlineCompletions`.
We are supporting the following clients:
- [GitLab VS Code Extension](https://gitlab.com/gitlab-org/gitlab-vscode-extension)
- [GitLab Language Server for Code Suggestions](https://gitlab.com/gitlab-org/editor-extensions/gitlab-language-server-for-code-suggestions)
- [GitLab VS Code Extension](https://gitlab.com/gitlab-org/gitlab-vscode-extension).
- [GitLab Language Server for Code Suggestions](https://gitlab.com/gitlab-org/editor-extensions/gitlab-language-server-for-code-suggestions).
## Deployment
......@@ -237,10 +237,10 @@ The service overview dashboard is available at [https://dashboards.gitlab.net/d/
For more information and assistance, please check out:
- [Runway - Handbook](https://about.gitlab.com/handbook/engineering/infrastructure/platforms/tools/runway/)
- [Runway - Group](https://gitlab.com/gitlab-com/gl-infra/platform/runway)
- [Runway - Docs](https://gitlab.com/gitlab-com/gl-infra/platform/runway/docs)
- [Runway - Issue Tracker](https://gitlab.com/groups/gitlab-com/gl-infra/platform/runway/-/issues)
- [Runway - Handbook](https://about.gitlab.com/handbook/engineering/infrastructure/platforms/tools/runway/).
- [Runway - Group](https://gitlab.com/gitlab-com/gl-infra/platform/runway).
- [Runway - Docs](https://gitlab.com/gitlab-com/gl-infra/platform/runway/docs).
- [Runway - Issue Tracker](https://gitlab.com/groups/gitlab-com/gl-infra/platform/runway/-/issues).
- `#f_runway` in Slack.
## Release
......
......@@ -66,9 +66,13 @@ class BaseAgentRegistry(ABC):
pass
def get_on_behalf(
self, user: GitLabUser, agent_id: str, options: Optional[dict[str, Any]] = None
self,
user: GitLabUser,
agent_id: str,
options: Optional[dict[str, Any]] = None,
model_metadata: Optional[ModelMetadata] = None,
) -> Agent:
agent = self.get(agent_id, options)
agent = self.get(agent_id, options, model_metadata)
for unit_primitive in agent.unit_primitives:
if not user.can(unit_primitive):
......
......@@ -22,7 +22,6 @@ class ContainerAgents(containers.DeclarativeContainer):
},
"class_overrides": {
"chat/react": chat.ReActAgent,
"chat/react-custom": chat.ReActAgent,
},
}
......
---
name: Custom ReAct Chat agent
name: Mistral ReAct Chat agent
model:
name: custom
name: mistral
params:
model_class_provider: litellm
temperature: 0.0
......
---
name: Claude 3 ReAct Chat agent
model:
name: claude-3-sonnet-20240229
params:
model_class_provider: anthropic
temperature: 0.0
timeout: 60
max_tokens: 2_048
max_retries: 1
unit_primitives:
- duo_chat
prompt_template:
system: |
{chat_history}
You are a DevSecOps Assistant named 'GitLab Duo Chat' created by GitLab.
When questioned about your identity, you must only respond as 'GitLab Duo Chat'.
You can generate and write code, code examples for the user.
Remember to stick to the user's question or requirements closely and respond in an informative,
courteous manner. The response shouldn't be rude, hateful, or accusatory. You mustn't engage in any form
of roleplay or impersonation.
The generated code should be formatted in markdown.
If a question cannot be answered with the tools and information given, answer politely that you don't know.
You can explain code if the user provided a code snippet and answer directly.
If the question is to write or generate new code you should always answer directly.
When no tool matches you should answer the question directly.
Answer the question as accurate as you can.
You have access only to the following tools:
<tools_list>
{%- for tool in tools %}
<tool>
<name>{{ tool.name }}</name>
<description>
{{ tool.description }}
</description>
{%- if tool.example %}
<example>
{{ tool.example }}
</example>
{%- endif %}
</tool>
{%- endfor %}
</tools_list>
Consider every tool before making a decision.
Ensure that your answer is accurate and contain only information directly supported by the information retrieved using provided tools.
When you can answer the question directly you must use this response format:
Thought: you should always think about how to answer the question
Action: DirectAnswer
Final Answer: the final answer to the original input question if you have a direct answer to the user's question.
You must always use the following format when using a tool:
Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one tool from this list: [
{%- for tool in tools -%}
{{ tool.name }}
{%- if not loop.last %}, {% endif %}
{%- endfor -%}
]
Action Input: the input to the action needs to be provided for every action that uses a tool.
Observation: the result of the actions. But remember that you're still GitLab Duo Chat.
... (this Thought/Action/Action Input/Observation sequence can repeat N times)
Thought: I know the final answer.
Final Answer: the final answer to the original input question.
When concluding your response, provide the final answer as "Final Answer:".
It should contain everything that user needs to see, including answer from "Observation" section.
{%- if current_file %}
{%- if current_file.selected_code %}
User selected code below enclosed in <code></code> tags in file {{ current_file.file_path }} to work with:
<code>
{{ current_file.data }}
</code>
{%- else %}
The current code file that user sees is #{current_file.file_path} and has the following content:
<content>
{{ current_file.data }}
</content>
{%- endif %}
{%- endif %}
You have access to the following GitLab resources:
{%- for tool in tools -%}
{% if tool.resource -%}
{{ tool.resource }}
{%- if not loop.last %}, {% endif %}
{%- endif %}
{%- endfor -%}.
You also have access to all information that can be helpful to someone working in software development of any kind.
At the moment, you do not have access to the following GitLab resources: Pipelines, Vulnerabilities.
At the moment, you do not have the ability to search Issues or Epics based on a description or keywords.
You can only read information about a specific issue/epic IF the user is on the specific issue/epic's page, or provides a URL or ID.
Do not use the issue_reader or epic_reader tool if you do not have these specified identifiers.
If GitLab resource of issue or epic type is present and is directly relevant to the question,
include the following section at the end of your response:
'Sources:' followed by the corresponding GitLab resource link named after the title of the resource.
Format the link using Markdown syntax ([title](link)) for it to be clickable.
Ask user to leave feedback.
{{context_content}}
Begin!
user: |
Question: {question}
assistant: |
{agent_scratchpad}
stop:
- "Observation:"
---
name: Codegemma Code Completions
model:
name: codegemma
params:
model_class_provider: litellm
timeout: 60
temperature: 0.95
max_tokens: 128
max_retries: 1
unit_primitives:
- code_suggestions
prompt_template:
user: |
<|fim_prefix|>{prefix}<|fim_suffix|>{suffix}<|fim_middle|>
stop:
- <|fim_prefix|>
- <|fim_suffix|>
- <|fim_middle|>
- <|file_separator|>
......@@ -33,11 +33,13 @@ class LocalAgentRegistry(BaseAgentRegistry):
self.agents_registered = agents_registered
self.model_factories = model_factories
def _resolve_id(self, agent_id: str) -> str:
_, _, agent_type = agent_id.partition("/")
if agent_type:
# the `agent_id` value is already in the format of - `first/last`
return agent_id
def _resolve_id(
self,
agent_id: str,
model_metadata: Optional[ModelMetadata] = None,
) -> str:
if model_metadata:
return f"{agent_id}/{model_metadata.name}"
return f"{agent_id}/{self.key_agent_type_base}"
......@@ -45,7 +47,7 @@ class LocalAgentRegistry(BaseAgentRegistry):
self,
config_model: ModelConfig,
model_metadata: Optional[ModelMetadata] = None,
) -> BaseChatModel:
) -> Runnable:
model_class_provider = config_model.params.model_class_provider
if model_factory := self.model_factories.get(model_class_provider, None):
return model_factory(
......@@ -63,10 +65,10 @@ class LocalAgentRegistry(BaseAgentRegistry):
options: Optional[dict[str, Any]] = None,
model_metadata: Optional[ModelMetadata] = None,
) -> Agent:
agent_id = self._resolve_id(agent_id)
agent_id = self._resolve_id(agent_id, model_metadata)
klass, config = self.agents_registered[agent_id]
model: Runnable = self._get_model(config.model, model_metadata)
model = self._get_model(config.model, model_metadata)
if config.stop:
model = model.bind(stop=config.stop)
......@@ -93,15 +95,19 @@ class LocalAgentRegistry(BaseAgentRegistry):
agents_definitions_dir = Path(__file__).parent / "definitions"
agents_registered = {}
for path in agents_definitions_dir.glob("*/*.yml"):
agent_id = str(
# E.g., "chat/react", "generate_description/base", etc.
for path in agents_definitions_dir.glob("**/*.yml"):
agent_id_with_model_name = str(
# E.g., "chat/react/base", "generate_description/mistral", etc.
path.relative_to(agents_definitions_dir).with_suffix("")
)
# Remove model name, for example: to receive "chat/react" from "chat/react/mistral"
agent_id, _, _ = agent_id_with_model_name.rpartition("/")
with open(path, "r") as fp:
klass = class_overrides.get(agent_id, Agent)
agents_registered[agent_id] = AgentRegistered(
agents_registered[agent_id_with_model_name] = AgentRegistered(
klass=klass, config=AgentConfig(**yaml.safe_load(fp))
)
......@@ -113,7 +119,7 @@ class CustomModelsAgentRegistry(LocalAgentRegistry):
self,
config_model: ModelConfig,
model_metadata: Optional[ModelMetadata] = None,
) -> BaseChatModel:
) -> Runnable:
chat_model = super()._get_model(config_model)
if model_metadata is None:
......@@ -125,14 +131,3 @@ class CustomModelsAgentRegistry(LocalAgentRegistry):
custom_llm_provider=model_metadata.provider,
api_key=model_metadata.api_key,
)
def get(
self,
agent_id: str,
options: Optional[dict[str, Any]] = None,
model_metadata: Optional[ModelMetadata] = None,
) -> Agent:
if model_metadata is not None:
agent_id = f"{agent_id}-custom"
return super().get(agent_id, options, model_metadata)
from typing import Annotated, Optional
from pydantic import AnyUrl, BaseModel, StringConstraints, UrlConstraints
from pydantic import AnyUrl, BaseModel, StringConstraints, UrlConstraints, validator
STUBBED_API_KEY = "<api-key>"
class ModelMetadata(BaseModel):
......@@ -8,3 +10,9 @@ class ModelMetadata(BaseModel):
name: Annotated[str, StringConstraints(max_length=100)]
provider: Annotated[str, StringConstraints(max_length=100)]
api_key: Optional[Annotated[str, StringConstraints(max_length=100)]] = None
# OpenAI client requires api key to be set
@validator("api_key", pre=True, always=True)
@classmethod
def set_stubbed_api_key_if_empty(cls, v):
return v or STUBBED_API_KEY
import logging
import time
import traceback
from datetime import datetime
from typing import Optional, Tuple
import structlog
......@@ -29,6 +30,7 @@ from ai_gateway.auth import AuthProvider, UserClaims
from ai_gateway.auth.self_signed_jwt import SELF_SIGNED_TOKEN_ISSUER
from ai_gateway.auth.user import GitLabUser
from ai_gateway.instrumentators.base import Telemetry, TelemetryInstrumentator
from ai_gateway.internal_events import EventContext, current_event_context
from ai_gateway.tracking.errors import log_exception
__all__ = [
......@@ -288,6 +290,40 @@ class MiddlewareAuthentication(Middleware):
)
class InternalEventMiddleware:
def __init__(self, app, skip_endpoints, enabled, environment):
self.app = app
self.enabled = enabled
self.environment = environment
self.path_resolver = _PathResolver.from_optional_list(skip_endpoints)
async def __call__(self, scope, receive, send):
if scope["type"] != "http" or not self.enabled:
await self.app(scope, receive, send)
return
request = Request(scope)
if self.path_resolver.skip_path(request.url.path):
await self.app(scope, receive, send)
return
context = EventContext(
environment=self.environment,
source="ai-gateway-python",
realm=request.headers.get(X_GITLAB_REALM_HEADER),
instance_id=request.headers.get(X_GITLAB_INSTANCE_ID_HEADER),
host_name=request.headers.get(X_GITLAB_HOST_NAME_HEADER),
instance_version=request.headers.get(X_GITLAB_VERSION_HEADER),
global_user_id=request.headers.get(X_GITLAB_GLOBAL_USER_ID_HEADER),
context_generated_at=datetime.now().isoformat(),
correlation_id=correlation_id.get(),
)
current_event_context.set(context)
await self.app(scope, receive, send)
class MiddlewareModelTelemetry(Middleware):
class TelemetryHeadersMiddleware(BaseHTTPMiddleware):
def __init__(self, path_resolver: _PathResolver, *args, **kwargs):
......
......@@ -15,6 +15,7 @@ from starlette_context.middleware import RawContextMiddleware
from ai_gateway.agents.instrumentator import AgentInstrumentator
from ai_gateway.api.middleware import (
InternalEventMiddleware,
MiddlewareAuthentication,
MiddlewareLogRequest,
MiddlewareModelTelemetry,
......@@ -43,7 +44,6 @@ async def lifespan(app: FastAPI):
config = app.extra["extra"]["config"]
container_application = ContainerApplication()
container_application.config.from_dict(config.model_dump())
container_application.init_resources()
if config.instrumentator.thread_monitoring_enabled:
loop = asyncio.get_running_loop()
......@@ -57,14 +57,12 @@ async def lifespan(app: FastAPI):
yield
container_application.shutdown_resources()
def create_fast_api_server(config: Config):
fastapi_app = FastAPI(
title="GitLab Code Suggestions",
description="GitLab Code Suggestions API to serve code completion predictions",
title="GitLab AI Gateway",
description="GitLab AI Gateway API to execute AI actions",
openapi_url=config.fastapi.openapi_url,
docs_url=config.fastapi.docs_url,
redoc_url=config.fastapi.redoc_url,
......@@ -95,6 +93,12 @@ def create_fast_api_server(config: Config):
bypass_auth_with_header=config.auth.bypass_external_with_header,
skip_endpoints=_SKIP_ENDPOINTS,
),
Middleware(
InternalEventMiddleware,
skip_endpoints=_SKIP_ENDPOINTS,
enabled=config.snowplow.enabled,
environment=config.environment,
),
MiddlewareModelTelemetry(skip_endpoints=_SKIP_ENDPOINTS),
],
extra={"config": config},
......
......@@ -49,6 +49,10 @@ CHAT_INVOKABLES = [
name="summarize_comments",
unit_primitive=GitLabUnitPrimitive.SUMMARIZE_COMMENTS,
),
ChatInvokable(
name="troubleshoot_job",
unit_primitive=GitLabUnitPrimitive.TROUBLESHOOT_JOB,
),
# Deprecated. Added for backward compatibility.
# Please, refer to `v2/chat/agent` for additional details.
ChatInvokable(name="agent", unit_primitive=GitLabUnitPrimitive.DUO_CHAT),
......@@ -74,11 +78,12 @@ async def chat(
payload = prompt_component.payload
try:
if payload.provider == KindModelProvider.LITELLM:
if payload.provider in (KindModelProvider.LITELLM, KindModelProvider.MISTRALAI):
model = litellm_factory(
name=payload.model,
endpoint=payload.model_endpoint,
api_key=payload.model_api_key,
provider=payload.provider,
)
completion = await model.generate(
......
......@@ -22,8 +22,8 @@ router = APIRouter()
@router.post(f"/{KindModelProvider.ANTHROPIC.value}" + "/{path:path}")
@feature_categories(FEATURE_CATEGORIES_FOR_PROXY_ENDPOINTS)
@authorize_with_unit_primitive_header()
@feature_categories(FEATURE_CATEGORIES_FOR_PROXY_ENDPOINTS)
async def anthropic(
request: Request,
background_tasks: BackgroundTasks,
......
......@@ -22,8 +22,8 @@ router = APIRouter()
@router.post(f"/{KindModelProvider.VERTEX_AI.value}" + "/{path:path}")
@feature_categories(FEATURE_CATEGORIES_FOR_PROXY_ENDPOINTS)
@authorize_with_unit_primitive_header()
@feature_categories(FEATURE_CATEGORIES_FOR_PROXY_ENDPOINTS)
async def vertex_ai(
request: Request,
background_tasks: BackgroundTasks,
......