Skip to content

Track feature category per API path

Shinya Maeda requested to merge track-feature-category-in-api-path into main

What does this merge request do and why?

This MR introduces an API-based feature category tracking in AI Gateway. This is a standard process in GitLab to clarify the ownership of the endpoints/features and the responsibilities.

Example usage in GitLab-Rails: https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/api/releases.rb#L13

This MR is required in a downstream MR.

Related to Instrument calls to models (!501 - merged)

How to set up and validate locally

Visit the OpenAPI playground and execute the following request. http://0.0.0.0:5052/docs

Duo Chat log:
{
    "url": "http://0.0.0.0:5052/v1/chat/agent",
    "path": "/v1/chat/agent",
    "status_code": 200,
    "method": "POST",
    "correlation_id": "656198238d034cccb5afdf786cc718fd",
    "http_version": "1.1",
    "client_ip": "127.0.0.1",
    "client_port": 40100,
    "duration_s": 1.4740175420010928,
    "cpu_s": 0.05742020400000003,
    "user_agent": "curl/8.4.0",
    "gitlab_instance_id": null,
    "gitlab_global_user_id": null,
    "gitlab_host_name": null,
    "gitlab_saas_namespace_ids": null,
    "gitlab_realm": null,
    "feature_category": "duo_chat",
    "logger": "api.access",
    "level": "info",
    "type": "mlops",
    "stage": "main",
    "timestamp": "2023-12-18T09:40:42.919208Z",
    "message": "127.0.0.1:40100 - \"POST /v1/chat/agent HTTP/1.1\" 200"
}
V2 Code Suggestions:
{
    "url": "http://0.0.0.0:5052/v2/code/completions",
    "path": "/v2/code/completions",
    "status_code": 200,
    "method": "POST",
    "correlation_id": "f96e45e814d54eb8b5419be2a17847fa",
    "http_version": "1.1",
    "client_ip": "127.0.0.1",
    "client_port": 46456,
    "duration_s": 1.8696396220002498,
    "cpu_s": 0.055158644000000034,
    "user_agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
    "gitlab_instance_id": null,
    "gitlab_global_user_id": null,
    "gitlab_host_name": null,
    "gitlab_saas_namespace_ids": null,
    "gitlab_realm": null,
    "feature_category": "code_suggestions",
    "model_engine": "vertex-ai",
    "model_name": "code-gecko@002",
    "prompt_length": 40,
    "prompt_length_stripped": 33,
    "inference_duration_s": 1.8655080509997788,
    "suffix_length": 0,
    "experiments": [
        {
            "name": "exp_truncate_suffix",
            "variant": 1
        }
    ],
    "lang": "",
    "editor_lang": "string",
    "model_output_length": 43,
    "model_output_length_stripped": 37,
    "model_output_score": -6.132078647613525,
    "blocked": false,
    "post_processing_duration_s": 0.00035016100082430057,
    "logger": "api.access",
    "level": "info",
    "type": "mlops",
    "stage": "main",
    "timestamp": "2023-12-18T09:41:20.567924Z",
    "message": "127.0.0.1:46456 - \"POST /v2/code/completions HTTP/1.1\" 200"
}
V2 Code Generations:
{
    "url": "http://0.0.0.0:5052/v2/code/generations",
    "path": "/v2/code/generations",
    "status_code": 200,
    "method": "POST",
    "correlation_id": "6001b9e2df9e47f19b43c1028b5b6ad8",
    "http_version": "1.1",
    "client_ip": "127.0.0.1",
    "client_port": 60908,
    "duration_s": 3.960637243999372,
    "cpu_s": 0.00851089500000013,
    "user_agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
    "gitlab_instance_id": null,
    "gitlab_global_user_id": null,
    "gitlab_host_name": null,
    "gitlab_saas_namespace_ids": null,
    "gitlab_realm": null,
    "feature_category": "code_suggestions",
    "model_engine": "vertex-ai",
    "model_name": "code-bison@002",
    "prompt_length": 15,
    "prompt_length_stripped": 13,
    "inference_duration_s": 3.9583992670013686,
    "lang": "ruby",
    "editor_lang": "ruby",
    "model_output_length": 59,
    "model_output_length_stripped": 50,
    "model_output_score": -8.439862251281738,
    "blocked": false,
    "safety_categories": [
        "Health"
    ],
    "logger": "api.access",
    "level": "info",
    "type": "mlops",
    "stage": "main",
    "timestamp": "2023-12-18T09:42:05.662924Z",
    "message": "127.0.0.1:60908 - \"POST /v2/code/generations HTTP/1.1\" 200"
}
V3 Code Suggestions:
{
    "url": "http://0.0.0.0:5052/v3/completions",
    "path": "/v3/completions",
    "status_code": 200,
    "method": "POST",
    "correlation_id": "2f94a0b2b49d419ba1a8062ffaa78415",
    "http_version": "1.1",
    "client_ip": "127.0.0.1",
    "client_port": 34562,
    "duration_s": 0.5325944449978124,
    "cpu_s": 0.004075927999999784,
    "user_agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
    "gitlab_instance_id": null,
    "gitlab_global_user_id": null,
    "gitlab_host_name": null,
    "gitlab_saas_namespace_ids": null,
    "gitlab_realm": null,
    "feature_category": "code_suggestions",
    "model_engine": "vertex-ai",
    "model_name": "code-gecko@002",
    "prompt_length": 47,
    "prompt_length_stripped": 40,
    "inference_duration_s": 0.5305382369988365,
    "suffix_length": 6,
    "experiments": [
        {
            "name": "exp_truncate_suffix",
            "variant": 1
        }
    ],
    "lang": "",
    "editor_lang": "string",
    "model_output_length": 1,
    "model_output_length_stripped": 1,
    "model_output_score": -2.1120405197143555,
    "blocked": false,
    "post_processing_duration_s": 0.00025684900174383074,
    "logger": "api.access",
    "level": "info",
    "type": "mlops",
    "stage": "main",
    "timestamp": "2023-12-18T09:42:38.278454Z",
    "message": "127.0.0.1:34562 - \"POST /v3/completions HTTP/1.1\" 200"
}

Notice that feature_category column is set to the request logs.

Merge request checklist

  • Tests added for new functionality. If not, please raise an issue to follow up.
  • Documentation added/updated, if needed.
Edited by Shinya Maeda

Merge request reports