Track feature category per API path
What does this merge request do and why?
This MR introduces an API-based feature category tracking in AI Gateway. This is a standard process in GitLab to clarify the ownership of the endpoints/features and the responsibilities.
Example usage in GitLab-Rails: https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/api/releases.rb#L13
This MR is required in a downstream MR.
Related to Instrument calls to models (!501 - merged)
How to set up and validate locally
Visit the OpenAPI playground and execute the following request. http://0.0.0.0:5052/docs
Duo Chat log:
{
"url": "http://0.0.0.0:5052/v1/chat/agent",
"path": "/v1/chat/agent",
"status_code": 200,
"method": "POST",
"correlation_id": "656198238d034cccb5afdf786cc718fd",
"http_version": "1.1",
"client_ip": "127.0.0.1",
"client_port": 40100,
"duration_s": 1.4740175420010928,
"cpu_s": 0.05742020400000003,
"user_agent": "curl/8.4.0",
"gitlab_instance_id": null,
"gitlab_global_user_id": null,
"gitlab_host_name": null,
"gitlab_saas_namespace_ids": null,
"gitlab_realm": null,
"feature_category": "duo_chat",
"logger": "api.access",
"level": "info",
"type": "mlops",
"stage": "main",
"timestamp": "2023-12-18T09:40:42.919208Z",
"message": "127.0.0.1:40100 - \"POST /v1/chat/agent HTTP/1.1\" 200"
}
V2 Code Suggestions:
{
"url": "http://0.0.0.0:5052/v2/code/completions",
"path": "/v2/code/completions",
"status_code": 200,
"method": "POST",
"correlation_id": "f96e45e814d54eb8b5419be2a17847fa",
"http_version": "1.1",
"client_ip": "127.0.0.1",
"client_port": 46456,
"duration_s": 1.8696396220002498,
"cpu_s": 0.055158644000000034,
"user_agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
"gitlab_instance_id": null,
"gitlab_global_user_id": null,
"gitlab_host_name": null,
"gitlab_saas_namespace_ids": null,
"gitlab_realm": null,
"feature_category": "code_suggestions",
"model_engine": "vertex-ai",
"model_name": "code-gecko@002",
"prompt_length": 40,
"prompt_length_stripped": 33,
"inference_duration_s": 1.8655080509997788,
"suffix_length": 0,
"experiments": [
{
"name": "exp_truncate_suffix",
"variant": 1
}
],
"lang": "",
"editor_lang": "string",
"model_output_length": 43,
"model_output_length_stripped": 37,
"model_output_score": -6.132078647613525,
"blocked": false,
"post_processing_duration_s": 0.00035016100082430057,
"logger": "api.access",
"level": "info",
"type": "mlops",
"stage": "main",
"timestamp": "2023-12-18T09:41:20.567924Z",
"message": "127.0.0.1:46456 - \"POST /v2/code/completions HTTP/1.1\" 200"
}
V2 Code Generations:
{
"url": "http://0.0.0.0:5052/v2/code/generations",
"path": "/v2/code/generations",
"status_code": 200,
"method": "POST",
"correlation_id": "6001b9e2df9e47f19b43c1028b5b6ad8",
"http_version": "1.1",
"client_ip": "127.0.0.1",
"client_port": 60908,
"duration_s": 3.960637243999372,
"cpu_s": 0.00851089500000013,
"user_agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
"gitlab_instance_id": null,
"gitlab_global_user_id": null,
"gitlab_host_name": null,
"gitlab_saas_namespace_ids": null,
"gitlab_realm": null,
"feature_category": "code_suggestions",
"model_engine": "vertex-ai",
"model_name": "code-bison@002",
"prompt_length": 15,
"prompt_length_stripped": 13,
"inference_duration_s": 3.9583992670013686,
"lang": "ruby",
"editor_lang": "ruby",
"model_output_length": 59,
"model_output_length_stripped": 50,
"model_output_score": -8.439862251281738,
"blocked": false,
"safety_categories": [
"Health"
],
"logger": "api.access",
"level": "info",
"type": "mlops",
"stage": "main",
"timestamp": "2023-12-18T09:42:05.662924Z",
"message": "127.0.0.1:60908 - \"POST /v2/code/generations HTTP/1.1\" 200"
}
V3 Code Suggestions:
{
"url": "http://0.0.0.0:5052/v3/completions",
"path": "/v3/completions",
"status_code": 200,
"method": "POST",
"correlation_id": "2f94a0b2b49d419ba1a8062ffaa78415",
"http_version": "1.1",
"client_ip": "127.0.0.1",
"client_port": 34562,
"duration_s": 0.5325944449978124,
"cpu_s": 0.004075927999999784,
"user_agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
"gitlab_instance_id": null,
"gitlab_global_user_id": null,
"gitlab_host_name": null,
"gitlab_saas_namespace_ids": null,
"gitlab_realm": null,
"feature_category": "code_suggestions",
"model_engine": "vertex-ai",
"model_name": "code-gecko@002",
"prompt_length": 47,
"prompt_length_stripped": 40,
"inference_duration_s": 0.5305382369988365,
"suffix_length": 6,
"experiments": [
{
"name": "exp_truncate_suffix",
"variant": 1
}
],
"lang": "",
"editor_lang": "string",
"model_output_length": 1,
"model_output_length_stripped": 1,
"model_output_score": -2.1120405197143555,
"blocked": false,
"post_processing_duration_s": 0.00025684900174383074,
"logger": "api.access",
"level": "info",
"type": "mlops",
"stage": "main",
"timestamp": "2023-12-18T09:42:38.278454Z",
"message": "127.0.0.1:34562 - \"POST /v3/completions HTTP/1.1\" 200"
}
Notice that feature_category
column is set to the request logs.
Merge request checklist
-
Tests added for new functionality. If not, please raise an issue to follow up. -
Documentation added/updated, if needed.
Edited by Shinya Maeda