Improve PostgreSQL CSV query redaction
This commit introduces the plugin https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog and removes the custom file plugins.
The redactor plugin inserts several fields:
-
sql
: contains the normalized query -
duration_s
: contains the duration in seconds
The redactor also drops several fields:
-
message
: from the original CSV log -
statement
: from the extracted slow log entry
Sample slow log entry:
{
"time": "2021-02-09T09:58:31.910Z",
"user_name": "gitlab",
"database_name": "gitlabhq_production",
"process_id": "34388",
"connection_from": "127.0.0.1:49624",
"session_id": "60225ca6.8654",
"session_line_num": "3",
"command_tag": "SELECT",
"session_start_time": "2021-02-09 09:57:58 GMT",
"virtual_transaction_id": "55/321333443",
"transaction_id": "0",
"error_severity": "LOG",
"sql_state_code": "00000",
"detail": null,
"hint": null,
"internal_query": null,
"internal_query_pos": null,
"context": null,
"query_pos": null,
"location": null,
"application_name": "",
"duration_s": 4.161897,
"sql": "SELECT \"ci_builds\".* FROM \"ci_builds\" INNER JOIN \"projects\" ON \"projects\".\"id\" = \"ci_builds\".\"project_id\" LEFT JOIN project_features ON ci_builds.project_id = project_features.project_id LEFT JOIN (SELECT \"ci_builds\".\"project_id\", count(*) AS running_builds FROM \"ci_builds\" WHERE \"ci_builds\".\"type\" = $1 AND (\"ci_builds\".\"status\" IN ($2)) AND \"ci_builds\".\"runner_id\" IN (SELECT \"ci_runners\".\"id\" FROM \"ci_runners\" WHERE \"ci_runners\".\"runner_type\" = $3) GROUP BY \"ci_builds\".\"project_id\") AS project_builds ON ci_builds.project_id=project_builds.project_id WHERE (\"ci_builds\".\"status\" IN ($4)) AND \"ci_builds\".\"runner_id\" IS NULL AND \"projects\".\"shared_runners_enabled\" = $5 AND \"projects\".\"pending_delete\" = $6 AND (project_features.builds_access_level IS NULL or project_features.builds_access_level > $7) AND \"ci_builds\".\"type\" = $8 AND (\"projects\".\"visibility_level\" = $9 OR (EXISTS (WITH RECURSIVE \"base_and_ancestors\" AS ((SELECT \"namespaces\".* FROM \"namespaces\" WHERE (namespaces.id = projects.namespace_id))\nUNION\n(SELECT \"namespaces\".* FROM \"namespaces\", \"base_and_ancestors\" WHERE \"namespaces\".\"id\" = \"base_and_ancestors\".\"parent_id\")) SELECT $10 FROM \"base_and_ancestors\" AS \"namespaces\" LEFT JOIN namespace_statistics ON namespace_statistics.namespace_id = namespaces.id WHERE \"namespaces\".\"parent_id\" IS NULL AND (COALESCE(namespaces.shared_runners_minutes_limit, $11, $12) = $13 OR COALESCE(namespace_statistics.shared_runners_seconds, $14) < COALESCE((namespaces.shared_runners_minutes_limit + COALESCE(namespaces.extra_shared_runners_minutes_limit, $15)), ($16 + COALESCE(namespaces.extra_shared_runners_minutes_limit, $17)), $18) * $19)))) AND (NOT EXISTS (SELECT $20 FROM \"taggings\" WHERE \"taggings\".\"taggable_type\" = $21 AND \"taggings\".\"context\" = $22 AND (taggable_id = ci_builds.id) AND \"taggings\".\"tag_id\" NOT IN ($23, $24))) ORDER BY COALESCE(project_builds.running_builds, $25) ASC, ci_builds.id ASC /*application:web,correlation_id:01EY33MQ3H40PVG913DCNQ596Y*/",
"tag": "postgres.postgres_csv",
"environment": "gprd",
"hostname": "patroni-02-db-gprd",
"fqdn": "patroni-02-db-gprd.c.gitlab-production.internal",
"stage": "main",
"shard": "default",
"tier": "db",
"type": "patroni"
}
Relates to https://gitlab.com/gitlab-com/gl-infra/infrastructure/-/issues/10612
Merge request reports
Activity
requested review from @bjk-gitlab
mentioned in commit 7a766065
removed review request for @bjk-gitlab
Please register or sign in to reply