Improve DependencyLinker sanitization

What does this MR do and why?

Improves performance of our dependency linkers. This skips the unnecessary HTML parsing that sanitize does.

To test this, I copied the contents of this file and committed to a local repo in GDK.

Then I loaded http://127.0.0.1:3000/gitlab-org/gitlab-test/-/blob/master/go.sum?format=json&viewer=simple

Before:

"cpu_s":0.482891,"mem_objects":681031,"mem_bytes":122544800,"mem_mallocs":611959,"mem_total_bytes":149786040,"duration_s":0.36563
Full request log
{"method":"GET","path":"/gitlab-org/gitlab-test/-/blob/master/go.sum","format":"json","controller":"Projects::BlobController","action":"show","status":200,"time":"2022-07-17T05:29:39.342Z","params":[{"key":"viewer","value":"simple"},{"key":"namespace_id","value":"gitlab-org"},{"key":"project_id","value":"gitlab-test"},{"key":"id","value":"master/go.sum"}],"correlation_id":"01G85A4D6JXJM0TSWZ4WFYQ690","meta.user":"root","meta.project":"gitlab-org/gitlab-test","meta.root_namespace":"gitlab-org","meta.client_id":"user/1","meta.caller_id":"Projects::BlobController#show","meta.remote_ip":"127.0.0.1","meta.feature_category":"source_code_management","remote_ip":"127.0.0.1","user_id":1,"username":"root","ua":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:102.0) Gecko/20100101 Firefox/102.0","request_urgency":"low","target_duration_s":5,"gitaly_calls":4,"gitaly_duration_s":0.028712,"redis_calls":7,"redis_duration_s":0.000615,"redis_read_bytes":589,"redis_write_bytes":30235,"redis_cache_calls":4,"redis_cache_duration_s":0.000405,"redis_cache_read_bytes":343,"redis_cache_write_bytes":29043,"redis_shared_state_calls":2,"redis_shared_state_duration_s":8.8e-05,"redis_shared_state_write_bytes":104,"redis_sessions_calls":1,"redis_sessions_duration_s":0.000122,"redis_sessions_read_bytes":246,"redis_sessions_write_bytes":1088,"db_count":6,"db_write_count":0,"db_cached_count":1,"db_replica_count":0,"db_primary_count":6,"db_main_count":6,"db_main_replica_count":0,"db_ci_count":0,"db_ci_replica_count":0,"db_replica_cached_count":0,"db_primary_cached_count":1,"db_main_cached_count":1,"db_main_replica_cached_count":0,"db_ci_cached_count":0,"db_ci_replica_cached_count":0,"db_replica_wal_count":0,"db_primary_wal_count":0,"db_main_wal_count":0,"db_main_replica_wal_count":0,"db_ci_wal_count":0,"db_ci_replica_wal_count":0,"db_replica_wal_cached_count":0,"db_primary_wal_cached_count":0,"db_main_wal_cached_count":0,"db_main_replica_wal_cached_count":0,"db_ci_wal_cached_count":0,"db_ci_replica_wal_cached_count":0,"db_replica_duration_s":0.0,"db_primary_duration_s":0.007,"db_main_duration_s":0.007,"db_main_replica_duration_s":0.0,"db_ci_duration_s":0.0,"db_ci_replica_duration_s":0.0,"cpu_s":0.482891,"mem_objects":681031,"mem_bytes":122544800,"mem_mallocs":611959,"mem_total_bytes":149786040,"pid":20210,"worker_id":"puma_0","rate_limiting_gates":[],"db_duration_s":0.00076,"view_duration_s":0.00834,"duration_s":0.36563}

After:

"cpu_s":0.302605,"mem_objects":519435,"mem_bytes":16529600,"mem_mallocs":222101,"mem_total_bytes":37307000,"duration_s":0.19733
Full request log
{"method":"GET","path":"/gitlab-org/gitlab-test/-/blob/master/go.sum","format":"json","controller":"Projects::BlobController","action":"show","status":200,"time":"2022-07-17T06:20:05.265Z","params":[{"key":"viewer","value":"simple"},{"key":"namespace_id","value":"gitlab-org"},{"key":"project_id","value":"gitlab-test"},{"key":"id","value":"master/go.sum"}],"correlation_id":"01G85D0RDHSVXDBKC47GRRGAXZ","meta.user":"root","meta.project":"gitlab-org/gitlab-test","meta.root_namespace":"gitlab-org","meta.client_id":"user/1","meta.caller_id":"Projects::BlobController#show","meta.remote_ip":"127.0.0.1","meta.feature_category":"source_code_management","remote_ip":"127.0.0.1","user_id":1,"username":"root","ua":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:102.0) Gecko/20100101 Firefox/102.0","request_urgency":"low","target_duration_s":5,"gitaly_calls":4,"gitaly_duration_s":0.028496,"redis_calls":7,"redis_duration_s":0.000748,"redis_read_bytes":589,"redis_write_bytes":30236,"redis_cache_calls":4,"redis_cache_duration_s":0.000501,"redis_cache_read_bytes":343,"redis_cache_write_bytes":29044,"redis_shared_state_calls":2,"redis_shared_state_duration_s":9.8e-05,"redis_shared_state_write_bytes":104,"redis_sessions_calls":1,"redis_sessions_duration_s":0.000149,"redis_sessions_read_bytes":246,"redis_sessions_write_bytes":1088,"db_count":24,"db_write_count":0,"db_cached_count":4,"db_replica_count":0,"db_primary_count":24,"db_main_count":24,"db_main_replica_count":0,"db_ci_count":0,"db_ci_replica_count":0,"db_replica_cached_count":0,"db_primary_cached_count":4,"db_main_cached_count":4,"db_main_replica_cached_count":0,"db_ci_cached_count":0,"db_ci_replica_cached_count":0,"db_replica_wal_count":0,"db_primary_wal_count":0,"db_main_wal_count":0,"db_main_replica_wal_count":0,"db_ci_wal_count":0,"db_ci_replica_wal_count":0,"db_replica_wal_cached_count":0,"db_primary_wal_cached_count":0,"db_main_wal_cached_count":0,"db_main_replica_wal_cached_count":0,"db_ci_wal_cached_count":0,"db_ci_replica_wal_cached_count":0,"db_replica_duration_s":0.0,"db_primary_duration_s":0.03,"db_main_duration_s":0.03,"db_main_replica_duration_s":0.0,"db_ci_duration_s":0.0,"db_ci_replica_duration_s":0.0,"cpu_s":0.302605,"mem_objects":519435,"mem_bytes":16529600,"mem_mallocs":222101,"mem_total_bytes":37307000,"pid":20211,"worker_id":"puma_1","rate_limiting_gates":[],"db_duration_s":0.00079,"view_duration_s":0.00776,"duration_s":0.19733}

The request is now faster and uses significantly less memory (122mb -> 16mb).

MR acceptance checklist

This checklist encourages us to confirm any changes have been analyzed to reduce risks in quality, performance, reliability, security, and maintainability.

Edited by Heinrich Lee Yu

Merge request reports

Loading