Commit 56221ed5 authored by Hubert depesz Lubaczewski's avatar Hubert depesz Lubaczewski
Browse files

Make it possible to anonymize queries

This has to be done by Pg::Explain while anonymizing plan, otherwise
anonymized query would be anonymized using different substitution
dictionary.
parent c9451165
Revision history for Pg-Explain
1.01 2020/07/27
- Make it possible to anonymize queries using the same anonymization
engine that is used to anonymize plan.
1.00 2020/06/06
- Make generated text explains more like Pg
- Parse planning time from explains from PostgreSQL 13
......
......@@ -317,6 +317,7 @@ t/55-planning-time.d/old-xml.plan
t/55-planning-time.d/old-yaml.expect
t/55-planning-time.d/old-yaml.plan
t/55-planning-time.t
t/56-anonymize-query.t
t/99-manifest.t
t/perlcriticrc
t/perltidyrc
......
......@@ -42,39 +42,39 @@
"provides" : {
"Pg::Explain" : {
"file" : "lib/Pg/Explain.pm",
"version" : "1.00"
"version" : "1.01"
},
"Pg::Explain::Analyzer" : {
"file" : "lib/Pg/Explain/Analyzer.pm",
"version" : "1.00"
"version" : "1.01"
},
"Pg::Explain::From" : {
"file" : "lib/Pg/Explain/From.pm",
"version" : "1.00"
"version" : "1.01"
},
"Pg::Explain::FromJSON" : {
"file" : "lib/Pg/Explain/FromJSON.pm",
"version" : "1.00"
"version" : "1.01"
},
"Pg::Explain::FromText" : {
"file" : "lib/Pg/Explain/FromText.pm",
"version" : "1.00"
"version" : "1.01"
},
"Pg::Explain::FromXML" : {
"file" : "lib/Pg/Explain/FromXML.pm",
"version" : "1.00"
"version" : "1.01"
},
"Pg::Explain::FromYAML" : {
"file" : "lib/Pg/Explain/FromYAML.pm",
"version" : "1.00"
"version" : "1.01"
},
"Pg::Explain::Node" : {
"file" : "lib/Pg/Explain/Node.pm",
"version" : "1.00"
"version" : "1.01"
},
"Pg::Explain::StringAnonymizer" : {
"file" : "lib/Pg/Explain/StringAnonymizer.pm",
"version" : "1.00"
"version" : "1.01"
}
},
"release_status" : "stable",
......@@ -88,6 +88,6 @@
"web" : "https://gitlab.com/depesz/Pg--Explain"
}
},
"version" : "1.00",
"version" : "1.01",
"x_serialization_backend" : "JSON::PP version 4.02"
}
......@@ -19,31 +19,31 @@ name: Pg-Explain
provides:
Pg::Explain:
file: lib/Pg/Explain.pm
version: '1.00'
version: '1.01'
Pg::Explain::Analyzer:
file: lib/Pg/Explain/Analyzer.pm
version: '1.00'
version: '1.01'
Pg::Explain::From:
file: lib/Pg/Explain/From.pm
version: '1.00'
version: '1.01'
Pg::Explain::FromJSON:
file: lib/Pg/Explain/FromJSON.pm
version: '1.00'
version: '1.01'
Pg::Explain::FromText:
file: lib/Pg/Explain/FromText.pm
version: '1.00'
version: '1.01'
Pg::Explain::FromXML:
file: lib/Pg/Explain/FromXML.pm
version: '1.00'
version: '1.01'
Pg::Explain::FromYAML:
file: lib/Pg/Explain/FromYAML.pm
version: '1.00'
version: '1.01'
Pg::Explain::Node:
file: lib/Pg/Explain/Node.pm
version: '1.00'
version: '1.01'
Pg::Explain::StringAnonymizer:
file: lib/Pg/Explain/StringAnonymizer.pm
version: '1.00'
version: '1.01'
requires:
Clone: '0'
Digest::SHA: '0'
......@@ -55,5 +55,5 @@ requires:
resources:
license: http://dev.perl.org/licenses/
repository: https://gitlab.com/depesz/Pg--Explain.git
version: '1.00'
version: '1.01'
x_serialization_backend: 'CPAN::Meta::YAML version 0.018'
......@@ -32,11 +32,11 @@ Pg::Explain - Object approach at reading explain analyze output
=head1 VERSION
Version 1.00
Version 1.01
=cut
our $VERSION = '1.00';
our $VERSION = '1.01';
=head1 SYNOPSIS
......@@ -432,17 +432,25 @@ sub get_struct {
Used to remove all individual values from the explain, while still retaining
all values that are needed to see what's wrong.
If there are any arguments, these are treated as strings, anonymized using
anonymizer used for plan, and are returned in the same order.
This is mainly useful to anonymize queries.
=cut
sub anonymize {
my $self = shift;
my $self = shift;
my @extra_args = @_;
my $anonymizer = Pg::Explain::StringAnonymizer->new();
$self->top_node->anonymize_gathering( $anonymizer );
$anonymizer->finalize();
$self->top_node->anonymize_substitute( $anonymizer );
return;
return if 0 == scalar @extra_args;
return map { $anonymizer->anonymize_text( $_ ) } @extra_args;
}
=head1 AUTHOR
......
......@@ -26,11 +26,11 @@ Pg::Explain::Analyzer - Some helper methods to analyze explains
=head1 VERSION
Version 1.00
Version 1.01
=cut
our $VERSION = '1.00';
our $VERSION = '1.01';
=head1 SYNOPSIS
......
......@@ -26,11 +26,11 @@ Pg::Explain::From - Base class for parsers of non-text explain formats.
=head1 VERSION
Version 1.00
Version 1.01
=cut
our $VERSION = '1.00';
our $VERSION = '1.01';
=head1 SYNOPSIS
......
......@@ -27,11 +27,11 @@ Pg::Explain::FromJSON - Parser for explains in JSON format
=head1 VERSION
Version 1.00
Version 1.01
=cut
our $VERSION = '1.00';
our $VERSION = '1.01';
=head1 SYNOPSIS
......
......@@ -26,11 +26,11 @@ Pg::Explain::FromText - Parser for text based explains
=head1 VERSION
Version 1.00
Version 1.01
=cut
our $VERSION = '1.00';
our $VERSION = '1.01';
=head1 SYNOPSIS
......
......@@ -27,11 +27,11 @@ Pg::Explain::FromXML - Parser for explains in XML format
=head1 VERSION
Version 1.00
Version 1.01
=cut
our $VERSION = '1.00';
our $VERSION = '1.01';
=head1 SYNOPSIS
......
......@@ -27,11 +27,11 @@ Pg::Explain::FromYAML - Parser for explains in YAML format
=head1 VERSION
Version 1.00
Version 1.01
=cut
our $VERSION = '1.00';
our $VERSION = '1.01';
=head1 SYNOPSIS
......
......@@ -27,11 +27,11 @@ Pg::Explain::Node - Class representing single node from query plan
=head1 VERSION
Version 1.00
Version 1.01
=cut
our $VERSION = '1.00';
our $VERSION = '1.01';
=head1 SYNOPSIS
......
......@@ -26,11 +26,11 @@ Pg::Explain::StringAnonymizer - Class to anonymize sets of strings
=head1 VERSION
Version 1.00
Version 1.01
=cut
our $VERSION = '1.00';
our $VERSION = '1.01';
=head1 SYNOPSIS
......@@ -126,6 +126,9 @@ sub finalize {
$self->_stringify();
my @keys_sorted = sort { length( $b ) <=> length( $a ) } keys %{ $self->{ 'strings' } };
$self->{ 'keys_re' } = join '|', map { qr{\Q$_\E} } @keys_sorted;
return;
}
......@@ -144,6 +147,20 @@ sub anonymized {
return $self->{ 'strings' }->{ $input };
}
=head2 anonymize_text
Anonymize given text using loaded dictionary of substiturions.
=cut
sub anonymize_text {
my $self = shift;
my $text = shift;
my $re = $self->{ 'keys_re' };
$text =~ s{(?:\b|\s)($re)(?:\b|\s)}{ $self->{'strings'}->{$1} }mge;
return $text;
}
=head2 anonymization_dictionary
Returns hash reference containing all input strings and their anonymized versions, like:
......
......@@ -10,9 +10,9 @@ my $anonymizer;
lives_ok( sub { $anonymizer = Pg::Explain::StringAnonymizer->new() }, 'Base object creation' );
isa_ok( $anonymizer, 'Pg::Explain::StringAnonymizer' );
lives_ok( sub { $anonymizer->add( 'depesz' ) }, 'Adding string' );
lives_ok( sub { $anonymizer->add( 'depesz' ) }, 'Adding string' );
lives_ok( sub { $anonymizer->add( 'yyy', 'xxx', 'c' ) }, 'Adding strings' );
lives_ok( sub { $anonymizer->add( [ qw( a b c ) ] ) }, 'Adding strings as arrayref' );
lives_ok( sub { $anonymizer->add( [ qw( a b c ) ] ) }, 'Adding strings as arrayref' );
my @expected_keys = sort qw( depesz yyy xxx a b c );
my @existing_keys = sort keys %{ $anonymizer->{ 'strings' } };
......
......@@ -118,6 +118,6 @@ lives_ok(
);
ok( $explain->as_text !~ /mix_cliente_compliance|mix_relatorio_up|vendedor_loja|vendedor|public/, 'anonymize() hides table names' );
ok( $explain->as_text !~ /cnpj_cliente|regiao/, 'anonymize() hides column names' );
ok( $explain->as_text !~ /cnpj_cliente|regiao/, 'anonymize() hides column names' );
exit;
......@@ -31,9 +31,9 @@ plan 'tests' => 8 + 4 * scalar @tests;
my $ex, $an;
lives_ok( sub { $ex = Pg::Explain->new( 'source' => 'Result (cost=0.00..0.01 rows=1 width=4)' ) }, 'Pg::Explain created' );
isa_ok( $ex, 'Pg::Explain' );
dies_ok( sub { $an = Pg::Explain::Analyzer->new(); }, 'Expecting to die #1' );
dies_ok( sub { $an = Pg::Explain::Analyzer->new(); }, 'Expecting to die #1' );
dies_ok( sub { $an = Pg::Explain::Analyzer->new( 1, 2 ); }, 'Expecting to die #2' );
dies_ok( sub { $an = Pg::Explain::Analyzer->new( 1 ); }, 'Expecting to die #3' );
dies_ok( sub { $an = Pg::Explain::Analyzer->new( 1 ); }, 'Expecting to die #3' );
lives_ok( sub { $an = Pg::Explain::Analyzer->new( $ex ); }, 'Should live' );
isa_ok( $an, 'Pg::Explain::Analyzer' );
throws_ok( sub { Pg::Explain::Analyzer->new( $an ); }, qr{not Pg::Explain}, 'Expecting to die #4' );
......
#!perl
use Test::More;
use Test::Deep;
use Test::Exception;
use autodie;
plan 'tests' => 14;
use Pg::Explain;
my $plan_source = q{ QUERY PLAN
──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Sort (cost=32.93..33.01 rows=32 width=224) (actual time=0.621..0.626 rows=8 loops=1)
Sort Key: n.nspname, c.relname
Sort Method: quicksort Memory: 27kB
-> Hash Join (cost=1.09..32.13 rows=32 width=224) (actual time=0.137..0.597 rows=8 loops=1)
Hash Cond: (c.relnamespace = n.oid)
-> Seq Scan on pg_class c (cost=0.00..29.89 rows=65 width=73) (actual time=0.032..0.445 rows=137 loops=1)
Filter: ((relkind = ANY ('{r,p,v,m,S,f,""}'::"char"[])) AND pg_table_is_visible(oid))
Rows Removed by Filter: 258
-> Hash (cost=1.07..1.07 rows=2 width=68) (actual time=0.065..0.066 rows=1 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 9kB
-> Seq Scan on pg_namespace n (cost=0.00..1.07 rows=2 width=68) (actual time=0.039..0.041 rows=1 loops=1)
Filter: ((nspname <> 'pg_catalog'::name) AND (nspname <> 'information_schema'::name) AND (nspname !~ '^pg_toast'::text))
Rows Removed by Filter: 3
Planning Time: 0.626 ms
Execution Time: 0.713 ms
(15 rows)
};
my $query = q{SELECT n.nspname as "Schema",
c.relname as "Name",
CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' WHEN 'm' THEN 'materialized view' WHEN 'i' THEN 'index' WHEN 'S' THEN 'sequence' WHEN 's' THEN 'special' WHEN 'f' THEN 'foreign table' WHEN 'p' THEN 'partitioned table' WHEN 'I' THEN 'partitioned index' END as "Type",
pg_catalog.pg_get_userbyid(c.relowner) as "Owner"
FROM pg_catalog.pg_class c
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind IN ('r','p','v','m','S','f','')
AND n.nspname <> 'pg_catalog'
AND n.nspname <> 'information_schema'
AND n.nspname !~ '^pg_toast'
AND pg_catalog.pg_table_is_visible(c.oid)
ORDER BY 1,2;};
my $explain = Pg::Explain->new( 'source' => $plan_source );
isa_ok( $explain, 'Pg::Explain' );
isa_ok( $explain->top_node, 'Pg::Explain::Node' );
my $anonymized_query;
lives_ok(
sub {
( $anonymized_query ) = $explain->anonymize( $query );
},
'Anonymization works',
);
my $textual = $explain->as_text();
ok( $textual =~ /::"char"\[\]/, 'anonymize() preserves type casting' );
ok( $textual =~ /::name\b/, 'anonymize() preserves type casting' );
ok( $textual =~ /::text\b/, 'anonymize() preserves type casting' );
ok( $textual !~ /'pg_catalog'/, 'anonymize() hides string literals' );
ok( $textual !~ /nspname/, 'anonymize() hides column names' );
ok( $textual !~ /pg_class/, 'anonymize() hides relation names' );
ok( $textual !~ /\{r,p,v,m,S,f,""\}/, 'anonymize() hides complex things' );
ok( $anonymized_query !~ /'pg_catalog'/, 'anonymize() hides string literals in query.' );
ok( $anonymized_query !~ /nspname/, 'anonymize() hides column names in query.' );
ok( $anonymized_query !~ /pg_class/, 'anonymize() hides relation names in query.' );
ok( $anonymized_query !~ /\{r,p,v,m,S,f,""\}/, 'anonymize() hides complex things in query.' );
exit;
sub just_numbers {
my $what = shift;
return unless 'HASH' eq ref $what;
delete $what->{ 'extra_info' };
delete $what->{ 'scan_on' };
delete $what->{ 'type' };
for my $key ( grep { 'ARRAY' eq ref $what->{ $_ } } keys %{ $what } ) {
for my $item ( @{ $what->{ $key } } ) {
just_numbers( $item );
}
}
return;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment