Commit 3c81bccd authored by Raúl Marín's avatar Raúl Marín

Speedup MVT column parsing

Closes #4145
Closes https://github.com/postgis/postgis/pull/286


git-svn-id: http://svn.osgeo.org/postgis/trunk@16685 b70326c6-7e19-0410-871a-916f4a2858ee
parent a96924e6
Pipeline #27663707 passed with stage
in 24 minutes and 13 seconds
......@@ -11,6 +11,7 @@ New since PostGIS 2.5.0beta1
- #4140, Use user-provided CFLAGS in address standardizer and the
topology module (Raúl Marín)
- #4143, Fix backend crash when ST_OffsetCurve fails (Dan Baston)
- #4145, Speedup MVT column parsing (Raúl Marín)
See PostGIS 2.5.0 section for full details
......
......@@ -295,10 +295,9 @@ static TupleDesc get_tuple_desc(mvt_agg_context *ctx)
return tupdesc;
}
static uint32_t get_key_index(mvt_agg_context *ctx, char *name)
static uint32_t get_key_index_with_size(mvt_agg_context *ctx, const char *name, size_t size)
{
struct mvt_kv_key *kv;
size_t size = strlen(name);
HASH_FIND(hh, ctx->keys_hash, name, size, kv);
if (!kv)
return UINT32_MAX;
......@@ -318,27 +317,38 @@ static uint32_t add_key(mvt_agg_context *ctx, char *name)
static void parse_column_keys(mvt_agg_context *ctx)
{
TupleDesc tupdesc = get_tuple_desc(ctx);
uint32_t natts = (uint32_t) tupdesc->natts;
uint32_t i;
uint32_t i, natts;
bool geom_found = false;
char *key;
POSTGIS_DEBUG(2, "parse_column_keys called");
ctx->column_cache.tupdesc = get_tuple_desc(ctx);
natts = ctx->column_cache.tupdesc->natts;
ctx->column_cache.column_keys_index = palloc(sizeof(uint32_t) * natts);
ctx->column_cache.column_oid = palloc(sizeof(uint32_t) * natts);
ctx->column_cache.values = palloc(sizeof(Datum) * natts);
ctx->column_cache.nulls = palloc(sizeof(bool) * natts);
for (i = 0; i < natts; i++)
{
#if POSTGIS_PGSQL_VERSION < 110
Oid typoid = getBaseType(tupdesc->attrs[i]->atttypid);
char *tkey = tupdesc->attrs[i]->attname.data;
Oid typoid = getBaseType(ctx->column_cache.tupdesc->attrs[i]->atttypid);
char *tkey = ctx->column_cache.tupdesc->attrs[i]->attname.data;
#else
Oid typoid = getBaseType(tupdesc->attrs[i].atttypid);
char *tkey = tupdesc->attrs[i].attname.data;
Oid typoid = getBaseType(ctx->column_cache.tupdesc->attrs[i].atttypid);
char *tkey = ctx->column_cache.tupdesc->attrs[i].attname.data;
#endif
ctx->column_cache.column_oid[i] = typoid;
#if POSTGIS_PGSQL_VERSION >= 94
if (typoid == JSONBOID)
{
ctx->column_cache.column_keys_index[i] = UINT32_MAX;
continue;
}
#endif
key = pstrdup(tkey);
if (ctx->geom_name == NULL)
{
if (!geom_found && typoid == TypenameGetTypid("geometry"))
......@@ -350,18 +360,19 @@ static void parse_column_keys(mvt_agg_context *ctx)
}
else
{
if (!geom_found && strcmp(key, ctx->geom_name) == 0)
if (!geom_found && strcmp(tkey, ctx->geom_name) == 0)
{
ctx->geom_index = i;
geom_found = true;
continue;
}
}
add_key(ctx, key);
ctx->column_cache.column_keys_index[i] = add_key(ctx, pstrdup(tkey));
}
if (!geom_found)
elog(ERROR, "parse_column_keys: no geometry column found");
ReleaseTupleDesc(tupdesc);
}
static void encode_keys(mvt_agg_context *ctx)
......@@ -389,7 +400,8 @@ static VectorTile__Tile__Value *create_value()
POSTGIS_DEBUG(2, "MVT_CREATE_VALUES called"); \
{ \
struct kvtype *kv; \
for (kv = ctx->hash; kv != NULL; kv=kv->hh.next) { \
for (kv = ctx->hash; kv != NULL; kv=kv->hh.next) \
{ \
VectorTile__Tile__Value *value = create_value(); \
value->hasfield = 1; \
value->valuefield = kv->valuefield; \
......@@ -433,6 +445,14 @@ static void encode_values(mvt_agg_context *ctx)
HASH_CLEAR(hh, ctx->uint_values_hash);
HASH_CLEAR(hh, ctx->sint_values_hash);
HASH_CLEAR(hh, ctx->bool_values_hash);
pfree(ctx->column_cache.column_keys_index);
pfree(ctx->column_cache.column_oid);
pfree(ctx->column_cache.values);
pfree(ctx->column_cache.nulls);
ReleaseTupleDesc(ctx->column_cache.tupdesc);
memset(&ctx->column_cache, 0, sizeof(ctx->column_cache));
}
#define MVT_PARSE_VALUE(value, kvtype, hash, valuefield, size) \
......@@ -486,11 +506,10 @@ static void encode_values(mvt_agg_context *ctx)
MVT_PARSE_INT_VALUE(value); \
}
static void add_value_as_string(mvt_agg_context *ctx,
char *value, uint32_t *tags, uint32_t k)
static void add_value_as_string_with_size(mvt_agg_context *ctx,
char *value, size_t size, uint32_t *tags, uint32_t k)
{
struct mvt_kv_string_value *kv;
size_t size = strlen(value);
POSTGIS_DEBUG(2, "add_value_as_string called");
HASH_FIND(hh, ctx->string_values_hash, value, size, kv);
if (!kv)
......@@ -508,6 +527,12 @@ static void add_value_as_string(mvt_agg_context *ctx,
tags[ctx->row_columns*2+1] = kv->id;
}
static void add_value_as_string(mvt_agg_context *ctx,
char *value, uint32_t *tags, uint32_t k)
{
return add_value_as_string_with_size(ctx, value, strlen(value), tags, k);
}
static void parse_datum_as_string(mvt_agg_context *ctx, Oid typoid,
Datum datum, uint32_t *tags, uint32_t k)
{
......@@ -542,15 +567,17 @@ static uint32_t *parse_jsonb(mvt_agg_context *ctx, Jsonb *jb,
if (r == WJB_KEY && v.type != jbvNull)
{
char *key;
key = palloc(v.val.string.len + 1 * sizeof(char));
memcpy(key, v.val.string.val, v.val.string.len);
key[v.val.string.len] = '\0';
k = get_key_index(ctx, key);
k = get_key_index_with_size(ctx, v.val.string.val, v.val.string.len);
if (k == UINT32_MAX)
{
char *key;
uint32_t newSize = ctx->keys_hash_i + 1;
key = palloc(v.val.string.len + 1);
memcpy(key, v.val.string.val, v.val.string.len);
key[v.val.string.len] = '\0';
tags = repalloc(tags, newSize * 2 * sizeof(*tags));
k = add_key(ctx, key);
}
......@@ -560,7 +587,7 @@ static uint32_t *parse_jsonb(mvt_agg_context *ctx, Jsonb *jb,
if (v.type == jbvString)
{
char *value;
value = palloc(v.val.string.len + 1 * sizeof(char));
value = palloc(v.val.string.len + 1);
memcpy(value, v.val.string.val, v.val.string.len);
value[v.val.string.len] = '\0';
add_value_as_string(ctx, value, tags, k);
......@@ -603,12 +630,23 @@ static void parse_values(mvt_agg_context *ctx)
{
uint32_t n_keys = ctx->keys_hash_i;
uint32_t *tags = palloc(n_keys * 2 * sizeof(*tags));
bool isnull;
uint32_t i, k;
TupleDesc tupdesc = get_tuple_desc(ctx);
uint32_t natts = (uint32_t) tupdesc->natts;
ctx->row_columns = 0;
uint32_t i;
mvt_column_cache cc = ctx->column_cache;
uint32_t natts = (uint32_t) cc.tupdesc->natts;
HeapTupleData tuple;
POSTGIS_DEBUG(2, "parse_values called");
ctx->row_columns = 0;
/* Build a temporary HeapTuple control structure */
tuple.t_len = HeapTupleHeaderGetDatumLength(ctx->row);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
tuple.t_data = ctx->row;
/* We use heap_deform_tuple as it costs only O(N) vs O(N^2) of GetAttributeByNum */
heap_deform_tuple(&tuple, cc.tupdesc, cc.values, cc.nulls);
POSTGIS_DEBUGF(3, "parse_values natts: %d", natts);
......@@ -616,25 +654,26 @@ static void parse_values(mvt_agg_context *ctx)
{
char *key;
Oid typoid;
Datum datum;
uint32_t k;
Datum datum = cc.values[i];
if (i == ctx->geom_index)
continue;
#if POSTGIS_PGSQL_VERSION < 110
key = tupdesc->attrs[i]->attname.data;
typoid = getBaseType(tupdesc->attrs[i]->atttypid);
#else
key = tupdesc->attrs[i].attname.data;
typoid = getBaseType(tupdesc->attrs[i].atttypid);
#endif
datum = GetAttributeByNum(ctx->row, i+1, &isnull);
k = get_key_index(ctx, key);
if (isnull)
if (cc.nulls[i])
{
POSTGIS_DEBUG(3, "parse_values isnull detected");
continue;
}
#if POSTGIS_PGSQL_VERSION < 110
key = cc.tupdesc->attrs[i]->attname.data;
#else
key = cc.tupdesc->attrs[i].attname.data;
#endif
k = cc.column_keys_index[i];
typoid = cc.column_oid[i];
#if POSTGIS_PGSQL_VERSION >= 94
if (k == UINT32_MAX && typoid != JSONBOID)
elog(ERROR, "parse_values: unexpectedly could not find parsed key name '%s'", key);
......@@ -644,7 +683,7 @@ static void parse_values(mvt_agg_context *ctx)
continue;
}
#else
if (k == -1)
if (k == UINT32_MAX)
elog(ERROR, "parse_values: unexpectedly could not find parsed key name '%s'", key);
#endif
......@@ -681,7 +720,6 @@ static void parse_values(mvt_agg_context *ctx)
ctx->row_columns++;
}
ReleaseTupleDesc(tupdesc);
ctx->feature->n_tags = ctx->row_columns * 2;
ctx->feature->tags = tags;
......@@ -857,6 +895,8 @@ void mvt_agg_init_context(mvt_agg_context *ctx)
ctx->values_hash_i = 0;
ctx->keys_hash_i = 0;
memset(&ctx->column_cache, 0, sizeof(ctx->column_cache));
layer = palloc(sizeof(*layer));
vector_tile__tile__layer__init(layer);
layer->version = 2;
......
......@@ -46,6 +46,15 @@
#include "vector_tile.pb-c.h"
typedef struct mvt_column_cache
{
uint32_t *column_keys_index;
uint32_t *column_oid;
Datum *values;
bool *nulls;
TupleDesc tupdesc;
} mvt_column_cache;
typedef struct mvt_agg_context
{
char *name;
......@@ -67,6 +76,7 @@ typedef struct mvt_agg_context
uint32_t values_hash_i;
uint32_t keys_hash_i;
uint32_t row_columns;
mvt_column_cache column_cache;
} mvt_agg_context;
/* Prototypes */
......
......@@ -343,6 +343,48 @@ FROM (
) AS geom
) AS q;
-- Strings and text
SELECT 'TA11', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (
SELECT 'AbcDfg'::varchar AS cstring,
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus sed nulla augue. Pellentesque ut vulputate ex. Nunc et odio placerat, lacinia lectus sed, fermentum sapien. Sed massa velit, ullamcorper et est quis, congue rhoncus orci. Suspendisse in ante varius, convallis enim ut, fermentum amet.'::text as ctext,
ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
) AS q;
-- Check null attributes
SELECT 'TA12', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (
SELECT 1::int AS c1, NULL::double precision AS c2, ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
UNION
SELECT NULL AS c1, 2.0 AS c2, ST_AsMVTGeom(ST_GeomFromText('POINT(26 18)'),
ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
) AS q;
SELECT 'TA13', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (
SELECT 1::int AS c1, NULL::double precision AS c2, ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
UNION
SELECT 5 AS c1, 2.0 AS c2, null AS geom
) AS q;
-- Extra geometry as parameter (casted as string)
SELECT 'TA14', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM
(
SELECT geom, St_Expand(geom, 10) as other_geom FROM
(
SELECT 'SRID=3857;MULTILINESTRING((105209.784484008 5267849.91657293,102374.204885822 5266414.05020624,99717.9874419115 5267379.35282178,90157.5689699989 5266091.78724987,86186.0622890498 5271349.34154337,78713.0972659854 5272871.78773217,76281.8581230672 5277951.00736649,81783.372341432 5289800.59747023))'::geometry as geom
) _sq
) AS q;
-- Numeric: Currently being casted as strings
SELECT 'TA15', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM
(
SELECT 1::numeric AS c1, '12.232389283223239'::numeric AS c2,
'1' AS cstring, ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
) AS q;
-- default values tests
SELECT 'D1', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (SELECT 1 AS c1, 'abcd'::text AS c2,
ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
......
......@@ -68,6 +68,23 @@ TA8|Gk8KBHRlc3QSDhIEAAABABgBIgQJMt4/Eg4SBAAAAQEYASIECTTcPxIOEgQAAQECGAEiBAk03D8a
AmMxGgJjMiICKAEiAigCIgIoAyiAIHgC
TA9|0
TA10|49
TA11|GucCCgR0ZXN0Eg4SBAAAAQEYASIECTLePxoHY3N0cmluZxoFY3RleHQiCAoGQWJjRGZnIq8CCqwC
TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4g
UGhhc2VsbHVzIHNlZCBudWxsYSBhdWd1ZS4gUGVsbGVudGVzcXVlIHV0IHZ1bHB1dGF0ZSBleC4g
TnVuYyBldCBvZGlvIHBsYWNlcmF0LCBsYWNpbmlhIGxlY3R1cyBzZWQsIGZlcm1lbnR1bSBzYXBp
ZW4uIFNlZCBtYXNzYSB2ZWxpdCwgdWxsYW1jb3JwZXIgZXQgZXN0IHF1aXMsIGNvbmd1ZSByaG9u
Y3VzIG9yY2kuIFN1c3BlbmRpc3NlIGluIGFudGUgdmFyaXVzLCBjb252YWxsaXMgZW5pbSB1dCwg
ZmVybWVudHVtIGFtZXQuKIAgeAI=
TA12|Gj4KBHRlc3QSDBICAAAYASIECTLePxIMEgIBARgBIgQJNNw/GgJjMRoCYzIiAigBIgkZAAAAAAAA
AEAogCB4Ag==
TA13|GiUKBHRlc3QSDBICAAAYASIECTLePxoCYzEaAmMyIgIoASiAIHgC
TA14|GpACCgR0ZXN0Ei8SAgAAGAIiJwny6wyShoMFOqUstRbBKYoPr5UBjxSFPpRS4XTkF/8lsE/8VZK5
ARoKb3RoZXJfZ2VvbSLFAQrCATAxMDMwMDAwMjAxMTBGMDAwMDAxMDAwMDAwMDUwMDAwMDBEOTQw
REZCQUZEOUVGMjQwNDc0RDYyNzJBODE2NTQ0MUQ5NDBERkJBRkQ5RUYyNDBDN0YzM0NBNkQ0MkQ1
NDQxNkExQTNGOEQzQ0IwRjk0MEM3RjMzQ0E2RDQyRDU0NDE2QTFBM0Y4RDNDQjBGOTQwNDc0RDYy
NzJBODE2NTQ0MUQ5NDBERkJBRkQ5RUYyNDA0NzRENjI3MkE4MTY1NDQxKIAgeAI=
TA15|GkkKBHRlc3QSEBIGAAABAQIAGAEiBAky3j8aAmMxGgJjMhoHY3N0cmluZyIDCgExIhQKEjEyLjIz
MjM4OTI4MzIyMzIzOSiAIHgC
D1|Gi8KBHRlc3QSDhIEAAABARgBIgQJMt4/GgJjMRoCYzIiAigBIgYKBGFiY2QogCB4Ag==
D2|Gi8KBHRlc3QSDhIEAAABARgBIgQJMt4/GgJjMRoCYzIiAigBIgYKBGFiY2QogCB4Ag==
D3|Gi8KBHRlc3QSDhIEAAABARgBIgQJMt4/GgJjMRoCYzIiAigBIgYKBGFiY2QogCB4Ag==
......
SELECT 'TA9', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (SELECT '{"c1":1,"c2":"abcd"}'::jsonb,
SELECT 'J1', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (SELECT '{"c1":1,"c2":"abcd"}'::jsonb,
ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom) AS q;
SELECT 'TA10', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (SELECT '{"c1":"abcd", "c2":"abcd"}'::jsonb,
SELECT 'J2', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (SELECT '{"c1":"abcd", "c2":"abcd"}'::jsonb,
ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom) AS q;
SELECT 'J3', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (
SELECT '{"c1":"abcd", "c2":"abcd"}'::jsonb,
ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'), ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
UNION ALL
SELECT '{"c3":"abasdadcd", "c1":5}'::jsonb,
ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'), ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
) AS q;
TA9|Gi8KBHRlc3QSDhIEAAABARgBIgQJMt4/GgJjMRoCYzIiAigBIgYKBGFiY2QogCB4Ag==
TA10|GisKBHRlc3QSDhIEAAABABgBIgQJMt4/GgJjMRoCYzIiBgoEYWJjZCiAIHgC
J1|Gi8KBHRlc3QSDhIEAAABARgBIgQJMt4/GgJjMRoCYzIiAigBIgYKBGFiY2QogCB4Ag==
J2|GisKBHRlc3QSDhIEAAABABgBIgQJMt4/GgJjMRoCYzIiBgoEYWJjZCiAIHgC
J3|GlAKBHRlc3QSDhIEAAABABgBIgQJMt4/Eg4SBAABAgIYASIECTLePxoCYzEaAmMyGgJjMyIGCgRh
YmNkIgIoBSILCglhYmFzZGFkY2QogCB4Ag==
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment