Commit b8d56f0c authored by Darafei Praliaskouski's avatar Darafei Praliaskouski

ST_ClusterKMeans: handle effective K=0 when all the inputs are EMPTY

Closes #4101
Closes https://github.com/postgis/postgis/pull/254



git-svn-id: http://svn.osgeo.org/postgis/trunk@16605 b70326c6-7e19-0410-871a-916f4a2858ee
parent d8d92f21
Pipeline #23095208 passed with stage
in 18 minutes and 40 seconds
......@@ -135,20 +135,8 @@ kmeans_init(POINT2D** objs, int* clusters, uint32_t n, POINT2D** centers, POINT2
double max_dst = -1;
double dst_p1, dst_p2;
assert(k > 0);
/* k = 1: first non-null is ok, and input check guarantees there's one */
if (k == 1)
{
for (i = 0; i < n; i++)
{
if (!objs[i]) continue;
centers_raw[0] = *((POINT2D *)objs[i]);
centers[0] = &(centers_raw[0]);
return;
}
assert(0);
}
/* k=0, k=1: "clustering" is just input validation */
assert(k > 1);
/* k >= 2: find two distant points greedily */
for (i = 1; i < n; i++)
......@@ -333,9 +321,24 @@ lwgeom_cluster_2d_kmeans(const LWGEOM** geoms, uint32_t n, uint32_t k)
k = num_non_empty;
}
kmeans_init(objs, clusters, n, centers, centers_raw, k);
result = kmeans(objs, clusters, n, centers, k);
if (k > 1)
{
kmeans_init(objs, clusters, n, centers, centers_raw, k);
result = kmeans(objs, clusters, n, centers, k);
}
else
{
/* k=0: everythong is unclusterable
* k=1: mark up NULL and non-NULL */
for (i = 0; i < n; i++)
{
if (k == 0 || !objs[i])
clusters[i] = KMEANS_NULL_CLUSTER;
else
clusters[i] = 0;
}
result = LW_TRUE;
}
/* Before error handling, might as well clean up all the inputs */
lwfree(objs);
......
......@@ -50,3 +50,13 @@ select '#4100a', count(distinct result) from (SELECT ST_ClusterKMeans(foo1.the_g
( ST_GeomFromEWKT('SRID=4326;POLYGON((-71.1261 42.2703 1,-71.1257 42.2703 1,-71.1257 42.2701 1,-71.126 42.2701 1,-71.1261 42.2702 1,-71.1261 42.2703 1))') ) ) As g(geom) CROSS JOIN generate_series(1,3) As i GROUP BY i )) As foo1 LIMIT 10) kmeans;
select '#4100b', count(distinct cid) from (select ST_ClusterKMeans(geom,2) over () as cid from (values ('POINT(0 0)'::geometry), ('POINT(0 0)')) g(geom)) kmeans;
select '#4101a', count(distinct result) from (SELECT ST_ClusterKMeans(foo1.the_geom, 3) OVER() As result
FROM ((SELECT ST_GeomFromText('POINT EMPTY',4326) As the_geom
UNION ALL SELECT ST_GeomFromText('MULTIPOINT EMPTY',4326) As the_geom
UNION ALL SELECT ST_GeomFromText('MULTIPOLYGON EMPTY',4326) As the_geom
UNION ALL SELECT ST_GeomFromText('LINESTRING EMPTY',4326) As the_geom
UNION ALL SELECT ST_GeomFromText('MULTILINESTRING EMPTY',4326) As the_geom ) ) As foo1 LIMIT 10) kmeans;
select '#4101b', count(distinct cid) from (select ST_ClusterKMeans(geom,2) over () as cid from (values ('POINT EMPTY'::geometry), ('POINT EMPTY')) g(geom)) kmeans;
......@@ -34,3 +34,7 @@ NOTICE: kmeans_init: there are at least 3 duplicate inputs, number of output cl
#4100a|1
NOTICE: kmeans_init: there are at least 2 duplicate inputs, number of output clusters may be less than you requested
#4100b|1
NOTICE: lwgeom_cluster_2d_kmeans: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data
#4101a|1
NOTICE: lwgeom_cluster_2d_kmeans: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data
#4101b|1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment