PostGIS 3.0.6dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches
lwgeom_geos_cluster.c
Go to the documentation of this file.
1/**********************************************************************
2 *
3 * PostGIS - Spatial Types for PostgreSQL
4 * http://postgis.net
5 *
6 * PostGIS is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * PostGIS is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18 *
19 **********************************************************************
20 *
21 * Copyright 2015-2016 Daniel Baston <dbaston@gmail.com>
22 *
23 **********************************************************************/
24
25#include <string.h>
26#include "liblwgeom.h"
27#include "liblwgeom_internal.h"
28#include "lwgeom_log.h"
29#include "lwgeom_geos.h"
30#include "lwunionfind.h"
31
32static const int STRTREE_NODE_CAPACITY = 10;
33
34/* Utility struct used to accumulate items in GEOSSTRtree_query callback */
36{
40};
41
42/* Utility struct to keep GEOSSTRtree and associated structures to be freed after use */
43struct STRTree
44{
45 GEOSSTRtree* tree;
46 GEOSGeometry** envelopes;
47 uint32_t* geom_ids;
48 uint32_t num_geoms;
49};
50
51static struct STRTree make_strtree(void** geoms, uint32_t num_geoms, char is_lwgeom);
52static void destroy_strtree(struct STRTree * tree);
53static int union_intersecting_pairs(GEOSGeometry** geoms, uint32_t num_geoms, UNIONFIND* uf);
54static int combine_geometries(UNIONFIND* uf, void** geoms, uint32_t num_geoms, void*** clustersGeoms, uint32_t* num_clusters, char is_lwgeom);
55
56/* Make a minimal GEOSGeometry* whose Envelope covers the same 2D extent as
57 * the supplied GBOX. This is faster and uses less memory than building a
58 * five-point polygon with GBOX2GEOS.
59 */
60static GEOSGeometry*
62{
63 if (lwgeom_is_empty(g))
64 return GEOSGeom_createEmptyPolygon();
65
66 if (lwgeom_get_type(g) == POINTTYPE) {
67 const POINT2D* pt = getPoint2d_cp(lwgeom_as_lwpoint(g)->point, 0);
68 return make_geos_point(pt->x, pt->y);
69 } else {
70 const GBOX* box = lwgeom_get_bbox(g);
71 if (!box)
72 return NULL;
73
74 return make_geos_segment(box->xmin, box->ymin, box->xmax, box->ymax);
75 }
76}
77
80static struct STRTree
81make_strtree(void** geoms, uint32_t num_geoms, char is_lwgeom)
82{
83 struct STRTree tree;
84 tree.envelopes = 0;
85 tree.num_geoms = 0;
86 tree.geom_ids = 0;
87
88 tree.tree = GEOSSTRtree_create(STRTREE_NODE_CAPACITY);
89 if (tree.tree == NULL)
90 {
91 return tree;
92 }
93 tree.geom_ids = lwalloc(num_geoms * sizeof(uint32_t));
94 tree.num_geoms = num_geoms;
95
96 if (is_lwgeom)
97 {
98 uint32_t i;
99 tree.envelopes = lwalloc(num_geoms * sizeof(GEOSGeometry*));
100 for (i = 0; i < num_geoms; i++)
101 {
102 tree.geom_ids[i] = i;
103 tree.envelopes[i] = geos_envelope_surrogate(geoms[i]);
104 GEOSSTRtree_insert(tree.tree, tree.envelopes[i], &(tree.geom_ids[i]));
105 }
106 }
107 else
108 {
109 uint32_t i;
110 tree.envelopes = NULL;
111 for (i = 0; i < num_geoms; i++)
112 {
113 tree.geom_ids[i] = i;
114 GEOSSTRtree_insert(tree.tree, geoms[i], &(tree.geom_ids[i]));
115 }
116 }
117
118 return tree;
119}
120
122static void
124{
125 size_t i;
126 GEOSSTRtree_destroy(tree->tree);
127
128 if (tree->envelopes)
129 {
130 for (i = 0; i < tree->num_geoms; i++)
131 {
132 GEOSGeom_destroy(tree->envelopes[i]);
133 }
134 lwfree(tree->envelopes);
135 }
136 lwfree(tree->geom_ids);
137}
138
139static void
140query_accumulate(void* item, void* userdata)
141{
142 struct QueryContext *cxt = userdata;
143 if (!cxt->items_found)
144 {
145 cxt->items_found_size = 8;
146 cxt->items_found = lwalloc(cxt->items_found_size * sizeof(void*));
147 }
148
149 if (cxt->num_items_found >= cxt->items_found_size)
150 {
151 cxt->items_found_size = 2 * cxt->items_found_size;
152 cxt->items_found = lwrealloc(cxt->items_found, cxt->items_found_size * sizeof(void*));
153 }
154 cxt->items_found[cxt->num_items_found++] = item;
155}
156
157/* Identify intersecting geometries and mark them as being in the same set */
158static int
159union_intersecting_pairs(GEOSGeometry** geoms, uint32_t num_geoms, UNIONFIND* uf)
160{
161 uint32_t p, i;
162 struct STRTree tree;
163 struct QueryContext cxt =
164 {
165 .items_found = NULL,
166 .num_items_found = 0,
167 .items_found_size = 0
168 };
169 int success = LW_SUCCESS;
170
171 if (num_geoms <= 1)
172 return LW_SUCCESS;
173
174 tree = make_strtree((void**) geoms, num_geoms, LW_FALSE);
175 if (tree.tree == NULL)
176 {
177 destroy_strtree(&tree);
178 return LW_FAILURE;
179 }
180
181 for (p = 0; p < num_geoms; p++)
182 {
183 const GEOSPreparedGeometry* prep = NULL;
184
185 if (!geoms[p] || GEOSisEmpty(geoms[p]))
186 continue;
187
188 cxt.num_items_found = 0;
189 GEOSSTRtree_query(tree.tree, geoms[p], &query_accumulate, &cxt);
190
191 for (i = 0; i < cxt.num_items_found; i++)
192 {
193 uint32_t q = *((uint32_t*) cxt.items_found[i]);
194
195 if (p != q && UF_find(uf, p) != UF_find(uf, q))
196 {
197 int geos_type = GEOSGeomTypeId(geoms[p]);
198 int geos_result;
199
200 /* Don't build prepared a geometry around a Point or MultiPoint -
201 * there are some problems in the implementation, and it's not clear
202 * there would be a performance benefit in any case. (See #3433)
203 */
204 if (geos_type != GEOS_POINT && geos_type != GEOS_MULTIPOINT)
205 {
206 /* Lazy initialize prepared geometry */
207 if (prep == NULL)
208 {
209 prep = GEOSPrepare(geoms[p]);
210 }
211 geos_result = GEOSPreparedIntersects(prep, geoms[q]);
212 }
213 else
214 {
215 geos_result = GEOSIntersects(geoms[p], geoms[q]);
216 }
217 if (geos_result > 1)
218 {
219 success = LW_FAILURE;
220 break;
221 }
222 else if (geos_result)
223 {
224 UF_union(uf, p, q);
225 }
226 }
227 }
228
229 if (prep)
230 GEOSPreparedGeom_destroy(prep);
231
232 if (!success)
233 break;
234 }
235
236 if (cxt.items_found)
237 lwfree(cxt.items_found);
238
239 destroy_strtree(&tree);
240 return success;
241}
242
246int
247cluster_intersecting(GEOSGeometry** geoms, uint32_t num_geoms, GEOSGeometry*** clusterGeoms, uint32_t* num_clusters)
248{
249 int cluster_success;
250 UNIONFIND* uf = UF_create(num_geoms);
251
252 if (union_intersecting_pairs(geoms, num_geoms, uf) == LW_FAILURE)
253 {
254 UF_destroy(uf);
255 return LW_FAILURE;
256 }
257
258 cluster_success = combine_geometries(uf, (void**) geoms, num_geoms, (void***) clusterGeoms, num_clusters, 0);
259 UF_destroy(uf);
260 return cluster_success;
261}
262
263static int
264dbscan_update_context(GEOSSTRtree* tree, struct QueryContext* cxt, LWGEOM** geoms, uint32_t p, double eps)
265{
266 cxt->num_items_found = 0;
267
268 GEOSGeometry* query_envelope;
269 if (geoms[p]->type == POINTTYPE)
270 {
271 const POINT2D* pt = getPoint2d_cp(lwgeom_as_lwpoint(geoms[p])->point, 0);
272 query_envelope = make_geos_segment( pt->x - eps, pt->y - eps, pt->x + eps, pt->y + eps );
273 } else {
274 const GBOX* box = lwgeom_get_bbox(geoms[p]);
275 query_envelope = make_geos_segment( box->xmin - eps, box->ymin - eps, box->xmax + eps, box->ymax + eps );
276 }
277
278 if (!query_envelope)
279 return LW_FAILURE;
280
281 GEOSSTRtree_query(tree, query_envelope, &query_accumulate, cxt);
282
283 GEOSGeom_destroy(query_envelope);
284
285 return LW_SUCCESS;
286}
287
288/* Union p's cluster with q's cluster, if q is not a border point of another cluster.
289 * Applicable to DBSCAN with minpoints > 1.
290 */
291static void
292union_if_available(UNIONFIND* uf, uint32_t p, uint32_t q, char* is_in_core, char* in_a_cluster)
293{
294 if (in_a_cluster[q])
295 {
296 /* Can we merge p's cluster with q's cluster? We can do this only
297 * if both p and q are considered _core_ points of their respective
298 * clusters.
299 */
300 if (is_in_core[q])
301 {
302 UF_union(uf, p, q);
303 }
304 }
305 else
306 {
307 UF_union(uf, p, q);
308 in_a_cluster[q] = LW_TRUE;
309 }
310}
311
312/* An optimized DBSCAN union for the case where min_points == 1.
313 * If min_points == 1, then we don't care how many neighbors we find; we can union clusters
314 * on the fly, as as we go through the distance calculations. This potentially allows us
315 * to avoid some distance computations altogether.
316 */
317static int
318union_dbscan_minpoints_1(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, double eps, char** in_a_cluster_ret)
319{
320 uint32_t p, i;
321 struct STRTree tree;
322 struct QueryContext cxt =
323 {
324 .items_found = NULL,
325 .num_items_found = 0,
326 .items_found_size = 0
327 };
328 int success = LW_SUCCESS;
329
330 if (in_a_cluster_ret)
331 {
332 char* in_a_cluster = lwalloc(num_geoms * sizeof(char));
333 for (i = 0; i < num_geoms; i++)
334 in_a_cluster[i] = LW_TRUE;
335 *in_a_cluster_ret = in_a_cluster;
336 }
337
338 if (num_geoms <= 1)
339 return LW_SUCCESS;
340
341 tree = make_strtree((void**) geoms, num_geoms, LW_TRUE);
342 if (tree.tree == NULL)
343 {
344 destroy_strtree(&tree);
345 return LW_FAILURE;
346 }
347
348 for (p = 0; p < num_geoms; p++)
349 {
350 if (lwgeom_is_empty(geoms[p]))
351 continue;
352
353 dbscan_update_context(tree.tree, &cxt, geoms, p, eps);
354 for (i = 0; i < cxt.num_items_found; i++)
355 {
356 uint32_t q = *((uint32_t*) cxt.items_found[i]);
357
358 if (UF_find(uf, p) != UF_find(uf, q))
359 {
360 double mindist = lwgeom_mindistance2d_tolerance(geoms[p], geoms[q], eps);
361 if (mindist == FLT_MAX)
362 {
363 success = LW_FAILURE;
364 break;
365 }
366
367 if (mindist <= eps)
368 UF_union(uf, p, q);
369 }
370 }
371 }
372
373 if (cxt.items_found)
374 lwfree(cxt.items_found);
375
376 destroy_strtree(&tree);
377
378 return success;
379}
380
381static int
382union_dbscan_general(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, double eps, uint32_t min_points, char** in_a_cluster_ret)
383{
384 uint32_t p, i;
385 struct STRTree tree;
386 struct QueryContext cxt =
387 {
388 .items_found = NULL,
389 .num_items_found = 0,
390 .items_found_size = 0
391 };
392 int success = LW_SUCCESS;
393 uint32_t* neighbors;
394 char* in_a_cluster;
395 char* is_in_core;
396
397 in_a_cluster = lwalloc(num_geoms * sizeof(char));
398 memset(in_a_cluster, 0, num_geoms * sizeof(char));
399
400 if (in_a_cluster_ret)
401 *in_a_cluster_ret = in_a_cluster;
402
403 /* Bail if we don't even have enough inputs to make a cluster. */
404 if (num_geoms < min_points)
405 {
406 if (!in_a_cluster_ret)
407 lwfree(in_a_cluster);
408 return LW_SUCCESS;
409 }
410
411 tree = make_strtree((void**) geoms, num_geoms, LW_TRUE);
412 if (tree.tree == NULL)
413 {
414 destroy_strtree(&tree);
415 return LW_FAILURE;
416 }
417
418 is_in_core = lwalloc(num_geoms * sizeof(char));
419 memset(is_in_core, 0, num_geoms * sizeof(char));
420 neighbors = lwalloc(min_points * sizeof(uint32_t));
421
422 for (p = 0; p < num_geoms; p++)
423 {
424 uint32_t num_neighbors = 0;
425
426 if (lwgeom_is_empty(geoms[p]))
427 continue;
428
429 dbscan_update_context(tree.tree, &cxt, geoms, p, eps);
430
431 /* We didn't find enough points to do anything, even if they are all within eps. */
432 if (cxt.num_items_found < min_points)
433 continue;
434
435 for (i = 0; i < cxt.num_items_found; i++)
436 {
437 uint32_t q = *((uint32_t*) cxt.items_found[i]);
438
439 if (num_neighbors >= min_points)
440 {
441 /* If we've already identified p as a core point, and it's already
442 * in the same cluster in q, then there's nothing to learn by
443 * computing the distance.
444 */
445 if (UF_find(uf, p) == UF_find(uf, q))
446 continue;
447
448 /* Similarly, if q is already identifed as a border point of another
449 * cluster, there's no point figuring out what the distance is.
450 */
451 if (in_a_cluster[q] && !is_in_core[q])
452 continue;
453 }
454
455 double mindist = lwgeom_mindistance2d_tolerance(geoms[p], geoms[q], eps);
456 if (mindist == FLT_MAX)
457 {
458 success = LW_FAILURE;
459 break;
460 }
461
462 if (mindist <= eps)
463 {
464 /* If we haven't hit min_points yet, we don't know if we can union p and q.
465 * Just set q aside for now.
466 */
467 if (num_neighbors < min_points)
468 {
469 neighbors[num_neighbors++] = q;
470
471 /* If we just hit min_points, we can now union all of the neighbor geometries
472 * we've been saving.
473 */
474 if (num_neighbors == min_points)
475 {
476 uint32_t j;
477 is_in_core[p] = LW_TRUE;
478 in_a_cluster[p] = LW_TRUE;
479 for (j = 0; j < num_neighbors; j++)
480 {
481 union_if_available(uf, p, neighbors[j], is_in_core, in_a_cluster);
482 }
483 }
484 }
485 else
486 {
487 /* If we're above min_points, no need to store our neighbors, just go ahead
488 * and union them now. This may allow us to cut out some distance
489 * computations.
490 */
491 union_if_available(uf, p, q, is_in_core, in_a_cluster);
492 }
493 }
494 }
495
496 if (!success)
497 break;
498 }
499
500 lwfree(neighbors);
501 lwfree(is_in_core);
502
503 /* Free in_a_cluster if we're not giving it to our caller */
504 if (!in_a_cluster_ret)
505 lwfree(in_a_cluster);
506
507 if (cxt.items_found)
508 lwfree(cxt.items_found);
509
510 destroy_strtree(&tree);
511 return success;
512}
513
514int union_dbscan(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, double eps, uint32_t min_points, char** in_a_cluster_ret)
515{
516 if (min_points <= 1)
517 return union_dbscan_minpoints_1(geoms, num_geoms, uf, eps, in_a_cluster_ret);
518 else
519 return union_dbscan_general(geoms, num_geoms, uf, eps, min_points, in_a_cluster_ret);
520}
521
525int
526cluster_within_distance(LWGEOM** geoms, uint32_t num_geoms, double tolerance, LWGEOM*** clusterGeoms, uint32_t* num_clusters)
527{
528 int cluster_success;
529 UNIONFIND* uf = UF_create(num_geoms);
530
531 if (union_dbscan(geoms, num_geoms, uf, tolerance, 1, NULL) == LW_FAILURE)
532 {
533 UF_destroy(uf);
534 return LW_FAILURE;
535 }
536
537 cluster_success = combine_geometries(uf, (void**) geoms, num_geoms, (void***) clusterGeoms, num_clusters, 1);
538 UF_destroy(uf);
539 return cluster_success;
540}
541
545static int
546combine_geometries(UNIONFIND* uf, void** geoms, uint32_t num_geoms, void*** clusterGeoms, uint32_t* num_clusters, char is_lwgeom)
547{
548 size_t i, j, k;
549
550 /* Combine components of each cluster into their own GeometryCollection */
551 *num_clusters = uf->num_clusters;
552 *clusterGeoms = lwalloc(*num_clusters * sizeof(void*));
553
554 void** geoms_in_cluster = lwalloc(num_geoms * sizeof(void*));
555 uint32_t* ordered_components = UF_ordered_by_cluster(uf);
556 for (i = 0, j = 0, k = 0; i < num_geoms; i++)
557 {
558 geoms_in_cluster[j++] = geoms[ordered_components[i]];
559
560 /* Is this the last geometry in the component? */
561 if ((i == num_geoms - 1) ||
562 (UF_find(uf, ordered_components[i]) != UF_find(uf, ordered_components[i+1])))
563 {
564 if (k >= uf->num_clusters) {
565 /* Should not get here - it means that we have more clusters than uf->num_clusters thinks we should. */
566 return LW_FAILURE;
567 }
568
569 if (is_lwgeom)
570 {
571 LWGEOM** components = lwalloc(j * sizeof(LWGEOM*));
572 memcpy(components, geoms_in_cluster, j * sizeof(LWGEOM*));
573 (*clusterGeoms)[k++] = lwcollection_construct(COLLECTIONTYPE, components[0]->srid, NULL, j, (LWGEOM**) components);
574 }
575 else
576 {
577 int srid = GEOSGetSRID(((GEOSGeometry**) geoms_in_cluster)[0]);
578 GEOSGeometry* combined = GEOSGeom_createCollection(GEOS_GEOMETRYCOLLECTION, (GEOSGeometry**) geoms_in_cluster, j);
579 GEOSSetSRID(combined, srid);
580 (*clusterGeoms)[k++] = combined;
581 }
582 j = 0;
583 }
584 }
585
586 lwfree(geoms_in_cluster);
587 lwfree(ordered_components);
588
589 return LW_SUCCESS;
590}
GEOSGeometry * make_geos_segment(double x1, double y1, double x2, double y2)
GEOSGeometry * make_geos_point(double x, double y)
LWCOLLECTION * lwcollection_construct(uint8_t type, int32_t srid, GBOX *bbox, uint32_t ngeoms, LWGEOM **geoms)
#define LW_FALSE
Definition liblwgeom.h:108
#define COLLECTIONTYPE
Definition liblwgeom.h:122
void * lwrealloc(void *mem, size_t size)
Definition lwutil.c:235
#define LW_FAILURE
Definition liblwgeom.h:110
#define LW_SUCCESS
Definition liblwgeom.h:111
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition liblwgeom.h:116
void * lwalloc(size_t size)
Definition lwutil.c:227
void lwfree(void *mem)
Definition lwutil.c:242
double lwgeom_mindistance2d_tolerance(const LWGEOM *lw1, const LWGEOM *lw2, double tolerance)
Function handling min distance calculations and dwithin calculations.
Definition measures.c:207
#define LW_TRUE
Return types for functions with status returns.
Definition liblwgeom.h:107
const GBOX * lwgeom_get_bbox(const LWGEOM *lwgeom)
Get a non-empty geometry bounding box, computing and caching it if not already there.
Definition lwgeom.c:725
This library is the generic geometry handling section of PostGIS.
static int dbscan_update_context(GEOSSTRtree *tree, struct QueryContext *cxt, LWGEOM **geoms, uint32_t p, double eps)
static int union_dbscan_general(LWGEOM **geoms, uint32_t num_geoms, UNIONFIND *uf, double eps, uint32_t min_points, char **in_a_cluster_ret)
int union_dbscan(LWGEOM **geoms, uint32_t num_geoms, UNIONFIND *uf, double eps, uint32_t min_points, char **in_a_cluster_ret)
static int union_dbscan_minpoints_1(LWGEOM **geoms, uint32_t num_geoms, UNIONFIND *uf, double eps, char **in_a_cluster_ret)
static void query_accumulate(void *item, void *userdata)
static struct STRTree make_strtree(void **geoms, uint32_t num_geoms, char is_lwgeom)
Make a GEOSSTRtree that stores a pointer to a variable containing the array index of the input geoms.
static void destroy_strtree(struct STRTree *tree)
Clean up STRTree after use.
static const int STRTREE_NODE_CAPACITY
static GEOSGeometry * geos_envelope_surrogate(const LWGEOM *g)
static void union_if_available(UNIONFIND *uf, uint32_t p, uint32_t q, char *is_in_core, char *in_a_cluster)
int cluster_within_distance(LWGEOM **geoms, uint32_t num_geoms, double tolerance, LWGEOM ***clusterGeoms, uint32_t *num_clusters)
Takes an array of LWGEOM* and constructs an array of LWGEOM*, where each element in the constructed a...
int cluster_intersecting(GEOSGeometry **geoms, uint32_t num_geoms, GEOSGeometry ***clusterGeoms, uint32_t *num_clusters)
Takes an array of GEOSGeometry* and constructs an array of GEOSGeometry*, where each element in the c...
static int union_intersecting_pairs(GEOSGeometry **geoms, uint32_t num_geoms, UNIONFIND *uf)
static int combine_geometries(UNIONFIND *uf, void **geoms, uint32_t num_geoms, void ***clustersGeoms, uint32_t *num_clusters, char is_lwgeom)
Uses a UNIONFIND to identify the set with which each input geometry is associated,...
static uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition lwinline.h:135
static LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition lwinline.h:121
static int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members)
Definition lwinline.h:193
static const POINT2D * getPoint2d_cp(const POINTARRAY *pa, uint32_t n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from.
Definition lwinline.h:91
void UF_destroy(UNIONFIND *uf)
Definition lwunionfind.c:54
uint32_t UF_find(UNIONFIND *uf, uint32_t i)
Definition lwunionfind.c:62
uint32_t * UF_ordered_by_cluster(UNIONFIND *uf)
UNIONFIND * UF_create(uint32_t N)
Definition lwunionfind.c:35
void UF_union(UNIONFIND *uf, uint32_t i, uint32_t j)
Definition lwunionfind.c:85
double ymax
Definition liblwgeom.h:343
double xmax
Definition liblwgeom.h:341
double ymin
Definition liblwgeom.h:342
double xmin
Definition liblwgeom.h:340
double y
Definition liblwgeom.h:376
double x
Definition liblwgeom.h:376
uint32_t items_found_size
GEOSGeometry ** envelopes
uint32_t * geom_ids
uint32_t num_geoms
GEOSSTRtree * tree
uint32_t num_clusters
Definition lwunionfind.h:35