PostGIS 3.0.6dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches
gserialized_estimate.c
Go to the documentation of this file.
1/**********************************************************************
2 *
3 * PostGIS - Spatial Types for PostgreSQL
4 * http://postgis.net
5 *
6 * PostGIS is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * PostGIS is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18 *
19 **********************************************************************
20 *
21 * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22 *
23 **********************************************************************/
24
25
26
27/**********************************************************************
28 THEORY OF OPERATION
29
30The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
31calculates (compute_gserialized_stats_mode) two histograms of occurances of
32features, once for the 2D domain (and the && operator) one for the
33ND domain (and the &&& operator).
34
35Queries in PostgreSQL call into the selectivity sub-system to find out
36the relative effectiveness of different clauses in sub-setting
37relations. Queries with constant arguments call gserialized_gist_sel,
38queries with relations on both sides call gserialized_gist_joinsel.
39
40gserialized_gist_sel sums up the values in the histogram that overlap
41the contant search box.
42
43gserialized_gist_joinsel sums up the product of the overlapping
44cells in each relation's histogram.
45
46Depending on the operator and type, the mode of selectivity calculation
47will be 2D or ND.
48
49- geometry && geometry ==> 2D
50- geometry &&& geometry ==> ND
51- geography && geography ==> ND
52
53The 2D mode is put in effect by retrieving the 2D histogram from the
54statistics cache and then allowing the generic ND calculations to
55go to work.
56
57TO DO: More testing and examination of the &&& operator and mixed
58dimensionality cases. (2D geometry) &&& (3D column), etc.
59
60**********************************************************************/
61
62#include "postgres.h"
63
64#include "access/genam.h"
65#include "access/gin.h"
66#include "access/gist.h"
67#include "access/gist_private.h"
68#include "access/gistscan.h"
69#include "utils/datum.h"
70#include "access/heapam.h"
71#include "catalog/index.h"
72#include "catalog/pg_am.h"
73#include "miscadmin.h"
74#include "storage/lmgr.h"
75#include "catalog/namespace.h"
76#include "catalog/indexing.h"
77#if PG_VERSION_NUM >= 100000
78#include "utils/regproc.h"
79#include "utils/varlena.h"
80#endif
81#include "utils/builtins.h"
82#include "utils/datum.h"
83#include "utils/snapmgr.h"
84#include "utils/fmgroids.h"
85#include "funcapi.h"
86#include "access/heapam.h"
87#include "catalog/pg_type.h"
88#include "access/relscan.h"
89
90#include "executor/spi.h"
91#include "fmgr.h"
92#include "commands/vacuum.h"
93#if PG_VERSION_NUM < 120000
94#include "nodes/relation.h"
95#else
96#include "nodes/pathnodes.h"
97#endif
98#include "parser/parsetree.h"
99#include "utils/array.h"
100#include "utils/lsyscache.h"
101#include "utils/builtins.h"
102#include "utils/syscache.h"
103#include "utils/rel.h"
104#include "utils/selfuncs.h"
105
106#include "../postgis_config.h"
107
108#include "access/htup_details.h"
109
110#include "stringbuffer.h"
111#include "liblwgeom.h"
112#include "lwgeom_pg.h" /* For debugging macros. */
113#include "gserialized_gist.h" /* For index common functions */
114
115#include <math.h>
116#if HAVE_IEEEFP_H
117#include <ieeefp.h>
118#endif
119#include <float.h>
120#include <string.h>
121#include <stdio.h>
122#include <ctype.h>
123
124
125/************************************************************************/
126
127
128/* Prototypes */
129Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
130Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
131Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
132Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
133Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
134Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
135Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
136Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
137Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
138Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
139Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
140Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
141
142/* Local prototypes */
143static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num);
144static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num);
145
146
147/* Other prototypes */
148float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
149float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode);
150
151
152/* Old Prototype */
153Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
154
155/*
156 * Assign a number to the n-dimensional statistics kind
157 *
158 * tgl suggested:
159 *
160 * 1-100: reserved for assignment by the core Postgres project
161 * 100-199: reserved for assignment by PostGIS
162 * 200-9999: reserved for other globally-known stats kinds
163 * 10000-32767: reserved for private site-local use
164 */
165#define STATISTIC_KIND_ND 102
166#define STATISTIC_KIND_2D 103
167#define STATISTIC_SLOT_ND 0
168#define STATISTIC_SLOT_2D 1
169
170/*
171* The SD factor restricts the side of the statistics histogram
172* based on the standard deviation of the extent of the data.
173* SDFACTOR is the number of standard deviations from the mean
174* the histogram will extend.
175*/
176#define SDFACTOR 3.25
177
183#define ND_DIMS 4
184
191#define MIN_DIMENSION_WIDTH 0.000000001
192
197#define MAX_DIMENSION_WIDTH 1.0E+20
198
202#define DEFAULT_ND_SEL 0.0001
203#define DEFAULT_ND_JOINSEL 0.001
204
208#define FALLBACK_ND_SEL 0.2
209#define FALLBACK_ND_JOINSEL 0.3
210
216typedef struct ND_BOX_T
217{
218 float4 min[ND_DIMS];
219 float4 max[ND_DIMS];
221
225typedef struct ND_IBOX_T
226{
230
231
238typedef struct ND_STATS_T
239{
240 /* Dimensionality of the histogram. */
241 float4 ndims;
242
243 /* Size of n-d histogram in each dimension. */
244 float4 size[ND_DIMS];
245
246 /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
248
249 /* How many rows in the table itself? */
251
252 /* How many rows were in the sample that built this histogram? */
254
255 /* How many not-Null/Empty features were in the sample? */
257
258 /* How many features actually got sampled in the histogram? */
260
261 /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
263
264 /* How many cells did those histogram features cover? */
265 /* Since we are pro-rating coverage, this number should */
266 /* now always equal histogram_features */
268
269 /* Variable length # of floats for histogram */
270 float4 value[1];
272
273
274
275
282static int
283gbox_ndims(const GBOX* gbox)
284{
285 int dims = 2;
286 if ( FLAGS_GET_GEODETIC(gbox->flags) )
287 return 3;
288 if ( FLAGS_GET_Z(gbox->flags) )
289 dims++;
290 if ( FLAGS_GET_M(gbox->flags) )
291 dims++;
292 return dims;
293}
294
300static int
301text_p_get_mode(const text *txt)
302{
303 int mode = 2;
304 char *modestr;
305 if (VARSIZE_ANY_EXHDR(txt) <= 0)
306 return mode;
307 modestr = (char*)VARDATA(txt);
308 if ( modestr[0] == 'N' )
309 mode = 0;
310 return mode;
311}
312
313
317static int
318cmp_int (const void *a, const void *b)
319{
320 int ia = *((const int*)a);
321 int ib = *((const int*)b);
322
323 if ( ia == ib )
324 return 0;
325 else if ( ia > ib )
326 return 1;
327 else
328 return -1;
329}
330
335static int
336range_quintile(int *vals, int nvals)
337{
338 qsort(vals, nvals, sizeof(int), cmp_int);
339 return vals[4*nvals/5] - vals[nvals/5];
340}
341
345static double
346total_double(const double *vals, int nvals)
347{
348 int i;
349 float total = 0;
350 /* Calculate total */
351 for ( i = 0; i < nvals; i++ )
352 total += vals[i];
353
354 return total;
355}
356
357#if POSTGIS_DEBUG_LEVEL >= 3
358
362static int
363total_int(const int *vals, int nvals)
364{
365 int i;
366 int total = 0;
367 /* Calculate total */
368 for ( i = 0; i < nvals; i++ )
369 total += vals[i];
370
371 return total;
372}
373
377static double
378avg(const int *vals, int nvals)
379{
380 int t = total_int(vals, nvals);
381 return (double)t / (double)nvals;
382}
383
387static double
388stddev(const int *vals, int nvals)
389{
390 int i;
391 double sigma2 = 0;
392 double mean = avg(vals, nvals);
393
394 /* Calculate sigma2 */
395 for ( i = 0; i < nvals; i++ )
396 {
397 double v = (double)(vals[i]);
398 sigma2 += (mean - v) * (mean - v);
399 }
400 return sqrt(sigma2 / nvals);
401}
402#endif /* POSTGIS_DEBUG_LEVEL >= 3 */
403
408static int
409nd_stats_value_index(const ND_STATS *stats, int *indexes)
410{
411 int d;
412 int accum = 1, vdx = 0;
413
414 /* Calculate the index into the 1-d values array that the (i,j,k,l) */
415 /* n-d histogram coordinate implies. */
416 /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
417 for ( d = 0; d < (int)(stats->ndims); d++ )
418 {
419 int size = (int)(stats->size[d]);
420 if ( indexes[d] < 0 || indexes[d] >= size )
421 {
422 POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
423 return -1;
424 }
425 vdx += indexes[d] * accum;
426 accum *= size;
427 }
428 return vdx;
429}
430
434static char*
435nd_box_to_json(const ND_BOX *nd_box, int ndims)
436{
437 char *rv;
438 int i;
440
441 stringbuffer_append(sb, "{\"min\":[");
442 for ( i = 0; i < ndims; i++ )
443 {
444 if ( i ) stringbuffer_append(sb, ",");
445 stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
446 }
447 stringbuffer_append(sb, "],\"max\":[");
448 for ( i = 0; i < ndims; i++ )
449 {
450 if ( i ) stringbuffer_append(sb, ",");
451 stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
452 }
453 stringbuffer_append(sb, "]}");
454
457 return rv;
458}
459
460
465static char*
467{
468 char *json_extent, *str;
469 int d;
471 int ndims = (int)roundf(nd_stats->ndims);
472
473 stringbuffer_append(sb, "{");
474 stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
475
476 /* Size */
477 stringbuffer_append(sb, "\"size\":[");
478 for ( d = 0; d < ndims; d++ )
479 {
480 if ( d ) stringbuffer_append(sb, ",");
481 stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
482 }
483 stringbuffer_append(sb, "],");
484
485 /* Extent */
486 json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
487 stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
488 pfree(json_extent);
489
490 stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
491 stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
492 stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
493 stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
494 stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
495 stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
496 stringbuffer_append(sb, "}");
497
500 return str;
501}
502
503
509// static char*
510// nd_stats_to_grid(const ND_STATS *stats)
511// {
512// char *rv;
513// int j, k;
514// int sizex = (int)roundf(stats->size[0]);
515// int sizey = (int)roundf(stats->size[1]);
516// stringbuffer_t *sb = stringbuffer_create();
517//
518// for ( k = 0; k < sizey; k++ )
519// {
520// for ( j = 0; j < sizex; j++ )
521// {
522// stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
523// }
524// stringbuffer_append(sb, "\n");
525// }
526//
527// rv = stringbuffer_getstringcopy(sb);
528// stringbuffer_destroy(sb);
529// return rv;
530// }
531
532
534static int
535nd_box_merge(const ND_BOX *source, ND_BOX *target)
536{
537 int d;
538 for ( d = 0; d < ND_DIMS; d++ )
539 {
540 target->min[d] = Min(target->min[d], source->min[d]);
541 target->max[d] = Max(target->max[d], source->max[d]);
542 }
543 return true;
544}
545
547static int
549{
550 memset(a, 0, sizeof(ND_BOX));
551 return true;
552}
553
559static int
561{
562 int d;
563 for ( d = 0; d < ND_DIMS; d++ )
564 {
565 a->min[d] = FLT_MAX;
566 a->max[d] = -1 * FLT_MAX;
567 }
568 return true;
569}
570
572static void
573nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
574{
575 int d = 0;
576 POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
577
578 nd_box_init(nd_box);
579 nd_box->min[d] = gbox->xmin;
580 nd_box->max[d] = gbox->xmax;
581 d++;
582 nd_box->min[d] = gbox->ymin;
583 nd_box->max[d] = gbox->ymax;
584 d++;
585 if ( FLAGS_GET_GEODETIC(gbox->flags) )
586 {
587 nd_box->min[d] = gbox->zmin;
588 nd_box->max[d] = gbox->zmax;
589 return;
590 }
591 if ( FLAGS_GET_Z(gbox->flags) )
592 {
593 nd_box->min[d] = gbox->zmin;
594 nd_box->max[d] = gbox->zmax;
595 d++;
596 }
597 if ( FLAGS_GET_M(gbox->flags) )
598 {
599 nd_box->min[d] = gbox->mmin;
600 nd_box->max[d] = gbox->mmax;
601 d++;
602 }
603 return;
604}
605
609static int
610nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
611{
612 int d;
613 for ( d = 0; d < ndims; d++ )
614 {
615 if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
616 return false;
617 }
618 return true;
619}
620
624static int
625nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
626{
627 int d;
628 for ( d = 0; d < ndims; d++ )
629 {
630 if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
631 return false;
632 }
633 return true;
634}
635
640static int
641nd_box_expand(ND_BOX *nd_box, double expansion_factor)
642{
643 int d;
644 double size;
645 for ( d = 0; d < ND_DIMS; d++ )
646 {
647 size = nd_box->max[d] - nd_box->min[d];
648 /* Avoid expanding boxes that are either too wide or too narrow*/
649 if (size < MIN_DIMENSION_WIDTH || size > MAX_DIMENSION_WIDTH)
650 continue;
651 nd_box->min[d] -= size * expansion_factor / 2;
652 nd_box->max[d] += size * expansion_factor / 2;
653 }
654 return true;
655}
656
661static inline int
662nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
663{
664 int d;
665
666 POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
667
668 /* Initialize ibox */
669 memset(nd_ibox, 0, sizeof(ND_IBOX));
670
671 /* In each dimension... */
672 for ( d = 0; d < nd_stats->ndims; d++ )
673 {
674 double smin = nd_stats->extent.min[d];
675 double smax = nd_stats->extent.max[d];
676 double width = smax - smin;
677
678 if (width < MIN_DIMENSION_WIDTH)
679 {
680 nd_ibox->min[d] = nd_ibox->max[d] = nd_stats->extent.min[d];
681 }
682 else
683 {
684 int size = (int)roundf(nd_stats->size[d]);
685
686 /* ... find cells the box overlaps with in this dimension */
687 nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
688 nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
689
690 POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
691 POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
692
693 /* Push any out-of range values into range */
694 nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
695 nd_ibox->max[d] = Min(nd_ibox->max[d], size - 1);
696 }
697 }
698 return true;
699}
700
704static inline double
705nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
706{
707 int d;
708 bool covered = true;
709 double ivol = 1.0;
710 double vol2 = 1.0;
711 double vol1 = 1.0;
712
713 for ( d = 0 ; d < ndims; d++ )
714 {
715 if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
716 return 0.0; /* Disjoint */
717
718 if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
719 covered = false;
720 }
721
722 if ( covered )
723 return 1.0;
724
725 for ( d = 0; d < ndims; d++ )
726 {
727 double width1 = b1->max[d] - b1->min[d];
728 double width2 = b2->max[d] - b2->min[d];
729 double imin, imax, iwidth;
730
731 vol1 *= width1;
732 vol2 *= width2;
733
734 imin = Max(b1->min[d], b2->min[d]);
735 imax = Min(b1->max[d], b2->max[d]);
736 iwidth = imax - imin;
737 iwidth = Max(0.0, iwidth);
738
739 ivol *= iwidth;
740 }
741
742 if ( vol2 == 0.0 )
743 return vol2;
744
745 return ivol / vol2;
746}
747
748/* How many bins shall we use in figuring out the distribution? */
749#define NUM_BINS 50
750
766static int
767nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
768{
769 int d, i, k, range;
770 int counts[NUM_BINS];
771 double smin, smax; /* Spatial min, spatial max */
772 double swidth; /* Spatial width of dimension */
773#if POSTGIS_DEBUG_LEVEL >= 3
774 double average, sdev, sdev_ratio;
775#endif
776 int bmin, bmax; /* Bin min, bin max */
777 const ND_BOX *ndb;
778
779 /* For each dimension... */
780 for ( d = 0; d < ndims; d++ )
781 {
782 /* Initialize counts for this dimension */
783 memset(counts, 0, sizeof(counts));
784
785 smin = extent->min[d];
786 smax = extent->max[d];
787 swidth = smax - smin;
788
789 /* Don't try and calculate distribution of overly narrow */
790 /* or overly wide dimensions. Here we're being pretty geographical, */
791 /* expecting "normal" planar or geographic coordinates. */
792 /* Otherwise we have to "handle" +/- Inf bounded features and */
793 /* the assumptions needed for that are as bad as this hack. */
794 if ( swidth < MIN_DIMENSION_WIDTH || swidth > MAX_DIMENSION_WIDTH )
795 {
796 distribution[d] = 0;
797 continue;
798 }
799
800 /* Sum up the overlaps of each feature with the dimensional bins */
801 for ( i = 0; i < num_boxes; i++ )
802 {
803 double minoffset, maxoffset;
804
805 /* Skip null entries */
806 ndb = nd_boxes[i];
807 if ( ! ndb ) continue;
808
809 /* Where does box fall relative to the working range */
810 minoffset = ndb->min[d] - smin;
811 maxoffset = ndb->max[d] - smin;
812
813 /* Skip boxes that our outside our working range */
814 if ( minoffset < 0 || minoffset > swidth ||
815 maxoffset < 0 || maxoffset > swidth )
816 {
817 continue;
818 }
819
820 /* What bins does this range correspond to? */
821 bmin = floor(NUM_BINS * minoffset / swidth);
822 bmax = floor(NUM_BINS * maxoffset / swidth);
823
824 /* Should only happen when maxoffset==swidth */
825 if (bmax >= NUM_BINS)
826 bmax = NUM_BINS-1;
827
828 POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
829
830 /* Increment the counts in all the bins this feature overlaps */
831 for ( k = bmin; k <= bmax; k++ )
832 {
833 counts[k] += 1;
834 }
835
836 }
837
838 /* How dispersed is the distribution of features across bins? */
839 range = range_quintile(counts, NUM_BINS);
840
841#if POSTGIS_DEBUG_LEVEL >= 3
842 average = avg(counts, NUM_BINS);
843 sdev = stddev(counts, NUM_BINS);
844 sdev_ratio = sdev/average;
845
846 POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
847 POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
848 POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
849 POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
850#endif
851
852 distribution[d] = range;
853 }
854
855 return true;
856}
857
863static inline int
864nd_increment(ND_IBOX *ibox, int ndims, int *counter)
865{
866 int d = 0;
867
868 while ( d < ndims )
869 {
870 if ( counter[d] < ibox->max[d] )
871 {
872 counter[d] += 1;
873 break;
874 }
875 counter[d] = ibox->min[d];
876 d++;
877 }
878 /* That's it, cannot increment any more! */
879 if ( d == ndims )
880 return false;
881
882 /* Increment complete! */
883 return true;
884}
885
886static ND_STATS*
887pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
888{
889 int stats_kind = STATISTIC_KIND_ND;
890 int rv;
891 ND_STATS *nd_stats;
892
893 /* If we're in 2D mode, set the kind appropriately */
894 if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
895
896 /* Then read the geom status histogram from that */
897
898#if POSTGIS_PGSQL_VERSION < 100
899 {
900 float4 *floatptr;
901 int nvalues;
902
903 rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid,
904 NULL, NULL, NULL, &floatptr, &nvalues);
905
906 if ( ! rv ) {
907 POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
908 return NULL;
909 }
910
911 /* Clone the stats here so we can release the attstatsslot immediately */
912 nd_stats = palloc(sizeof(float) * nvalues);
913 memcpy(nd_stats, floatptr, sizeof(float) * nvalues);
914
915 /* Clean up */
916 free_attstatsslot(0, NULL, 0, floatptr, nvalues);
917 }
918#else /* PostgreSQL 10 or higher */
919 {
920 AttStatsSlot sslot;
921 rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
922 ATTSTATSSLOT_NUMBERS);
923 if ( ! rv ) {
924 POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
925 return NULL;
926 }
927
928 /* Clone the stats here so we can release the attstatsslot immediately */
929 nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
930 memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
931
932 free_attstatsslot(&sslot);
933 }
934#endif
935
936 return nd_stats;
937}
938
943static ND_STATS*
944pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
945{
946 HeapTuple stats_tuple = NULL;
947 ND_STATS *nd_stats;
948
949 /* First pull the stats tuple for the whole tree */
950 if ( ! only_parent )
951 {
952 POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
953 stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
954 if ( stats_tuple )
955 POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
956 }
957 /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
958 if ( only_parent || ! stats_tuple )
959 {
960 POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
961 stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
962 if ( stats_tuple )
963 POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
964 }
965 if ( ! stats_tuple )
966 {
967 POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
968 return NULL;
969 }
970
971 nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
972 ReleaseSysCache(stats_tuple);
973 if ( ! nd_stats )
974 {
975 POSTGIS_DEBUGF(2,
976 "histogram for attribute %d of table \"%s\" does not exist?",
977 att_num, get_rel_name(table_oid));
978 }
979
980 return nd_stats;
981}
982
991static ND_STATS*
992pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
993{
994 const char *att_name = text_to_cstring(att_text);
995 AttrNumber att_num;
996
997 /* We know the name? Look up the num */
998 if ( att_text )
999 {
1000 /* Get the attribute number */
1001 att_num = get_attnum(table_oid, att_name);
1002 if ( ! att_num ) {
1003 elog(ERROR, "attribute \"%s\" does not exist", att_name);
1004 return NULL;
1005 }
1006 }
1007 else
1008 {
1009 elog(ERROR, "attribute name is null");
1010 return NULL;
1011 }
1012
1013 return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
1014}
1015
1029static float8
1031{
1032 int ncells1, ncells2;
1033 int ndims1, ndims2, ndims;
1034 double ntuples_max;
1035 double ntuples_not_null1, ntuples_not_null2;
1036
1037 ND_BOX extent1, extent2;
1038 ND_IBOX ibox1, ibox2;
1039 int at1[ND_DIMS];
1040 int at2[ND_DIMS];
1041 double min1[ND_DIMS];
1042 double width1[ND_DIMS];
1043 double cellsize1[ND_DIMS];
1044 int size2[ND_DIMS];
1045 double min2[ND_DIMS];
1046 double width2[ND_DIMS];
1047 double cellsize2[ND_DIMS];
1048 int size1[ND_DIMS];
1049 int d;
1050 double val = 0;
1051 float8 selectivity;
1052
1053 /* Drop out on null inputs */
1054 if ( ! ( s1 && s2 ) )
1055 {
1056 elog(NOTICE, " estimate_join_selectivity called with null inputs");
1057 return FALLBACK_ND_SEL;
1058 }
1059
1060 /* We need to know how many cells each side has... */
1061 ncells1 = (int)roundf(s1->histogram_cells);
1062 ncells2 = (int)roundf(s2->histogram_cells);
1063
1064 /* ...so that we can drive the summation loop with the smaller histogram. */
1065 if ( ncells1 > ncells2 )
1066 {
1067 const ND_STATS *stats_tmp = s1;
1068 s1 = s2;
1069 s2 = stats_tmp;
1070 }
1071
1072 POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
1073 POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
1074
1075 /* Re-read that info after the swap */
1076 ncells1 = (int)roundf(s1->histogram_cells);
1077 ncells2 = (int)roundf(s2->histogram_cells);
1078
1079 /* Q: What's the largest possible join size these relations can create? */
1080 /* A: The product of the # of non-null rows in each relation. */
1081 ntuples_not_null1 = s1->table_features * (s1->not_null_features / s1->sample_features);
1082 ntuples_not_null2 = s2->table_features * (s2->not_null_features / s2->sample_features);
1083 ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1084
1085 /* Get the ndims as ints */
1086 ndims1 = (int)roundf(s1->ndims);
1087 ndims2 = (int)roundf(s2->ndims);
1088 ndims = Max(ndims1, ndims2);
1089
1090 /* Get the extents */
1091 extent1 = s1->extent;
1092 extent2 = s2->extent;
1093
1094 /* If relation stats do not intersect, join is very very selective. */
1095 if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
1096 {
1097 POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
1098 PG_RETURN_FLOAT8(0.0);
1099 }
1100
1101 /*
1102 * First find the index range of the part of the smaller
1103 * histogram that overlaps the larger one.
1104 */
1105 if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
1106 {
1107 POSTGIS_DEBUG(3, "could not calculate overlap of relations");
1108 PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
1109 }
1110
1111 /* Initialize counters / constants on s1 */
1112 for ( d = 0; d < ndims1; d++ )
1113 {
1114 at1[d] = ibox1.min[d];
1115 min1[d] = s1->extent.min[d];
1116 width1[d] = s1->extent.max[d] - s1->extent.min[d];
1117 size1[d] = (int)roundf(s1->size[d]);
1118 cellsize1[d] = width1[d] / size1[d];
1119 }
1120
1121 /* Initialize counters / constants on s2 */
1122 for ( d = 0; d < ndims2; d++ )
1123 {
1124 min2[d] = s2->extent.min[d];
1125 width2[d] = s2->extent.max[d] - s2->extent.min[d];
1126 size2[d] = (int)roundf(s2->size[d]);
1127 cellsize2[d] = width2[d] / size2[d];
1128 }
1129
1130 /* For each affected cell of s1... */
1131 do
1132 {
1133 double val1;
1134 /* Construct the bounds of this cell */
1135 ND_BOX nd_cell1;
1136 nd_box_init(&nd_cell1);
1137 for ( d = 0; d < ndims1; d++ )
1138 {
1139 nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1140 nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1141 }
1142
1143 /* Find the cells of s2 that cell1 overlaps.. */
1144 nd_box_overlap(s2, &nd_cell1, &ibox2);
1145
1146 /* Initialize counter */
1147 for ( d = 0; d < ndims2; d++ )
1148 {
1149 at2[d] = ibox2.min[d];
1150 }
1151
1152 POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1153
1154 /* Get the value at this cell */
1155 val1 = s1->value[nd_stats_value_index(s1, at1)];
1156
1157 /* For each overlapped cell of s2... */
1158 do
1159 {
1160 double ratio2;
1161 double val2;
1162
1163 /* Construct the bounds of this cell */
1164 ND_BOX nd_cell2;
1165 nd_box_init(&nd_cell2);
1166 for ( d = 0; d < ndims2; d++ )
1167 {
1168 nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1169 nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1170 }
1171
1172 POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1173
1174 /* Calculate overlap ratio of the cells */
1175 ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1176
1177 /* Multiply the cell counts, scaled by overlap ratio */
1178 val2 = s2->value[nd_stats_value_index(s2, at2)];
1179 POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1180 val += val1 * (val2 * ratio2);
1181 }
1182 while ( nd_increment(&ibox2, ndims2, at2) );
1183
1184 }
1185 while( nd_increment(&ibox1, ndims1, at1) );
1186
1187 POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1188
1189 /*
1190 * In order to compare our total cell count "val" to the
1191 * ntuples_max, we need to scale val up to reflect a full
1192 * table estimate. So, multiply by ratio of table size to
1193 * sample size.
1194 */
1195 val *= (s1->table_features / s1->sample_features);
1196 val *= (s2->table_features / s2->sample_features);
1197
1198 POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1199
1200 /*
1201 * Because the cell counts are over-determined due to
1202 * double counting of features that overlap multiple cells
1203 * (see the compute_gserialized_stats routine)
1204 * we also have to scale our cell count "val" *down*
1205 * to adjust for the double counting.
1206 */
1207// val /= (s1->cells_covered / s1->histogram_features);
1208// val /= (s2->cells_covered / s2->histogram_features);
1209
1210 /*
1211 * Finally, the selectivity is the estimated number of
1212 * rows to be returned divided by the maximum possible
1213 * number of rows that can be returned.
1214 */
1215 selectivity = val / ntuples_max;
1216
1217 /* Guard against over-estimates and crazy numbers :) */
1218 if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1219 {
1220 selectivity = DEFAULT_ND_JOINSEL;
1221 }
1222 else if ( selectivity > 1.0 )
1223 {
1224 selectivity = 1.0;
1225 }
1226
1227 return selectivity;
1228}
1229
1235Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1236{
1237 PG_RETURN_DATUM(DirectFunctionCall5(
1239 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1240 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1241 Int32GetDatum(0) /* ND mode */
1242 ));
1243}
1244
1250Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1251{
1252 PG_RETURN_DATUM(DirectFunctionCall5(
1254 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1255 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1256 Int32GetDatum(2) /* 2D mode */
1257 ));
1258}
1259
1260double
1261gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
1262{
1263 float8 selectivity;
1264 Oid relid1, relid2;
1265 ND_STATS *stats1, *stats2;
1266 Node *arg1 = (Node*) linitial(args);
1267 Node *arg2 = (Node*) lsecond(args);
1268 Var *var1 = (Var*) arg1;
1269 Var *var2 = (Var*) arg2;
1270
1271 POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1272
1273 /* We only do column joins right now, no functional joins */
1274 /* TODO: handle g1 && ST_Expand(g2) */
1275 if (!IsA(arg1, Var) || !IsA(arg2, Var))
1276 {
1277 POSTGIS_DEBUGF(1, "%s called with arguments that are not column references", __func__);
1278 return DEFAULT_ND_JOINSEL;
1279 }
1280
1281 /* What are the Oids of our tables/relations? */
1282 relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1283 relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1284
1285 /* Pull the stats from the stats system. */
1286 stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1287 stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1288
1289 /* If we can't get stats, we have to stop here! */
1290 if (!stats1)
1291 {
1292 POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1293 return DEFAULT_ND_JOINSEL;
1294 }
1295 else if (!stats2)
1296 {
1297 POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1298 return DEFAULT_ND_JOINSEL;
1299 }
1300
1301 selectivity = estimate_join_selectivity(stats1, stats2);
1302 POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1303 pfree(stats1);
1304 pfree(stats2);
1305 return selectivity;
1306}
1307
1317Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1318{
1319 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1320 /* Oid operator = PG_GETARG_OID(1); */
1321 List *args = (List *) PG_GETARG_POINTER(2);
1322 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1323 int mode = PG_GETARG_INT32(4);
1324
1325 POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1326
1327 /* Check length of args and punt on > 2 */
1328 if (list_length(args) != 2)
1329 {
1330 POSTGIS_DEBUGF(2, "%s: got nargs == %d", __func__, list_length(args));
1331 PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1332 }
1333
1334 /* Only respond to an inner join/unknown context join */
1335 if (jointype != JOIN_INNER)
1336 {
1337 POSTGIS_DEBUGF(1, "%s: jointype %d not supported", __func__, jointype);
1338 PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1339 }
1340
1341 PG_RETURN_FLOAT8(gserialized_joinsel_internal(root, args, jointype, mode));
1342}
1343
1344
1345
1346
1365static void
1366compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1367 int sample_rows, double total_rows, int mode)
1368{
1369 MemoryContext old_context;
1370 int d, i; /* Counters */
1371 int notnull_cnt = 0; /* # not null rows in the sample */
1372 int null_cnt = 0; /* # null rows in the sample */
1373 int histogram_features = 0; /* # rows that actually got counted in the histogram */
1374
1375 ND_STATS *nd_stats; /* Our histogram */
1376 size_t nd_stats_size; /* Size to allocate */
1377
1378 double total_width = 0; /* # of bytes used by sample */
1379 double total_sample_volume = 0; /* Area/volume coverage of the sample */
1380 double total_cell_count = 0; /* # of cells in histogram affected by sample */
1381
1382 ND_BOX sum; /* Sum of extents of sample boxes */
1383 ND_BOX avg; /* Avg of extents of sample boxes */
1384 ND_BOX stddev; /* StdDev of extents of sample boxes */
1385
1386 const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1387 ND_BOX sample_extent; /* Extent of the raw sample */
1388 int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1389 ND_BOX histo_extent; /* Spatial extent of the histogram */
1390 ND_BOX histo_extent_new; /* Temporary variable */
1391 int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1392 int histo_cells; /* Number of cells in the histogram */
1393 int histo_cells_new = 1; /* Temporary variable */
1394
1395 int ndims = 2; /* Dimensionality of the sample */
1396 int histo_ndims = 0; /* Dimensionality of the histogram */
1397 double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1398 double total_distribution; /* Total of sample_distribution */
1399
1400 int stats_slot; /* What slot is this data going into? (2D vs ND) */
1401 int stats_kind; /* And this is what? (2D vs ND) */
1402
1403 /* Initialize sum and stddev */
1404 nd_box_init(&sum);
1405 nd_box_init(&stddev);
1406 nd_box_init(&avg);
1407 nd_box_init(&histo_extent);
1408 nd_box_init(&histo_extent_new);
1409
1410 /*
1411 * This is where gserialized_analyze_nd
1412 * should put its' custom parameters.
1413 */
1414 /* void *mystats = stats->extra_data; */
1415
1416 POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1417 POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1418 POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1419
1420 /*
1421 * We might need less space, but don't think
1422 * its worth saving...
1423 */
1424 sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1425
1426 /*
1427 * First scan:
1428 * o read boxes
1429 * o find dimensionality of the sample
1430 * o find extent of the sample
1431 * o count null-infinite/not-null values
1432 * o compute total_width
1433 * o compute total features's box area (for avgFeatureArea)
1434 * o sum features box coordinates (for standard deviation)
1435 */
1436 for ( i = 0; i < sample_rows; i++ )
1437 {
1438 Datum datum;
1439 GSERIALIZED *geom;
1440 GBOX gbox;
1441 ND_BOX *nd_box;
1442 bool is_null;
1443 bool is_copy;
1444
1445 datum = fetchfunc(stats, i, &is_null);
1446
1447 /* Skip all NULLs. */
1448 if ( is_null )
1449 {
1450 POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1451 null_cnt++;
1452 continue;
1453 }
1454
1455 /* Read the bounds from the gserialized. */
1456 geom = (GSERIALIZED *)PG_DETOAST_DATUM(datum);
1457 is_copy = VARATT_IS_EXTENDED(datum);
1458 if ( LW_FAILURE == gserialized_get_gbox_p(geom, &gbox) )
1459 {
1460 /* Skip empties too. */
1461 POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1462 continue;
1463 }
1464
1465 /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1466 if ( mode == 2 )
1467 gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1468
1469 /* Check bounds for validity (finite and not NaN) */
1470 if ( ! gbox_is_valid(&gbox) )
1471 {
1472 POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1473 continue;
1474 }
1475
1476 /*
1477 * In N-D mode, set the ndims to the maximum dimensionality found
1478 * in the sample. Otherwise, leave at ndims == 2.
1479 */
1480 if ( mode != 2 )
1481 ndims = Max(gbox_ndims(&gbox), ndims);
1482
1483 /* Convert gbox to n-d box */
1484 nd_box = palloc(sizeof(ND_BOX));
1485 nd_box_from_gbox(&gbox, nd_box);
1486
1487 /* Cache n-d bounding box */
1488 sample_boxes[notnull_cnt] = nd_box;
1489
1490 /* Initialize sample extent before merging first entry */
1491 if ( ! notnull_cnt )
1492 nd_box_init_bounds(&sample_extent);
1493
1494 /* Add current sample to overall sample extent */
1495 nd_box_merge(nd_box, &sample_extent);
1496
1497 /* How many bytes does this sample use? */
1498 total_width += VARSIZE(geom);
1499
1500 /* Add bounds coordinates to sums for stddev calculation */
1501 for ( d = 0; d < ndims; d++ )
1502 {
1503 sum.min[d] += nd_box->min[d];
1504 sum.max[d] += nd_box->max[d];
1505 }
1506
1507 /* Increment our "good feature" count */
1508 notnull_cnt++;
1509
1510 /* Free up memory if our sample geometry was copied */
1511 if ( is_copy )
1512 pfree(geom);
1513
1514 /* Give backend a chance of interrupting us */
1515 vacuum_delay_point();
1516 }
1517
1518 /*
1519 * We'll build a histogram having stats->attr->attstattarget cells
1520 * on each side, within reason... we'll use ndims*10000 as the
1521 * maximum number of cells.
1522 * Also, if we're sampling a relatively small table, we'll try to ensure that
1523 * we have an average of 5 features for each cell so the histogram isn't
1524 * so sparse.
1525 */
1526 histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
1527 histo_cells_target = Min(histo_cells_target, ndims * 10000);
1528 histo_cells_target = Min(histo_cells_target, (int)(total_rows/5));
1529 POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1530 POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1531
1532 /* If there's no useful features, we can't work out stats */
1533 if ( ! notnull_cnt )
1534 {
1535 Oid relation_oid = stats->attr->attrelid;
1536 char *relation_name = get_rel_name(relation_oid);
1537 elog(NOTICE,
1538 "PostGIS: Unable to compute statistics for \"%s.%s\": No non-null/empty features",
1539 relation_name ? relation_name : "(NULL)",
1540 stats->attr->attname.data);
1541 stats->stats_valid = false;
1542 return;
1543 }
1544
1545 POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1546
1547 /*
1548 * Second scan:
1549 * o compute standard deviation
1550 */
1551 for ( d = 0; d < ndims; d++ )
1552 {
1553 /* Calculate average bounds values */
1554 avg.min[d] = sum.min[d] / notnull_cnt;
1555 avg.max[d] = sum.max[d] / notnull_cnt;
1556
1557 /* Calculate standard deviation for this dimension bounds */
1558 for ( i = 0; i < notnull_cnt; i++ )
1559 {
1560 const ND_BOX *ndb = sample_boxes[i];
1561 stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1562 stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1563 }
1564 stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1565 stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1566
1567 /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1568 histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1569 histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1570 }
1571
1572 /*
1573 * Third scan:
1574 * o skip hard deviants
1575 * o compute new histogram box
1576 */
1577 nd_box_init_bounds(&histo_extent_new);
1578 for ( i = 0; i < notnull_cnt; i++ )
1579 {
1580 const ND_BOX *ndb = sample_boxes[i];
1581 /* Skip any hard deviants (boxes entirely outside our histo_extent */
1582 if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1583 {
1584 POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1585 sample_boxes[i] = NULL;
1586 continue;
1587 }
1588 /* Expand our new box to fit all the other features. */
1589 nd_box_merge(ndb, &histo_extent_new);
1590 }
1591 /*
1592 * Expand the box slightly (1%) to avoid edge effects
1593 * with objects that are on the boundary
1594 */
1595 nd_box_expand(&histo_extent_new, 0.01);
1596 histo_extent = histo_extent_new;
1597
1598 /*
1599 * How should we allocate our histogram cells to the
1600 * different dimensions? We can't do it by raw dimensional width,
1601 * because in x/y/z space, the z can have different units
1602 * from the x/y. Similarly for x/y/t space.
1603 * So, we instead calculate how much features overlap
1604 * each other in their dimension to figure out which
1605 * dimensions have useful selectivity characteristics (more
1606 * variability in density) and therefor would find
1607 * more cells useful (to distinguish between dense places and
1608 * homogeneous places).
1609 */
1610 nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1611 sample_distribution);
1612
1613 /*
1614 * The sample_distribution array now tells us how spread out the
1615 * data is in each dimension, so we use that data to allocate
1616 * the histogram cells we have available.
1617 * At this point, histo_cells_target is the approximate target number
1618 * of cells.
1619 */
1620
1621 /*
1622 * Some dimensions have basically a uniform distribution, we want
1623 * to allocate no cells to those dimensions, only to dimensions
1624 * that have some interesting differences in data distribution.
1625 * Here we count up the number of interesting dimensions
1626 */
1627 for ( d = 0; d < ndims; d++ )
1628 {
1629 if ( sample_distribution[d] > 0 )
1630 histo_ndims++;
1631 }
1632
1633 if ( histo_ndims == 0 )
1634 {
1635 /* Special case: all our dimensions had low variability! */
1636 /* We just divide the cells up evenly */
1637 POSTGIS_DEBUG(3, " special case: no axes have variability");
1638 histo_cells_new = 1;
1639 for ( d = 0; d < ndims; d++ )
1640 {
1641 histo_size[d] = (int)pow((double)histo_cells_target, 1/(double)ndims);
1642 if ( ! histo_size[d] )
1643 histo_size[d] = 1;
1644 POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1645 histo_cells_new *= histo_size[d];
1646 }
1647 POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1648 }
1649 else
1650 {
1651 /*
1652 * We're going to express the amount of variability in each dimension
1653 * as a proportion of the total variability and allocate cells in that
1654 * dimension relative to that proportion.
1655 */
1656 POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1657 total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1658 POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1659 histo_cells_new = 1; /* For the number of cells in the final histogram */
1660 for ( d = 0; d < ndims; d++ )
1661 {
1662 if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1663 {
1664 histo_size[d] = 1;
1665 }
1666 else /* Interesting dimension */
1667 {
1668 /* How does this dims variability compare to the total? */
1669 float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1670 /*
1671 * Scale the target cells number by the # of dims and ratio,
1672 * then take the appropriate root to get the estimated number of cells
1673 * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1674 */
1675 histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
1676 /* If something goes awry, just give this dim one slot */
1677 if ( ! histo_size[d] )
1678 histo_size[d] = 1;
1679 }
1680 histo_cells_new *= histo_size[d];
1681 }
1682 POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1683 }
1684
1685 /* Update histo_cells to the actual number of cells we need to allocate */
1686 histo_cells = histo_cells_new;
1687 POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1688
1689 /*
1690 * Create the histogram (ND_STATS) in the stats memory context
1691 */
1692 old_context = MemoryContextSwitchTo(stats->anl_context);
1693 nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1694 nd_stats = palloc(nd_stats_size);
1695 memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1696 MemoryContextSwitchTo(old_context);
1697
1698 /* Initialize the #ND_STATS objects */
1699 nd_stats->ndims = ndims;
1700 nd_stats->extent = histo_extent;
1701 nd_stats->sample_features = sample_rows;
1702 nd_stats->table_features = total_rows;
1703 nd_stats->not_null_features = notnull_cnt;
1704 /* Copy in the histogram dimensions */
1705 for ( d = 0; d < ndims; d++ )
1706 nd_stats->size[d] = histo_size[d];
1707
1708 /*
1709 * Fourth scan:
1710 * o fill histogram values with the proportion of
1711 * features' bbox overlaps: a feature's bvol
1712 * can fully overlap (1) or partially overlap
1713 * (fraction of 1) an histogram cell.
1714 *
1715 * Note that we are filling each cell with the "portion of
1716 * the feature's box that overlaps the cell". So, if we sum
1717 * up the values in the histogram, we could get the
1718 * histogram feature count.
1719 *
1720 */
1721 for ( i = 0; i < notnull_cnt; i++ )
1722 {
1723 const ND_BOX *nd_box;
1724 ND_IBOX nd_ibox;
1725 int at[ND_DIMS];
1726 int d;
1727 double num_cells = 0;
1728 double tmp_volume = 1.0;
1729 double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1730 double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1731 double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1732
1733 nd_box = sample_boxes[i];
1734 if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1735
1736 /* Give backend a chance of interrupting us */
1737 vacuum_delay_point();
1738
1739 /* Find the cells that overlap with this box and put them into the ND_IBOX */
1740 nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1741 memset(at, 0, sizeof(int)*ND_DIMS);
1742
1743 POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1744 nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1745 nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1746
1747 for ( d = 0; d < nd_stats->ndims; d++ )
1748 {
1749 /* Initialize the starting values */
1750 at[d] = nd_ibox.min[d];
1751 min[d] = nd_stats->extent.min[d];
1752 max[d] = nd_stats->extent.max[d];
1753 cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1754
1755 /* What's the volume (area) of this feature's box? */
1756 tmp_volume *= (nd_box->max[d] - nd_box->min[d]);
1757 }
1758
1759 /* Add feature volume (area) to our total */
1760 total_sample_volume += tmp_volume;
1761
1762 /*
1763 * Move through all the overlaped histogram cells values and
1764 * add the box overlap proportion to them.
1765 */
1766 do
1767 {
1768 ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1769 double ratio;
1770 /* Create a box for this histogram cell */
1771 for ( d = 0; d < nd_stats->ndims; d++ )
1772 {
1773 nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1774 nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1775 }
1776
1777 /*
1778 * If a feature box is completely inside one cell the ratio will be
1779 * 1.0. If a feature box is 50% in two cells, each cell will get
1780 * 0.5 added on.
1781 */
1782 ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1783 nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1784 num_cells += ratio;
1785 POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1786 POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1787 }
1788 while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1789
1790 /* Keep track of overall number of overlaps counted */
1791 total_cell_count += num_cells;
1792 /* How many features have we added to this histogram? */
1793 histogram_features++;
1794 }
1795
1796 POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1797 POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1798 POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1799
1800 /* Error out if we got no sample information */
1801 if ( ! histogram_features )
1802 {
1803 POSTGIS_DEBUG(3, " no stats have been gathered");
1804 elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1805 stats->stats_valid = false;
1806 return;
1807 }
1808
1809 nd_stats->histogram_features = histogram_features;
1810 nd_stats->histogram_cells = histo_cells;
1811 nd_stats->cells_covered = total_cell_count;
1812
1813 /* Put this histogram data into the right slot/kind */
1814 if ( mode == 2 )
1815 {
1816 stats_slot = STATISTIC_SLOT_2D;
1817 stats_kind = STATISTIC_KIND_2D;
1818 }
1819 else
1820 {
1821 stats_slot = STATISTIC_SLOT_ND;
1822 stats_kind = STATISTIC_KIND_ND;
1823 }
1824
1825 /* Write the statistics data */
1826 stats->stakind[stats_slot] = stats_kind;
1827 stats->staop[stats_slot] = InvalidOid;
1828 stats->stanumbers[stats_slot] = (float4*)nd_stats;
1829 stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1830 stats->stanullfrac = (float4)null_cnt/sample_rows;
1831 stats->stawidth = total_width/notnull_cnt;
1832 stats->stadistinct = -1.0;
1833 stats->stats_valid = true;
1834
1835 POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1836 POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1837 POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1838 POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1839 POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1840 POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1841 POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1842 /*
1843 POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1844 */
1845
1846 return;
1847}
1848
1849
1867static void
1868compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1869 int sample_rows, double total_rows)
1870{
1871 /* 2D Mode */
1872 compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1873
1874 if (stats->stats_valid)
1875 {
1876 /* ND Mode: Only computed if 2D was computed too (not NULL and valid) */
1877 compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1878 }
1879}
1880
1881
1910Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1911{
1912 VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1913 Form_pg_attribute attr = stats->attr;
1914
1915 POSTGIS_DEBUG(2, "gserialized_analyze_nd called");
1916
1917 /* If the attstattarget column is negative, use the default value */
1918 /* NB: it is okay to scribble on stats->attr since it's a copy */
1919 if (attr->attstattarget < 0)
1920 attr->attstattarget = default_statistics_target;
1921
1922 POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget);
1923
1924 /* Setup the minimum rows and the algorithm function.
1925 * 300 matches the default value set in
1926 * postgresql/src/backend/commands/analyze.c */
1927 stats->minrows = 300 * stats->attr->attstattarget;
1928 stats->compute_stats = compute_gserialized_stats;
1929
1930 POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows);
1931
1932 /* Indicate we are done successfully */
1933 PG_RETURN_BOOL(true);
1934}
1935
1948static float8
1949estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1950{
1951 int d; /* counter */
1952 float8 selectivity;
1953 ND_BOX nd_box;
1954 ND_IBOX nd_ibox;
1955 int at[ND_DIMS];
1956 double cell_size[ND_DIMS];
1957 double min[ND_DIMS];
1958 double max[ND_DIMS];
1959 double total_count = 0.0;
1960 int ndims_max;
1961
1962 /* Calculate the overlap of the box on the histogram */
1963 if ( ! nd_stats )
1964 {
1965 elog(NOTICE, " estimate_selectivity called with null input");
1966 return FALLBACK_ND_SEL;
1967 }
1968
1969 ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1970
1971 /* Initialize nd_box. */
1972 nd_box_from_gbox(box, &nd_box);
1973
1974 /*
1975 * To return 2D stats on an ND sample, we need to make the
1976 * 2D box cover the full range of the other dimensions in the
1977 * histogram.
1978 */
1979 POSTGIS_DEBUGF(3, " mode: %d", mode);
1980 if ( mode == 2 )
1981 {
1982 POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1983 ndims_max = 2;
1984 }
1985
1986 POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1987 POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1988
1989 /*
1990 * Search box completely misses histogram extent?
1991 * We have to intersect in all N dimensions or else we have
1992 * zero interaction under the &&& operator. It's important
1993 * to short circuit in this case, as some of the tests below
1994 * will return junk results when run on non-intersecting inputs.
1995 */
1996 if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1997 {
1998 POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1999 return 0.0;
2000 }
2001
2002 /* Search box completely contains histogram extent! */
2003 if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
2004 {
2005 POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
2006 return 1.0;
2007 }
2008
2009 /* Calculate the overlap of the box on the histogram */
2010 if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
2011 {
2012 POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2013 return FALLBACK_ND_SEL;
2014 }
2015
2016 /* Work out some measurements of the histogram */
2017 for ( d = 0; d < nd_stats->ndims; d++ )
2018 {
2019 /* Cell size in each dim */
2020 min[d] = nd_stats->extent.min[d];
2021 max[d] = nd_stats->extent.max[d];
2022 cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2023 POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2024
2025 /* Initialize the counter */
2026 at[d] = nd_ibox.min[d];
2027 }
2028
2029 /* Move through all the overlap values and sum them */
2030 do
2031 {
2032 float cell_count, ratio;
2033 ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2034
2035 /* We have to pro-rate partially overlapped cells. */
2036 for ( d = 0; d < nd_stats->ndims; d++ )
2037 {
2038 nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2039 nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2040 }
2041
2042 ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2043 cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2044
2045 /* Add the pro-rated count for this cell to the overall total */
2046 total_count += cell_count * ratio;
2047 POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2048 }
2049 while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2050
2051 /* Scale by the number of features in our histogram to get the proportion */
2052 selectivity = total_count / nd_stats->histogram_features;
2053
2054 POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2055 POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2056 POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2057 POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2058
2059 /* Prevent rounding overflows */
2060 if (selectivity > 1.0) selectivity = 1.0;
2061 else if (selectivity < 0.0) selectivity = 0.0;
2062
2063 return selectivity;
2064}
2065
2066
2067
2073Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
2074{
2075 Oid table_oid = PG_GETARG_OID(0);
2076 text *att_text = PG_GETARG_TEXT_P(1);
2077 ND_STATS *nd_stats;
2078 char *str;
2079 text *json;
2080 int mode = 2; /* default to 2D mode */
2081 bool only_parent = false; /* default to whole tree stats */
2082
2083 /* Check if we've been asked to not use 2d mode */
2084 if ( ! PG_ARGISNULL(2) )
2085 mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
2086
2087 /* Retrieve the stats object */
2088 nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
2089 if ( ! nd_stats )
2090 elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2091
2092 /* Convert to JSON */
2093 str = nd_stats_to_json(nd_stats);
2094 json = cstring_to_text(str);
2095 pfree(str);
2096 pfree(nd_stats);
2097 PG_RETURN_TEXT_P(json);
2098}
2099
2100
2106Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
2107{
2108 Oid table_oid = PG_GETARG_OID(0);
2109 text *att_text = PG_GETARG_TEXT_P(1);
2110 Datum geom_datum = PG_GETARG_DATUM(2);
2111 GBOX gbox; /* search box read from gserialized datum */
2112 float8 selectivity = 0;
2113 ND_STATS *nd_stats;
2114 int mode = 2; /* 2D mode by default */
2115
2116 /* Check if we've been asked to not use 2d mode */
2117 if ( ! PG_ARGISNULL(3) )
2118 mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
2119
2120 /* Retrieve the stats object */
2121 nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
2122
2123 if ( ! nd_stats )
2124 elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2125
2126 /* Calculate the gbox */
2127 if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2128 elog(ERROR, "unable to calculate bounding box from geometry");
2129
2130 POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
2131
2132 /* Do the estimation */
2133 selectivity = estimate_selectivity(&gbox, nd_stats, mode);
2134
2135 pfree(nd_stats);
2136 PG_RETURN_FLOAT8(selectivity);
2137}
2138
2139
2145Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
2146{
2147 Oid table_oid1 = PG_GETARG_OID(0);
2148 text *att_text1 = PG_GETARG_TEXT_P(1);
2149 Oid table_oid2 = PG_GETARG_OID(2);
2150 text *att_text2 = PG_GETARG_TEXT_P(3);
2151 ND_STATS *nd_stats1, *nd_stats2;
2152 float8 selectivity = 0;
2153 int mode = 2; /* 2D mode by default */
2154
2155
2156 /* Retrieve the stats object */
2157 nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
2158 nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
2159
2160 if ( ! nd_stats1 )
2161 elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
2162
2163 if ( ! nd_stats2 )
2164 elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2165
2166 /* Check if we've been asked to not use 2d mode */
2167 if ( ! PG_ARGISNULL(4) )
2168 {
2169 text *modetxt = PG_GETARG_TEXT_P(4);
2170 char *modestr = text_to_cstring(modetxt);
2171 if ( modestr[0] == 'N' )
2172 mode = 0;
2173 }
2174
2175 /* Do the estimation */
2176 selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2177
2178 pfree(nd_stats1);
2179 pfree(nd_stats2);
2180 PG_RETURN_FLOAT8(selectivity);
2181}
2182
2188Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2189{
2190 PG_RETURN_DATUM(DirectFunctionCall5(
2192 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2193 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2194 Int32GetDatum(2) /* 2-D mode */
2195 ));
2196}
2197
2203Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2204{
2205 PG_RETURN_DATUM(DirectFunctionCall5(
2207 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2208 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2209 Int32GetDatum(0) /* N-D mode */
2210 ));
2211}
2212
2213
2228float8
2229gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
2230{
2231 VariableStatData vardata;
2232 Node *other = NULL;
2233 bool varonleft;
2234 ND_STATS *nd_stats = NULL;
2235
2236 GBOX search_box;
2237 float8 selectivity = 0;
2238 Const *otherConst;
2239
2240 POSTGIS_DEBUGF(2, "%s: entered function", __func__);
2241
2242 if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft))
2243 {
2244 POSTGIS_DEBUGF(2, "%s: could not find vardata", __func__);
2245 return DEFAULT_ND_SEL;
2246 }
2247
2248 if (!IsA(other, Const))
2249 {
2250 ReleaseVariableStats(vardata);
2251 POSTGIS_DEBUGF(2, "%s: no constant argument, returning default selectivity %g", __func__, DEFAULT_ND_SEL);
2252 return DEFAULT_ND_SEL;
2253 }
2254
2255 otherConst = (Const*)other;
2256 if ((!otherConst) || otherConst->constisnull)
2257 {
2258 ReleaseVariableStats(vardata);
2259 POSTGIS_DEBUGF(2, "%s: constant argument is NULL", __func__);
2260 return DEFAULT_ND_SEL;
2261 }
2262
2263 if (!gserialized_datum_get_gbox_p(otherConst->constvalue, &search_box))
2264 {
2265 ReleaseVariableStats(vardata);
2266 POSTGIS_DEBUGF(2, "%s: search box is EMPTY", __func__);
2267 return 0.0;
2268 }
2269
2270 if (!vardata.statsTuple)
2271 {
2272 POSTGIS_DEBUGF(1, "%s: no statistics available on table. Empty? Need to ANALYZE?", __func__);
2273 return DEFAULT_ND_SEL;
2274 }
2275
2276 nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2277 ReleaseVariableStats(vardata);
2278 selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2279 pfree(nd_stats);
2280 return selectivity;
2281}
2282
2284Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2285{
2286 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2287 // Oid operator_oid = PG_GETARG_OID(1);
2288 List *args = (List *) PG_GETARG_POINTER(2);
2289 int varRelid = PG_GETARG_INT32(3);
2290 int mode = PG_GETARG_INT32(4);
2291 float8 selectivity = gserialized_sel_internal(root, args, varRelid, mode);
2292 POSTGIS_DEBUGF(2, "%s: selectivity is %g", __func__, selectivity);
2293 PG_RETURN_FLOAT8(selectivity);
2294}
2295
2296
2297
2304Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2305{
2306 char *nsp = NULL;
2307 char *tbl = NULL;
2308 text *col = NULL;
2309 char *nsp_tbl = NULL;
2310 Oid tbl_oid, idx_oid = 0;
2311 ND_STATS *nd_stats;
2312 GBOX *gbox = NULL;
2313 bool only_parent = false;
2314 int key_type, att_num;
2315 size_t sz;
2316
2317 /* We need to initialize the internal cache to access it later via postgis_oid() */
2318 postgis_initialize_cache(fcinfo);
2319
2320 if ( PG_NARGS() == 4 )
2321 {
2322 nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2323 tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2324 col = PG_GETARG_TEXT_P(2);
2325 only_parent = PG_GETARG_BOOL(3);
2326 sz = strlen(nsp) + strlen(tbl) + 6;
2327 nsp_tbl = palloc(sz);
2328 snprintf(nsp_tbl, sz, "\"%s\".\"%s\"", nsp, tbl);
2329 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2330 pfree(nsp_tbl);
2331 }
2332 else if ( PG_NARGS() == 3 )
2333 {
2334 nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2335 tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2336 col = PG_GETARG_TEXT_P(2);
2337 sz = strlen(nsp) + strlen(tbl) + 6;
2338 nsp_tbl = palloc(sz);
2339 snprintf(nsp_tbl, sz, "\"%s\".\"%s\"", nsp, tbl);
2340 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2341 pfree(nsp_tbl);
2342 }
2343 else if ( PG_NARGS() == 2 )
2344 {
2345 tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2346 col = PG_GETARG_TEXT_P(1);
2347 sz = strlen(tbl) + 3;
2348 nsp_tbl = palloc(sz);
2349 snprintf(nsp_tbl, sz, "\"%s\"", tbl);
2350 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2351 pfree(nsp_tbl);
2352 }
2353 else
2354 {
2355 elog(ERROR, "estimated_extent() called with wrong number of arguments");
2356 PG_RETURN_NULL();
2357 }
2358
2359 /* Read the extent from the head of the spatial index, if there is one */
2360
2361 idx_oid = table_get_spatial_index(tbl_oid, col, &key_type, &att_num);
2362 if (idx_oid)
2363 {
2364 /* TODO: how about only_parent ? */
2365 gbox = spatial_index_read_extent(idx_oid, key_type, att_num);
2366 POSTGIS_DEBUGF(2, "index for \"%s.%s\" exists, reading gbox from there", tbl, text_to_cstring(col));
2367 if ( ! gbox ) PG_RETURN_NULL();
2368 }
2369 else
2370 {
2371 POSTGIS_DEBUGF(2, "index for \"%s.%s\" does not exist", tbl, text_to_cstring(col));
2372
2373 /* Fall back to reading the stats, if no index is found */
2374
2375 /* Estimated extent only returns 2D bounds, so use mode 2 */
2376 nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
2377
2378 /* Error out on no stats */
2379 if ( ! nd_stats ) {
2380 elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text_to_cstring(col));
2381 PG_RETURN_NULL();
2382 }
2383
2384 /* Construct the box */
2385 gbox = palloc(sizeof(GBOX));
2386 FLAGS_SET_GEODETIC(gbox->flags, 0);
2387 FLAGS_SET_Z(gbox->flags, 0);
2388 FLAGS_SET_M(gbox->flags, 0);
2389 gbox->xmin = nd_stats->extent.min[0];
2390 gbox->xmax = nd_stats->extent.max[0];
2391 gbox->ymin = nd_stats->extent.min[1];
2392 gbox->ymax = nd_stats->extent.max[1];
2393 pfree(nd_stats);
2394 }
2395
2396 PG_RETURN_POINTER(gbox);
2397}
2398
2406Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2407{
2408 if ( PG_NARGS() == 3 )
2409 {
2410 PG_RETURN_DATUM(
2411 DirectFunctionCall3(gserialized_estimated_extent,
2412 PG_GETARG_DATUM(0),
2413 PG_GETARG_DATUM(1),
2414 PG_GETARG_DATUM(2)));
2415 }
2416 else if ( PG_NARGS() == 2 )
2417 {
2418 PG_RETURN_DATUM(
2419 DirectFunctionCall2(gserialized_estimated_extent,
2420 PG_GETARG_DATUM(0),
2421 PG_GETARG_DATUM(1)));
2422 }
2423
2424 elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2425 PG_RETURN_NULL();
2426}
2427
2428/************************************************************************/
2429
2430static Oid
2431table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
2432{
2433 Relation tbl_rel;
2434 ListCell *lc;
2435 List *idx_list;
2436 Oid result = InvalidOid;
2437 char *colname = text_to_cstring(col);
2438
2439 /* Lookup our spatial index key types */
2440 Oid b2d_oid = postgis_oid(BOX2DFOID);
2441 Oid gdx_oid = postgis_oid(BOX3DOID);
2442
2443 if (!(b2d_oid && gdx_oid))
2444 return InvalidOid;
2445
2446 tbl_rel = RelationIdGetRelation(tbl_oid);
2447 idx_list = RelationGetIndexList(tbl_rel);
2448 RelationClose(tbl_rel);
2449
2450 /* For each index associated with this table... */
2451 foreach(lc, idx_list)
2452 {
2453 Form_pg_class idx_form;
2454 HeapTuple idx_tup;
2455 int idx_relam;
2456 Oid idx_oid = lfirst_oid(lc);
2457
2458 idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
2459 if (!HeapTupleIsValid(idx_tup))
2460 elog(ERROR, "%s: unable to lookup index %u in syscache", __func__, idx_oid);
2461 idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
2462 idx_relam = idx_form->relam;
2463 ReleaseSysCache(idx_tup);
2464
2465 /* Does the index use a GIST access method? */
2466 if (idx_relam == GIST_AM_OID)
2467 {
2468 Form_pg_attribute att;
2469 Oid atttypid;
2470 int attnum;
2471 /* Is the index on the column name we are looking for? */
2472 HeapTuple att_tup = SearchSysCache2(ATTNAME,
2473 ObjectIdGetDatum(idx_oid),
2474 PointerGetDatum(colname));
2475 if (!HeapTupleIsValid(att_tup))
2476 continue;
2477
2478 att = (Form_pg_attribute) GETSTRUCT(att_tup);
2479 atttypid = att->atttypid;
2480 attnum = att->attnum;
2481 ReleaseSysCache(att_tup);
2482
2483 /* Is the column actually spatial? */
2484 if (b2d_oid == atttypid || gdx_oid == atttypid)
2485 {
2486 /* Save result, clean up, and break out */
2487 result = idx_oid;
2488 if (att_num)
2489 *att_num = attnum;
2490 if (key_type)
2491 *key_type = (atttypid == b2d_oid ? STATISTIC_SLOT_2D : STATISTIC_SLOT_ND);
2492 break;
2493 }
2494 }
2495 }
2496 return result;
2497}
2498
2499static GBOX *
2500spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
2501{
2502 BOX2DF *bounds_2df = NULL;
2503 GIDX *bounds_gidx = NULL;
2504 GBOX *gbox = NULL;
2505 Relation idx_rel;
2506 Buffer buffer;
2507 Page page;
2508 OffsetNumber offset;
2509 unsigned long offset_max;
2510
2511 if (!idx_oid)
2512 return NULL;
2513
2514 idx_rel = index_open(idx_oid, AccessShareLock);
2515 buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2516 page = (Page) BufferGetPage(buffer);
2517 offset = FirstOffsetNumber;
2518 offset_max = PageGetMaxOffsetNumber(page);
2519 while (offset <= offset_max)
2520 {
2521 ItemId iid = PageGetItemId(page, offset);
2522 IndexTuple ituple;
2523 if (!iid)
2524 {
2525 ReleaseBuffer(buffer);
2526 index_close(idx_rel, AccessShareLock);
2527 return NULL;
2528 }
2529 ituple = (IndexTuple) PageGetItem(page, iid);
2530 if (!GistTupleIsInvalid(ituple))
2531 {
2532 bool isnull;
2533 Datum idx_attr = index_getattr(ituple, att_num, idx_rel->rd_att, &isnull);
2534 if (!isnull)
2535 {
2536 if (key_type == STATISTIC_SLOT_2D)
2537 {
2538 BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2539 if (bounds_2df)
2540 box2df_merge(bounds_2df, b);
2541 else
2542 bounds_2df = box2df_copy(b);
2543 }
2544 else
2545 {
2546 GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2547 if (bounds_gidx)
2548 gidx_merge(&bounds_gidx, b);
2549 else
2550 bounds_gidx = gidx_copy(b);
2551 }
2552 }
2553 }
2554 offset++;
2555 }
2556
2557 ReleaseBuffer(buffer);
2558 index_close(idx_rel, AccessShareLock);
2559
2560 if (key_type == STATISTIC_SLOT_2D && bounds_2df)
2561 {
2562 if (box2df_is_empty(bounds_2df))
2563 return NULL;
2564 gbox = gbox_new(0);
2565 box2df_to_gbox_p(bounds_2df, gbox);
2566 }
2567 else if (key_type == STATISTIC_SLOT_ND && bounds_gidx)
2568 {
2569 if (gidx_is_unknown(bounds_gidx))
2570 return NULL;
2571 gbox = gbox_new(0);
2572 gbox_from_gidx(bounds_gidx, gbox, 0);
2573 }
2574 else
2575 return NULL;
2576
2577 return gbox;
2578}
2579
2580/*
2581CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2582 RETURNS box2d
2583 AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2584 LANGUAGE 'c' STABLE STRICT;
2585*/
2586
2589{
2590 GBOX *gbox = NULL;
2591 int key_type;
2592 int att_num;
2593 Oid tbl_oid = PG_GETARG_DATUM(0);
2594 text *col = PG_GETARG_TEXT_P(1);
2595 Oid idx_oid;
2596
2597 if(!tbl_oid)
2598 PG_RETURN_NULL();
2599
2600 /* We need to initialize the internal cache to access it later via postgis_oid() */
2601 postgis_initialize_cache(fcinfo);
2602
2603 idx_oid = table_get_spatial_index(tbl_oid, col, &key_type, &att_num);
2604 if (!idx_oid)
2605 PG_RETURN_NULL();
2606
2607 gbox = spatial_index_read_extent(idx_oid, key_type, att_num);
2608 if (!gbox)
2609 PG_RETURN_NULL();
2610 else
2611 PG_RETURN_POINTER(gbox);
2612}
2613
GBOX * gbox_new(lwflags_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition gbox.c:32
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition gbox.c:392
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition gbox.c:197
int gserialized_get_gbox_p(const GSERIALIZED *g, GBOX *gbox)
Read the box from the GSERIALIZED or calculate it if necessary.
Definition gserialized.c:65
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
#define NUM_BINS
#define STATISTIC_SLOT_ND
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define STATISTIC_KIND_2D
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
#define DEFAULT_ND_JOINSEL
#define STATISTIC_KIND_ND
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
#define SDFACTOR
#define FALLBACK_ND_JOINSEL
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
This function should return an estimation of the number of rows returned by a query involving an over...
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator?
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension,...
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
bool box2df_is_empty(const BOX2DF *a)
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
BOX2DF * box2df_copy(BOX2DF *b)
bool gidx_is_unknown(const GIDX *a)
GIDX * gidx_copy(GIDX *b)
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define LW_FAILURE
Definition liblwgeom.h:110
#define FLAGS_GET_Z(flags)
Definition liblwgeom.h:179
#define FLAGS_GET_M(flags)
Definition liblwgeom.h:180
#define FLAGS_SET_GEODETIC(flags, value)
Definition liblwgeom.h:189
#define FLAGS_SET_M(flags, value)
Definition liblwgeom.h:187
#define FLAGS_SET_Z(flags, value)
Definition liblwgeom.h:186
#define FLAGS_GET_GEODETIC(flags)
Definition liblwgeom.h:182
This library is the generic geometry handling section of PostGIS.
#define str(s)
Datum buffer(PG_FUNCTION_ARGS)
char * text_to_cstring(const text *textptr)
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided.
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string.
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
static void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
double ymax
Definition liblwgeom.h:343
double zmax
Definition liblwgeom.h:345
double xmax
Definition liblwgeom.h:341
double zmin
Definition liblwgeom.h:344
double mmax
Definition liblwgeom.h:347
double ymin
Definition liblwgeom.h:342
double xmin
Definition liblwgeom.h:340
double mmin
Definition liblwgeom.h:346
lwflags_t flags
Definition liblwgeom.h:339
float4 max[ND_DIMS]
float4 min[ND_DIMS]
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
int max[ND_DIMS]
int min[ND_DIMS]
N-dimensional box index type.
float4 size[ND_DIMS]
N-dimensional statistics structure.