PostGIS 3.0.6dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches

◆ nd_box_array_distribution()

static int nd_box_array_distribution ( const ND_BOX **  nd_boxes,
int  num_boxes,
const ND_BOX extent,
int  ndims,
double *  distribution 
)
static

Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension, returning the range_quintile of of the overlap counts per cell in a uniform partition of the extent of the dimension.

A uniform distribution of counts will have a small range and will require few cells in a selectivity histogram. A diverse distribution of counts will have a larger range and require more cells in a selectivity histogram (to distinguish between areas of feature density and areas of feature sparseness. This measurement should help us identify cases like X/Y/Z data where there is lots of variability in density in X/Y (diversely in a multi-kilometer range) and far less in Z (in a few-hundred meter range).

Definition at line 767 of file gserialized_estimate.c.

768{
769 int d, i, k, range;
770 int counts[NUM_BINS];
771 double smin, smax; /* Spatial min, spatial max */
772 double swidth; /* Spatial width of dimension */
773#if POSTGIS_DEBUG_LEVEL >= 3
774 double average, sdev, sdev_ratio;
775#endif
776 int bmin, bmax; /* Bin min, bin max */
777 const ND_BOX *ndb;
778
779 /* For each dimension... */
780 for ( d = 0; d < ndims; d++ )
781 {
782 /* Initialize counts for this dimension */
783 memset(counts, 0, sizeof(counts));
784
785 smin = extent->min[d];
786 smax = extent->max[d];
787 swidth = smax - smin;
788
789 /* Don't try and calculate distribution of overly narrow */
790 /* or overly wide dimensions. Here we're being pretty geographical, */
791 /* expecting "normal" planar or geographic coordinates. */
792 /* Otherwise we have to "handle" +/- Inf bounded features and */
793 /* the assumptions needed for that are as bad as this hack. */
794 if ( swidth < MIN_DIMENSION_WIDTH || swidth > MAX_DIMENSION_WIDTH )
795 {
796 distribution[d] = 0;
797 continue;
798 }
799
800 /* Sum up the overlaps of each feature with the dimensional bins */
801 for ( i = 0; i < num_boxes; i++ )
802 {
803 double minoffset, maxoffset;
804
805 /* Skip null entries */
806 ndb = nd_boxes[i];
807 if ( ! ndb ) continue;
808
809 /* Where does box fall relative to the working range */
810 minoffset = ndb->min[d] - smin;
811 maxoffset = ndb->max[d] - smin;
812
813 /* Skip boxes that our outside our working range */
814 if ( minoffset < 0 || minoffset > swidth ||
815 maxoffset < 0 || maxoffset > swidth )
816 {
817 continue;
818 }
819
820 /* What bins does this range correspond to? */
821 bmin = floor(NUM_BINS * minoffset / swidth);
822 bmax = floor(NUM_BINS * maxoffset / swidth);
823
824 /* Should only happen when maxoffset==swidth */
825 if (bmax >= NUM_BINS)
826 bmax = NUM_BINS-1;
827
828 POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
829
830 /* Increment the counts in all the bins this feature overlaps */
831 for ( k = bmin; k <= bmax; k++ )
832 {
833 counts[k] += 1;
834 }
835
836 }
837
838 /* How dispersed is the distribution of features across bins? */
839 range = range_quintile(counts, NUM_BINS);
840
841#if POSTGIS_DEBUG_LEVEL >= 3
842 average = avg(counts, NUM_BINS);
843 sdev = stddev(counts, NUM_BINS);
844 sdev_ratio = sdev/average;
845
846 POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
847 POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
848 POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
849 POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
850#endif
851
852 distribution[d] = range;
853 }
854
855 return true;
856}
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
#define NUM_BINS
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
float4 max[ND_DIMS]
float4 min[ND_DIMS]
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...

References ND_BOX_T::max, MAX_DIMENSION_WIDTH, ND_BOX_T::min, NUM_BINS, and range_quintile().

Referenced by compute_gserialized_stats_mode().

Here is the call graph for this function:
Here is the caller graph for this function: