PostGIS 3.0.6dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches

◆ gserialized_gist_picksplit()

Datum gserialized_gist_picksplit ( PG_FUNCTION_ARGS  )

Definition at line 1579 of file gserialized_gist_nd.c.

1580{
1581
1582 GistEntryVector *entryvec = (GistEntryVector *)PG_GETARG_POINTER(0);
1583
1584 GIST_SPLITVEC *v = (GIST_SPLITVEC *)PG_GETARG_POINTER(1);
1585 OffsetNumber i;
1586 /* One union box for each half of the space. */
1587 GIDX **box_union;
1588 /* One offset number list for each half of the space. */
1589 OffsetNumber **list;
1590 /* One position index for each half of the space. */
1591 int *pos;
1592 GIDX *box_pageunion;
1593 GIDX *box_current;
1594 int direction = -1;
1595 bool all_entries_equal = true;
1596 OffsetNumber max_offset;
1597 int nbytes, ndims_pageunion, d;
1598 int posmin = entryvec->n;
1599
1600 POSTGIS_DEBUG(4, "[GIST] 'picksplit' function called");
1601
1602 /*
1603 ** First calculate the bounding box and maximum number of dimensions in this page.
1604 */
1605
1606 max_offset = entryvec->n - 1;
1607 box_current = (GIDX *)DatumGetPointer(entryvec->vector[FirstOffsetNumber].key);
1608 box_pageunion = gidx_copy(box_current);
1609
1610 /* Calculate the containing box (box_pageunion) for the whole page we are going to split. */
1611 for (i = OffsetNumberNext(FirstOffsetNumber); i <= max_offset; i = OffsetNumberNext(i))
1612 {
1613 box_current = (GIDX *)DatumGetPointer(entryvec->vector[i].key);
1614
1615 if (all_entries_equal && !gidx_equals(box_pageunion, box_current))
1616 all_entries_equal = false;
1617
1618 gidx_merge(&box_pageunion, box_current);
1619 }
1620
1621 POSTGIS_DEBUGF(3, "[GIST] box_pageunion: %s", gidx_to_string(box_pageunion));
1622
1623 /* Every box in the page is the same! So, we split and just put half the boxes in each child. */
1624 if (all_entries_equal)
1625 {
1626 POSTGIS_DEBUG(4, "[GIST] picksplit finds all entries equal!");
1628 PG_RETURN_POINTER(v);
1629 }
1630
1631 /* Initialize memory structures. */
1632 nbytes = (max_offset + 2) * sizeof(OffsetNumber);
1633 ndims_pageunion = GIDX_NDIMS(box_pageunion);
1634 POSTGIS_DEBUGF(4, "[GIST] ndims_pageunion == %d", ndims_pageunion);
1635 pos = palloc(2 * ndims_pageunion * sizeof(int));
1636 list = palloc(2 * ndims_pageunion * sizeof(OffsetNumber *));
1637 box_union = palloc(2 * ndims_pageunion * sizeof(GIDX *));
1638 for (d = 0; d < ndims_pageunion; d++)
1639 {
1640 list[BELOW(d)] = (OffsetNumber *)palloc(nbytes);
1641 list[ABOVE(d)] = (OffsetNumber *)palloc(nbytes);
1642 box_union[BELOW(d)] = gidx_new(ndims_pageunion);
1643 box_union[ABOVE(d)] = gidx_new(ndims_pageunion);
1644 pos[BELOW(d)] = 0;
1645 pos[ABOVE(d)] = 0;
1646 }
1647
1648 /*
1649 ** Assign each entry in the node to the volume partitions it belongs to,
1650 ** such as "above the x/y plane, left of the y/z plane, below the x/z plane".
1651 ** Each entry thereby ends up in three of the six partitions.
1652 */
1653 POSTGIS_DEBUG(4, "[GIST] 'picksplit' calculating best split axis");
1654 for (i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i))
1655 {
1656 box_current = (GIDX *)DatumGetPointer(entryvec->vector[i].key);
1657
1658 for (d = 0; d < ndims_pageunion; d++)
1659 {
1660 if (GIDX_GET_MIN(box_current, d) - GIDX_GET_MIN(box_pageunion, d) <
1661 GIDX_GET_MAX(box_pageunion, d) - GIDX_GET_MAX(box_current, d))
1663 list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1664 else
1666 list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1667 }
1668 }
1669
1670 /*
1671 ** "Bad disposition", too many entries fell into one octant of the space, so no matter which
1672 ** plane we choose to split on, we're going to end up with a mostly full node. Where the
1673 ** data is pretty homogeneous (lots of duplicates) entries that are equidistant from the
1674 ** sides of the page union box can occasionally all end up in one place, leading
1675 ** to this condition.
1676 */
1677 if (gserialized_gist_picksplit_badratios(pos, ndims_pageunion))
1678 {
1679 /*
1680 ** Instead we split on center points and see if we do better.
1681 ** First calculate the average center point for each axis.
1682 */
1683 double *avgCenter = palloc(ndims_pageunion * sizeof(double));
1684
1685 for (d = 0; d < ndims_pageunion; d++)
1686 avgCenter[d] = 0.0;
1687
1688 POSTGIS_DEBUG(4, "[GIST] picksplit can't find good split axis, trying center point method");
1689
1690 for (i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i))
1691 {
1692 box_current = (GIDX *)DatumGetPointer(entryvec->vector[i].key);
1693 for (d = 0; d < ndims_pageunion; d++)
1694 avgCenter[d] += (GIDX_GET_MAX(box_current, d) + GIDX_GET_MIN(box_current, d)) / 2.0;
1695 }
1696 for (d = 0; d < ndims_pageunion; d++)
1697 {
1698 avgCenter[d] /= max_offset;
1699 pos[BELOW(d)] = pos[ABOVE(d)] = 0; /* Re-initialize our counters. */
1700 POSTGIS_DEBUGF(4, "[GIST] picksplit average center point[%d] = %.12g", d, avgCenter[d]);
1701 }
1702
1703 /* For each of our entries... */
1704 for (i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i))
1705 {
1706 double center;
1707 box_current = (GIDX *)DatumGetPointer(entryvec->vector[i].key);
1708
1709 for (d = 0; d < ndims_pageunion; d++)
1710 {
1711 center = (GIDX_GET_MIN(box_current, d) + GIDX_GET_MAX(box_current, d)) / 2.0;
1712 if (center < avgCenter[d])
1714 list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1715 else if (FPeq(center, avgCenter[d]))
1716 if (pos[BELOW(d)] > pos[ABOVE(d)])
1718 &(box_union[ABOVE(d)]),
1719 box_current,
1720 &(pos[ABOVE(d)]),
1721 i);
1722 else
1724 &(box_union[BELOW(d)]),
1725 box_current,
1726 &(pos[BELOW(d)]),
1727 i);
1728 else
1730 list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1731 }
1732 }
1733
1734 /* Do we have a good disposition now? If not, screw it, just cut the node in half. */
1735 if (gserialized_gist_picksplit_badratios(pos, ndims_pageunion))
1736 {
1737 POSTGIS_DEBUG(4,
1738 "[GIST] picksplit still cannot find a good split! just cutting the node in half");
1740 PG_RETURN_POINTER(v);
1741 }
1742 }
1743
1744 /*
1745 ** Now, what splitting plane gives us the most even ratio of
1746 ** entries in our child pages? Since each split region has been apportioned entries
1747 ** against the same number of total entries, the axis that has the smallest maximum
1748 ** number of entries in its regions is the most evenly distributed.
1749 ** TODO: what if the distributions are equal in two or more axes?
1750 */
1751 for (d = 0; d < ndims_pageunion; d++)
1752 {
1753 int posd = Max(pos[ABOVE(d)], pos[BELOW(d)]);
1754 if (posd < posmin)
1755 {
1756 direction = d;
1757 posmin = posd;
1758 }
1759 }
1760 if (direction == -1 || posmin == entryvec->n)
1761 elog(ERROR, "Error in building split, unable to determine split direction.");
1762
1763 POSTGIS_DEBUGF(3, "[GIST] 'picksplit' splitting on axis %d", direction);
1764
1766 list[BELOW(direction)],
1767 pos[BELOW(direction)],
1768 &(box_union[BELOW(direction)]),
1769 list[ABOVE(direction)],
1770 pos[ABOVE(direction)],
1771 &(box_union[ABOVE(direction)]));
1772
1773 POSTGIS_DEBUGF(4, "[GIST] spl_ldatum: %s", gidx_to_string((GIDX *)v->spl_ldatum));
1774 POSTGIS_DEBUGF(4, "[GIST] spl_rdatum: %s", gidx_to_string((GIDX *)v->spl_rdatum));
1775
1776 POSTGIS_DEBUGF(
1777 4,
1778 "[GIST] axis %d: parent range (%.12g, %.12g) left range (%.12g, %.12g), right range (%.12g, %.12g)",
1779 direction,
1780 GIDX_GET_MIN(box_pageunion, direction),
1781 GIDX_GET_MAX(box_pageunion, direction),
1782 GIDX_GET_MIN((GIDX *)v->spl_ldatum, direction),
1783 GIDX_GET_MAX((GIDX *)v->spl_ldatum, direction),
1784 GIDX_GET_MIN((GIDX *)v->spl_rdatum, direction),
1785 GIDX_GET_MAX((GIDX *)v->spl_rdatum, direction));
1786
1787 PG_RETURN_POINTER(v);
1788}
static void gserialized_gist_picksplit_constructsplit(GIST_SPLITVEC *v, OffsetNumber *list1, int nlist1, GIDX **union1, OffsetNumber *list2, int nlist2, GIDX **union2)
static bool gserialized_gist_picksplit_badratios(int *pos, int dims)
static void gserialized_gist_picksplit_addlist(OffsetNumber *list, GIDX **box_union, GIDX *box_current, int *pos, int num)
#define ABOVE(d)
#define BELOW(d)
bool gidx_equals(GIDX *a, GIDX *b)
GIDX * gidx_copy(GIDX *b)
static void gserialized_gist_picksplit_fallback(GistEntryVector *entryvec, GIST_SPLITVEC *v)
void gidx_merge(GIDX **b_union, GIDX *b_new)

References ABOVE, BELOW, gidx_copy(), gidx_equals(), gidx_merge(), gserialized_gist_picksplit_addlist(), gserialized_gist_picksplit_badratios(), gserialized_gist_picksplit_constructsplit(), and gserialized_gist_picksplit_fallback().

Here is the call graph for this function: