PostGIS 3.0.6dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches

◆ kmeans_init()

static void kmeans_init ( POINT2D **  objs,
uint32_t  n,
POINT2D **  centers,
POINT2D centers_raw,
uint32_t  k 
)
static

Definition at line 129 of file lwkmeans.c.

130{
131 double* distances;
132 uint32_t p1 = 0, p2 = 0;
133 uint32_t i, j;
134 uint32_t duplicate_count = 1; /* a point is a duplicate of itself */
135 double max_dst = -1, current_distance;
136 double dst_p1, dst_p2;
137
138 /* k=0, k=1: "clustering" is just input validation */
139 assert(k > 1);
140
141 /* k >= 2: find two distant points greedily */
142 for (i = 1; i < n; i++)
143 {
144 /* skip null */
145 if (!objs[i]) continue;
146
147 /* reinit if first element happened to be null */
148 if (!objs[p1] && !objs[p2])
149 {
150 p1 = i;
151 p2 = i;
152 continue;
153 }
154
155 /* if we found a larger distance, replace our choice */
156 dst_p1 = distance2d_sqr_pt_pt(objs[i], objs[p1]);
157 dst_p2 = distance2d_sqr_pt_pt(objs[i], objs[p2]);
158 if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
159 {
160 if (dst_p1 > dst_p2)
161 {
162 max_dst = dst_p1;
163 p2 = i;
164 }
165 else
166 {
167 max_dst = dst_p2;
168 p1 = i;
169 }
170 }
171 if ((dst_p1 == 0) || (dst_p2 == 0)) duplicate_count++;
172 }
173 if (duplicate_count > 1)
174 lwnotice(
175 "%s: there are at least %u duplicate inputs, number of output clusters may be less than you requested",
176 __func__,
177 duplicate_count);
178
179 /* by now two points should be found and non-same */
180 assert(p1 != p2 && objs[p1] && objs[p2] && max_dst >= 0);
181
182 /* accept these two points */
183 centers_raw[0] = *((POINT2D *)objs[p1]);
184 centers[0] = &(centers_raw[0]);
185 centers_raw[1] = *((POINT2D *)objs[p2]);
186 centers[1] = &(centers_raw[1]);
187
188 if (k > 2)
189 {
190 /* array of minimum distance to a point from accepted cluster centers */
191 distances = lwalloc(sizeof(double) * n);
192
193 /* initialize array with distance to first object */
194 for (j = 0; j < n; j++)
195 {
196 if (objs[j])
197 distances[j] = distance2d_sqr_pt_pt(objs[j], centers[0]);
198 else
199 distances[j] = -1;
200 }
201 distances[p1] = -1;
202 distances[p2] = -1;
203
204 /* loop i on clusters, skip 0 and 1 as found already */
205 for (i = 2; i < k; i++)
206 {
207 uint32_t candidate_center = 0;
208 double max_distance = -DBL_MAX;
209
210 /* loop j on objs */
211 for (j = 0; j < n; j++)
212 {
213 /* empty objs and accepted clusters are already marked with distance = -1 */
214 if (distances[j] < 0) continue;
215
216 /* update minimal distance with previosuly accepted cluster */
217 current_distance = distance2d_sqr_pt_pt(objs[j], centers[i - 1]);
218 if (current_distance < distances[j])
219 distances[j] = current_distance;
220
221 /* greedily take a point that's farthest from any of accepted clusters */
222 if (distances[j] > max_distance)
223 {
224 candidate_center = j;
225 max_distance = distances[j];
226 }
227 }
228
229 /* Checked earlier by counting entries on input, just in case */
230 assert(max_distance >= 0);
231
232 /* accept candidate to centers */
233 distances[candidate_center] = -1;
234 /* Copy the point coordinates into the initial centers array
235 * Centers array is an array of pointers to points, not an array of points */
236 centers_raw[i] = *((POINT2D *)objs[candidate_center]);
237 centers[i] = &(centers_raw[i]);
238 }
239 lwfree(distances);
240 }
241}
void * lwalloc(size_t size)
Definition lwutil.c:227
void lwfree(void *mem)
Definition lwutil.c:242
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
Definition lwutil.c:177
static double distance2d_sqr_pt_pt(const POINT2D *p1, const POINT2D *p2)
Definition lwinline.h:35

References distance2d_sqr_pt_pt(), lwalloc(), lwfree(), and lwnotice().

Referenced by lwgeom_cluster_2d_kmeans().

Here is the call graph for this function:
Here is the caller graph for this function: