PostGIS 3.0.6dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches
shpcommon.c
Go to the documentation of this file.
1/**********************************************************************
2 *
3 * PostGIS - Spatial Types for PostgreSQL
4 * http://postgis.net
5 *
6 * Copyright (C) 2014 Sandro Santilli <strk@kbt.io>
7 * Copyright (C) 2010 Mark Cave-Ayland <mark.cave-ayland@siriusit.co.uk>
8 *
9 * This is free software; you can redistribute and/or modify it under
10 * the terms of the GNU General Public Licence. See the COPYING file.
11 *
12 **********************************************************************/
13
14/* This file contains functions that are shared between the loader and dumper */
15
16#include <stdio.h>
17#include <string.h>
18#include <stdlib.h>
19
20#include "shpcommon.h"
21
22typedef struct
23{
24 int ldid;
25 int cpg;
26 char *desc;
27 char *iconv;
28 char *pg;
30
31static int num_code_pages = 60;
32
33/* http://www.autopark.ru/ASBProgrammerGuide/DBFSTRUC.HTM */
34/* http://resources.arcgis.com/fr/content/kbase?fa=articleShow&d=21106 */
35
37 {0x01, 437, "U.S. MS-DOS", "CP437",""},
38 {0x02, 850, "International MS-DOS", "CP850",""},
39 {0x03, 1252, "Window ANSI", "WINDOWS-1252","WIN1252"},
40 {0x08, 865, "Danish OEM", "CP865",""},
41 {0x09, 437, "Dutch OEM", "CP437",""},
42 {0x0A, 850, "Dutch OEM*", "CP850",""},
43 {0x0B, 437, "Finnish OEM", "CP437",""},
44 {0x0D, 437, "French OEM", "CP437",""},
45 {0x0E, 850, "French OEM*", "CP850",""},
46 {0x0F, 437, "German OEM", "CP437",""},
47 {0x10, 850, "German OEM*", "CP850",""},
48 {0x11, 437, "Italian OEM", "CP437",""},
49 {0x12, 850, "Italian OEM*", "CP850",""},
50 {0x13, 932, "Japanese Shift-JIS", "CP932","SJIS"},
51 {0x14, 850, "Spanish OEM*", "CP850",""},
52 {0x15, 437, "Swedish OEM", "CP437",""},
53 {0x16, 850, "Swedish OEM*", "CP850",""},
54 {0x17, 865, "Norwegian OEM", "CP865",""},
55 {0x18, 437, "Spanish OEM", "CP865",""},
56 {0x19, 437, "English OEM (Britain)", "CP437",""},
57 {0x1A, 850, "English OEM (Britain)*", "CP850",""},
58 {0x1B, 437, "English OEM (U.S.)", "CP437",""},
59 {0x1C, 863, "French OEM (Canada)", "CP863",""},
60 {0x1D, 850, "French OEM*", "CP850",""},
61 {0x1F, 852, "Czech OEM", "CP852",""},
62 {0x22, 852, "Hungarian OEM", "CP852",""},
63 {0x23, 852, "Polish OEM", "CP852",""},
64 {0x24, 860, "Portuguese OEM", "CP860",""},
65 {0x25, 850, "Portuguese OEM*", "CP850",""},
66 {0x26, 866, "Russian OEM", "WINDOWS-866","WIN866"},
67 {0x37, 850, "English OEM (U.S.)*", "CP850",""},
68 {0x40, 852, "Romanian OEM", "CP852",""},
69 {0x4D, 936, "Chinese GBK (PRC)", "CP936",""},
70 {0x4E, 949, "Korean (ANSI/OEM)", "CP949",""},
71 {0x4F, 950, "Chinese Big 5 (Taiwan)", "CP950","BIG5"},
72 {0x50, 874, "Thai (ANSI/OEM)", "WIN874",""},
73 {0x57, 1252, "ANSI", "WINDOWS-1252",""},
74 {0x58, 1252, "Western European ANSI", "WINDOWS-1252",""},
75 {0x59, 1252, "Spanish ANSI", "WINDOWS-1252",""},
76 {0x64, 852, "Eastern European MS-DOS", "CP852",""},
77 {0x65, 866, "Russian MS-DOS", "CP866",""},
78 {0x66, 865, "Nordic MS-DOS", "CP865",""},
79 {0x67, 861, "Icelandic MS-DOS", "",""},
80 {0x6A, 737, "Greek MS-DOS (437G)", "CP737",""},
81 {0x6B, 857, "Turkish MS-DOS", "CP857",""},
82 {0x6C, 863, "French-Canadian MS-DOS", "CP863",""},
83 {0x78, 950, "Taiwan Big 5", "CP950",""},
84 {0x79, 949, "Hangul (Wansung)", "CP949",""},
85 {0x7A, 936, "PRC GBK", "CP936","GBK"},
86 {0x7B, 932, "Japanese Shift-JIS", "CP932",""},
87 {0x7C, 874, "Thai Windows/MS-DOS", "WINDOWS-874","WIN874"},
88 {0x86, 737, "Greek OEM", "CP737",""},
89 {0x87, 852, "Slovenian OEM", "CP852",""},
90 {0x88, 857, "Turkish OEM", "CP857",""},
91 {0xC8, 1250, "Eastern European Windows", "WINDOWS-1250","WIN1250"},
92 {0xC9, 1251, "Russian Windows", "WINDOWS-1251","WIN1251"},
93 {0xCA, 1254, "Turkish Windows", "WINDOWS-1254","WIN1254"},
94 {0xCB, 1253, "Greek Windows", "WINDOWS-1253","WIN1253"},
95 {0xCC, 1257, "Baltic Window", "WINDOWS-1257","WIN1257"},
96 {0xFF, 65001, "UTF-8", "UTF-8","UTF8"}
97};
98
99
100
101
102
108char *
110{
111 /*
112 * Escape apostrophes and backslashes:
113 * ' -> \'
114 * \ -> \\
115 *
116 * 1. find # of characters
117 * 2. make new string
118 */
119
120 char *result;
121 char *ptr, *optr;
122 int toescape = 0;
123 size_t size;
124
125 ptr = str;
126
127 /* Count how many characters we need to escape so we know the size of the string we need to return */
128 while (*ptr)
129 {
130 if (*ptr == '\'' || *ptr == '\\')
131 toescape++;
132
133 ptr++;
134 }
135
136 /* If we don't have to escape anything, simply return the input pointer */
137 if (toescape == 0)
138 return str;
139
140 size = ptr - str + toescape + 1;
141 result = calloc(1, size);
142 optr = result;
143 ptr = str;
144
145 while (*ptr)
146 {
147 if (*ptr == '\'' || *ptr == '\\')
148 *optr++ = '\\';
149
150 *optr++ = *ptr++;
151 }
152
153 *optr = '\0';
154
155 return result;
156}
157
158void
160{
161 map->size = 0;
162 map->pgfieldnames = NULL;
163 map->dbffieldnames = NULL;
164}
165
166void
168{
169 int i;
170 if (map != NULL){
171 if (map->size)
172 {
173 for (i = 0; i < map->size; i++)
174 {
175 if (map->pgfieldnames[i]) free(map->pgfieldnames[i]);
176 if (map->dbffieldnames[i]) free(map->dbffieldnames[i]);
177 }
178 free(map->pgfieldnames);
179 free(map->dbffieldnames);
180 }
181 }
182}
183
184const char *
185colmap_dbf_by_pg(colmap *map, const char *pgname)
186{
187 int i;
188 for (i=0; i<map->size; i++)
189 {
190 if (!strcasecmp(map->pgfieldnames[i], pgname))
191 {
192 return map->dbffieldnames[i];
193 }
194 }
195 return NULL;
196}
197
198const char *
199colmap_pg_by_dbf(colmap *map, const char *dbfname)
200{
201 int i;
202 for (i=0; i<map->size; i++)
203 {
204 if (!strcasecmp(map->dbffieldnames[i], dbfname))
205 {
206 return map->pgfieldnames[i];
207 }
208 }
209 return NULL;
210}
211
212int
213colmap_read(const char *filename, colmap *map, char *errbuf, size_t errbuflen)
214{
215 FILE *fptr;
216 char linebuffer[1024];
217 char *tmpstr;
218 int curmapsize, fieldnamesize;
219
220 /* Read column map file and load the colmap_dbffieldnames
221 * and colmap_pgfieldnames arrays */
222 fptr = fopen(filename, "r");
223 if (!fptr)
224 {
225 /* Return an error */
226 snprintf(errbuf, errbuflen, _("ERROR: Unable to open column map file %s"),
227 filename);
228 return 0;
229 }
230
231 /* First count how many columns we have... */
232 while (fgets(linebuffer, 1024, fptr) != NULL) ++map->size;
233
234 /* Now we know the final size, allocate the arrays and load the data */
235 fseek(fptr, 0, SEEK_SET);
236 map->pgfieldnames = (char **)malloc(sizeof(char *) * map->size);
237 map->dbffieldnames = (char **)malloc(sizeof(char *) * map->size);
238
239 /* Read in a line at a time... */
240 curmapsize = 0;
241 while (fgets(linebuffer, 1024, fptr) != NULL)
242 {
243 /* Split into two separate strings: pgfieldname and dbffieldname */
244 /* First locate end of first column (pgfieldname) */
245 fieldnamesize = strcspn(linebuffer, "\t\n ");
246 tmpstr = linebuffer;
247
248 /* Allocate memory and copy the string ensuring it is terminated */
249 map->pgfieldnames[curmapsize] = malloc(fieldnamesize + 1);
250 strncpy(map->pgfieldnames[curmapsize], tmpstr, fieldnamesize);
251 map->pgfieldnames[curmapsize][fieldnamesize] = '\0';
252
253 /* Now swallow up any whitespace */
254 tmpstr = linebuffer + fieldnamesize;
255 tmpstr += strspn(tmpstr, "\t\n ");
256
257 /* Finally locate end of second column (dbffieldname) */
258 fieldnamesize = strcspn(tmpstr, "\t\n ");
259
260 /* Allocate memory and copy the string ensuring it is terminated */
261 map->dbffieldnames[curmapsize] = malloc(fieldnamesize + 1);
262 strncpy(map->dbffieldnames[curmapsize], tmpstr, fieldnamesize);
263 map->dbffieldnames[curmapsize][fieldnamesize] = '\0';
264
265 /* Error out if the dbffieldname is > 10 chars */
266 if (strlen(map->dbffieldnames[curmapsize]) > 10)
267 {
268 snprintf(errbuf, errbuflen, _("ERROR: column map file specifies a DBF field name \"%s\" which is longer than 10 characters"), map->dbffieldnames[curmapsize]);
269 return 0;
270 }
271
272 ++curmapsize;
273 }
274
275 fclose(fptr);
276
277 /* Done; return success */
278 return 1;
279}
280
281/*
282* Code page info will come out of dbfopen as either a bare codepage number
283* (e.g. 1256) or as "LDID/1234" from the DBF hreader. We want to look up
284* the equivalent iconv encoding string so we can use iconv to transcode
285* the data into UTF8
286*/
287char *
288codepage2encoding(const char *cpg)
289{
290 int cpglen;
291 int is_ldid = 0;
292 int num, i;
293
294 /* Do nothing on nothing. */
295 if ( ! cpg ) return NULL;
296
297 /* Is this an LDID string? */
298 /* If so, note it and move past the "LDID/" tag */
299 cpglen = strlen(cpg);
300 if ( strstr(cpg, "LDID/") )
301 {
302 if ( cpglen > 5 )
303 {
304 cpg += 5;
305 is_ldid = 1;
306 }
307 else
308 {
309 return NULL;
310 }
311 }
312
313 /* Read the number */
314 num = atoi(cpg);
315
316 /* Can we find this number in our lookup table? */
317 for ( i = is_ldid ; i < num_code_pages; i++ )
318 {
319 if ( is_ldid )
320 {
321 if ( code_pages[i].ldid == num )
322 return strdup(code_pages[i].iconv);
323 }
324 else
325 {
326 if ( code_pages[i].cpg == num )
327 return strdup(code_pages[i].iconv);
328 }
329 }
330
331 /* Didn't find a matching entry */
332 return NULL;
333
334}
335
336/*
337* In the case where data is coming out of the database in some wierd encoding
338* we want to look up the appropriate code page entry to feed to DBFCreateEx
339*
340* Return null on error (cannot allocate memory)
341*/
342char *
343encoding2codepage(const char *encoding)
344{
345 int i;
346 for ( i = 0; i < num_code_pages; i++ )
347 {
348 if ( strcasecmp(encoding, code_pages[i].pg) == 0 )
349 {
350 if ( code_pages[i].ldid == 0xFF )
351 {
352 return strdup("UTF-8");
353 }
354 else
355 {
356 char *codepage = NULL;
357 int ret = asprintf(&codepage, "LDID/%d", code_pages[i].ldid);
358 if ( ret == -1 ) return NULL; /* return null on error */
359 return codepage;
360 }
361 }
362 }
363
364 /* OK, we give up, pretend it's UTF8 */
365 return strdup("UTF-8");
366}
#define str(s)
void * malloc(YYSIZE_T)
void free(void *)
static int num_code_pages
Definition shpcommon.c:31
char * encoding2codepage(const char *encoding)
Definition shpcommon.c:343
static code_page_entry code_pages[]
Definition shpcommon.c:36
int colmap_read(const char *filename, colmap *map, char *errbuf, size_t errbuflen)
Read the content of filename into a symbol map.
Definition shpcommon.c:213
void colmap_init(colmap *map)
Definition shpcommon.c:159
const char * colmap_dbf_by_pg(colmap *map, const char *pgname)
Definition shpcommon.c:185
char * codepage2encoding(const char *cpg)
Definition shpcommon.c:288
void colmap_clean(colmap *map)
Definition shpcommon.c:167
const char * colmap_pg_by_dbf(colmap *map, const char *dbfname)
Definition shpcommon.c:199
char * escape_connection_string(char *str)
Escape strings that are to be used as part of a PostgreSQL connection string.
Definition shpcommon.c:109
#define _(String)
Definition shpcommon.h:24
char * pg
Definition shpcommon.c:28
char * desc
Definition shpcommon.c:26
int ldid
Definition shpcommon.c:24
char * iconv
Definition shpcommon.c:27
int cpg
Definition shpcommon.c:25
Definition shpcommon.c:23
char ** pgfieldnames
Definition shpcommon.h:55
int size
Definition shpcommon.h:61
char ** dbffieldnames
Definition shpcommon.h:58