00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00038 #ifdef HAVE_CONFIG_H
00039 #include <config.h>
00040 #endif
00041
00042
00043 #include <stdlib.h>
00044 #include <string.h>
00045 #include <stdio.h>
00046 #include <ctype.h>
00047 #include <errno.h>
00048 #include <wchar.h>
00049 #include <wctype.h>
00050
00051 #include "gerbv.h"
00052 #include "csv.h"
00053 #include "csv_defines.h"
00054 #define ST_START 1
00055 #define ST_COLLECT 2
00056 #define ST_TAILSPACE 3
00057 #define ST_END_QUOTE 4
00058 #define istspace iswspace
00059
00060
00061 struct sinput {
00062 FILE *in;
00063 const char *src;
00064 size_t sn;
00065 size_t count;
00066 };
00067
00068
00069 struct winput {
00070 const wchar_t *src;
00071 size_t sn;
00072 size_t count;
00073 };
00074
00075
00076 static int
00077 snextch(struct sinput *in)
00078 {
00079 int ch;
00080
00081 if (in->in) {
00082 if ((ch = fgetc(in->in)) == EOF) {
00083 if (ferror(in->in)) {
00084 GERB_MESSAGE("errno:%d", errno);
00085 return -1;
00086 }
00087 return 0;
00088 }
00089 } else {
00090 if (in->sn == 0) {
00091 return 0;
00092 }
00093 ch = (unsigned char) *(in->src)++;
00094 in->sn--;
00095 }
00096 in->count++;
00097
00098 return ch;
00099 }
00100
00101
00102 static int
00103 wnextch(struct winput *in)
00104 {
00105 int ch;
00106
00107 if (in->sn == 0) {
00108 return 0;
00109 }
00110 ch = *(in->src)++;
00111 in->sn--;
00112 in->count++;
00113
00114 return ch;
00115 }
00116
00117 static int
00118 csv_parse_str(struct sinput *in, char *buf, size_t bn, char *row[], int rn, int sep, int flags)
00119 {
00120 int trim, quotes, ch, state, r, j, t, inquotes;
00121
00122 trim = flags & CSV_TRIM;
00123 quotes = flags & CSV_QUOTES;
00124 state = ST_START;
00125 inquotes = 0;
00126 ch = r = j = t = 0;
00127
00128 memset(row, 0, sizeof(char *) * rn);
00129
00130 while (rn && bn && (ch = snextch(in)) > 0) {
00131 switch (state) {
00132 case ST_START:
00133 if (ch != '\n' && ch != sep && isspace(ch)) {
00134 if (!trim) {
00135 buf[j++] = ch; bn--;
00136 t = j;
00137 }
00138 break;
00139 } else if (quotes && ch == '"') {
00140 j = t = 0;
00141 state = ST_COLLECT;
00142 inquotes = 1;
00143 break;
00144 }
00145 state = ST_COLLECT;
00146 case ST_COLLECT:
00147 if (inquotes) {
00148 if (ch == '"') {
00149 state = ST_END_QUOTE;
00150 break;
00151 }
00152 } else if (ch == sep || ch == '\n') {
00153 row[r++] = buf; rn--;
00154 buf[t] = '\0'; bn--;
00155 buf += t + 1;
00156 j = t = 0;
00157
00158 state = ST_START;
00159 inquotes = 0;
00160 if (ch == '\n') {
00161 rn = 0;
00162 }
00163 break;
00164 } else if (quotes && ch == '"') {
00165 errno = EILSEQ;
00166 GERB_MESSAGE("%d: unexpected quote in element",errno);
00167 return -1;
00168 }
00169 buf[j++] = ch; bn--;
00170 if (!trim || isspace(ch) == 0) {
00171 t = j;
00172 }
00173 break;
00174 case ST_TAILSPACE:
00175 case ST_END_QUOTE:
00176 if (ch == sep || ch == '\n') {
00177 row[r++] = buf; rn--;
00178 buf[j] = '\0'; bn--;
00179 buf += j + 1;
00180 j = t = 0;
00181 state = ST_START;
00182 inquotes = 0;
00183 if (ch == '\n') {
00184 rn = 0;
00185 }
00186 break;
00187 } else if (quotes && ch == '"' && state != ST_TAILSPACE) {
00188 buf[j++] = '"'; bn--;
00189 t = j;
00190 state = ST_COLLECT;
00191 break;
00192 } else if (isspace(ch)) {
00193 state = ST_TAILSPACE;
00194 break;
00195 }
00196 errno = EILSEQ;
00197 GERB_MESSAGE("%d: bad end quote in element", errno);
00198 return -1;
00199 }
00200 }
00201 if (ch <= 0) {
00202
00203
00204 if(state == ST_TAILSPACE || state == ST_END_QUOTE
00205 || (state == ST_COLLECT && ! inquotes)) {
00206 row[r++] = buf; rn--;
00207 buf[j] = '\0'; bn--;
00208 buf += j + 1;
00209 inquotes = 0;
00210 rn = 0;
00211 } else {
00212
00213 return -1;
00214 }
00215 }
00216 if (bn == 0) {
00217 errno = E2BIG;
00218 GERB_MESSAGE("E2BIG %d ", errno);
00219 return -1;
00220 }
00221 if (rn) {
00222 if (inquotes) {
00223 errno = EILSEQ;
00224 GERB_MESSAGE("EILSEQ %d ", errno);
00225 return -1;
00226 }
00227 row[r] = buf;
00228 buf[t] = '\0';
00229 }
00230
00231 if (r < 4) {
00232 return -1;
00233 }
00234 return in->count;
00235 }
00236
00237
00238 static int
00239 csv_parse_wcs(struct winput *in, wchar_t *buf, size_t bn, wchar_t *row[], int rn, wint_t sep, int flags)
00240 {
00241 int trim, quotes, state, r, j, t, inquotes;
00242 wint_t ch;
00243
00244 trim = flags & CSV_TRIM;
00245 quotes = flags & CSV_QUOTES;
00246 state = ST_START;
00247 inquotes = 0;
00248 ch = r = j = t = 0;
00249
00250 memset(row, 0, sizeof(wchar_t *) * rn);
00251
00252 while (rn && bn && (ch = wnextch(in)) > 0) {
00253 switch (state) {
00254 case ST_START:
00255 if (ch != L'\n' && ch != sep && iswspace(ch)) {
00256 if (!trim) {
00257 buf[j++] = ch; bn--;
00258 t = j;
00259 }
00260 break;
00261 } else if (quotes && ch == L'"') {
00262 j = t = 0;
00263 state = ST_COLLECT;
00264 inquotes = 1;
00265 break;
00266 }
00267 state = ST_COLLECT;
00268 case ST_COLLECT:
00269 if (inquotes) {
00270 if (ch == L'"') {
00271 state = ST_END_QUOTE;
00272 break;
00273 }
00274 } else if (ch == sep || ch == L'\n') {
00275 row[r++] = buf; rn--;
00276 buf[t] = L'\0'; bn--;
00277 buf += t + 1;
00278 j = t = 0;
00279 state = ST_START;
00280 inquotes = 0;
00281 if (ch == L'\n') {
00282 rn = 0;
00283 }
00284 break;
00285 } else if (quotes && ch == L'"') {
00286 errno = EILSEQ;
00287 GERB_MESSAGE("%d: unexpected quote in element", errno);
00288 return -1;
00289 }
00290 buf[j++] = ch; bn--;
00291 if (!trim || iswspace(ch) == 0) {
00292 t = j;
00293 }
00294 break;
00295 case ST_TAILSPACE:
00296 case ST_END_QUOTE:
00297 if (ch == sep || ch == L'\n') {
00298 row[r++] = buf; rn--;
00299 buf[j] = L'\0'; bn--;
00300 buf += j + 1;
00301 j = t = 0;
00302 state = ST_START;
00303 inquotes = 0;
00304 if (ch == L'\n') {
00305 rn = 0;
00306 }
00307 break;
00308 } else if (quotes && ch == L'"' && state != ST_TAILSPACE) {
00309 buf[j++] = L'"'; bn--;
00310 t = j;
00311 state = ST_COLLECT;
00312 break;
00313 } else if (iswspace(ch)) {
00314 state = ST_TAILSPACE;
00315 break;
00316 }
00317 errno = EILSEQ;
00318 GERB_MESSAGE("%d: bad end quote in element ", errno);
00319 return -1;
00320 }
00321 }
00322 if (ch <= 0) {
00323
00324
00325 if(state == ST_TAILSPACE || state == ST_END_QUOTE
00326 || (state == ST_COLLECT && ! inquotes)) {
00327 row[r++] = buf; rn--;
00328 buf[j] = L'\0'; bn--;
00329 buf += j + 1;
00330 inquotes = 0;
00331 rn = 0;
00332 } else {
00333
00334 return -1;
00335 }
00336 }
00337 if (bn == 0) {
00338 errno = E2BIG;
00339 GERB_MESSAGE("%d", errno);
00340 return -1;
00341 }
00342 if (rn) {
00343 if (inquotes) {
00344 errno = EILSEQ;
00345 GERB_MESSAGE("%d", errno);
00346 return -1;
00347 }
00348 row[r] = buf;
00349 buf[t] = L'\0';
00350 }
00351
00352 return in->count;
00353 }
00354
00355
00356 int
00357 csv_row_parse_wcs(const wchar_t *src, size_t sn, wchar_t *buf, size_t bn, wchar_t *row[], int rn, int sep, int trim)
00358 {
00359 struct winput input;
00360 input.src = src;
00361 input.sn = sn;
00362 input.count = 0;
00363 return csv_parse_wcs(&input, buf, bn, row, rn, (wint_t)sep, trim);
00364 }
00365
00366
00367 int
00368 csv_row_parse_str(const char *src, size_t sn, char *buf, size_t bn, char *row[], int rn, int sep, int trim)
00369 {
00370 struct sinput input;
00371 input.in = NULL;
00372 input.src = src;
00373 input.sn = sn;
00374 input.count = 0;
00375 return csv_parse_str(&input, buf, bn, row, rn, sep, trim);
00376 }
00377
00378
00379 int
00380 csv_row_fread(FILE *in, char *buf, size_t bn, char *row[], int numcols, int sep, int trim)
00381 {
00382 struct sinput input;
00383 input.in = in;
00384 input.count = 0;
00385 return csv_parse_str(&input, buf, bn, row, numcols, sep, trim);
00386 }
00387