/*
  csvtotable: convert a CSV file into a NoSQL table.

  Copyright (c) 2004,2006 Carlo Strozzi

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 dated June, 1991.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

  $Id: csvtotable.c,v 1.3 2006/03/10 11:26:13 carlo Exp $

*/

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <sys/file.h>

#ifndef HELPDIR
#define HELPDIR "/usr/local/nosql/help"
#endif
#ifndef HELPFILE
#define HELPFILE (HELPDIR "/csvtotable.txt")
#endif

#ifndef DOCDIR 
#define DOCDIR "/usr/local/nosql/doc"
#endif
#ifndef COPYING
#define COPYING (DOCDIR "/COPYING")
#endif
#ifndef WARRANTY
#define WARRANTY (DOCDIR "/WARRANTY")
#endif

/* global variables */
static char *progname;		/* global pointer to argv[0] */

/* function declarations */

static int eusage(void) {
	fprintf(stderr, "Usage: %s [options]\n", progname);
	return 1;
}

int main(int argc, char **argv) {

   int c, i=0, nf=0, headnf=0, quoted=0, endquote=0, nr=1, newcol=1;

   FILE *ifile = NULL, *ofile = NULL;

   progname = argv[0];

   while (++i < argc && *argv[i] == '-') {

     if (!strcmp(argv[i], "-i") ||
    	      !strcmp(argv[i], "--input")) {

       if (++i >= argc || *argv[i] == '-') exit(eusage());

       if ((ifile = freopen(argv[i], "r", stdin)) < 0) {
          perror(argv[i]);
          exit(1);
       }
     }

     else if (!strcmp(argv[i], "-o") ||
    	      !strcmp(argv[i], "--output")) {

       if (++i >= argc || *argv[i] == '-') exit(eusage());

       if ((ofile = freopen(argv[i], "w", stdout)) < 0) {
    	  perror(argv[i]);
    	  exit(1);
       }
     }

     else if (!strcmp(argv[i], "-h") ||
    	      !strcmp(argv[i], "--help")) {

       execlp("grep","grep","-v","^#",HELPFILE,(char *) 0);
       perror("grep");
       exit(1);
     }
     else if (!strcmp(argv[i], "--show-copying")) {
       execlp("cat","cat",COPYING,(char *) 0);
       perror("cat");
       exit(1);
     }
     else if (!strcmp(argv[i], "--show-warranty")) {
       execlp("cat","cat",WARRANTY,(char *) 0);
       perror("cat");
       exit(1);
     }
   }

   while ((c = getchar()) != EOF) {

	if (endquote && c != '"') endquote=quoted=0;


	if (nr==1) {

	    /* table header: it is required that the input table
	       contains a valid header. The latter must abide by
	       more restrictive rules than those that are acceptable
	       for the table body. In particular, the header may not
	       contain embedded line-breaks, not even if quoted.
	       Carriage returns (CR) are always discarded in header,
	       even if quoted. Column names are forced to begin with
	       a letter, and any quoted characters other than a-zA-Z0-9
	       are turned into underscores. */

	    switch (c) {
	      case ' ': case '\t': case '\r':
		if (!quoted) continue;
		break;
	      case ',':
		if (!quoted) {
		   putchar('\t');
		   /* putchar('\001'); */
		   newcol=1;
		   headnf++;
	        }
		continue;
	      case '\n':
		if (!quoted) {
		   putchar('\n');
		   nf=0;
		   nr++;
		}
		continue;
	      case '"':
		if (endquote) {
		   putchar('"');
		   endquote=0;
		}
		else {
		   if (quoted) endquote=1;
		   else quoted=1;
		}
	        continue;
	    }

	    if (newcol) {
	       putchar('\001');

	       /* only a-zA-Z are allowed here */
	       if ((c > 64 && c < 91) || (c > 96 && c < 123)) putchar(c);
	       else putchar('X');
	       newcol=0;
	    }

	    else if ((c > 64 && c < 91) || (c > 96 && c < 123) ||
				(c > 47 && c < 58)) putchar(c);
	    else if (quoted) putchar('_');
	    else {
	       /* invalid unquoted characters in input header */
	       fprintf(stderr, "%s: invalid characters in input header\n", progname);
	       exit(1);
	    }
	}

	else {

	    /*

	    table body: the CSV specs implemented here are those
	    listed at the following URL:
	    http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm#FileFormat

	    */

	    switch (c) {
	      case ' ':
		if (quoted) putchar(c);
		continue;
	      case '\r':
		if (quoted) putchar(c);
		break;
	      case '\t':
		if (quoted) printf("\\t");
		break;
	      case ',':
		if (quoted) putchar(c);
		else {
		   putchar('\t');
		   nf++;
	        }
		break;
	      case '\n':
		if (quoted) printf("\\n");
		else {
		   putchar('\n');
		   nr++;
		   if (nf != headnf) {
		       /* broken table */
		       fprintf(stderr, "%s: invalid no. of fields at record %d\n", progname, nr);
		       exit(1);
		   }
		   nf=0;
		}
		break;
	      case '"':
		if (endquote) {
		   putchar('"');
		   endquote=0;
		}
		else {
		   if (quoted) endquote=1;
		   else quoted=1;
		}
	        break;
	      default:
		putchar(c);
	    }
	}
   }

   exit(0);
}

/* End of program */
