#!/usr/bin/mawk -We
# *********************************************************************
# updtable: inserts/updates/deletes table rows based on the contents
#	    of an edit table.
# Copyright (c) 2000,2006 Carlo Strozzi
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# *********************************************************************
# $Id: updtable,v 1.3 2006/03/10 11:26:13 carlo Exp $

BEGIN {
  NULL = "" ; FS = OFS = "\t"

  # Get local settings.
  nosql_install = ENVIRON["NOSQL_INSTALL"]
  stdout = ENVIRON["NOSQL_STDOUT"]
  stderr = ENVIRON["NOSQL_STDERR"]

  # Set default values if necessary.
  if (nosql_install == NULL) nosql_install = "/usr/local/nosql"
  if (stdout == NULL) stdout = "/dev/stdout"
  if (stderr == NULL) stderr = "/dev/stderr"

  while (ARGV[++i] != NULL) {
    if (ARGV[i] == "-n" || ARGV[i] == "--no-insert") no_ins = 1
    else if (ARGV[i] == "-N" || ARGV[i] == "--no-header") no_hdr = 1
    else if (ARGV[i] == "-l" || ARGV[i] == "--last") pick_last = 1
    else if (ARGV[i] == "-s" || ARGV[i] == "--stdin") swap = 1
    else if (ARGV[i] == "-w" || ARGV[i] == "--write-size") s_file = ARGV[++i]
    else if (ARGV[i] == "-K" || ARGV[i] == "--key-columns") {
       klist = ARGV[++i]
       kn = split(klist,kc,",")
       klist = "," klist ","
    }
    else if (ARGV[i] == "-h" || ARGV[i] == "--help") {
       system("grep -v '^#' " nosql_install "/help/updtable.txt")
       exit(rc=1)
    }
    else if (ARGV[i] == "--show-copying") {
       system("cat " nosql_install "/doc/COPYING")
       exit(rc=1)
    }
    else if (ARGV[i] == "--show-warranty") {
       system("cat " nosql_install "/doc/WARRANTY")
       exit(rc=1)
    }
    else if (ARGV[i] !~ /^-/) e_file = ARGV[i]
  }

  if (e_file == NULL) {
     print "Usage: updtable [options] table_1 < table_2" > stderr
     exit(1)
  }

  # infer the input key column names from file name, unless they
  # have been explicitly specified on the command line.

  if (!kn) {
     klist = e_file

    # Handle both table and index file names (the index first!). Note
    # that _k and _x were choosen because no real column name can begin
    # with an uderscore, so there's no risk of ambiguities. Note also
    # that we need to strip everything up to _x first, as in index 
    # files the actual key columns are those that come after _x, and   
    # they may not necessarily be the same as the key columns of the       
    # main table. That is, given the main table 'table._k.col1.col2',
    # it is quite possible to have an index file name like this:
    # 'table._k.col1.col2._x.col3.col4.col5

     if (sub(/.*\._x\./,"",klist) || sub(/.*\._k\./,"",klist)) {
	gsub(/\./,",",klist)
	sub(/-.*$/,"",klist)		# remove possible "-suffix".
	kn = split(klist,kc,",")
	klist = "," klist ","
     }
     else klist = NULL
  }

  #
  # Load the associative array with the updates.
  #

  i = 0
  if (swap) {
     while (getline < "-" > 0) {	       # Read updates from stdin.
        if (++j == 1) {
	   gsub(/\001/, "")		       # Remove SOH markers
	   while (++i <= NF) {
	     if (pick_last) ep[$i] = i
	     else if (ep[$i] == NULL) ep[$i] = i
	     en[i] = $i
	   }
        }  else {
	   gsub(/ +\t/,"\t"); sub(/ +$/,NULL)	# trim trailing blanks.
	   if (!kn) {
	      kc[1] = en[1]
	      klist = "," en[1] ","
	      kn = 1
	   }

	   # insert (possibly combined) key column.
	   key = ""
	   for (i=1; i<=kn; i++) {
		if (i > 1 && i <= kn) key = key ":"
		key = key $ep[kc[i]]
	   }
	   $0 = key OFS $0
	   edits[$1] = $0		       # Edit array.
	   keys[$1] = 1			       # Key array.
	}
     }
     ### close(stdin)			       # Let's spare resources.
     ARGV[1] = e_file			       # Set new stdin stream.
  }  else {				       # Read updates from e_file.
     while (getline < e_file > 0) {
	if (++j == 1) {
	   gsub(/\001/, "")		       # Remove SOH markers.
	   while (++i <= NF) {
	     if (pick_last) ep[$i] = i
	     else if (ep[$i] == NULL) ep[$i] = i
	     en[i] = $i
	   }
        }  else {
	   gsub(/ +\t/,"\t"); sub(/ +$/,NULL)	# trim trailing blanks.
	   if (!kn) {
	      kc[1] = en[1]
	      klist = "," en[1] ","
	      kn = 1
	   }

	   # insert (possibly combined) key column.
	   key = ""
	   for (i=1; i<=kn; i++) {
		if (i > 1 && i <= kn) key = key ":"
		key = key $ep[kc[i]]
	   }
	   $0 = key OFS $0
	   edits[$1] = $0		       # Edit array.
	   keys[$1] = 1			       # Key array.
	}
     }
     close(e_file)			       # Let's spare resources.
     ARGV[1] = "-"
  }

  # make sure the specified key column(s) exists in the edit table.
  for (i=1; i<=kn; i++) {
      if (ep[kc[i]] == "") {
	 print "updtable: column '" kc[i] "' not found in edit table" > stderr
	 exit(rc=1)
      }
  }

  if (s_file != NULL) print j-1 > s_file
  close(s_file)
  NR = 0				       # Reset record counter.
  NF = 0
  ARGC = 2				       # Fix argv[].
  edit_numfields = split($0, n)		       # No. of fields in edit table.

  if (!kn) {
     kc[1] = en[1]
     klist = "," en[1] ","
     kn = 1
  }

}

# At this point whichever file we have on stdin it is always
# the one that is being updated.

NR == 1 {
  gsub(/\001/, "")			       # Remove SOH markers.
  old_numfields = split($0, n)		       # No. of fields in main table.

  i = 0
  while (++i <= NF) {
    if (!P[$i]) { 
      if (i == 1) auto_col = $i
      else auto_col = auto_col " " $i
    }

    if (pick_last) P[$i] = i
    else {
      if (!P[$i]) P[$i] = i
    }
  }

  split(auto_col, c_names, " ")

  # make sure the specified key column(s) exists in the main table.
  for (i=1; i<=kn; i++) {
      if (P[kc[i]] == "") {
	 print "updtable: column '" kc[i] "' not found in main table" > stderr
	 exit(rc=1)
      }
  }

  if (!no_hdr) {
     out_hdr = $0			       # Do not add SOH's to $0 yet.
     printf("\001"); gsub(/\t/,"\t\001",out_hdr); print out_hdr
     fflush(stdout)
  }

  next
}

# This is to cope with the case where 'table_2'
# itself contains the delete string, i.e. when we use
# 'updtable' against a Table Journal.

/(^|\t)\.\.DEL\.\.(\t|$)/ { next }

{ gsub(/ +\t/,"\t"); sub(/ +$/,NULL) }		# trim trailing blanks.

# Apply the updates to already existing keys.

{
   # insert (possibly combined) key column.

   key = ""
   for (i=1; i<=kn; i++) {
       if (i > 1 && i <= kn) key = key ":"
       key = key $P[kc[i]]
   }

   $0 = key OFS $0
}

split(edits[$1], a) {			# Something to do ?

   status[$1] = 1			# Mark key as updated.

   # Skip records that are to be deleted.
   if (edits[$1] ~ /(^|\t)\.\.DEL\.\.(\t|$)/) next

   sub(/[^\t]+\t/,"")			# remove combined key.
   for (i = 1; i <= NF; i++) {
      if (i > 1) printf("\t")
      kre = "," c_names[i] ","
      # ignore edits on key columns.
      if (klist !~ kre && ep[c_names[i]]) repl = a[ep[c_names[i]]+1]
      else repl = $i			# retain old value.

      printf("%s", repl)
   }
   printf("\n")				# Print record separator.
   next
}

{ sub(/[^\t]+\t/,""); print }		# remove combined key and print.

END {
   if (rc || no_ins) exit(rc)

   # Unless '-n' was specified, handle new keys, i.e. edits referring
   # to keys that do not occur in the file being updated. This is to
   # cope with 'insert' operations.

   for (j in keys) {
      split(edits[j], a)
      if (status[j] || edits[j] ~ /(^|\t)\.\.DEL\.\.(\t|$)/) continue
      for (i = 1; i <= old_numfields; i++) {
	 if (i > 1) printf("\t")
         if (ep[c_names[i]]) repl = a[ep[c_names[i]]+1]
         else repl = NULL
         printf("%s", repl)
      }
      printf("\n")			# Print record separator.
   }
}

# End of program.
