#!/usr/bin/awk -f
#****************************************************************************
#  ##   ##         #####   #####  ##     **     NoSQL RDBMS - record2sed    *
#  ###  ##        ####### ####### ##     **        $Revision: 1.1.1.1 $       *
#  #### ##        ###     ##   ## ##     ************************************
#  #######  ####  #####   ##   ## ##     **   Carlo Strozzi (c) 1998-2000   *
#  ####### ######   ##### ## # ## ##     ************************************
#  ## #### ##  ##     ### ##  ### ##     **           Written by            *
#  ##  ### ###### ####### ######  ###### **          Carlo Strozzi          *
#  ##   ##  ####   #####   #### # ###### **     e-mail: carlos@linux.it     *
#****************************************************************************
#   NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.                          *
#   This program comes with ABSOLUTELY NO WARRANTY; for details             *
#   refer to the GNU General Public License.                                *
#****************************************************************************
# NOTE: to edit, set ts=8 in 'vi' (or equivalent)
#       to print, pipe through 'pr -t -e8'
#****************************************************************************
#
# Converts a single-record table into special sed(1) substitution statements.
#
# Usage:  record2sed < table
#
# Note: options must be passed through the environment variable _awk_args,
#       i.e.: _awk_args='[options]'
# 
# Options:
#     -p|--prefix P
#           Prefix the 'name' part of each output statement with string 'P'.
# 
#     -h|--html
#           Encode NoSQL-escaped TABs and newlines into their ISO
#	    entities &#9; and &#10; respectively, i.e. in a format
#	    suitable to be rendered in a Web page. Mostly useful with
#	    Common Gateway Interface (CGI) programs.
#   
#     -C|--strip-comments
#          Print the statements necessary to remove comment-lines,
#          i.e. lines that start with a '#' character.
#
#     -l|--last
#          If the input table contains duplicated column names
#          pick the last occurrence of each. The default is to
#          pick the first one. This is sometimes useful after
#          the 'join' operator.
#
# Takes a single-record table and builds a corresponding sed(1) program
# in the form 's/\$\[name\]/data/g', where 'name' is the table column name,
# and 'data' is the associated column value. The program will automatically
# escape sed(1) special characters in 'data'.
#
# This operator reads a single-record table from STDIN and prints the
# corresponding sed(1) program to STDOUT. If the input table has more
# than one record, then only the last one will be used.
#
########################################################################

BEGIN {
  NULL = "" ; FS = OFS = "\t"; split( ENVIRON["_awk_args"], args, " " )

  while ( args[++i] != NULL )
  {
    if ( args[i] == "-p" || args[i] == "--prefix" ) prefix = args[++i]
    else if ( args[i] == "-h" || args[i] == "--html" ) html = 1
    else if ( args[i] == "-C" || args[i] == "--strip-comments" )
    {
      no_comments = 1
    }
    else if ( args[i] == "-l" || args[i] == "--last" ) pick_last = 1
  }

  # Handle comments.
  if ( no_comments ) printf("/^#.*$/d\n")
}

########################################################################
# Main loop.
########################################################################

# Column names.
NR == 1 {
  i = 0
  while ( ++i <= NF )
  {
    if ( !P[$i] )
    { 
      if ( i == 1 ) auto_col = $i
      else auto_col = auto_col " " $i
    }

    if ( pick_last ) P[$i] = i
    else
    {
      if ( !P[$i] ) P[$i] = i
    }
  }
  split( auto_col, c_names, " " )
}

# Dashline.
NR == 2 { next }

END {
  if ( NR > 2 )
  {
    i = 0
    while ( P[c_names[++i]] )		# Process each field in turn.
    {
      field = $P[c_names[i]]

      # Unescape tabs and newlines first.
      field = NoSQL_Unescape( field )

      # Apply additional escaping for HTML if requested.
      if ( html )
      {
	gsub( "\t", "\&#9;", field )		# tab
	gsub( "\n", "\&#10;", field )		# newline
      }

      # Apply actual sed(1) escaping. Do '\' first!

      field = Mawk_Bug( field )
      gsub( "&", "\\\\&", field )
      gsub( "#", "\\\\#", field )
      gsub( "/", "\\\/", field )

      # Add more sed(1) escapes here if necessary.

      # Print sed(1) replacement statement.
      printf("s/\\$\\[%s%s]/%s/g\n", prefix, c_names[i], field)
    }
  }
  else {
    # No data rows in the input table; print empty assignments.
    i = 0
    while (P[c_names[++i]]) printf("s/\\$\\[%s%s]//g\n", prefix, c_names[i])
  }
}

########################################################################
# NoSQL_Unescape(string)
#
# Takes a string and translates any unescaped '\t' and '\n' strings into
# physical tabs and newlines respectively. Returns the converted string.
########################################################################
function NoSQL_Unescape(s,		S,i,s_length,a,escaped) {
  s_length = split(s, a, "")
  s_length++				# Cope with s_length==1
  while ( ++i <= s_length ) {
    if ( a[i] == "\\" && !escaped ) { escaped = 1; continue }
    if ( a[i] == "n" && escaped ) { S = S "\n"; escaped = 0; continue }
    if ( a[i] == "t" && escaped ) { S = S "\t"; escaped = 0; continue }
    if ( escaped ) { S = S "\\" a[i]; escaped = 0; continue }
    S = S a[i]
  }
  return S
}

########################################################################
# Mawk_Bug(string)
#
# Takes a string and turns all '\' characters into their escaped form
# '\\'. Returns the escaped string. This could be done with just a gsub(),
# but mawk(1) has a bug that makes it behave differently from other awk
# implementations:
#
# gsub( /\\/, "\\\\", field )		# This works with both gawk(1)
#					# and the original nawk(1).
#
# gsub( /\\/, "\\\\\\", field )		# This works just with mawk(1),
#					# otherwise it produces more
#					# backslashes than necessary,
#					# which looks rather obvious.
#
########################################################################
function Mawk_Bug( s,		a,i,j,S ) {

   i = split( s, a, "\\" )
   S = a[1]
   for ( j = 2; j <= i; j++ ) S = S "\\\\" a[j]
   return S
}

########################################################################
# End of program.
########################################################################
