#!/usr/bin/perl
# *********************************************************************
# Original code: indextbl,v 1.13 1994/06/20 10:46:33 hobbs
#
# Adapted to NoSQL by Carlo Strozzi
#
# summtable: NoSQL table summary builder.
# Copyright (c) 1998,2006 Carlo Strozzi
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# *********************************************************************
#  $Id: summtable,v 1.3 2006/03/10 11:26:13 carlo Exp $

# Get local settings and set defaults.

$NOSQL_INSTALL = $ENV{'NOSQL_INSTALL'};
$NOSQL_INSTALL = "/usr/local/nosql" if not $NOSQL_INSTALL;

$0 =~ s-.*/-- ;

while ( $ARGV[0] =~ /^-/ ) {				# Get args
    $_ = shift ;
    if( /^-c(.*)/ || /--count-(.*)/ ){
	$v = $1 ;
	if( $v =~ /^u/ ){ $CUNIQ++ ; $SAV++ ; }
	if( $v =~ /^(un|unique-null)$/ ){ $CUNUL++ ; }
	if( $v =~ /^(uu|unique-bycolumn)$/ ){ $CUQBY++ ; }
	if( $v =~ /^(u2|unique-bycolumn-2)$/ ){ $CUQBY++ ; $CU2++ ; }
	next ; }
    if( /^(-m|--totals)$/ ){ $MAM++ ; $SAV++ ; next ; }
    if( /^(-v|--revert)$/ ){ $INV++ ; next ; }
    if( /^(-h|--help)$/ ){
	$HelpInfo = `grep -v '^#' $NOSQL_INSTALL/help/summtable.txt`;
	print $HelpInfo ;
	exit 1 ;
    }
    if( /^--show-copying.*/ ){
      system "cat $NOSQL_INSTALL/doc/COPYING" ;
      exit 1 ;
    }
    if( /^--show-warranty.*/ ){
      system "cat $NOSQL_INSTALL/doc/WARRANTY" ;
      exit 1 ;
    }
    die "$0: bad option: $_\n", "For help type \"$0 --help\".\n" ; 
}
while(<STDIN>){
    next if /^\s*#/ ;			# comment
    chop ;
    if ($lln == 0) { $_ =~ s/[\001 ]+//g ; }	# remove SOH markers
    @F = split( /\t/, $_ );
    if( ++$lln < 2 ){
	if( $lln == 1 ){			# col name line
	    @H = @F ; # save headers
	    @ARGV = @H if ! @ARGV && ! $INV ;
	    &get_col_x ; }		# get, chk column indexes.
	next ; }
    $rows++ ;				# data row count
    if( $SAV ){
	for (@a){
	    $x = "$_|$F[$_]" ;
	    if( ! $q{$x} ){ $cu[$_]++ ; }	# unique count
	    $q{$x}++ ;				# value count
	}
    }
}
print "Rows: $rows\n" ;
if( $CUNIQ ){				# count of unique stuff
    for (@a){
	print "Unique values for $H[$_]: $cu[$_]\n" ;
	next if ! $CUQBY && ! $CUNUL ;
	for $k ( sort( keys %q )){
	    @t = split( /\|/, $k );
	    if( $t[0] eq $_ ){
		if( $t[1] eq "" ){ $t[1] = "(null)" ; }
		if( $t[1] =~ /^\s+$/ ){ $t[1] = "(blank)" ; }
		if( $CUNUL ){
		    last if $t[1] ne "(null)" && $t[1] ne "(blank)" ; }
		if( $CU2 ){
		    next unless $q{$k} > 1 ; }
		printf( "%8d %s\n", $q{$k}, $t[1] ) ;
	    }
	}
    }
}
if( $MAM ){				# min, avg, max 
    for (@a){
	$n = $sum = $max = 0 ;
	$min = 2e31 -1 ;
	for $k ( sort( keys %q )){
	    @t = split( /\|/, $k );
	    if( $t[0] eq $_ ){
		$sum += $t[1] * $q{$k} ;
		$n += $q{$k} ;
		$min = $t[1] if $t[1] < $min ;
		$max = $t[1] if $t[1] > $max ;
	    }
	}
	$avg = $sum / $n if $n ;
	printf( "Min, Avg, Max, Total for %s: %d, %d, %d, %d\n",
	    $H[$_], $min, $avg, $max, $sum ) ;
    }
}
sub get_col_x {		# get, chk column indexes, inc -v, die if bad column
			# uses @H, $INV, put indexes in @a.
			# modified for summ.
    local( $f, $ok, @nn ) ;
    for $arg (@ARGV){
	for( $ok=$f=0 ; $f < @H ; $f++ ){
	    if( $arg eq $H[$f] ){	# match existing column
		$ok++ ;
		push( @a, $f );
		last ; }
	}
	die "$0: bad column name: $arg\n" if ! $ok ;
    }
    if( $INV ){					# inverse option
	loop: for( $f=0 ; $f < @H ; $f++ ){
	    for $i (@a){
		next loop if $i eq $f ; }
	    push( @nn, $f ); }
	@a = @nn ;
    }
}

# End of program.
