/*
 * afcopy.cpp:
 *
 * Copy one AFF file to another. 
 * Resulting file is re-ordered and possibly re-compressed.
 */

/*
 * Copyright (c) 2006
 *	Simson L. Garfinkel 
 *      All rights reserved.
 *
 * This code is derrived from software contributed by
 * Simson L. Garfinkel
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. [Omitted]
 * 4. Neither the name of Simson Garfinkel, Basis Technology, or other
 *    contributors to this program may be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY SIMSON GARFINKEL AND CONTRIBUTORS ``AS
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL SIMSON
 * GARFINKEL, BAIS TECHNOLOGy, OR CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */


#include "config.h"
#include "afflib.h"
#include "afflib_i.h"
#include "quads.h"
#include "utils.h"

#include <sys/signal.h>

#ifdef HAVE_TIME_H
#include <time.h>
#endif

#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif

#include <ctype.h>
#include <zlib.h>
#include <openssl/md5.h>
#include <openssl/sha.h>
#include <assert.h>

#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

#ifdef HAVE_TERM_H
#include <term.h>
#endif

#ifdef HAVE_NCURSES_TERM_H
#include <ncurses/term.h>
#endif

#ifdef WIN32
#include "unix4win32.h"
#include <malloc.h>
#endif

char *progname = "afcopy";

int opt_verbose = 0;
int opt_debug = 0;
int opt_x = 0;
int opt_y = 0;
int opt_preen = 0;
int opt_zap =0;
int opt_missing = 0;


void usage()
{
    printf("%s version %s\n",progname,PACKAGE_VERSION);
    printf("usage: %s [options] file1 file\n",progname);
    printf("                    Copies file1 to file2\n");
    printf("       %s [options] file1 file2 file3 ... dir\n",progname);
    printf("                    Copies file1.. into dir\n");
    printf("       %s [options] file1 file2 file3 ... dir1 dir2...\n",progname);
    printf("                    Copies file1.. into dirs1, dir2, ...\n");
    printf("\n");
    printf("By default, all page MACs are verified on read and all segments\n");
    printf("are verified after write.\n");
    
    printf("Options:\n");
    printf("   -v = verbose: print each file as it is copied\n");
    printf("   -vv = very verbose: print each segment as it is copied\n");
    printf("   -d = print debugging information as well\n");
    printf("   -c = also verify decompression of each page\n");
    printf("   -x = don't verify hashes on reads\n");
    printf("   -y = don't verify writes\n");
    printf("   -p = preen; recompress all pages with LZMA or NULL\n");
    printf("\n");
    printf("   -h = help; print this message.\n");
    printf("   -V = print the program version and exit.\n");
    printf("   -z = zap; copy even if the destination exists.\n");
    printf("   -m = just copy the missing segments\n");
    printf("\n");
    printf("Examples:\n");
#ifdef USE_S3
    printf("       %s -vpy *.aff s3:///     Copy all files in current\n",progname);
    printf("                                directory to S3 default bucket\n");
#endif
    exit(1);
}


const char *current_source = 0;
const char *current_dest = 0;
const char *current_seg  = 0;
void sig_info(int arg)
{
    if(current_source){
	printf("Copying %s ",current_source);
	if(current_dest){
	    printf("--> %s",current_dest);
	    if(current_seg) printf(" (%s) ",current_seg);
	}
    }
    printf("\n");
}


/* Copy pagenumber from ain to aout. Return 0 if success, -1 if can't do it. */
int preen(AFFILE *ain,AFFILE *aout,int64 pagenum,unsigned long arg)
{
    int alg = (arg & AF_PAGE_COMP_ALG_MASK);
    //int max = (alg & AF_PAGE_COMP_MAX);

    if(alg==AF_PAGE_COMP_ALG_ZERO) return -1; // can't preen ZERO
    if(alg==AF_PAGE_COMP_ALG_LZMA) return -1; // don't re-compress LZMA
    
    size_t pagesize = af_page_size(ain);
    if(pagesize<=0) return -1;		// couldn't get pagesize

    unsigned char *pagebuf = (unsigned char *)malloc(pagesize);
    if(!pagebuf) return -1;		// couldn't allocate memory for page?

    if(af_get_page(ain,pagenum,pagebuf,&pagesize)){
	free(pagebuf);
	return -1;
    }

    /* Got the page; now write it out with max compression */
    af_enable_compression(aout,AF_COMPRESSION_ALG_LZMA,AF_COMPRESSION_MAX);

    /* Write out the page */
    if(af_update_page(aout,pagenum,pagebuf,pagesize)){
	free(pagebuf);
	return -1;
    }
    free(pagebuf);
    return 0;
}

void unlink_outfiles(vector<string> outfiles)
{
    for(vector<string>::const_iterator o = outfiles.begin();
	o != outfiles.end();
	o++){
	printf("Unlinking %s\n",o->c_str());
	unlink(o->c_str());
    }
}

#if !defined( __BSD_VISIBLE) && !defined(isnumber)
#define isnumber(x) isdigit(x)
#endif

int afcopy(char *infile,vector<string> &outfiles)
{
#ifdef SIGINFO
    signal(SIGINFO,sig_info);
#endif
    hashMapT hashMap;
    
    /* Open the input file */
    AFFILE *ain = af_open(infile,O_RDONLY,0);
    if(!ain) err(1,"%s",infile);
    seglist segments;
    get_seglist(ain,&segments);

    outlist afouts;				    // vector of output AFFs
    vector<int64>preened_pages;
    
    /* Now, try to open the output files, to see if they exist */
    current_source = infile;
    if(opt_verbose) printf("%s: ",infile);
    for(vector<string>::const_iterator o = outfiles.begin();
	o != outfiles.end();
	o++){
	const char *fname = o->c_str();

	outelement out;

	out.af = af_open(fname,O_RDWR|O_EXCL,0666);
	if(out.af){
	    fprintf(stderr,"%s: file exists...  ",fname);
	    if(opt_zap==0 && opt_missing==0){
		fprintf(stderr,"Will not overwrite; use -m or -z\n");
		af_close(out.af);
		continue;
	    }
	    if(opt_zap){
		fprintf(stderr,"zapping...\n");
		af_close(out.af);
		unlink(fname);
		out.af = 0;
	    }
	    if(opt_missing){
		fprintf(stderr,"Filling in missing segments...\n");
		if(af_page_size(ain) != af_page_size(out.af)){
		    fprintf(stderr,"%s and %s have different page sizes (%d != %d)\n",
			    af_filename(ain),
			    af_filename(out.af),
			    af_page_size(ain),
			    af_page_size(out.af));
		    af_close(out.af);
		    out.af=0;
		    continue;
		}
	    }
	}
	if(out.af==0){
	    out.af = af_open(fname,O_RDWR|O_EXCL|O_CREAT,0666);
	    if(!out.af){
		warn("%s",fname);
		continue;
	    }
	    if(af_set_pagesize(out.af,af_page_size(ain))){
		errx(1,"%s: cannot set page size to %d\n", af_filename(out.af),af_page_size(ain));
	    }
	}
	if(o != outfiles.begin()) printf("\t ");
	if(opt_verbose){
	    printf(" => %s ",fname);
	    if(opt_preen) printf(" (preening) ");
	    printf("\n");
	}
	if(opt_missing) get_seglist(out.af,&out.segs);
	afouts.push_back(out);
    }

    /* IF we couldn't open any output files, return */
    if(afouts.size()==0){
	af_close(ain);			// close the input file 
	return -1;
    }


    /* Start the copying */
    struct timeval t0,t1;
    gettimeofday(&t0,0);
    for(seglist::const_iterator seg = segments.begin();
	seg!= segments.end();seg++){
	/* For each segment, get the size of the segment */

	const char *segname = seg->name.c_str();
	current_seg = segname;

	size_t seglen=0;
	if(af_get_seg(ain,segname,0,0,&seglen)){
	    err(1,"Cannot read length of segment '%s' on input file %s",
		segname,af_filename(ain));
	}
	unsigned char *segbuf = (unsigned char *)malloc(seglen);
	if(!segbuf){
	    err(1,"Cannot allocated %d bytes for segment '%s' in %s",
		(int)seglen,segname,af_filename(ain));
	}

	/* Now get the source segment */
	unsigned long arg;
	if(af_get_seg(ain,segname,&arg,segbuf,&seglen)){
	    unlink_outfiles(outfiles);
	    err(1,"Cannot read segment '%s' in %s. Deleteing output file",
		segname,af_filename(ain));
	}
	int64 pagenumber = af_segname_page_number(segname);

	/* Calculate the MD5 of this segment and remember it in the map */
	md5blob md5;
	MD5(segbuf,seglen,md5.buf);
	hashMap[segname] = md5;


	/* Write the segment to each file */
	for(outlist::iterator aout = afouts.begin();
	    aout != afouts.end();
	    aout++){

	    current_dest = af_filename(aout->af);
	    if(opt_verbose>1 || opt_debug) printf("\n   %s -> %s ...", segname,af_filename(aout->af));

	    if(pagenumber>=0 && opt_preen){
		if(opt_debug) printf(" (PREENED) ");
		if(preen(ain,aout->af,pagenumber,arg)==0){
		    preened_pages.push_back(pagenumber);
		    continue;
		}
	    }

	    for(seglist::const_iterator j = aout->segs.begin();
		j != aout->segs.end();
		j++){
		if(j->name == segname){
		    printf("%s is already in %s\n",segname,af_filename(aout->af));
		    goto skip;
		}
	    }

	    if(af_update_seg(aout->af,segname,arg,segbuf,seglen)){
		unlink_outfiles(outfiles);
		err(1,"Cannot write segment '%s' to %s.", segname,af_filename(aout->af));
	    }
	skip:;
	}
	free(segbuf);
	current_dest = 0;
	if(opt_verbose>1 || opt_debug) putchar('\n');
    }
    current_seg = 0;
    af_close(ain);

    gettimeofday(&t1,0);
    if(afouts.size()==1){
	AFFILE *af = afouts.begin()->af;
	uint64 w = af->bytes_written;
	double sec = ((t1.tv_sec-t0.tv_sec)+(t1.tv_usec-t0.tv_usec)/1000000.0);
	printf("%s: %qd bytes transfered in %.2f seconds. xfer rate: %.2f MBytes/sec\n",
	       af_filename(af),w,sec,(w/1000000.0) / sec);
    }
	
    current_seg = "VERIFYING";
    /* Now verify all of the hashes */
    if(opt_verbose || opt_debug) printf("\n\nFiles copied. Verifying...\n");
    for(seglist::const_iterator seg = segments.begin(); seg!= segments.end();seg++){

	const char *segname = seg->name.c_str();
	for(outlist::iterator aout = afouts.begin(); aout != afouts.end(); aout++){
	    size_t seglen=0;
	    char b2[1024];

	    if((aout->af)->v->flag & AF_VNODE_TYPE_RELIABLE){
		continue;		// no need to verify a reliable write
	    }
	    if(opt_verbose>1 || opt_debug) printf("  verifying %s...\n",segname);

	again:
	    if(af_get_seg(aout->af,segname,0,0,&seglen)){
		if(segname != b2 &&
		   segname[0]=='s' && segname[1]=='e' && segname[2]=='g' &&
		   isnumber(segname[3])){
		    /* Looks like a legacy segname name was renamed.
		     * Try the new name
		     */
		    snprintf(b2,sizeof(b2),"page%s",segname+3);
		    if(opt_verbose) printf("  Couldn't read %s; looking for %s\n",
				       segname,b2);
		    segname = b2;
		    goto again;
		}
		unlink_outfiles(outfiles);
		errx(1,"Cannot read length of segment '%s' in output file %s",
		     segname,af_filename(aout->af));
	    }
	    int64 pagenumber = af_segname_page_number(segname);
	    if(find(preened_pages.begin(),preened_pages.end(),pagenumber) !=preened_pages.end()){
		/* TK: page pagenumber was preened.
		 * It should be check against the original hash */
		continue;
	    }
	    
	    unsigned char *segbuf = (unsigned char *)malloc(seglen);
	    if(!segbuf){
		err(1,"Cannot allocated %d bytes for segment '%s' in %s",
		    (int)seglen,segname,af_filename(ain));
	    }
	    unsigned long arg;
	    if(af_get_seg(aout->af,segname,&arg,segbuf,&seglen)){
		err(1,"Cannot read segment '%s' in %s",
		    segname,af_filename(aout->af));
	    }

	    /* Calculate the MD5 of this segment and see if it matches the map.
	     * (But don't do this for preened segments.
	     */
	    unsigned char md5_read[16];
	    MD5(segbuf,seglen,md5_read);
	    if(memcmp(hashMap[segname].buf,md5_read,16)!=0){
		unlink_outfiles(outfiles);
		errx(1,"Hash read from %s for segment %s doesn't validate.",
		     af_filename(aout->af),segname);
	    }
	    free(segbuf);		// free the buffer
	}
    }
    for(outlist::iterator aout = afouts.begin(); aout != afouts.end(); aout++){
	af_close(aout->af);
    }
    if(opt_verbose>1 || opt_debug) printf("\n");
    current_source = 0;
    return 0;
}

int main(int argc,char **argv)
{
    int ch;

    setvbuf(stdout,0,_IONBF,0);		// turn off buffering on stdout
    while ((ch = getopt(argc, argv, "vdpVxyh?zm")) != -1) {
	switch (ch) {
	case 'v': opt_verbose++; break;
	case 'd': opt_debug++; break;
	case 'p': opt_preen++;break;
	case 'x': opt_x++;break;
	case 'y': opt_y++;break;
	case 'z': opt_zap++;break;
	case 'm': opt_missing++;break;

	case 'h':
	case '?':
	default:
	    usage();
	    break;
	case 'V':
	    printf("%s version %s\n",progname,PACKAGE_VERSION);
	    exit(0);
	}
    }
    argc -= optind;
    argv += optind;

    if(argc<2){				// at this point, we need at least two args
	usage();
    }


    /* Find any directories */
    vector<string> dirlist;
    for(int i=argc-1;i>0;i--){
	struct stat st;

	// s3 names that do not end with ".aff" are directories
	const char *last4 = strlen(argv[i])>4 ? argv[i]+strlen(argv[i])-4 : "";
	if(strncmp(argv[i],"s3://",5)==0 &&
	   strcmp(last4,".aff")!=0){
	    dirlist.push_back(argv[i]);
	    argc--;
	    continue;		
	}

	if(stat(argv[i],&st)!=0) break; // out of directories
	if((st.st_mode & S_IFMT)!=S_IFDIR) break; // found a non-dir
	dirlist.push_back(argv[i]);
	argc--;			// ignore the last
    }

    /* If I found no directories, then there better just be two values */
    if(dirlist.size()==0){
	if(argc!=2){
	    fprintf(stderr,"Please specify a directory or just two AFF files.\n\n");
	    usage();
	}
	/* Must be copying from file1 to file2. Make sure file2 does not exist */
	if(access(argv[1],R_OK)==0){
	    fprintf(stderr,"File exists: %s\n",argv[1]);
	    if(!opt_zap) exit(1);
	}
	
	vector<string> outfiles;
	outfiles.push_back(argv[1]);
	return afcopy(argv[0],outfiles);
    }

    /* Loop for each file and each directory */

    while(argc--){
	/* Open the output files */
	vector<string> outfiles;
	for(unsigned int i=0;i<dirlist.size();i++){
	    string outfilename;
	    const char *name = rindex(*argv,'/');
	    if(name) name++;
	    else name = *argv;
	    
	    outfilename.append(dirlist[i]);
	    if(outfilename[outfilename.size()-1]!='/') {
		outfilename.append("/");
	    }
	    outfilename.append(name);
	    outfiles.push_back(outfilename);
	}
	afcopy(argv[0],outfiles);	   // old outfiles will get GCed
	argv++;
    }
    return 0;
}


